From aba1288327cf1329b979c8d8a3ee1cf8942d6922 Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Tue, 16 Jun 2026 11:26:36 +0000 Subject: [PATCH 01/52] Validate multi-language Lineage support --- gems/decomplex/lib/decomplex/ast.rb | 292 ++++++----- .../lib/decomplex/false_simplicity.rb | 185 ++++++- .../lib/decomplex/flay_similarity.rb | 89 ++-- gems/decomplex/lib/decomplex/syntax.rb | 456 +++++++++++++----- gems/decomplex/test/syntax_test.rb | 180 +++++++ .../lineage/docs/agents/cross-lang-support.md | 76 +++ gems/lineage/src/extract.rs | 314 +++++++++++- gems/lineage/src/hazard.rs | 206 +++++++- gems/lineage/src/quality.rs | 36 +- .../nil_kill/commands/normalize_command.rb | 22 +- .../lib/nil_kill/runtime/trace_loader.rb | 21 +- .../spec/multi_language_runtime_spec.rb | 70 +++ gems/slopcop/exe/slopcop | 9 +- gems/slopcop/lib/slopcop/constraints.rb | 2 + .../lib/slopcop/constraints/go_provider.rb | 158 ++++++ .../test/constraints_go_provider_test.rb | 93 ++++ 16 files changed, 1886 insertions(+), 323 deletions(-) create mode 100644 gems/lineage/docs/agents/cross-lang-support.md create mode 100644 gems/slopcop/lib/slopcop/constraints/go_provider.rb create mode 100644 gems/slopcop/test/constraints_go_provider_test.rb diff --git a/gems/decomplex/lib/decomplex/ast.rb b/gems/decomplex/lib/decomplex/ast.rb index 38b1d891a..11c4af045 100644 --- a/gems/decomplex/lib/decomplex/ast.rb +++ b/gems/decomplex/lib/decomplex/ast.rb @@ -51,11 +51,11 @@ class TreeSitterNormalizer method function_definition function_declaration method_definition method_declaration function_item singleton_method ].freeze - CLASS_KINDS = %w[class class_definition class_declaration].freeze + CLASS_KINDS = %w[class class_definition class_declaration class_specifier].freeze MODULE_KINDS = %w[module].freeze BLOCK_KINDS = %w[ block body_statement statement_block statement_list class_body - switch_body match_block then block_body + switch_body match_block then block_body control_structure_body function_body ].freeze IF_KINDS = %w[if if_statement if_modifier unless unless_modifier if_expression conditional].freeze LOOP_KINDS = { @@ -69,17 +69,22 @@ class TreeSitterNormalizer }.freeze CASE_KINDS = %w[ case switch_statement expression_switch_statement switch_expression match_statement match_expression + when_expression + ].freeze + WHEN_KINDS = %w[ + when switch_case case_clause expression_case case_statement switch_section + switch_block_statement_group switch_entry when_entry match_arm ].freeze - WHEN_KINDS = %w[when switch_case case_clause expression_case match_arm].freeze ASSIGNMENT_KINDS = %w[ assignment assignment_expression assignment_statement augmented_assignment ].freeze MEMBER_KINDS = %w[ - call attribute member_expression field selector_expression field_expression expression_list + call attribute member_expression member_access_expression field field_access selector_expression field_expression + navigation_expression directly_assignable_expression expression_list ].freeze CALL_KINDS = %w[call call_expression method_call method_call_expression].freeze IDENTIFIER_KINDS = %w[ - identifier property_identifier field_identifier shorthand_property_identifier + identifier simple_identifier property_identifier field_identifier shorthand_property_identifier ].freeze CONST_KINDS = %w[constant scope_resolution type_identifier scoped_type_identifier].freeze STRING_KINDS = %w[ @@ -107,6 +112,7 @@ class TreeSitterNormalizer def initialize(document) @document = document @local_stack = [] + @normalizing = Set.new end def normalize @@ -123,118 +129,126 @@ def normalize def normalize_node(node) return nil unless ts_node?(node) - return nil if node.kind == "comment" - return normalize_assignment_lhs(node) if assignment_lhs?(node) - return normalize_infix_statement(node) if infix_statement?(node) - return normalize_dotted_expression(node) if dotted_expression?(node) - return normalize_unary_not_statement(node) if unary_not_statement?(node) - - if leading_function_statement?(node) - normalize_leading_function_statement(node) - elsif modifier_statement?(node) - normalize_modifier_statement(node) - elsif ternary_statement?(node) - normalize_ternary_statement(node) - elsif statement_call_with_block?(node) - normalize_statement_call_with_block(node) - elsif command_call_statement?(node) - normalize_command_call_statement(node) - elsif FUNCTION_KINDS.include?(node.kind) - normalize_function(node) - elsif class_node?(node) - normalize_class(node) - elsif module_node?(node) - normalize_module(node) - elsif node.kind == "impl_item" - normalize_impl(node) - elsif node.kind == "elsif" - normalize_elsif(node) - elsif IF_KINDS.include?(node.kind) - normalize_if(node) - elsif LOOP_KINDS.key?(node.kind) - normalize_loop(node) - elsif CASE_KINDS.include?(node.kind) || hidden_match?(node) - normalize_case(node) - elsif node.kind == "element_reference" - normalize_element_reference(node) - elsif node.kind == "rescue_modifier" - normalize_rescue_modifier(node) - elsif node.kind == "ensure" - normalize_ensure_clause(node) - elsif node.kind == "begin" - normalize_begin(node) - elsif node.kind == "operator_assignment" - normalize_operator_assignment(node) - elsif ASSIGNMENT_KINDS.include?(node.kind) - normalize_assignment(node) - elsif node.kind == "subshell" - normalize_subshell(node) - elsif node.kind == "block_argument" - normalize_block_argument(node) - elsif node.kind == "pair" - normalize_pair(node) - elsif node.kind == "singleton_class" - normalize_singleton_class(node) - elsif node.kind == "lambda" - normalize_lambda(node) - elsif node.kind == "yield" - normalize_yield(node) - elsif yield_argument_list?(node) - normalize_yield_argument_list(node) - elsif node.kind == "heredoc_beginning" - normalize_heredoc_beginning(node) - elsif node.kind == "chained_string" - normalize_chained_string(node) - elsif node.kind == "interpolation" - normalize_interpolation(node) - elsif unary_minus_expression?(node) - normalize_unary_minus(node) - elsif unary_not_expression?(node) - normalize_unary_not(node) - elsif boolean_expression?(node) - normalize_boolean(node) - elsif operator_call_expression?(node) - normalize_operator_call(node) - elsif comparison_expression?(node) - normalize_comparison(node) - elsif CALL_KINDS.include?(node.kind) - normalize_call(node) - elsif member_read_node?(node) - normalize_member_read(node) - elsif BLOCK_KINDS.include?(node.kind) - wrap(:BLOCK, children: normalize_children(node), source: node) - elsif unwrap_node?(node) - normalize_node(node.named_children.first) - elsif RETURN_KINDS.key?(node.kind) - normalize_return(node) - elsif self_node?(node) - wrap(:SELF, children: [], source: node) - elsif instance_variable?(node) - wrap(:IVAR, children: [node.text.to_s], source: node) - elsif global_variable?(node) - normalize_global_variable(node) - elsif const_node?(node) - normalize_const(node) - elsif ruby? && IDENTIFIER_KINDS.include?(node.kind) && node.text.to_s == "yield" - wrap(:YIELD, children: [nil], source: node) - elsif ruby_vcall_identifier?(node) - return wrap(:YIELD, children: [nil], source: node) if node.text.to_s == "yield" - - wrap(:VCALL, children: [node.text.to_s.to_sym], source: node) - elsif vcall_identifier?(node) - wrap(:VCALL, children: [node.text.to_s.to_sym], source: node) - elsif local_identifier?(node) - wrap(:LVAR, children: [node.text.to_s], source: node) - elsif NIL_KINDS.include?(node.kind) - wrap(:NIL, children: [], source: node) - elsif interpolated_string?(node) - normalize_interpolated_string(node) - elsif STRING_KINDS.include?(node.kind) - wrap(:STR, children: [node.text.to_s], source: node) - elsif SYMBOL_KINDS.include?(node.kind) - wrap(:LIT, children: [node.text.to_s.sub(/\A:/, "").to_sym], source: node) - else - wrap(kind_type(node.kind), children: normalize_children(node), source: node) + key = node_key(node) + return nil if @normalizing.include?(key) + + @normalizing << key + begin + return nil if node.kind == "comment" + return normalize_assignment_lhs(node) if assignment_lhs?(node) + return normalize_infix_statement(node) if infix_statement?(node) + return normalize_dotted_expression(node) if dotted_expression?(node) + return normalize_unary_not_statement(node) if unary_not_statement?(node) + + if leading_function_statement?(node) + normalize_leading_function_statement(node) + elsif modifier_statement?(node) + normalize_modifier_statement(node) + elsif ternary_statement?(node) + normalize_ternary_statement(node) + elsif statement_call_with_block?(node) + normalize_statement_call_with_block(node) + elsif command_call_statement?(node) + normalize_command_call_statement(node) + elsif FUNCTION_KINDS.include?(node.kind) + normalize_function(node) + elsif class_node?(node) + normalize_class(node) + elsif module_node?(node) + normalize_module(node) + elsif node.kind == "impl_item" + normalize_impl(node) + elsif node.kind == "elsif" + normalize_elsif(node) + elsif IF_KINDS.include?(node.kind) + normalize_if(node) + elsif LOOP_KINDS.key?(node.kind) + normalize_loop(node) + elsif CASE_KINDS.include?(node.kind) || hidden_match?(node) + normalize_case(node) + elsif node.kind == "element_reference" + normalize_element_reference(node) + elsif node.kind == "rescue_modifier" + normalize_rescue_modifier(node) + elsif node.kind == "ensure" + normalize_ensure_clause(node) + elsif node.kind == "begin" + normalize_begin(node) + elsif node.kind == "operator_assignment" + normalize_operator_assignment(node) + elsif ASSIGNMENT_KINDS.include?(node.kind) + normalize_assignment(node) + elsif node.kind == "subshell" + normalize_subshell(node) + elsif node.kind == "block_argument" + normalize_block_argument(node) + elsif node.kind == "pair" + normalize_pair(node) + elsif node.kind == "singleton_class" + normalize_singleton_class(node) + elsif node.kind == "lambda" + normalize_lambda(node) + elsif node.kind == "yield" + normalize_yield(node) + elsif yield_argument_list?(node) + normalize_yield_argument_list(node) + elsif node.kind == "heredoc_beginning" + normalize_heredoc_beginning(node) + elsif node.kind == "chained_string" + normalize_chained_string(node) + elsif node.kind == "interpolation" + normalize_interpolation(node) + elsif unary_minus_expression?(node) + normalize_unary_minus(node) + elsif unary_not_expression?(node) + normalize_unary_not(node) + elsif boolean_expression?(node) + normalize_boolean(node) + elsif operator_call_expression?(node) + normalize_operator_call(node) + elsif comparison_expression?(node) + normalize_comparison(node) + elsif CALL_KINDS.include?(node.kind) + normalize_call(node) + elsif member_read_node?(node) + normalize_member_read(node) + elsif BLOCK_KINDS.include?(node.kind) + wrap(:BLOCK, children: normalize_children(node), source: node) + elsif unwrap_node?(node) + normalize_node(node.named_children.first) + elsif RETURN_KINDS.key?(node.kind) + normalize_return(node) + elsif self_node?(node) + wrap(:SELF, children: [], source: node) + elsif instance_variable?(node) + wrap(:IVAR, children: [node.text.to_s], source: node) + elsif global_variable?(node) + normalize_global_variable(node) + elsif const_node?(node) + normalize_const(node) + elsif ruby? && IDENTIFIER_KINDS.include?(node.kind) && node.text.to_s == "yield" + wrap(:YIELD, children: [nil], source: node) + elsif ruby_vcall_identifier?(node) + return wrap(:YIELD, children: [nil], source: node) if node.text.to_s == "yield" + + wrap(:VCALL, children: [node.text.to_s.to_sym], source: node) + elsif vcall_identifier?(node) + wrap(:VCALL, children: [node.text.to_s.to_sym], source: node) + elsif local_identifier?(node) + wrap(:LVAR, children: [node.text.to_s], source: node) + elsif NIL_KINDS.include?(node.kind) + wrap(:NIL, children: [], source: node) + elsif interpolated_string?(node) + normalize_interpolated_string(node) + elsif STRING_KINDS.include?(node.kind) + wrap(:STR, children: [node.text.to_s], source: node) + elsif SYMBOL_KINDS.include?(node.kind) + wrap(:LIT, children: [node.text.to_s.sub(/\A:/, "").to_sym], source: node) + else + wrap(kind_type(node.kind), children: normalize_children(node), source: node) + end + ensure + @normalizing.delete(key) end end @@ -916,7 +930,7 @@ def normalize_body_nodes(nodes, source:) def normalize_patterns(node) patterns = node.named_children.select do |child| - %w[pattern case_pattern match_pattern].include?(child.kind) + %w[pattern case_pattern match_pattern switch_pattern when_condition].include?(child.kind) end patterns = [named_field(node, "value")].compact if patterns.empty? patterns = [node.named_children.find { |child| !BLOCK_KINDS.include?(child.kind) && !statement_node?(child) }].compact if patterns.empty? @@ -924,7 +938,7 @@ def normalize_patterns(node) patterns.flat_map do |pattern| if pattern.text.to_s.include?("::") [wrap(:CONST, children: [pattern.text.to_s.to_sym], source: pattern)] - elsif %w[pattern case_pattern match_pattern expression_list].include?(pattern.kind) + elsif %w[pattern case_pattern match_pattern switch_pattern when_condition expression_list].include?(pattern.kind) pattern.named_children.map { |child| normalize_node(child) }.compact else [normalize_node(pattern)].compact @@ -1418,12 +1432,28 @@ def member_parts(node) recv = named_field(node, "receiver") || named_field(node, "object") || named_field(node, "operand") || named_field(node, "value") || - node.named_children.first + named_field(node, "expression") || + node.named_children.find { |child| child.kind != "navigation_suffix" } mid = named_field(node, "method") || named_field(node, "field") || - named_field(node, "property") || node.named_children.reject { |child| %w[block do_block argument_list arguments].include?(child.kind) }.last + named_field(node, "property") || named_field(node, "suffix") || + node.named_children.find { |child| child.kind == "navigation_suffix" } || + node.named_children.reject { |child| %w[block do_block argument_list arguments].include?(child.kind) }.last return [nil, nil] unless recv && mid && recv != mid - [recv, mid.text.to_s.sub(/=\z/, "")] + [recv, member_name(mid).sub(/=\z/, "")] + end + + def member_name(node) + return "" unless ts_node?(node) + + if node.kind == "navigation_suffix" + suffix = named_field(node, "suffix") || + node.named_children.find { |child| IDENTIFIER_KINDS.include?(child.kind) } || + node.named_children.last + return suffix&.text.to_s.sub(/\A[.?]+/, "") + end + + node.text.to_s.sub(/\A[.?]+/, "") end def call_arguments(node, function) @@ -2417,13 +2447,19 @@ def parent_named_child?(parent, node) parent.named_children.any? { |child| same_ts_node?(child, node) } end - def same_ts_node?(left, right) - left.kind == right.kind && left.start_byte == right.start_byte && left.end_byte == right.end_byte - rescue StandardError - false - end + def same_ts_node?(left, right) + left.kind == right.kind && left.start_byte == right.start_byte && left.end_byte == right.end_byte + rescue StandardError + false + end + + def node_key(node) + [node.kind, node.start_byte, node.end_byte] + rescue StandardError + node.object_id + end - def bare_identifier_text?(text) + def bare_identifier_text?(text) text.to_s.strip.match?(/\A[A-Za-z_]\w*[!?=]?\z/) end diff --git a/gems/decomplex/lib/decomplex/false_simplicity.rb b/gems/decomplex/lib/decomplex/false_simplicity.rb index ea2ab7059..39d1a4bad 100644 --- a/gems/decomplex/lib/decomplex/false_simplicity.rb +++ b/gems/decomplex/lib/decomplex/false_simplicity.rb @@ -33,18 +33,10 @@ class FalseSimplicity ) EMPTY_PAIRS = {}.freeze - GENERIC_LEXICON = Lexicon.new( - dispatch_mids: %w[eval reflect Reflect Proxy getattr setattr].freeze, - meta_mids: %w[eval exec].freeze, - method_obj_mids: %i[method].freeze, - io_consts: %w[Console console fs process subprocess socket Deno Bun].freeze, - io_bare: %w[print println open system exec spawn sleep].freeze, - dir_context: [].freeze, - context_pairs: EMPTY_PAIRS, - context_bare: %w[rand random].freeze, - callback_set: %w[transaction synchronize lock with_lock unlock mutex atomic subscribe callback hook].freeze, - core_consts: [].freeze - ).freeze + COMMON_CALLBACK_SET = %w[ + transaction synchronize lock with_lock unlock mutex atomic subscribe + callback hook + ].freeze RUBY_LEXICON = Lexicon.new( dispatch_mids: %w[send __send__ public_send const_get constantize instance_variable_get].freeze, @@ -91,7 +83,7 @@ class FalseSimplicity "random" => %w[random randint randrange choice] }.freeze, context_bare: %w[random randint randrange].freeze, - callback_set: GENERIC_LEXICON.callback_set, + callback_set: COMMON_CALLBACK_SET, core_consts: [].freeze ).freeze JS_LEXICON = Lexicon.new( @@ -107,14 +99,175 @@ class FalseSimplicity "performance" => %w[now] }.freeze, context_bare: [].freeze, - callback_set: GENERIC_LEXICON.callback_set, + callback_set: COMMON_CALLBACK_SET, + core_consts: [].freeze + ).freeze + GO_LEXICON = Lexicon.new( + dispatch_mids: %w[Call CallSlice Method MethodByName ValueOf TypeOf].freeze, + meta_mids: %w[Call CallSlice MethodByName New MakeFunc].freeze, + method_obj_mids: %i[method].freeze, + io_consts: %w[os io ioutil fs net http exec syscall].freeze, + io_bare: %w[panic print println recover].freeze, + dir_context: %w[Getwd UserHomeDir].freeze, + context_pairs: { + "time" => %w[Now Since Until], + "rand" => %w[Int Intn Float64 Read] + }.freeze, + context_bare: [].freeze, + callback_set: (COMMON_CALLBACK_SET + %w[Lock Unlock RLock RUnlock Do Go Add Done Wait]).uniq.freeze, + core_consts: [].freeze + ).freeze + RUST_LEXICON = Lexicon.new( + dispatch_mids: %w[downcast downcast_ref downcast_mut call call_mut call_once].freeze, + meta_mids: %w[transmute from_raw_parts from_raw_parts_mut].freeze, + method_obj_mids: %i[method].freeze, + io_consts: %w[std tokio fs env process net io].freeze, + io_bare: %w[panic todo unimplemented unreachable].freeze, + dir_context: %w[current_dir home_dir].freeze, + context_pairs: { + "SystemTime" => %w[now], + "Instant" => %w[now] + }.freeze, + context_bare: [].freeze, + callback_set: (COMMON_CALLBACK_SET + %w[lock read write spawn await]).uniq.freeze, + core_consts: [].freeze + ).freeze + ZIG_LEXICON = Lexicon.new( + dispatch_mids: %w[field fieldParentPtr ptrCast alignCast call].freeze, + meta_mids: %w[typeInfo TypeOf ptrCast intFromPtr ptrFromInt].freeze, + method_obj_mids: %i[method].freeze, + io_consts: %w[std os fs process net Thread Mutex Atomic].freeze, + io_bare: %w[panic unreachable].freeze, + dir_context: [].freeze, + context_pairs: { + "time" => %w[timestamp nanoTimestamp milliTimestamp] + }.freeze, + context_bare: [].freeze, + callback_set: (COMMON_CALLBACK_SET + %w[lock unlock spawn wait signal]).uniq.freeze, + core_consts: [].freeze + ).freeze + LUA_LEXICON = Lexicon.new( + dispatch_mids: %w[load loadfile dofile require rawget rawset].freeze, + meta_mids: %w[setmetatable getmetatable debug eval load loadfile].freeze, + method_obj_mids: %i[method].freeze, + io_consts: %w[io os debug package].freeze, + io_bare: %w[print error assert require collectgarbage].freeze, + dir_context: [].freeze, + context_pairs: { + "os" => %w[time clock date getenv], + "math" => %w[random] + }.freeze, + context_bare: [].freeze, + callback_set: COMMON_CALLBACK_SET, + core_consts: [].freeze + ).freeze + C_LEXICON = Lexicon.new( + dispatch_mids: %w[dlsym dlopen GetProcAddress].freeze, + meta_mids: %w[setjmp longjmp va_start va_arg].freeze, + method_obj_mids: %i[method].freeze, + io_consts: %w[FILE DIR pthread mutex atomic].freeze, + io_bare: %w[printf fprintf fopen open read write close system exec abort exit assert].freeze, + dir_context: %w[getcwd getenv].freeze, + context_pairs: EMPTY_PAIRS, + context_bare: %w[rand time clock].freeze, + callback_set: (COMMON_CALLBACK_SET + %w[pthread_mutex_lock pthread_mutex_unlock]).uniq.freeze, + core_consts: [].freeze + ).freeze + CPP_LEXICON = Lexicon.new( + dispatch_mids: %w[dynamic_cast typeid any_cast get_if visit invoke].freeze, + meta_mids: %w[reinterpret_cast const_cast dlsym dlopen].freeze, + method_obj_mids: %i[method].freeze, + io_consts: %w[std filesystem fstream iostream thread mutex atomic].freeze, + io_bare: %w[throw abort exit assert system].freeze, + dir_context: %w[current_path].freeze, + context_pairs: { + "chrono" => %w[now], + "random_device" => %w[operator()] + }.freeze, + context_bare: [].freeze, + callback_set: (COMMON_CALLBACK_SET + %w[lock unlock try_lock wait notify_one notify_all]).uniq.freeze, + core_consts: [].freeze + ).freeze + CSHARP_LEXICON = Lexicon.new( + dispatch_mids: %w[Invoke GetMethod GetProperty GetField Activator CreateInstance].freeze, + meta_mids: %w[Invoke GetType Reflection Emit DynamicMethod].freeze, + method_obj_mids: %i[method].freeze, + io_consts: %w[Console File Directory Path Process Socket HttpClient Environment].freeze, + io_bare: %w[throw].freeze, + dir_context: %w[CurrentDirectory GetEnvironmentVariable].freeze, + context_pairs: { + "DateTime" => %w[Now UtcNow Today], + "Guid" => %w[NewGuid], + "Random" => %w[Next NextDouble] + }.freeze, + context_bare: [].freeze, + callback_set: (COMMON_CALLBACK_SET + %w[Lock Monitor Enter Exit Wait Pulse]).uniq.freeze, + core_consts: [].freeze + ).freeze + JAVA_LEXICON = Lexicon.new( + dispatch_mids: %w[invoke getMethod getDeclaredMethod getField getDeclaredField forName].freeze, + meta_mids: %w[invoke setAccessible newInstance Proxy].freeze, + method_obj_mids: %i[method].freeze, + io_consts: %w[System File Files Paths ProcessBuilder Socket HttpClient Thread Lock AtomicReference].freeze, + io_bare: %w[throw].freeze, + dir_context: %w[getProperty getenv].freeze, + context_pairs: { + "System" => %w[currentTimeMillis nanoTime getenv getProperty], + "Instant" => %w[now], + "UUID" => %w[randomUUID], + "Math" => %w[random] + }.freeze, + context_bare: [].freeze, + callback_set: (COMMON_CALLBACK_SET + %w[lock unlock wait notify notifyAll submit execute]).uniq.freeze, + core_consts: [].freeze + ).freeze + SWIFT_LEXICON = Lexicon.new( + dispatch_mids: %w[perform value setValue selector NSClassFromString].freeze, + meta_mids: %w[Mirror unsafeBitCast withUnsafePointer withUnsafeBytes].freeze, + method_obj_mids: %i[method].freeze, + io_consts: %w[FileManager Process URLSession DispatchQueue Thread Lock NSLock].freeze, + io_bare: %w[print fatalError preconditionFailure assertionFailure].freeze, + dir_context: %w[currentDirectoryPath homeDirectoryForCurrentUser].freeze, + context_pairs: { + "Date" => %w[now], + "UUID" => %w[init] + }.freeze, + context_bare: [].freeze, + callback_set: (COMMON_CALLBACK_SET + %w[lock unlock async sync]).uniq.freeze, + core_consts: [].freeze + ).freeze + KOTLIN_LEXICON = Lexicon.new( + dispatch_mids: %w[invoke call callBy memberProperties declaredMemberFunctions].freeze, + meta_mids: %w[reflection javaClass Class forName setAccessible].freeze, + method_obj_mids: %i[method].freeze, + io_consts: %w[System File Files Paths ProcessBuilder Socket HttpClient Thread Mutex AtomicReference].freeze, + io_bare: %w[println print error check require TODO].freeze, + dir_context: %w[getProperty getenv].freeze, + context_pairs: { + "System" => %w[currentTimeMillis nanoTime getenv getProperty], + "Instant" => %w[now], + "UUID" => %w[randomUUID], + "Random" => %w[nextInt nextLong nextDouble] + }.freeze, + context_bare: [].freeze, + callback_set: (COMMON_CALLBACK_SET + %w[lock unlock synchronized launch async await]).uniq.freeze, core_consts: [].freeze ).freeze LANGUAGE_LEXICONS = { ruby: RUBY_LEXICON, python: PYTHON_LEXICON, javascript: JS_LEXICON, - typescript: JS_LEXICON + typescript: JS_LEXICON, + go: GO_LEXICON, + rust: RUST_LEXICON, + zig: ZIG_LEXICON, + lua: LUA_LEXICON, + c: C_LEXICON, + cpp: CPP_LEXICON, + csharp: CSHARP_LEXICON, + java: JAVA_LEXICON, + swift: SWIFT_LEXICON, + kotlin: KOTLIN_LEXICON }.freeze # Compatibility aliases for tests and downstream code that inspect @@ -157,7 +310,7 @@ def initialize(file, lines, language: :ruby, lexicon: nil) end def self.lexicon_for(language) - LANGUAGE_LEXICONS.fetch(language.to_sym, GENERIC_LEXICON) + LANGUAGE_LEXICONS.fetch(language.to_sym) end def walk(node, defs, cls) diff --git a/gems/decomplex/lib/decomplex/flay_similarity.rb b/gems/decomplex/lib/decomplex/flay_similarity.rb index 64a169985..4d8d298bd 100644 --- a/gems/decomplex/lib/decomplex/flay_similarity.rb +++ b/gems/decomplex/lib/decomplex/flay_similarity.rb @@ -290,40 +290,49 @@ def body_node(node) node.named_children.find { |child| BODY_KINDS.include?(child.kind) } end - def fingerprint(node) - return ["", 0] unless ts_node?(node) - return ["", 0] if node.kind == "comment" - return fingerprint_call(node) if CALL_KINDS.include?(node.kind) && call_message(node) + def fingerprint(node, active = nil) + return ["", 0] unless ts_node?(node) + active ||= Set.new + key = node_key(node) + return ["", 0] if active.include?(key) - if node.child_count.zero? - token = terminal_token(node) - return ["", 0] if token.empty? + active << key + begin + return ["", 0] if node.kind == "comment" + return fingerprint_call(node, active) if CALL_KINDS.include?(node.kind) && call_message(node) + + if node.child_count.zero? + token = terminal_token(node) + return ["", 0] if token.empty? return [token, 1] end - child_parts = [] - mass = 1 - node.children.each do |child| - child_fp, child_mass = fingerprint(child) - next if child_fp.empty? + child_parts = [] + mass = 1 + node.children.each do |child| + child_fp, child_mass = fingerprint(child, active) + next if child_fp.empty? - child_parts << child_fp - mass += child_mass + child_parts << child_fp + mass += child_mass end - return [terminal_token(node), 1] if child_parts.empty? + return [terminal_token(node), 1] if child_parts.empty? - ["#{node.kind}(#{child_parts.join(' ')})", mass] - end + ["#{node.kind}(#{child_parts.join(' ')})", mass] + ensure + active.delete(key) + end + end - def fingerprint_call(node) - message = call_message(node) - child_parts = [] - mass = 1 - node.children.each do |child| - child_fp, child_mass = fingerprint(child) - next if child_fp.empty? + def fingerprint_call(node, active) + message = call_message(node) + child_parts = [] + mass = 1 + node.children.each do |child| + child_fp, child_mass = fingerprint(child, active) + next if child_fp.empty? child_parts << child_fp mass += child_mass @@ -414,12 +423,22 @@ def method_span_for(file, line_no) MethodSpan.new(name: "(top-level)", first_line: line_no, last_line: line_no) end - def walk(node, &block) - return unless ts_node?(node) + def walk(node, &block) + return unless ts_node?(node) - yield node - node.children.each { |child| walk(child, &block) } - end + pending = [node] + seen = Set.new + until pending.empty? + current = pending.pop + next unless ts_node?(current) + key = node_key(current) + next if seen.include?(key) + + seen << key + yield current + current.children.reverse_each { |child| pending << child } + end + end def named_field(node, name) node.child_by_field_name(name) @@ -427,9 +446,15 @@ def named_field(node, name) nil end - def ts_node?(node) - node && node.respond_to?(:kind) && node.respond_to?(:children) - end + def ts_node?(node) + node && node.respond_to?(:kind) && node.respond_to?(:children) + end + + def node_key(node) + [node.kind, node.start_byte, node.end_byte] + rescue StandardError + node.object_id + end def span(node) [node.start_point.row + 1, node.start_point.column, diff --git a/gems/decomplex/lib/decomplex/syntax.rb b/gems/decomplex/lib/decomplex/syntax.rb index 44ca62834..77439c0f6 100644 --- a/gems/decomplex/lib/decomplex/syntax.rb +++ b/gems/decomplex/lib/decomplex/syntax.rb @@ -57,23 +57,6 @@ def call_name?(source, names) end end - GENERIC_LEXICON = LanguageLexicon.new( - nil_literal_patterns: [/\b(?:nil|null|none|undefined)\b/i].freeze, - type_guard_patterns: [ - /\b(?:isinstance|typeof|typeid|instanceof)\b/, - /(?:\?\.|&\.)/, - /@typeInfo\b/, - /\bkind\s*(?:==|!=)/ - ].freeze, - diagnostic_patterns: [ - /\b(?:throw|panic|abort|unreachable)\b/, - /\breturn\s+error[.\w]*/ - ].freeze, - trivial_patterns: [ - /\A(?:nil|null|None|undefined|true|false|0|1|break|continue|unreachable)\s*;?\z/, - /\Areturn\s+(?:nil|null|None|undefined|true|false|0|1)\s*;?\z/ - ].freeze - ).freeze RUBY_LEXICON = LanguageLexicon.new( nil_literal_patterns: [/\bnil\b/].freeze, type_guard_patterns: [ @@ -153,9 +136,9 @@ def call_name?(source, names) /\Areturn\s+(?:None|true|false|0|1)\s*;?\z/ ].freeze ).freeze - ZIG_LEXICON = LanguageLexicon.new( - nil_literal_patterns: [/\bnull\b/].freeze, - type_guard_patterns: [ + ZIG_LEXICON = LanguageLexicon.new( + nil_literal_patterns: [/\bnull\b/].freeze, + type_guard_patterns: [ /\bnull\b/, /@typeInfo\b/, /\bif\s*\([^)]*\)\s*\|/ @@ -167,18 +150,140 @@ def call_name?(source, names) ].freeze, trivial_patterns: [ /\A(?:null|true|false|0|1|break|continue|unreachable)\s*;?\z/, - /\Areturn\s+(?:null|true|false|0|1)\s*;?\z/ - ].freeze - ).freeze - LANGUAGE_LEXICONS = { - ruby: RUBY_LEXICON, - python: PYTHON_LEXICON, - javascript: JAVASCRIPT_LEXICON, - typescript: JAVASCRIPT_LEXICON, - go: GO_LEXICON, - rust: RUST_LEXICON, - zig: ZIG_LEXICON - }.freeze + /\Areturn\s+(?:null|true|false|0|1)\s*;?\z/ + ].freeze + ).freeze + LUA_LEXICON = LanguageLexicon.new( + nil_literal_patterns: [/\bnil\b/].freeze, + type_guard_patterns: [ + /\btype\s*\(/, + /\bnil\b/, + /\b(?:pcall|xpcall)\s*\(/ + ].freeze, + diagnostic_patterns: [ + /\berror\s*\(/, + /\bassert\s*\(/ + ].freeze, + trivial_patterns: [ + /\A(?:nil|true|false|0|1|break)\s*;?\z/, + /\Areturn\s+(?:nil|true|false|0|1)\s*;?\z/ + ].freeze + ).freeze + C_LEXICON = LanguageLexicon.new( + nil_literal_patterns: [/\bNULL\b/].freeze, + type_guard_patterns: [ + /\bNULL\b/, + /\bsizeof\s*\(/, + /\b_Generic\s*\(/ + ].freeze, + diagnostic_patterns: [ + /\b(?:assert|abort|exit)\s*\(/, + /\breturn\s+errno\b/ + ].freeze, + trivial_patterns: [ + /\A(?:NULL|true|false|0|1|break|continue)\s*;?\z/, + /\Areturn\s+(?:NULL|true|false|0|1)\s*;?\z/ + ].freeze + ).freeze + CPP_LEXICON = LanguageLexicon.new( + nil_literal_patterns: [/\b(?:nullptr|NULL)\b/].freeze, + type_guard_patterns: [ + /\b(?:nullptr|NULL)\b/, + /\b(?:dynamic_cast|typeid)\s*[<(]/, + /\bstd::(?:get_if|holds_alternative)\s*[<(]/ + ].freeze, + diagnostic_patterns: [ + /\bthrow\b/, + /\b(?:assert|abort|exit)\s*\(/, + /\bstd::terminate\s*\(/ + ].freeze, + trivial_patterns: [ + /\A(?:nullptr|NULL|true|false|0|1|break|continue)\s*;?\z/, + /\Areturn\s+(?:nullptr|NULL|true|false|0|1)\s*;?\z/ + ].freeze + ).freeze + CSHARP_LEXICON = LanguageLexicon.new( + nil_literal_patterns: [/\bnull\b/].freeze, + type_guard_patterns: [ + /\bnull\b/, + /(?:\?\.|\?\?)/, + /\b(?:is|as|typeof)\b/ + ].freeze, + diagnostic_patterns: [ + /\bthrow\b/, + /\b(?:Debug\.Assert|Trace\.Assert|Environment\.Exit)\s*\(/ + ].freeze, + trivial_patterns: [ + /\A(?:null|true|false|0|1|break|continue)\s*;?\z/, + /\Areturn\s+(?:null|true|false|0|1)\s*;?\z/ + ].freeze + ).freeze + JAVA_LEXICON = LanguageLexicon.new( + nil_literal_patterns: [/\bnull\b/].freeze, + type_guard_patterns: [ + /\bnull\b/, + /\binstanceof\b/, + /\bObjects\.(?:isNull|nonNull|requireNonNull)\s*\(/ + ].freeze, + diagnostic_patterns: [ + /\bthrow\b/, + /\bassert\b/, + /\bSystem\.exit\s*\(/ + ].freeze, + trivial_patterns: [ + /\A(?:null|true|false|0|1|break|continue)\s*;?\z/, + /\Areturn\s+(?:null|true|false|0|1)\s*;?\z/ + ].freeze + ).freeze + SWIFT_LEXICON = LanguageLexicon.new( + nil_literal_patterns: [/\bnil\b/].freeze, + type_guard_patterns: [ + /\bnil\b/, + /(?:\?\.|\?\?)/, + /\b(?:if|guard)\s+let\b/, + /\b(?:as\?|is)(?:\s|$)/ + ].freeze, + diagnostic_patterns: [ + /\bthrow\b/, + /\b(?:fatalError|preconditionFailure|assertionFailure|assert|precondition)\s*\(/ + ].freeze, + trivial_patterns: [ + /\A(?:nil|true|false|0|1|break|continue)\s*;?\z/, + /\Areturn\s+(?:nil|true|false|0|1)\s*;?\z/ + ].freeze + ).freeze + KOTLIN_LEXICON = LanguageLexicon.new( + nil_literal_patterns: [/\bnull\b/].freeze, + type_guard_patterns: [ + /\bnull\b/, + /(?:\?\.|\?\?)/, + /\b(?:is|as\?)(?:\s|$)/ + ].freeze, + diagnostic_patterns: [ + /\bthrow\b/, + /\b(?:error|require|check|assert|TODO)\s*\(/ + ].freeze, + trivial_patterns: [ + /\A(?:null|true|false|0|1|break|continue)\s*;?\z/, + /\Areturn\s+(?:null|true|false|0|1)\s*;?\z/ + ].freeze + ).freeze + LANGUAGE_LEXICONS = { + ruby: RUBY_LEXICON, + python: PYTHON_LEXICON, + javascript: JAVASCRIPT_LEXICON, + typescript: JAVASCRIPT_LEXICON, + go: GO_LEXICON, + rust: RUST_LEXICON, + zig: ZIG_LEXICON, + lua: LUA_LEXICON, + c: C_LEXICON, + cpp: CPP_LEXICON, + csharp: CSHARP_LEXICON, + java: JAVA_LEXICON, + swift: SWIFT_LEXICON, + kotlin: KOTLIN_LEXICON + }.freeze module_function @@ -223,26 +328,36 @@ def tree_sitter? %w[tree_sitter treesitter].include?(parser) end - def language_for(file) - case File.extname(file).downcase + def language_for(file) + forced = ENV["DECOMPLEX_FORCE_LANGUAGE"].to_s.strip + return forced.tr("-", "_").to_sym unless forced.empty? + + case File.extname(file).downcase when ".rb" then :ruby when ".py", ".pyi" then :python when ".js", ".jsx", ".mjs", ".cjs" then :javascript - when ".ts", ".tsx" then :typescript - when ".go" then :go - when ".rs" then :rust - when ".zig" then :zig - else :ruby - end - end + when ".ts", ".tsx" then :typescript + when ".go" then :go + when ".rs" then :rust + when ".zig" then :zig + when ".lua" then :lua + when ".c", ".h" then :c + when ".cc", ".cpp", ".cxx", ".hh", ".hpp", ".hxx" then :cpp + when ".cs" then :csharp + when ".java" then :java + when ".swift" then :swift + when ".kt", ".kts" then :kotlin + else :ruby + end + end def supported_exts(parser: self.parser) - case parser.to_s.tr("-", "_") - when "", "tree_sitter", "treesitter" - %w[.rb .py .pyi .js .jsx .mjs .cjs .ts .tsx .go .rs .zig] - else - [] - end + case parser.to_s.tr("-", "_") + when "", "tree_sitter", "treesitter" + %w[.rb .py .pyi .js .jsx .mjs .cjs .ts .tsx .go .rs .zig .lua .c .h .cc .cpp .cxx .hh .hpp .hxx .cs .java .swift .kt .kts] + else + [] + end end def supported_source?(file, parser: self.parser) @@ -251,7 +366,7 @@ def supported_source?(file, parser: self.parser) def language_lexicon(language) key = language.to_s.empty? ? nil : language.to_sym - LANGUAGE_LEXICONS.fetch(key, GENERIC_LEXICON) + LANGUAGE_LEXICONS.fetch(key) end class Document @@ -378,17 +493,30 @@ class TreeSitterAdapter BRANCH_KINDS = %w[if unless if_statement if_modifier unless_modifier if_expression while until while_statement for for_statement case switch_statement expression_switch_statement switch_expression - match_statement match_expression].freeze + match_statement match_expression when_expression].freeze NOISE_MESSAGES = %w[! != == === < <= > >= [] []= to_s inspect class].freeze LANGUAGE_PACKAGES = { ruby: "tree-sitter-ruby", python: "tree-sitter-python", javascript: "tree-sitter-javascript", typescript: "tree-sitter-typescript", - go: "tree-sitter-go", - rust: "tree-sitter-rust", - zig: "@tree-sitter-grammars/tree-sitter-zig" - }.freeze + go: "tree-sitter-go", + rust: "tree-sitter-rust", + zig: "@tree-sitter-grammars/tree-sitter-zig", + lua: "@tree-sitter-grammars/tree-sitter-lua", + c: "tree-sitter-c", + cpp: "tree-sitter-cpp", + csharp: "tree-sitter-c-sharp", + java: "tree-sitter-java", + swift: "tree-sitter-swift", + kotlin: "tree-sitter-kotlin" + }.freeze + LANGUAGE_GRAMMAR_NAMES = { + csharp: ["c-sharp", "csharp"] + }.freeze + TREE_SITTER_LANGUAGE_NAMES = { + csharp: "c_sharp" + }.freeze def parse(file, language: nil) lang = (language || Syntax.language_for(file)).to_sym @@ -514,12 +642,12 @@ def type_aliases(lines) private - def parser_for(language) - require_tree_sitter - lang_name = language.to_s - register_language(lang_name, grammar_path(language)) - ::TreeSitter::Parser.new.tap { |parser| parser.language = lang_name } - end + def parser_for(language) + require_tree_sitter + lang_name = TREE_SITTER_LANGUAGE_NAMES.fetch(language, language.to_s) + register_language(lang_name, grammar_path(language)) + ::TreeSitter::Parser.new.tap { |parser| parser.language = lang_name } + end def require_tree_sitter gem "tree_sitter", "~> 0.1" @@ -549,22 +677,29 @@ def grammar_path(language) "to a parser shared library (.so/.dylib/.node). Checked: #{candidates.join(', ')}" end - def grammar_candidates(language) - pkg = LANGUAGE_PACKAGES.fetch(language) - names = ["#{language}.so", "tree-sitter-#{language}.so", - "libtree-sitter-#{language}.so", "#{language}.node", - "tree-sitter-#{language}.node", - "@tree-sitter-grammars+tree-sitter-#{language}.node"] - roots = [ - File.expand_path("../../vendor/tree-sitter", __dir__), - File.expand_path("../../vendor/tree-sitter/#{language}", __dir__), + def grammar_candidates(language) + pkg = LANGUAGE_PACKAGES.fetch(language) + stems = LANGUAGE_GRAMMAR_NAMES.fetch(language, [language.to_s]) + names = stems.flat_map do |stem| + ["#{stem}.so", "tree-sitter-#{stem}.so", + "libtree-sitter-#{stem}.so", "#{stem}.node", + "tree-sitter-#{stem}.node", + "#{stem}_binding.node", + "tree_sitter_#{stem.tr('-', '_')}_binding.node", + "@tree-sitter-grammars+tree-sitter-#{stem}.node"] + end + roots = [ + File.expand_path("../../vendor/tree-sitter", __dir__), + File.expand_path("../../vendor/tree-sitter/#{language}", __dir__), File.expand_path("../../node_modules/#{pkg}", __dir__), File.expand_path("../../../../node_modules/#{pkg}", __dir__), File.expand_path("../../../../../node_modules/#{pkg}", __dir__) - ] - all_prebuilds = roots.flat_map do |root| - Dir.glob(File.join(root, "prebuilds", "*", "*tree-sitter-#{language}.node")) - end + ] + all_prebuilds = roots.flat_map do |root| + stems.flat_map do |stem| + Dir.glob(File.join(root, "prebuilds", "*", "*tree-sitter-#{stem}.node")) + end + end prebuilds = platform_prebuilds(all_prebuilds) roots.product(names).map { |root, name| File.join(root, name) } + prebuilds end @@ -592,13 +727,24 @@ def host_arch end end - def walk(node, stack, &block) - return unless ts_node?(node) + def walk(node, stack, &block) + return unless ts_node?(node) - stack = push_context(stack, node) - yield node, stack - node.children.each { |child| walk(child, stack, &block) } - end + pending = [[node, stack]] + seen = Set.new + until pending.empty? + current, current_stack = pending.pop + next unless ts_node?(current) + key = node_key(current) + next if seen.include?(key) + + seen << key + + next_stack = push_context(current_stack, current) + yield current, next_stack + current.children.reverse_each { |child| pending << [child, next_stack] } + end + end def push_context(stack, node) next_stack = push_owner_context(stack, node) @@ -661,9 +807,11 @@ def function_name(node) case node.kind when "body_statement" hidden_ruby_method_name(node) - when "method", "function_definition", "function_declaration", - "method_definition", "function_item" - named_field(node, "name")&.text || first_named_text(node, %w[identifier constant property_identifier]) + when "method", "function_definition", "function_declaration", + "method_definition", "function_item" + named_field(node, "name")&.text || + declarator_name(named_field(node, "declarator")) || + first_named_text(node, %w[identifier constant property_identifier]) when "singleton_method" name = named_field(node, "name")&.text || node.named_children.reverse.find { |child| %w[identifier field_identifier property_identifier].include?(child.kind) }&.text @@ -707,11 +855,11 @@ def function_params(node) def parameter_name(param) return nil unless ts_node?(param) - return param.text if %w[identifier shorthand_property_identifier_pattern].include?(param.kind) + return param.text if %w[identifier simple_identifier shorthand_property_identifier_pattern].include?(param.kind) name = named_field(param, "name") || param.named_children.find do |child| - %w[identifier field_identifier property_identifier].include?(child.kind) + %w[identifier simple_identifier field_identifier property_identifier].include?(child.kind) end text = name&.text.to_s return nil if text.empty? || text == "_" @@ -793,7 +941,7 @@ def record_decision_site(document, node, stack, out) case node.kind when "case", "switch_statement", "expression_switch_statement", "switch_expression", - "match_statement", "match_expression" + "match_statement", "match_expression", "when_expression" return if ruby_predicate_less_case?(node) patterns = case_patterns(node) @@ -949,10 +1097,14 @@ def case_arm_patterns(child) patterns = child.named_children.select { |node| %w[pattern case_pattern match_pattern].include?(node.kind) } patterns = [named_field(child, "pattern") || child.named_children.first].compact if patterns.empty? ruby_when_pattern_texts(patterns) - when "switch_case", "case_clause", "expression_case" + when "switch_case", "case_clause", "expression_case", "case_statement", "switch_section", + "switch_block_statement_group", "switch_entry", "when_entry" return [] if child.text.to_s.lstrip.start_with?("else") - value = named_field(child, "value") || child.named_children.first + value = named_field(child, "value") || named_field(child, "pattern") || + child.named_children.find { |candidate| candidate.kind == "when_condition" } || + child.named_children.find { |candidate| candidate.kind == "switch_pattern" } || + child.named_children.first value && value.kind !~ /statement|block/ ? [normalize_text(value.text)] : [] else [] @@ -1007,7 +1159,7 @@ def case_arms(node) child = stack.shift next unless ts_node?(child) - if %w[when switch_case case_clause expression_case match_arm].include?(child.kind) + if %w[when switch_case case_clause expression_case case_statement switch_section switch_block_statement_group switch_entry when_entry match_arm].include?(child.kind) arms << child elsif !%w[method function_definition function_declaration method_definition method_declaration function_item class class_definition @@ -1027,9 +1179,10 @@ def decision_predicate(node) def decision_subject(node) named_field(node, "value") || named_field(node, "subject") || + node.named_children.find { |child| child.kind == "when_subject" } || named_field(node, "condition") || node.named_children.find do |child| - !%w[when switch_case case_clause expression_case match_arm else then comment].include?(child.kind) + !%w[when switch_case case_clause expression_case case_statement switch_section switch_block_statement_group switch_entry when_entry match_arm else then comment].include?(child.kind) end end @@ -1250,7 +1403,7 @@ def record_branch_arm(document, node, stack, out) when "while", "until", "while_statement", "for", "for_statement" record_loop_arm(document, node, stack, out) when "case", "body_statement", "switch_statement", "expression_switch_statement", "switch_expression", - "match_statement", "match_expression" + "match_statement", "match_expression", "when_expression" return if node.kind == "body_statement" && !hidden_case?(node) record_case_arms(document, node, stack, out) @@ -1481,12 +1634,23 @@ def rhs_param_names(node, params) found & params end - def collect_identifiers(node, out) - return unless ts_node?(node) + def collect_identifiers(node, out) + return unless ts_node?(node) + + pending = [node] + seen = Set.new + until pending.empty? + current = pending.pop + next unless ts_node?(current) + key = node_key(current) + next if seen.include?(key) + + seen << key + out << current.text if current.kind == "identifier" + current.children.reverse_each { |child| pending << child } + end + end - out << node.text if node.kind == "identifier" - node.children.each { |child| collect_identifiers(child, out) } - end def owner_for_node(document, node, stack: nil) receiver_owner = receiver_owner_name(node) @@ -1527,8 +1691,8 @@ def owner_name_from_declaration(document, node) end case node.kind - when "class", "class_definition", "class_declaration", "module" - named_field(node, "name")&.text || first_named_text(node, %w[constant identifier type_identifier]) + when "class", "class_definition", "class_declaration", "class_specifier", "module" + named_field(node, "name")&.text || first_named_text(node, %w[constant identifier type_identifier]) when "impl_item", "impl_block" impl_owner_name(node) when "struct_item", "struct_spec", "type_spec", "type_declaration" @@ -1542,7 +1706,7 @@ def owner_kind(node) return hidden_ruby_owner_kind(node) if hidden_ruby_owner_declaration?(node) case node.kind - when "class", "class_definition", "class_declaration" then :class + when "class", "class_definition", "class_declaration", "class_specifier" then :class when "module" then :module when "impl_item", "impl_block" then :impl when "struct_declaration", "struct_item", "struct_spec" then :struct @@ -1648,8 +1812,8 @@ def call_target(document, node) ruby_bare_call_target(document, node) when "call_expression", "method_invocation", "invocation_expression" generic_call_target(node) - when "attribute", "selector_expression", "field", "member_expression", - "field_expression", "expression_list" + when "attribute", "selector_expression", "field", "field_access", "member_expression", + "member_access_expression", "field_expression", "expression_list" adjacent_argument_call_target(node) end end @@ -1736,14 +1900,18 @@ def target_from_callee(callee) if field_like_node?(callee) object = named_field(callee, "object") || named_field(callee, "receiver") || named_field(callee, "operand") || named_field(callee, "value") || - callee.named_children.first + named_field(callee, "expression") || + callee.named_children.find { |child| child.kind != "navigation_suffix" } field = named_field(callee, "field") || named_field(callee, "property") || + named_field(callee, "suffix") || + callee.named_children.find { |child| child.kind == "navigation_suffix" } || callee.named_children.last - return nil unless object && field + field_text = member_field_text(field) + return nil unless object && field_text { receiver: normalize_text(object.text).sub(/\A\*/, ""), - message: field.text + message: field_text } elsif %w[identifier field_identifier property_identifier constant type_identifier].include?(callee.kind) { @@ -1831,22 +1999,29 @@ def state_read_target(node) return nil if named_field(node, "arguments") { receiver: normalize_text(receiver.text), field: method.text } - when "field", "selector_expression", "member_expression", "attribute", - "field_expression", "expression_list" + when "field", "field_access", "selector_expression", "member_expression", "member_access_expression", "attribute", + "field_expression", "navigation_expression", "directly_assignable_expression", "expression_list" return nil if node.kind == "expression_list" && !(named_field(node, "operand") && named_field(node, "field")) object = named_field(node, "object") || named_field(node, "receiver") || - named_field(node, "operand") || named_field(node, "value") - field = named_field(node, "field") || named_field(node, "property") || node.named_children.last + named_field(node, "expression") || + named_field(node, "operand") || named_field(node, "value") || + named_field(node, "argument") || + node.named_children.find { |child| child.kind != "navigation_suffix" } + field = named_field(node, "field") || named_field(node, "property") || + named_field(node, "name") || named_field(node, "suffix") || + node.named_children.find { |child| child.kind == "navigation_suffix" } || + node.named_children.last if node.kind == "field_expression" && node.text.to_s.start_with?(".") field = node.named_children.find { |child| child.kind == "identifier" } || field return { receiver: ".literal", field: field.text } if field end - return nil unless object && field + field_text = member_field_text(field) + return nil unless object && field_text return nil if namespace_receiver?(object.text) - return nil if NOISE_MESSAGES.include?(field.text) + return nil if NOISE_MESSAGES.include?(field_text) - { receiver: normalize_text(object.text), field: field.text } + { receiver: normalize_text(object.text), field: field_text } when "instance_variable", "global_variable" { receiver: "self", field: node.text } end @@ -1863,22 +2038,29 @@ def state_target(lhs) return nil unless receiver && method { receiver: normalize_text(receiver.text), field: method.text.sub(/=\z/, "") } - when "field", "selector_expression", "member_expression", "attribute", - "field_expression", "expression_list" + when "field", "field_access", "selector_expression", "member_expression", "member_access_expression", "attribute", + "field_expression", "navigation_expression", "directly_assignable_expression", "expression_list" if lhs.kind == "expression_list" && !(named_field(lhs, "operand") && named_field(lhs, "field")) return state_target(lhs.named_children.first) end object = named_field(lhs, "object") || named_field(lhs, "receiver") || - named_field(lhs, "operand") || named_field(lhs, "value") - field = named_field(lhs, "field") || named_field(lhs, "property") || lhs.named_children.last + named_field(lhs, "expression") || + named_field(lhs, "operand") || named_field(lhs, "value") || + named_field(lhs, "argument") || + lhs.named_children.find { |child| child.kind != "navigation_suffix" } + field = named_field(lhs, "field") || named_field(lhs, "property") || + named_field(lhs, "name") || named_field(lhs, "suffix") || + lhs.named_children.find { |child| child.kind == "navigation_suffix" } || + lhs.named_children.last if lhs.kind == "field_expression" && lhs.text.to_s.start_with?(".") field = lhs.named_children.find { |child| child.kind == "identifier" } || field return { receiver: ".literal", field: field.text.sub(/=\z/, "") } if field end - return nil unless object && field + field_text = member_field_text(field) + return nil unless object && field_text - { receiver: normalize_text(object.text), field: field.text.sub(/=\z/, "") } + { receiver: normalize_text(object.text), field: field_text.sub(/=\z/, "") } when "instance_variable", "global_variable" { receiver: "self", field: lhs.text } end @@ -1934,8 +2116,24 @@ def parent_node(node) end def field_like_node?(node) - %w[field selector_expression member_expression attribute field_expression - expression_list scoped_identifier].include?(node.kind) + %w[field field_access selector_expression member_expression member_access_expression attribute field_expression + navigation_expression directly_assignable_expression expression_list scoped_identifier].include?(node.kind) + end + + def member_field_text(field) + return nil unless ts_node?(field) + + if field.kind == "navigation_suffix" + suffix = named_field(field, "suffix") || + field.named_children.find { |child| %w[identifier simple_identifier field_identifier property_identifier].include?(child.kind) } || + field.named_children.last + text = suffix&.text.to_s + return nil if text.empty? + + return text.sub(/\A[.?]+/, "") + end + + field.text.to_s.sub(/\A[.?]+/, "") end def normalize_type_owner(text) @@ -1946,11 +2144,31 @@ def normalize_type_owner(text) end def first_named_text(node, kinds) - child = node.named_children.find { |c| kinds.include?(c.kind) } + expanded = kinds.include?("identifier") ? kinds + %w[simple_identifier] : kinds + child = node.named_children.find { |c| expanded.include?(c.kind) } child&.text end - def inline_def_argument_list?(node) + def declarator_name(node) + return nil unless ts_node?(node) + + pending = [node] + seen = Set.new + until pending.empty? + current = pending.pop + next unless ts_node?(current) + key = node_key(current) + next if seen.include?(key) + + seen << key + return current.text if %w[identifier simple_identifier field_identifier property_identifier].include?(current.kind) + + current.named_children.reverse_each { |child| pending << child } + end + nil + end + + def inline_def_argument_list?(node) ts_node?(node) && node.kind == "argument_list" && node.children.first&.kind.to_s == "def" end diff --git a/gems/decomplex/test/syntax_test.rb b/gems/decomplex/test/syntax_test.rb index 5531ea77b..ceb68f9a4 100644 --- a/gems/decomplex/test/syntax_test.rb +++ b/gems/decomplex/test/syntax_test.rb @@ -90,6 +90,36 @@ def test_tree_sitter_grammar_candidates_keep_only_current_platform_prebuilds assert_equal [current], adapter.send(:platform_prebuilds, [other, current]) end + def test_language_profiles_have_language_specific_lexicons + examples = { + lua: ["script.lua", "value == nil", "error('bad')"], + c: ["src/main.c", "ptr == NULL", "abort()"], + cpp: ["src/main.cpp", "value == nullptr", "throw Error{}"], + csharp: ["src/Program.cs", "value is string", "throw new Exception()"], + java: ["src/Main.java", "value instanceof String", "throw new RuntimeException()"], + swift: ["Sources/App.swift", "if let value = maybe", "fatalError()"], + kotlin: ["src/Main.kt", "value as? String", "require(value != null)"] + } + + examples.each do |language, (path, type_guard, diagnostic)| + lexicon = Decomplex::Syntax.language_lexicon(language) + + assert_equal language, Decomplex::Syntax.language_for(path) + assert_instance_of Decomplex::Syntax::LanguageLexicon, lexicon, language + assert lexicon.type_guard?(type_guard), language + assert lexicon.diagnostic?(diagnostic), language + end + end + + def test_force_language_override_handles_ambiguous_headers + assert_equal :c, Decomplex::Syntax.language_for("include/demo.h") + + with_env("DECOMPLEX_FORCE_LANGUAGE", "cpp") do + assert_equal :cpp, Decomplex::Syntax.language_for("include/demo.h") + assert_equal :cpp, Decomplex::Syntax.language_for("src/demo.c") + end + end + def test_tree_sitter_ruby_adapter_extracts_portable_facts_when_grammar_is_available grammar = ENV["DECOMPLEX_TS_RUBY_PATH"] skip "set DECOMPLEX_TS_RUBY_PATH to run Tree-sitter adapter smoke test" unless grammar && File.file?(grammar) @@ -261,6 +291,156 @@ def call(self): end end + def test_tree_sitter_c_adapter_extracts_functions_branches_and_pointer_state + grammar = ENV["DECOMPLEX_TS_C_PATH"] + skip "set DECOMPLEX_TS_C_PATH to run C structural facts test" unless grammar && File.file?(grammar) + + with_file(<<~C, ".c") do |path| + typedef struct Node { int storage; int ready; int enabled; int kind; } Node; + static int classify(Node* node) { + node->storage = 1; + if (node->ready && node->enabled) return 1; + switch (node->kind) { case 1: return 1; case 2: return 2; default: return 0; } + } + C + doc = Decomplex::Syntax.parse(path, parser: "tree_sitter", language: :c) + + assert_includes doc.function_defs.map(&:name), "classify" + assert_includes doc.state_writes.map { |write| [write.receiver, write.field, write.function] }, + ["node", "storage", "classify"] + assert_includes doc.decision_sites.map(&:kind), :conjunction + assert_includes doc.decision_sites.map(&:kind), :case_dispatch + end + end + + def test_tree_sitter_cpp_adapter_extracts_class_methods_and_pointer_state + grammar = ENV["DECOMPLEX_TS_CPP_PATH"] + skip "set DECOMPLEX_TS_CPP_PATH to run C++ structural facts test" unless grammar && File.file?(grammar) + + with_file(<<~CPP, ".cpp") do |path| + class Parser { + public: + int parse(Node* node) { + node->storage = 1; + if (node == nullptr || node->ready) return 1; + switch (node->kind) { case 1: return 1; case 2: return 2; default: return 0; } + } + }; + CPP + doc = Decomplex::Syntax.parse(path, parser: "tree_sitter", language: :cpp) + + assert_includes doc.owner_defs.map { |owner| [owner.name, owner.kind] }, ["Parser", :class] + assert_includes doc.function_defs.map { |fn| [fn.owner, fn.name] }, ["Parser", "parse"] + assert_includes doc.state_writes.map { |write| [write.receiver, write.field, write.function] }, + ["node", "storage", "parse"] + assert_includes doc.decision_sites.map(&:kind), :case_dispatch + end + end + + def test_tree_sitter_csharp_adapter_extracts_class_methods_and_member_state + grammar = ENV["DECOMPLEX_TS_CSHARP_PATH"] + skip "set DECOMPLEX_TS_CSHARP_PATH to run C# structural facts test" unless grammar && File.file?(grammar) + + with_file(<<~CS, ".cs") do |path| + public sealed class Parser { + private int _storage; + public int Parse(Node node) { + this._storage = 1; + if (node == null || node.Ready) return 1; + switch (node.Kind) { case 1: return 1; case 2: return 2; default: return 0; } + } + } + CS + doc = Decomplex::Syntax.parse(path, parser: "tree_sitter", language: :csharp) + + assert_includes doc.owner_defs.map { |owner| [owner.name, owner.kind] }, ["Parser", :class] + assert_includes doc.function_defs.map { |fn| [fn.owner, fn.name] }, ["Parser", "Parse"] + assert_includes doc.state_writes.map { |write| [write.receiver, write.field, write.function] }, + ["this", "_storage", "Parse"] + assert_includes doc.decision_sites.map(&:kind), :case_dispatch + end + end + + def test_tree_sitter_java_adapter_extracts_class_methods_and_member_state + grammar = ENV["DECOMPLEX_TS_JAVA_PATH"] + skip "set DECOMPLEX_TS_JAVA_PATH to run Java structural facts test" unless grammar && File.file?(grammar) + + with_file(<<~JAVA, ".java") do |path| + public final class Parser { + private int storage; + public int parse(Node node) { + this.storage = 1; + if (node == null || node.ready()) return 1; + switch (node.kind()) { case 1: return 1; case 2: return 2; default: return 0; } + } + } + JAVA + doc = Decomplex::Syntax.parse(path, parser: "tree_sitter", language: :java) + + assert_includes doc.owner_defs.map { |owner| [owner.name, owner.kind] }, ["Parser", :class] + assert_includes doc.function_defs.map { |fn| [fn.owner, fn.name] }, ["Parser", "parse"] + assert_includes doc.state_writes.map { |write| [write.receiver, write.field, write.function] }, + ["this", "storage", "parse"] + assert_includes doc.decision_sites.map(&:kind), :case_dispatch + end + end + + def test_tree_sitter_swift_adapter_extracts_class_methods_and_member_state + grammar = ENV["DECOMPLEX_TS_SWIFT_PATH"] + skip "set DECOMPLEX_TS_SWIFT_PATH to run Swift structural facts test" unless grammar && File.file?(grammar) + + with_file(<<~SWIFT, ".swift") do |path| + final class Parser { + private var storage: Int = 0 + func parse(_ node: Node) -> Int { + self.storage = 1 + if node == nil || node.ready { return 1 } + switch node.kind { + case .one: return 1 + case .two: return 2 + default: return 0 + } + } + } + SWIFT + doc = Decomplex::Syntax.parse(path, parser: "tree_sitter", language: :swift) + + assert_includes doc.owner_defs.map { |owner| [owner.name, owner.kind] }, ["Parser", :class] + assert_includes doc.function_defs.map { |fn| [fn.owner, fn.name] }, ["Parser", "parse"] + assert_includes doc.state_writes.map { |write| [write.receiver, write.field, write.function] }, + ["self", "storage", "parse"] + assert_includes doc.decision_sites.map(&:kind), :case_dispatch + end + end + + def test_tree_sitter_kotlin_adapter_extracts_class_methods_and_member_state + grammar = ENV["DECOMPLEX_TS_KOTLIN_PATH"] + skip "set DECOMPLEX_TS_KOTLIN_PATH to run Kotlin structural facts test" unless grammar && File.file?(grammar) + + with_file(<<~KOTLIN, ".kt") do |path| + class Parser { + var storage: Int = 0 + fun parse(node: Node): Int { + this.storage = 1 + if (node == null || node.ready) return 1 + return when (node.kind) { + Kind.ONE -> 1 + Kind.TWO -> 2 + else -> 0 + } + } + } + KOTLIN + doc = Decomplex::Syntax.parse(path, parser: "tree_sitter", language: :kotlin) + + assert_includes doc.owner_defs.map { |owner| [owner.name, owner.kind] }, ["Parser", :class] + assert_includes doc.function_defs.map { |fn| [fn.owner, fn.name] }, ["Parser", "parse"] + assert_includes doc.state_writes.map { |write| [write.receiver, write.field, write.function] }, + ["this", "storage", "parse"] + assert_includes doc.decision_sites.map(&:kind), :case_dispatch + end + end + def test_tree_sitter_zig_adapter_extracts_structural_facts_when_grammar_is_available grammar = ENV["DECOMPLEX_TS_ZIG_PATH"] skip "set DECOMPLEX_TS_ZIG_PATH to run Zig structural facts test" unless grammar && File.file?(grammar) diff --git a/gems/lineage/docs/agents/cross-lang-support.md b/gems/lineage/docs/agents/cross-lang-support.md new file mode 100644 index 000000000..e667afbc4 --- /dev/null +++ b/gems/lineage/docs/agents/cross-lang-support.md @@ -0,0 +1,76 @@ +# Cross-Language Support Validation + +This document tracks the first practical validation pass for building Lineage databases from non-CLEAR repositories and ingesting analyzer, lint, coverage, hazard, and runtime evidence. + +`gems/lineage/docs/agents/plugins.md` describes the plugin architecture and broad language targets. It does not prescribe exact repositories, so this pass used representative active OSS projects with enough real code to exercise the adapters. + +## Goal + +Create one `lineage.db` per target repository, ingest the best available evidence, start a Lineage UI server for each on `0.0.0.0`, and spot check that the UI can review the project with cross-language data. + +## Validation Matrix + +| Language | Repository | Local Clone | Database | UI Port | Status | +| --- | --- | --- | --- | --- | --- | +| Python | `https://github.com/Textualize/rich` | `/tmp/lineage-rich` | `/tmp/lineage-rich/lineage.db` | `8081` | Complete | +| TypeScript | `https://github.com/colinhacks/zod` | `/tmp/lineage-zod` | `/tmp/lineage-zod/lineage.db` | `8082` | Complete | +| Go | `https://github.com/junegunn/fzf` | `/tmp/lineage-fzf` | `/tmp/lineage-fzf/lineage.db` | `8083` | Complete | +| Lua | `https://github.com/luarocks/luarocks` | `/tmp/lineage-lua-luarocks` | `/tmp/lineage-lua-luarocks/lineage.db` | `8084` | Complete, no coverage | +| C | `https://github.com/libuv/libuv` | `/tmp/lineage-c-libuv` | `/tmp/lineage-c-libuv/lineage.db` | `8085` | Complete, no coverage | +| C++ | `https://github.com/fmtlib/fmt` | `/tmp/lineage-cpp-fmt` | `/tmp/lineage-cpp-fmt/lineage.db` | `8086` | Complete, no coverage | +| C# | `https://github.com/serilog/serilog` | `/tmp/lineage-csharp-serilog` | `/tmp/lineage-csharp-serilog/lineage.db` | `8087` | Complete, no coverage | +| Java | `https://github.com/google/gson` | `/tmp/lineage-java-gson` | `/tmp/lineage-java-gson/lineage.db` | `8088` | Complete, no coverage | +| Swift | `https://github.com/apple/swift-argument-parser` | `/tmp/lineage-swift-argument-parser` | `/tmp/lineage-swift-argument-parser/lineage.db` | `8089` | Complete, no coverage | +| Kotlin | `https://github.com/square/okio` | `/tmp/lineage-kotlin-okio` | `/tmp/lineage-kotlin-okio/lineage.db` | `8090` | Complete, no coverage | + +All UI servers were restarted with detached sessions and smoke checked through `curl` on ports `8081` through `8090`. + +## Evidence Targets + +Each repository received as much of this evidence as the current tools could produce without repository-specific hacks: + +- `lineage build`: Git history, logical units, churn, and ownership. +- Decomplex SARIF: structural complexity findings. +- SlopCop SARIF: coverage gaps and constraint findings. +- Boobytrap SARIF: bug-risk findings derived from churn, complexity, and coverage. +- Nil-kill SARIF: optionality, union, hidden enum, and primitive pressure findings where the language adapter supports them. +- Espalier SARIF: architectural pressure findings where the language adapter supports them. +- Lint SARIF: native lint output converted or emitted as SARIF where the repository already had a reasonable local toolchain. +- Coverage: native coverage output ingested through Lineage-supported formats when the toolchain was available. +- Runtime traces: Sentry-style stack trace ingestion for Python smoke coverage. +- Hazards: Go concurrency hazards for `fzf`. + +## Current Counts + +| Language | Logical Units | SARIF Artifacts | SARIF Findings | Quality Events | Coverage Line Events | Hazards | Runtime Events | +| --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | +| Python / Rich | 2,152 | 6 | 6,270 | 1,022 | 7,792 | 0 | 1 | +| TypeScript / Zod | 2,437 | 6 | 8,112 | 1,365 | 8,908 | 0 | 0 | +| Go / fzf | 1,421 | 7 | 13,316 | 608 | 16,422 | 312 | 0 | +| Lua / LuaRocks | 1,043 | 6 | 5,056 | 0 | 0 | 0 | 0 | +| C / libuv | 3,920 | 6 | 21,895 | 0 | 0 | 0 | 0 | +| C++ / fmt | 6,014 | 6 | 2,982 | 0 | 0 | 0 | 0 | +| C# / Serilog | 615 | 6 | 1,281 | 0 | 0 | 0 | 0 | +| Java / Gson | 4,921 | 6 | 2,624 | 0 | 0 | 0 | 0 | +| Swift / Argument Parser | 1,938 | 6 | 835 | 0 | 0 | 0 | 0 | +| Kotlin / Okio | 3,357 | 6 | 1,900 | 0 | 0 | 0 | 0 | + +## Adapter Work Completed + +- Replaced generic language placeholders with explicit Decomplex lexicons for Lua, C, C++, C#, Java, Swift, and Kotlin. +- Added real Tree-sitter syntax support and tests for C, C++, C#, Java, Swift, and Kotlin structural facts. +- Added Swift member access and `switch_entry` support. +- Added Kotlin `when_expression` and `when_entry` support. +- Added grammar candidate support for packages that ship `tree_sitter_*_binding.node`, needed by `tree-sitter-kotlin`. +- Added Go concurrency hazard detection through SlopCop/Lineage. +- Fixed Lineage source extraction and coverage ingestion issues found during TypeScript/Go validation. +- Fixed Nil-kill static-only normalization so non-Ruby languages do not accidentally depend on stale runtime traces. + +## Environment Gaps + +- Lua coverage/lint was limited by missing local LuaRocks/Busted tooling. +- C and C++ coverage was not generated in this pass; static analyzer, syntax lint, and SARIF ingestion were validated. +- C#, Java, Swift, and Kotlin native build/lint/coverage were limited by missing `dotnet`, Java, Swift, and Kotlin toolchains in this environment. +- TypeScript and Go runtime tracing are still out of scope for this pass. + +These are environment/toolchain gaps, not Lineage ingestion blockers. The DBs and UIs exist for all requested languages. diff --git a/gems/lineage/src/extract.rs b/gems/lineage/src/extract.rs index e6f15ae40..0ee457604 100644 --- a/gems/lineage/src/extract.rs +++ b/gems/lineage/src/extract.rs @@ -3,8 +3,10 @@ use std::collections::BTreeSet; use std::collections::HashMap; use tree_sitter::{Language, Node, Parser}; -pub const DEFAULT_CODE_EXTENSIONS: &[&str] = - &["rb", "zig", "py", "js", "lua", "c", "go", "rs", "S"]; +pub const DEFAULT_CODE_EXTENSIONS: &[&str] = &[ + "rb", "zig", "py", "js", "jsx", "mjs", "cjs", "ts", "tsx", "lua", "c", "h", "cc", "cpp", + "cxx", "hh", "hpp", "hxx", "cs", "java", "swift", "kt", "kts", "go", "rs", "S", +]; const DEFAULT_IGNORED_COMPONENTS: &[&str] = &[ ".git", ".zig-cache", @@ -231,9 +233,15 @@ fn detect_candidate(line: &str, line_number: u32, extension: Option<&str>) -> Op match extension { Some("rb") | Some("py") => detect_ruby_python(trimmed, line_number), - Some("js") => detect_javascript(trimmed, line_number), + Some("js") | Some("jsx") | Some("mjs") | Some("cjs") | Some("ts") | Some("tsx") => { + detect_javascript_typescript(trimmed, line_number) + } Some("lua") => detect_lua(trimmed, line_number), - Some("c") => detect_c(trimmed, line_number), + Some("c") | Some("h") | Some("cc") | Some("cpp") | Some("cxx") | Some("hh") + | Some("hpp") | Some("hxx") => detect_c_family(trimmed, line_number), + Some("cs") | Some("java") => detect_csharp_java(trimmed, line_number), + Some("swift") => detect_swift(trimmed, line_number), + Some("kt") | Some("kts") => detect_kotlin(trimmed, line_number), Some("go") => detect_go(trimmed, line_number), Some("zig") => detect_rust_or_zig(trimmed, line_number), Some("S") => detect_assembly(trimmed, line_number), @@ -528,14 +536,8 @@ fn ruby_python_def_rest(line: &str) -> Option<&str> { } } -fn detect_javascript(line: &str, line_number: u32) -> Option { - let line = line - .strip_prefix("export default ") - .unwrap_or(line) - .strip_prefix("export ") - .unwrap_or(line) - .strip_prefix("async ") - .unwrap_or(line); +fn detect_javascript_typescript(line: &str, line_number: u32) -> Option { + let line = strip_javascript_modifiers(line); if let Some(rest) = line.strip_prefix("function ") { return named_candidate(rest, UnitKind::Function, line, line_number); @@ -543,9 +545,79 @@ fn detect_javascript(line: &str, line_number: u32) -> Option { if let Some(rest) = line.strip_prefix("class ") { return named_candidate(rest, UnitKind::Class, line, line_number); } + if let Some(rest) = line.strip_prefix("interface ") { + return named_candidate(rest, UnitKind::Class, line, line_number); + } + if let Some(rest) = line.strip_prefix("type ") { + return named_candidate(rest, UnitKind::Class, line, line_number); + } + if let Some(name) = javascript_const_callable_name(line) { + return Some(Candidate { + name: name.to_string(), + kind: UnitKind::Function, + signature: line.trim().to_string(), + line: line_number, + end_line: None, + }); + } + None +} + +fn strip_javascript_modifiers(mut line: &str) -> &str { + loop { + let next = line + .strip_prefix("export default ") + .or_else(|| line.strip_prefix("export ")) + .or_else(|| line.strip_prefix("declare ")) + .or_else(|| line.strip_prefix("abstract ")) + .or_else(|| line.strip_prefix("async ")) + .unwrap_or(line); + if next == line { + return line; + } + line = next; + } +} + +fn javascript_const_callable_name(line: &str) -> Option<&str> { + let rest = line + .strip_prefix("const ") + .or_else(|| line.strip_prefix("let ")) + .or_else(|| line.strip_prefix("var "))?; + let name = javascript_identifier(rest)?; + let after_name = rest[name.len()..].trim_start(); + if after_name.starts_with('=') && after_name.contains("=>") { + return Some(name); + } + if after_name.starts_with(':') { + let type_annotation = after_name.split('=').next().unwrap_or(after_name); + if type_annotation.contains("=>") || type_annotation.contains('(') { + return Some(name); + } + } None } +fn javascript_identifier(input: &str) -> Option<&str> { + let input = input.trim_start(); + let end = input + .char_indices() + .find_map(|(index, ch)| { + if ch.is_alphanumeric() || ch == '_' || ch == '$' { + None + } else { + Some(index) + } + }) + .unwrap_or(input.len()); + let ident = &input[..end]; + if ident.is_empty() { + None + } else { + Some(ident) + } +} + fn detect_lua(line: &str, line_number: u32) -> Option { let rest = line .strip_prefix("local function ") @@ -553,14 +625,20 @@ fn detect_lua(line: &str, line_number: u32) -> Option { named_candidate(rest, UnitKind::Function, line, line_number) } -fn detect_c(line: &str, line_number: u32) -> Option { +fn detect_c_family(line: &str, line_number: u32) -> Option { + if let Some(rest) = c_family_type_rest(line) { + return named_candidate(rest, UnitKind::Class, line, line_number); + } if line.ends_with(';') || !line.contains('(') || !line.contains(')') || !line.contains('{') { return None; } let before_paren = line.split_once('(')?.0.trim_end(); let name = before_paren.split_whitespace().last()?; - if matches!(name, "if" | "for" | "while" | "switch" | "return" | "sizeof") { + if matches!( + name, + "if" | "for" | "while" | "switch" | "return" | "sizeof" | "catch" + ) { return None; } @@ -573,6 +651,128 @@ fn detect_c(line: &str, line_number: u32) -> Option { }) } +fn c_family_type_rest(line: &str) -> Option<&str> { + let line = strip_c_family_modifiers(line); + line.strip_prefix("class ") + .or_else(|| line.strip_prefix("struct ")) + .or_else(|| line.strip_prefix("enum ")) + .or_else(|| line.strip_prefix("namespace ")) +} + +fn strip_c_family_modifiers(mut line: &str) -> &str { + loop { + let next = line + .strip_prefix("template ") + .or_else(|| line.strip_prefix("export ")) + .or_else(|| line.strip_prefix("public ")) + .or_else(|| line.strip_prefix("private ")) + .or_else(|| line.strip_prefix("protected ")) + .or_else(|| line.strip_prefix("internal ")) + .or_else(|| line.strip_prefix("static ")) + .or_else(|| line.strip_prefix("inline ")) + .or_else(|| line.strip_prefix("constexpr ")) + .or_else(|| line.strip_prefix("sealed ")) + .or_else(|| line.strip_prefix("abstract ")) + .or_else(|| line.strip_prefix("partial ")) + .or_else(|| line.strip_prefix("readonly ")) + .unwrap_or(line); + if next == line { + return line; + } + line = next; + } +} + +fn detect_csharp_java(line: &str, line_number: u32) -> Option { + let line = strip_c_family_modifiers(line); + if let Some(rest) = line + .strip_prefix("class ") + .or_else(|| line.strip_prefix("interface ")) + .or_else(|| line.strip_prefix("struct ")) + .or_else(|| line.strip_prefix("enum ")) + .or_else(|| line.strip_prefix("record ")) + { + return named_candidate(rest, UnitKind::Class, line, line_number); + } + detect_c_family(line, line_number) +} + +fn detect_swift(line: &str, line_number: u32) -> Option { + let line = strip_swift_modifiers(line); + if let Some(rest) = line + .strip_prefix("class ") + .or_else(|| line.strip_prefix("struct ")) + .or_else(|| line.strip_prefix("enum ")) + .or_else(|| line.strip_prefix("protocol ")) + .or_else(|| line.strip_prefix("actor ")) + { + return named_candidate(rest, UnitKind::Class, line, line_number); + } + if let Some(rest) = line.strip_prefix("func ") { + return named_candidate(rest, UnitKind::Function, line, line_number); + } + None +} + +fn strip_swift_modifiers(mut line: &str) -> &str { + loop { + let next = line + .strip_prefix("public ") + .or_else(|| line.strip_prefix("private ")) + .or_else(|| line.strip_prefix("fileprivate ")) + .or_else(|| line.strip_prefix("internal ")) + .or_else(|| line.strip_prefix("open ")) + .or_else(|| line.strip_prefix("final ")) + .or_else(|| line.strip_prefix("static ")) + .or_else(|| line.strip_prefix("mutating ")) + .or_else(|| line.strip_prefix("async ")) + .unwrap_or(line); + if next == line { + return line; + } + line = next; + } +} + +fn detect_kotlin(line: &str, line_number: u32) -> Option { + let line = strip_kotlin_modifiers(line); + if let Some(rest) = line + .strip_prefix("class ") + .or_else(|| line.strip_prefix("interface ")) + .or_else(|| line.strip_prefix("object ")) + .or_else(|| line.strip_prefix("enum class ")) + .or_else(|| line.strip_prefix("data class ")) + .or_else(|| line.strip_prefix("sealed class ")) + { + return named_candidate(rest, UnitKind::Class, line, line_number); + } + if let Some(rest) = line.strip_prefix("fun ") { + return named_candidate(rest, UnitKind::Function, line, line_number); + } + None +} + +fn strip_kotlin_modifiers(mut line: &str) -> &str { + loop { + let next = line + .strip_prefix("public ") + .or_else(|| line.strip_prefix("private ")) + .or_else(|| line.strip_prefix("protected ")) + .or_else(|| line.strip_prefix("internal ")) + .or_else(|| line.strip_prefix("open ")) + .or_else(|| line.strip_prefix("final ")) + .or_else(|| line.strip_prefix("abstract ")) + .or_else(|| line.strip_prefix("suspend ")) + .or_else(|| line.strip_prefix("inline ")) + .or_else(|| line.strip_prefix("override ")) + .unwrap_or(line); + if next == line { + return line; + } + line = next; + } +} + fn detect_go(line: &str, line_number: u32) -> Option { let rest = line.strip_prefix("func ")?; if rest.starts_with('(') { @@ -697,6 +897,45 @@ mod tests { assert_eq!(extractor.extract_units(&zig)[0].name, "run"); } + #[test] + fn extracts_typescript_symbols_with_heuristics() { + let file = BlobFile { + path: "packages/zod/src/demo.ts".into(), + contents: r#" +export interface ParseContext { + async?: boolean; +} + +export type Result = { value: T }; + +export abstract class Parser { + abstract run(value: unknown): Result; +} + +export function parse(value: unknown): Result { + return { value }; +} + +export const safeParse: (value: unknown) => Result = (value) => { + return { value }; +}; +"# + .into(), + }; + + let units = HeuristicExtractor::default().extract_units(&file); + let names: Vec<_> = units + .iter() + .map(|unit| (unit.kind, unit.name.as_str())) + .collect(); + + assert!(names.contains(&(UnitKind::Class, "ParseContext"))); + assert!(names.contains(&(UnitKind::Class, "Result"))); + assert!(names.contains(&(UnitKind::Class, "Parser"))); + assert!(names.contains(&(UnitKind::Function, "parse"))); + assert!(names.contains(&(UnitKind::Function, "safeParse"))); + } + #[test] fn extracts_rust_symbols_with_tree_sitter() { let file = BlobFile { @@ -811,6 +1050,13 @@ pub fn StringMap(comptime Value: type) type { assert!(filter.supports_path("zig/main.zig")); assert!(filter.supports_path("src/vm.S")); assert!(filter.supports_path("src/main.c")); + assert!(filter.supports_path("src/main.h")); + assert!(filter.supports_path("src/main.cpp")); + assert!(filter.supports_path("src/main.hpp")); + assert!(filter.supports_path("src/Program.cs")); + assert!(filter.supports_path("src/Main.java")); + assert!(filter.supports_path("Sources/App.swift")); + assert!(filter.supports_path("src/main.kt")); assert!(filter.supports_path("gems/lineage/src/ui.rs")); assert!(filter.supports_path("script/tool.lua")); assert!(!filter.supports_path("benchmarks/x/bench.profile/transpiled.zig")); @@ -862,6 +1108,46 @@ pub fn StringMap(comptime Value: type) type { assert_eq!(extractor.extract_units(&asm)[0].name, "boot"); } + #[test] + fn extracts_c_family_and_managed_language_units() { + let extractor = HeuristicExtractor::default(); + let cpp = BlobFile { + path: "include/demo.hpp".into(), + contents: "class Parser {\n};\nstatic int parse_value(int x) { return x; }\n".into(), + }; + let csharp = BlobFile { + path: "src/Program.cs".into(), + contents: "public sealed class Program {}\nprivate static int Run(int x) { return x; }\n".into(), + }; + let java = BlobFile { + path: "src/Main.java".into(), + contents: "public interface Handler {}\npublic int handle(int x) { return x; }\n".into(), + }; + let swift = BlobFile { + path: "Sources/App.swift".into(), + contents: "public struct App {}\npublic func run(_ x: Int) -> Int { x }\n".into(), + }; + let kotlin = BlobFile { + path: "src/main.kt".into(), + contents: "data class Box(val value: Int)\nsuspend fun run(value: Int): Int = value\n".into(), + }; + + let cpp_names: Vec<_> = extractor + .extract_units(&cpp) + .into_iter() + .map(|unit| (unit.kind, unit.name)) + .collect(); + assert!(cpp_names.contains(&(UnitKind::Class, "Parser".to_string()))); + assert!(cpp_names.contains(&(UnitKind::Function, "parse_value".to_string()))); + + assert_eq!(extractor.extract_units(&csharp)[0].name, "Program"); + assert_eq!(extractor.extract_units(&java)[0].name, "Handler"); + assert_eq!(extractor.extract_units(&swift)[0].name, "App"); + assert_eq!(extractor.extract_units(&swift)[1].name, "run"); + assert_eq!(extractor.extract_units(&kotlin)[0].name, "Box"); + assert_eq!(extractor.extract_units(&kotlin)[1].name, "run"); + } + #[test] fn gives_same_named_units_distinct_ordinals_and_ids() { let file = BlobFile { diff --git a/gems/lineage/src/hazard.rs b/gems/lineage/src/hazard.rs index 56d4f5f7b..90bc4ea9b 100644 --- a/gems/lineage/src/hazard.rs +++ b/gems/lineage/src/hazard.rs @@ -32,6 +32,7 @@ pub fn ingest_hazards( ) -> Result { match provider { "zig" => ingest_zig_hazards(storage, repo.as_ref(), commit, timestamp), + "go" => ingest_go_hazards(storage, repo.as_ref(), commit, timestamp), other => anyhow::bail!("unsupported hazard provider {other:?}"), } } @@ -41,6 +42,27 @@ fn ingest_zig_hazards( repo: &Path, commit: &str, timestamp: Option, +) -> Result { + ingest_language_hazards(storage, repo, commit, timestamp, "zig", zig_source_files, scan_zig_sites) +} + +fn ingest_go_hazards( + storage: &Storage, + repo: &Path, + commit: &str, + timestamp: Option, +) -> Result { + ingest_language_hazards(storage, repo, commit, timestamp, "go", go_source_files, scan_go_sites) +} + +fn ingest_language_hazards( + storage: &Storage, + repo: &Path, + commit: &str, + timestamp: Option, + language: &str, + source_files: fn(&Path) -> Result>, + scan_sites: fn(&str, &str) -> Vec, ) -> Result { let repo = repo .canonicalize() @@ -48,7 +70,7 @@ fn ingest_zig_hazards( let timestamp = timestamp .or_else(|| storage.commit_timestamp(commit).ok().flatten()) .unwrap_or_else(now_timestamp); - let files = zig_source_files(&repo)?; + let files = source_files(&repo)?; let extractor = HeuristicExtractor::default(); let mut stats = HazardIngestStats { scanned_files: files.len(), @@ -57,7 +79,7 @@ fn ingest_zig_hazards( }; storage.begin_transaction()?; - storage.deactivate_active_hazards("zig")?; + storage.deactivate_active_hazards(language)?; for path in files { let abs = repo.join(&path); let contents = fs::read_to_string(&abs) @@ -67,7 +89,7 @@ fn ingest_zig_hazards( contents: contents.clone(), }; let units = extractor.extract_units(&blob); - for site in scan_zig_sites(&path, &contents) { + for site in scan_sites(&path, &contents) { stats.hazards += 1; let unit = unit_for_site(&blob, &units, site.line); let resolved_id = storage @@ -78,7 +100,7 @@ fn ingest_zig_hazards( } storage.insert_hazard_event(&HazardEvent { unit_id: resolved_id, - language: "zig".into(), + language: language.into(), hazard_type: site.hazard_type.clone(), required_evidence: site.required_evidence.clone(), path: site.path.clone(), @@ -88,7 +110,7 @@ fn ingest_zig_hazards( detected_at_hash: commit.to_string(), is_active: true, payload_json: json!({ - "provider": "zig", + "provider": language, "source": site.source, "timestamp": timestamp }) @@ -111,6 +133,34 @@ fn zig_source_files(repo: &Path) -> Result> { Ok(files) } +fn go_source_files(repo: &Path) -> Result> { + let mut files = Vec::new(); + collect_go_files(repo, Path::new(""), &mut files)?; + files.sort(); + files.dedup(); + Ok(files) +} + +fn collect_go_files(repo: &Path, rel_dir: &Path, out: &mut Vec) -> Result<()> { + let abs = repo.join(rel_dir); + if !abs.is_dir() { + return Ok(()); + } + for entry in fs::read_dir(&abs)? { + let entry = entry?; + let path = entry.path(); + let rel = rel_path(repo, &path)?; + if path.is_dir() { + if !excluded_go_dir(&rel) { + collect_go_files(repo, Path::new(&rel), out)?; + } + } else if rel.ends_with(".go") && !excluded_go_file(&rel) { + out.push(rel); + } + } + Ok(()) +} + fn collect_zig_files(repo: &Path, rel_dir: &Path, out: &mut Vec) -> Result<()> { let abs = repo.join(rel_dir); if !abs.is_dir() { @@ -129,6 +179,19 @@ fn collect_zig_files(repo: &Path, rel_dir: &Path, out: &mut Vec) -> Resu Ok(()) } +fn excluded_go_dir(path: &str) -> bool { + let name = path.rsplit('/').next().unwrap_or(path); + matches!(name, ".git" | "vendor" | "testdata" | "node_modules" | "tmp" | "dist") + || name.starts_with('.') +} + +fn excluded_go_file(path: &str) -> bool { + let Some(name) = path.rsplit('/').next() else { + return true; + }; + name.ends_with("_test.go") +} + fn excluded_zig_file(path: &str) -> bool { let Some(name) = path.rsplit('/').next() else { return true; @@ -201,6 +264,34 @@ fn scan_zig_sites(path: &str, contents: &str) -> Vec { sites } +fn scan_go_sites(path: &str, contents: &str) -> Vec { + let mut sites = Vec::new(); + let mut in_block_comment = false; + for (index, line) in contents.lines().enumerate() { + let line_no = (index + 1) as u32; + let code = strip_go_comment(line, &mut in_block_comment); + if code.trim().is_empty() { + continue; + } + if is_go_goroutine_site(&code) { + sites.push(site(path, line_no, line, "go_race_goroutine", "race")); + } + if is_go_atomic_site(&code) { + sites.push(site(path, line_no, line, "go_race_atomic", "race")); + } + if is_go_lock_site(&code) { + sites.push(site(path, line_no, line, "go_race_lock", "race")); + } + if is_go_waitgroup_site(&code) { + sites.push(site(path, line_no, line, "go_concurrency_waitgroup", "concurrency")); + } + if is_go_channel_site(&code) { + sites.push(site(path, line_no, line, "go_concurrency_channel", "concurrency")); + } + } + sites +} + fn site( path: &str, line: u32, @@ -217,6 +308,42 @@ fn site( } } +fn is_go_goroutine_site(code: &str) -> bool { + code.trim_start().starts_with("go ") || code.contains("; go ") +} + +fn is_go_atomic_site(code: &str) -> bool { + code.contains("atomic.") +} + +fn is_go_lock_site(code: &str) -> bool { + [ + "sync.Mutex", + "sync.RWMutex", + "sync.Map", + "sync.Once", + "sync.Cond", + ".Lock(", + ".Unlock(", + ".RLock(", + ".RUnlock(", + ] + .iter() + .any(|needle| code.contains(needle)) +} + +fn is_go_waitgroup_site(code: &str) -> bool { + ["sync.WaitGroup", ".Add(", ".Done(", ".Wait("] + .iter() + .any(|needle| code.contains(needle)) +} + +fn is_go_channel_site(code: &str) -> bool { + code.contains("make(chan") + || code.contains("select {") + || code.contains("<-") +} + fn is_atomic_site(code: &str) -> bool { code.contains("@atomic") || code.contains("@cmpxchg") @@ -342,6 +469,42 @@ fn strip_zig_comment(line: &str) -> &str { line.split_once("//").map(|(code, _)| code).unwrap_or(line) } +fn strip_go_comment(line: &str, in_block_comment: &mut bool) -> String { + let mut out = String::new(); + let mut rest = line; + loop { + if *in_block_comment { + let Some((_, after)) = rest.split_once("*/") else { + return out; + }; + *in_block_comment = false; + rest = after; + continue; + } + let block = rest.find("/*"); + let line_comment = rest.find("//"); + match (block, line_comment) { + (Some(block), Some(comment)) if comment < block => { + out.push_str(&rest[..comment]); + return out; + } + (Some(block), _) => { + out.push_str(&rest[..block]); + rest = &rest[block + 2..]; + *in_block_comment = true; + } + (_, Some(comment)) => { + out.push_str(&rest[..comment]); + return out; + } + (None, None) => { + out.push_str(rest); + return out; + } + } + } +} + fn unit_for_site(blob: &BlobFile, units: &[LogicalUnit], line: u32) -> LogicalUnit { units .iter() @@ -398,4 +561,37 @@ mod tests { assert_eq!(stats.hazards, 2); assert_eq!(storage.count_rows("unit_hazards").unwrap(), 2); } + + #[test] + fn ingests_go_concurrency_hazards_for_current_snapshot() { + let dir = tempdir().unwrap(); + fs::write( + dir.path().join("worker.go"), + "package demo\n\nimport \"sync/atomic\"\n\nfunc run(ch chan int) {\n go func() { ch <- 1 }()\n value := atomic.LoadInt64(&counter)\n _ = value\n}\n", + ) + .unwrap(); + fs::write( + dir.path().join("worker_test.go"), + "package demo\n\nfunc TestRun() { go run(nil) }\n", + ) + .unwrap(); + let storage = Storage::open_memory().unwrap(); + + let stats = ingest_hazards(&storage, dir.path(), "go", "abc", Some(10)).unwrap(); + + assert_eq!(stats.scanned_files, 1); + assert_eq!(stats.hazards, 3); + assert_eq!(storage.count_rows("unit_hazards").unwrap(), 3); + } + + #[test] + fn go_hazard_scan_ignores_comments() { + let sites = scan_go_sites( + "demo.go", + "package demo\n\nfunc run() {\n // go func() {}()\n /* atomic.AddInt64(&x, 1) */\n ch <- 1\n}\n", + ); + + assert_eq!(sites.len(), 1); + assert_eq!(sites[0].hazard_type, "go_concurrency_channel"); + } } diff --git a/gems/lineage/src/quality.rs b/gems/lineage/src/quality.rs index 4697eb6b0..41afe0f39 100644 --- a/gems/lineage/src/quality.rs +++ b/gems/lineage/src/quality.rs @@ -238,8 +238,8 @@ fn record_from_codecov_node(node: &Value) -> Option { let line_coverage = node .get("totals") .and_then(|totals| totals.get("coverage")) - .and_then(Value::as_f64) - .or_else(|| node.get("coverage").and_then(Value::as_f64)); + .and_then(finite_json_f64) + .or_else(|| node.get("coverage").and_then(finite_json_f64)); Some(CoverageRecord { path, @@ -359,7 +359,12 @@ fn record_from_generic_node(node: &Value) -> Option { } fn metric_value(node: &Value, keys: &[&str]) -> Option { - keys.iter().find_map(|key| node.get(*key).and_then(Value::as_f64)) + keys.iter() + .find_map(|key| node.get(*key).and_then(finite_json_f64)) +} + +fn finite_json_f64(value: &Value) -> Option { + value.as_f64().filter(|number| number.is_finite()) } fn line_hits_from_generic_node(node: &Value) -> Vec { @@ -436,6 +441,7 @@ fn parse_cobertura_records(input: &str) -> Result> { class .attribute("line-rate") .and_then(|value| value.parse::().ok()) + .filter(|value| value.is_finite()) .map(|value| value * 100.0) } else { let covered = line_hits.iter().filter(|hit| hit.hits > 0).count(); @@ -538,6 +544,9 @@ fn record_metric( let Some(new_value) = value else { return Ok(0); }; + if !new_value.is_finite() { + return Ok(0); + } let recorded = storage.record_quality_metric(&QualityEvent { unit_id: unit_id.to_string(), commit_hash: commit_hash.to_string(), @@ -643,6 +652,27 @@ mod tests { assert_eq!(records[0].line_hits[1], CoverageLineHit { line: 2, hits: 0 }); } + #[test] + fn ignores_non_finite_cobertura_line_rate() { + let payload = r#" + + + + + + + + + "#; + + let records = parse_coverage_input(payload, "cobertura").unwrap(); + + assert_eq!(records.len(), 1); + assert_eq!(records[0].path, "src/generated.go"); + assert_eq!(records[0].line_coverage, None); + assert!(records[0].line_hits.is_empty()); + } + #[test] fn parses_simplecov_resultset_line_hits() { let value = json!({ diff --git a/gems/nil-kill/lib/nil_kill/commands/normalize_command.rb b/gems/nil-kill/lib/nil_kill/commands/normalize_command.rb index 5d48ae0c3..eee2556c7 100644 --- a/gems/nil-kill/lib/nil_kill/commands/normalize_command.rb +++ b/gems/nil-kill/lib/nil_kill/commands/normalize_command.rb @@ -12,10 +12,10 @@ def run static_path = option("--static") || abort("normalize requires --static PATH") output = option("--output") || File.join(TMP_DIR, "evidence.json") analyze = !@argv.delete("--no-analyze") - traces = options("--traces") - traces = [RUNTIME_DIR] if traces.empty? + explicit_traces = options("--traces") static = JSON.parse(File.read(static_path)) root = File.expand_path(option("--root") || static["root"] || ROOT) + traces = explicit_traces.empty? ? default_trace_paths(static) : explicit_traces bundle = Runtime::Normalizer.new(root: root).normalize(static: static, trace_paths: traces, analyze: analyze) FileUtils.mkdir_p(File.dirname(output)) File.write(output, JSON.pretty_generate(bundle)) @@ -41,6 +41,24 @@ def options(name) def option(name) options(name).last end + + def default_trace_paths(static) + languages = static_languages(static) + return [RUNTIME_DIR] if languages.empty? || languages == ["ruby"] + + [] + end + + def static_languages(static) + canonical = Schema::EvidenceBundle.canonical_static(static) + languages = [] + languages.concat(Array(static["languages"])) if static.is_a?(Hash) + languages.concat(Hash(canonical["language_capabilities"]).keys) + Array(canonical["files"]).each { |file| languages << file["language"] } + Array(canonical["methods"]).each { |method| languages << (method["language"] || method["lang"]) } + Array(canonical["fields"]).each { |field| languages << field["language"] } + languages.map(&:to_s).reject(&:empty?).uniq.sort + end end end end diff --git a/gems/nil-kill/lib/nil_kill/runtime/trace_loader.rb b/gems/nil-kill/lib/nil_kill/runtime/trace_loader.rb index 615e85ef2..16ddb5b8d 100644 --- a/gems/nil-kill/lib/nil_kill/runtime/trace_loader.rb +++ b/gems/nil-kill/lib/nil_kill/runtime/trace_loader.rb @@ -4,23 +4,30 @@ module NilKill module Runtime class TraceLoader + MAX_DIAGNOSTICS_PER_FILE_CODE = 20 + def initialize(paths) @paths = Array(paths).flatten.compact end def each_event event_files.each do |file| + diagnostics = Hash.new(0) File.foreach(file).with_index(1) do |line, line_no| next if line.strip.empty? event = JSON.parse(line) unless event.is_a?(Hash) && event["event"] - yield nil, diagnostic(file, line_no, "not_raw_trace_event", "JSONL row is not a Raw Runtime Trace Event v1 object") + yield_limited_diagnostic(diagnostics, file, line_no, "not_raw_trace_event", + "JSONL row is not a Raw Runtime Trace Event v1 object") { |diagnostic| yield nil, diagnostic } + break if diagnostics["not_raw_trace_event"] > MAX_DIAGNOSTICS_PER_FILE_CODE next end yield event, nil rescue JSON::ParserError => e - yield nil, diagnostic(file, line_no, "invalid_json", e.message) + yield_limited_diagnostic(diagnostics, file, line_no, "invalid_json", e.message) do |diagnostic| + yield nil, diagnostic + end end end end @@ -37,6 +44,16 @@ def event_files private + def yield_limited_diagnostic(counts, file, line_no, code, message) + counts[code] += 1 + if counts[code] <= MAX_DIAGNOSTICS_PER_FILE_CODE + yield diagnostic(file, line_no, code, message) + elsif counts[code] == MAX_DIAGNOSTICS_PER_FILE_CODE + 1 + yield diagnostic(file, line_no, "#{code}_suppressed", + "suppressed additional #{code} diagnostics for this trace file") + end + end + def diagnostic(file, line_no, code, message) { "severity" => "warning", diff --git a/gems/nil-kill/spec/multi_language_runtime_spec.rb b/gems/nil-kill/spec/multi_language_runtime_spec.rb index c36da4fdc..e866d3ab1 100644 --- a/gems/nil-kill/spec/multi_language_runtime_spec.rb +++ b/gems/nil-kill/spec/multi_language_runtime_spec.rb @@ -327,6 +327,76 @@ def call(self, value): end end + it "does not default non-Ruby static normalization to stale legacy runtime files" do + Dir.mktmpdir("nil-kill-python-no-default-traces", NilKill::ROOT) do |dir| + static_path = File.join(dir, "static.json") + output_path = File.join(dir, "evidence.json") + FileUtils.mkdir_p(NilKill::RUNTIME_DIR) + File.write(File.join(NilKill::RUNTIME_DIR, "collections-stale.jsonl"), + 30.times.map { JSON.generate("legacy" => true) }.join("\n") + "\n") + File.write(static_path, JSON.pretty_generate( + "files" => [{"path" => "pkg/user.py", "language" => "python", "digest" => "sha256:test"}], + "methods" => [], + "fields" => [], + "language_capabilities" => {"python" => NilKill::Languages.capability_for("python")} + )) + + NilKill::Commands::NormalizeCommand.new(["--static", static_path, "--output", output_path, "--no-analyze"]).run + evidence = JSON.parse(File.read(output_path)) + + expect(evidence["languages"]).to eq(["python"]) + expect(evidence["diagnostics"]).to eq([]) + expect(evidence.dig("metadata", "trace_files")).to eq([]) + end + end + + it "keeps Ruby-only normalization defaulting to the legacy runtime directory" do + Dir.mktmpdir("nil-kill-ruby-default-traces", NilKill::ROOT) do |dir| + static_path = File.join(dir, "static.json") + output_path = File.join(dir, "evidence.json") + FileUtils.mkdir_p(NilKill::RUNTIME_DIR) + File.write(File.join(NilKill::RUNTIME_DIR, "events.jsonl"), JSON.generate( + "schema_version" => 1, + "event" => "process_start", + "language" => "ruby", + "run_id" => "run-1", + "pid" => 1, + "thread_id" => "main", + "timestamp_ns" => 1, + "path" => "src/demo.rb", + "line" => 1, + "payload" => {} + ) + "\n") + File.write(static_path, JSON.pretty_generate( + "files" => [{"path" => "src/demo.rb", "language" => "ruby", "digest" => "sha256:test"}], + "methods" => [], + "fields" => [] + )) + + NilKill::Commands::NormalizeCommand.new(["--static", static_path, "--output", output_path, "--no-analyze"]).run + evidence = JSON.parse(File.read(output_path)) + + expect(evidence["languages"]).to eq(["ruby"]) + expect(evidence.dig("runtime", "runs")).to include(a_hash_including("run_id" => "run-1")) + expect(evidence.dig("metadata", "trace_files")).not_to be_empty + end + end + + it "caps incompatible JSONL diagnostics per trace file" do + Dir.mktmpdir("nil-kill-bad-traces", NilKill::ROOT) do |dir| + trace_path = File.join(dir, "collections-stale.jsonl") + File.write(trace_path, 40.times.map { JSON.generate("legacy" => true) }.join("\n") + "\n") + + diagnostics = [] + NilKill::Runtime::TraceLoader.new([trace_path]).each_event do |_event, diagnostic| + diagnostics << diagnostic if diagnostic + end + + expect(diagnostics.count { |diagnostic| diagnostic["code"] == "not_raw_trace_event" }).to eq(20) + expect(diagnostics).to include(a_hash_including("code" => "not_raw_trace_event_suppressed")) + end + end + it "keeps legacy Ruby runtime loading behind the normalizer boundary" do Dir.mktmpdir("nil-kill-legacy-runtime", NilKill::ROOT) do |dir| source = File.join(dir, "sample.rb") diff --git a/gems/slopcop/exe/slopcop b/gems/slopcop/exe/slopcop index a96eb5656..64bc15b8a 100755 --- a/gems/slopcop/exe/slopcop +++ b/gems/slopcop/exe/slopcop @@ -21,6 +21,7 @@ def usage slopcop constraints [--repo=.] --base=origin/master [--head=HEAD] \\ [--coverage=loom:zig/zig-out/coverage-loom/merged/kcov-merged/cobertura.xml] \\ [--coverage=vopr:zig/zig-out/coverage-vopr/merged/kcov-merged/cobertura.xml] \\ + [--language=zig|go] \\ [--sarif=slopcop-constraints.sarif] [--json=constraints.sarif] \\ [--markdown=constraints.md] [--strict] @@ -29,7 +30,7 @@ def usage kcov Cobertura XML, kcov codecov JSON, coverage.py JSON, or Nil-Kill branch coverage JSON constraints --coverage - typed coverage input. Currently supported: loom:PATH and vopr:PATH. + typed coverage input. Common types: loom:PATH, vopr:PATH, race:PATH, concurrency:PATH. Findings are advisory warnings unless --strict is supplied. dark-arms Lineage-ready SARIF JSON overlay containing all classified dark arms. @@ -168,6 +169,7 @@ def run_constraints(args) base: nil, head: "HEAD", coverage: [], + languages: ["zig"], sarif: nil, json: nil, markdown: nil, @@ -182,6 +184,8 @@ def run_constraints(args) when /\A--head=(.+)/ then opts[:head] = Regexp.last_match(1) when /\A--coverage=(.+)/ then opts[:coverage] << Regexp.last_match(1) when "--coverage" then opts[:coverage] << args.shift.to_s + when /\A--language=(.+)/ then opts[:languages] = Regexp.last_match(1).split(",") + when "--language" then opts[:languages] = args.shift.to_s.split(",") when /\A--sarif=(.+)/ then opts[:sarif] = Regexp.last_match(1) when /\A--json=(.+)/ then opts[:json] = Regexp.last_match(1) when /\A--markdown=(.+)/ then opts[:markdown] = Regexp.last_match(1) @@ -195,7 +199,8 @@ def run_constraints(args) repo: opts[:repo], base: opts[:base], head: opts[:head], - coverage_specs: opts[:coverage] + coverage_specs: opts[:coverage], + languages: opts[:languages] ) write_output(opts[:sarif], audit.to_sarif) if opts[:sarif] diff --git a/gems/slopcop/lib/slopcop/constraints.rb b/gems/slopcop/lib/slopcop/constraints.rb index ec6fcdf5d..0ba0fdf12 100644 --- a/gems/slopcop/lib/slopcop/constraints.rb +++ b/gems/slopcop/lib/slopcop/constraints.rb @@ -4,6 +4,7 @@ require_relative "constraints/diff" require_relative "constraints/evidence" require_relative "constraints/finding" +require_relative "constraints/go_provider" require_relative "constraints/sarif" require_relative "constraints/zig_provider" @@ -13,6 +14,7 @@ module Constraints def providers { + "go" => GoProvider, "zig" => ZigProvider } end diff --git a/gems/slopcop/lib/slopcop/constraints/go_provider.rb b/gems/slopcop/lib/slopcop/constraints/go_provider.rb new file mode 100644 index 000000000..78f6aa1dd --- /dev/null +++ b/gems/slopcop/lib/slopcop/constraints/go_provider.rb @@ -0,0 +1,158 @@ +# frozen_string_literal: true + +require_relative "finding" + +module SlopCop + module Constraints + module GoProvider + module_function + + def rules + [ + { + "id" => "slopcop-go-race-uncovered", + "name" => "Go race coverage missing", + "shortDescription" => { "text" => "Go shared-concurrency site lacks race coverage evidence" }, + "fullDescription" => { + "text" => "A changed Go goroutine, atomic, lock, or sync primitive was not reached by race coverage." + }, + "defaultConfiguration" => { "level" => "warning" } + }, + { + "id" => "slopcop-go-concurrency-uncovered", + "name" => "Go concurrency coverage missing", + "shortDescription" => { "text" => "Go channel/wait site lacks concurrency coverage evidence" }, + "fullDescription" => { + "text" => "A changed Go channel or wait-group site was not reached by concurrency coverage." + }, + "defaultConfiguration" => { "level" => "warning" } + } + ] + end + + def findings(repo:, additions:, evidence:) + repo = File.expand_path(repo) + additions.each_with_object([]) do |(path, lines), out| + next unless source_path?(path) + + lines.each do |line| + source = source_line(repo, path, line) + next if source.empty? + + scan_line(path, line, source).each do |hazard| + next if covered?(evidence, hazard) + + out << Finding.new( + path: path, + line: line, + rule_id: rule_id_for(hazard[:required_evidence]), + message: "changed #{hazard[:label]} has no #{hazard[:required_evidence]} coverage evidence", + source: source.strip, + hazard_type: hazard[:hazard_type], + required_evidence: hazard[:required_evidence], + severity: "warning" + ) + end + end + end + end + + def source_path?(path) + path.end_with?(".go") && !path.end_with?("_test.go") && !path.split("/").include?("vendor") + end + + def scan_hazards(repo:, paths: nil) + repo = File.expand_path(repo) + files = if paths && !Array(paths).empty? + Array(paths).select { |path| source_path?(path) } + else + Dir.chdir(repo) { Dir["**/*.go"] }.select { |path| source_path?(path) } + end + files.flat_map do |path| + File.readlines(File.join(repo, path)).each_with_index.flat_map do |source, index| + scan_line(path, index + 1, source).map do |hazard| + hazard.merge(path: path, line: index + 1, source: source.strip) + end + end + end.sort_by { |site| [site[:path], site[:line], site[:hazard_type]] } + end + + def scan_line(path, line, source) + code = strip_comment(source) + return [] if code.strip.empty? + + hazards = [] + hazards << hazard(path, line, "go_race_goroutine", "race", "goroutine launch") if goroutine_site?(code) + hazards << hazard(path, line, "go_race_atomic", "race", "atomic operation") if atomic_site?(code) + hazards << hazard(path, line, "go_race_lock", "race", "lock/sync primitive") if lock_site?(code) + hazards << hazard(path, line, "go_concurrency_waitgroup", "concurrency", "wait group operation") if waitgroup_site?(code) + hazards << hazard(path, line, "go_concurrency_channel", "concurrency", "channel operation") if channel_site?(code) + hazards + end + + def covered?(evidence, hazard) + evidence_type = hazard[:required_evidence] + return false unless evidence.known_type?(evidence_type) + + evidence.line_covered?(evidence_type, hazard[:path], hazard[:line]) + end + + def rule_id_for(required_evidence) + required_evidence == "race" ? "slopcop-go-race-uncovered" : "slopcop-go-concurrency-uncovered" + end + + def hazard(path, line, hazard_type, required_evidence, label) + { + path: path, + line: line, + hazard_type: hazard_type, + required_evidence: required_evidence, + label: label + } + end + + def goroutine_site?(code) + code.lstrip.start_with?("go ") || code.include?("; go ") + end + + def atomic_site?(code) + code.include?("atomic.") + end + + def lock_site?(code) + [ + "sync.Mutex", + "sync.RWMutex", + "sync.Map", + "sync.Once", + "sync.Cond", + ".Lock(", + ".Unlock(", + ".RLock(", + ".RUnlock(" + ].any? { |needle| code.include?(needle) } + end + + def waitgroup_site?(code) + ["sync.WaitGroup", ".Add(", ".Done(", ".Wait("].any? { |needle| code.include?(needle) } + end + + def channel_site?(code) + code.include?("make(chan") || + code.include?("select {") || + code.include?("<-") + end + + def strip_comment(source) + source.split("//", 2).first.to_s + end + + def source_line(repo, path, line) + file = File.join(repo, path) + return "" unless File.file?(file) + + File.readlines(file)[line.to_i - 1].to_s.rstrip + end + end + end +end diff --git a/gems/slopcop/test/constraints_go_provider_test.rb b/gems/slopcop/test/constraints_go_provider_test.rb new file mode 100644 index 000000000..e659be35e --- /dev/null +++ b/gems/slopcop/test/constraints_go_provider_test.rb @@ -0,0 +1,93 @@ +# frozen_string_literal: true + +require "json" +require "minitest/autorun" +require "tmpdir" +require "fileutils" + +require_relative "../lib/slopcop" + +class ConstraintsGoProviderTest < Minitest::Test + def test_go_provider_is_registered + assert_same SlopCop::Constraints::GoProvider, SlopCop::Constraints.providers.fetch("go") + end + + def test_scans_go_concurrency_hazards + Dir.mktmpdir do |dir| + File.write(File.join(dir, "worker.go"), <<~GO) + package demo + + import "sync/atomic" + + func run(ch chan int) { + go func() { ch <- 1 }() + _ = atomic.LoadInt64(&counter) + // go ignored() + } + GO + + hazards = SlopCop::Constraints::GoProvider.scan_hazards(repo: dir) + + hazard_types = hazards.map { |hazard| hazard[:hazard_type] } + assert_includes hazard_types, "go_race_goroutine" + assert_includes hazard_types, "go_concurrency_channel" + assert_includes hazard_types, "go_race_atomic" + refute hazards.any? { |hazard| hazard[:source].include?("ignored") } + end + end + + def test_findings_are_suppressed_by_matching_coverage_evidence + Dir.mktmpdir do |dir| + File.write(File.join(dir, "worker.go"), <<~GO) + package demo + + func run(ch chan int) { + go func() { ch <- 1 }() + } + GO + coverage = File.join(dir, "coverage.json") + File.write( + coverage, + JSON.dump( + coverage: { + "worker.go" => { + "4" => 1 + } + } + ) + ) + evidence = SlopCop::Constraints::Evidence.from_specs(["race:#{coverage}", "concurrency:#{coverage}"], repo: dir) + + findings = SlopCop::Constraints::GoProvider.findings( + repo: dir, + additions: { "worker.go" => [4] }, + evidence: evidence + ) + + assert_empty findings + end + end + + def test_uncovered_changed_go_hazard_gets_finding + Dir.mktmpdir do |dir| + File.write(File.join(dir, "worker.go"), <<~GO) + package demo + + func run(ch chan int) { + ch <- 1 + } + GO + evidence = SlopCop::Constraints::Evidence.from_specs([], repo: dir) + + findings = SlopCop::Constraints::GoProvider.findings( + repo: dir, + additions: { "worker.go" => [4] }, + evidence: evidence + ) + + assert_equal 1, findings.length + assert_equal "slopcop-go-concurrency-uncovered", findings.first.rule_id + assert_equal "go_concurrency_channel", findings.first.hazard_type + end + end +end From 791119a6ea1711b8524c1c62e1504c80d5b63d3e Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Tue, 16 Jun 2026 12:08:49 +0000 Subject: [PATCH 02/52] Validate language support quality Improve Decomplex SARIF messages, suppress generated Lua compatibility-prelude noise, and preserve Go name-type struct field types through static evidence. Document the multi-language Lineage validation pass and the current quality level for Python, TypeScript, Go, Lua, C, C++, C#, Java, Swift, and Kotlin. Co-authored-by: Codex --- gems/decomplex/lib/decomplex/report.rb | 84 +++++++ gems/decomplex/lib/decomplex/syntax.rb | 14 ++ gems/decomplex/test/report_test.rb | 28 +++ gems/decomplex/test/syntax_test.rb | 43 ++++ .../docs/agents/lang-support-quality.md | 208 ++++++++++++++++++ gems/nil-kill/lib/nil_kill/report.rb | 2 +- .../spec/multi_language_runtime_spec.rb | 57 +++++ 7 files changed, 435 insertions(+), 1 deletion(-) create mode 100644 gems/lineage/docs/agents/lang-support-quality.md diff --git a/gems/decomplex/lib/decomplex/report.rb b/gems/decomplex/lib/decomplex/report.rb index 6d10b6d86..3a86e6400 100644 --- a/gems/decomplex/lib/decomplex/report.rb +++ b/gems/decomplex/lib/decomplex/report.rb @@ -432,12 +432,96 @@ def sarif_rule_id(title) end def sarif_message(title, finding, location) + detail = sarif_message_detail(title, finding) + return "#{title}: #{detail}" unless detail.to_s.empty? + subject = location[:method] || finding[:method] || finding[:name] || finding[:field] || finding[:contract] || finding[:owner] || finding[:token] || finding[:kind] [title, subject].compact.join(": ") end + def sarif_message_detail(title, finding) + case title + when "Decision Pressure" + "`#{finding[:contract]}` creates #{finding[:decisions]} eliminable guard decision(s) across " \ + "#{finding[:methods]} method(s)" + when "Redundant Nil Guards" + "`#{finding[:local]}` is nil-guarded by `#{finding[:guard]}` after proof `#{finding[:proof]}`" + when "Missing Abstractions" + "guard tuple `#{Array(finding[:members]).join(' | ')}` repeats in #{finding[:support]} site(s) " \ + "with scatter=#{finding[:scatter]}" + when "State-Based Branch Density" + refs = Array(finding[:state_refs]).first(8).join(" | ") + "#{finding[:decisions]} state-based branch decision(s) over `#{refs}`; " \ + "example predicate `#{finding[:predicate]}`" + when "Temporal Ordering Pressure" + "`#{finding[:owner]}` exposes mutable lifecycle pressure score=#{finding[:score]} " \ + "(public=#{finding[:public_methods]}, state_methods=#{finding[:state_methods]}, " \ + "writers=#{finding[:writers]})" + when "Neglected Conditions", "Neglected Path Conditions" + "missing condition `#{finding[:missing]}` from `#{Array(finding[:pattern] || finding[:guards]).join(' | ')}` " \ + "(support=#{finding[:support]})" + when "Oversized Predicates" + "#{finding[:count]} condition atoms in predicate `#{finding[:predicate]}`" + when "Neglected Updates" + "writes `.#{finding[:has]}` but not co-written `.#{finding[:missing]}` on receiver `#{finding[:recv]}` " \ + "(support=#{finding[:support]})" + when "Semantic Predicate Aliases", "Exact Predicate Aliases" + "predicate aliases `#{Array(finding[:names]).join(' = ')}` for `#{finding[:canon] || finding[:body]}`" + when "Reification Misses" + "predicate `#{finding[:predicate]}` is reinvented inline as `#{finding[:raw]}`" + when "Broken Protocols" + "does `#{finding[:has]}` without co-called `#{finding[:missing]}` " \ + "(support=#{finding[:support]}, confidence=#{finding[:confidence]})" + when "Implicit Control Flow" + sarif_implicit_control_flow_detail(finding) + when "Weighted Inlined Cognitive Complexity" + "inlined=#{finding[:inlined]} (local=#{finding[:local]}, hidden=#{finding[:hidden]}, " \ + "depth=#{finding[:depth]}); chain `#{Array(finding[:call_chain]).join(' -> ')}`" + when "Locality Drag" + "`#{finding[:variable]}` is initialized at line #{finding[:defined_at]} but first used at line " \ + "#{finding[:used_at]} after #{finding[:unrelated_statements]} unrelated statement(s)" + when "Function LCOM" + mode = finding[:mode] == :late_join ? "late_join" : "disjoint" + "#{mode} local data-flow: score=#{finding[:score]}, components=#{finding[:components]}, " \ + "locals=#{finding[:locals]}, statements=#{finding[:statements]}" + when "Operational Discontinuity", "Operational Discontinuity (High Confidence)" + "score=#{finding[:score]}, reset_boundaries=#{finding[:resets]}, dead=#{finding[:dead_total]}, " \ + "new=#{finding[:new_total]}, confidence=#{finding[:confidence] || :review}" + when "False Simplicity" + "[#{finding[:kind]}] `#{finding[:detail]}` support=#{finding[:support]}, scatter=#{finding[:scatter]}" + when "Fat Unions" + "union `#{Array(finding[:variant_set]).join(' | ')}` has #{Array(finding[:common]).size} common and " \ + "#{Array(finding[:variant]).size} variant member(s), scatter=#{finding[:scatter]}" + when "Derived-State Staleness" + "`#{finding[:derived]}` derived from `#{finding[:source]}` at line #{finding[:derived_at]}; " \ + "`#{finding[:source]}` reassigned at line #{finding[:source_reassigned_at]} but " \ + "`#{finding[:derived]}` is not recomputed" + when "Inconsistent Rename Clones" + "clone of #{finding[:ref_at]}: reference variable `#{finding[:ref_name]}` diverges as " \ + "#{Array(finding[:divergent]).inspect}" + when "Structural Similarity (Type-2/3)" + "[#{finding[:clone_type]}] mass=#{finding[:mass]} node=`#{finding[:node]}` across " \ + "#{Array(finding[:sites]).size} site(s)" + else + nil + end + end + + def sarif_implicit_control_flow_detail(finding) + protocol = Array(finding[:protocol]).join(" -> ") + dependency = Array(finding[:dependency]).join("|") + states = Array(finding[:states]).join(" | ") + if finding[:kind] == :order_drift + observed = Array(finding[:observed]).join(" -> ") + return "[order_drift] observed `#{observed}` against protocol `#{protocol}` " \ + "(#{dependency} state=`#{states}`)" + end + + "[protocol_pressure] protocol `#{protocol}` (#{dependency} state=`#{states}`), support=#{finding[:support]}" + end + def sarif_locations_for_finding(finding) spans = finding[:spans] if spans.is_a?(Hash) && !spans.empty? diff --git a/gems/decomplex/lib/decomplex/syntax.rb b/gems/decomplex/lib/decomplex/syntax.rb index 77439c0f6..24edd5eeb 100644 --- a/gems/decomplex/lib/decomplex/syntax.rb +++ b/gems/decomplex/lib/decomplex/syntax.rb @@ -934,6 +934,8 @@ def control_context(node) end def record_decision_site(document, node, stack, out) + return if generated_lua_compat_prelude?(document, node) + if boolean_container?(node) && boolean_and?(node) record_conjunction_decision(document, node, stack, out) return @@ -1394,6 +1396,8 @@ def record_branch_decision(document, node, stack, out, immutable_readers:, immut end def record_branch_arm(document, node, stack, out) + return if generated_lua_compat_prelude?(document, node) + if if_node?(node) record_if_arms(document, node, stack, out) return @@ -1983,11 +1987,21 @@ def declared_type_text(node, name_node) normalize_text(match[1]) elsif (match = text.match(/\A\s*(?:pub\s+)?(?:const|var)\s+\w+\s*:\s*([^=;\n]+)/)) normalize_text(match[1]) + elsif (match = after_name.match(/\A\s+([^=;,\n]+)/)) + normalize_text(match[1]) end rescue StandardError nil end + def generated_lua_compat_prelude?(document, node) + return false unless document.language == :lua + return false unless line(node) == 1 + + first_line = document.lines.first.to_s + first_line.include?("_tl_compat") && first_line.include?("compat53.module") + end + def state_read_target(node) case node.kind when "call" diff --git a/gems/decomplex/test/report_test.rb b/gems/decomplex/test/report_test.rb index 3b0b3b298..9108bc917 100644 --- a/gems/decomplex/test/report_test.rb +++ b/gems/decomplex/test/report_test.rb @@ -73,6 +73,34 @@ def test_sarif_result_locations_use_report_finding_locations assert result.fetch("partialFingerprints").fetch("decomplexFinding") end + def test_sarif_message_includes_detector_specific_derived_state_context + r = Decomplex::Report.allocate + message = r.send(:sarif_message, "Derived-State Staleness", { + derived: "style", + source: "options", + derived_at: 12, + source_reassigned_at: 30 + }, {}) + + assert_includes message, "`style` derived from `options` at line 12" + assert_includes message, "`options` reassigned at line 30" + assert_includes message, "`style` is not recomputed" + end + + def test_sarif_message_includes_detector_specific_protocol_context + r = Decomplex::Report.allocate + message = r.send(:sarif_message, "Broken Protocols", { + has: "lock", + missing: "unlock", + support: 8, + confidence: 0.89 + }, {}) + + assert_includes message, "does `lock` without co-called `unlock`" + assert_includes message, "support=8" + assert_includes message, "confidence=0.89" + end + def test_markdown_orders_sections_by_signal_tier_not_volume md = report.to_markdown prio = md[/## Project Prioritization.*?\n\n(.*?)\n\n/m, 1].to_s diff --git a/gems/decomplex/test/syntax_test.rb b/gems/decomplex/test/syntax_test.rb index ceb68f9a4..383ad10a9 100644 --- a/gems/decomplex/test/syntax_test.rb +++ b/gems/decomplex/test/syntax_test.rb @@ -120,6 +120,49 @@ def test_force_language_override_handles_ambiguous_headers end end + def test_tree_sitter_lua_adapter_ignores_generated_teal_compat_prelude + grammar = ENV["DECOMPLEX_TS_LUA_PATH"] + skip "set DECOMPLEX_TS_LUA_PATH to run Lua structural facts test" unless grammar && File.file?(grammar) + + with_file(<<~LUA, ".lua") do |path| + local _tl_compat; if (tonumber((_VERSION or ""):match("[%d.]*$")) or 0) < 5.3 then local pcall, require = pcall, require; local ok, compat53 = pcall(require, "compat53.module"); if ok then compat53.module(_ENV) end end + function real(a, b) + if a and b then + return true + end + end + LUA + doc = Decomplex::Syntax.parse(path, parser: "tree_sitter", language: :lua) + + assert_empty doc.decision_sites.select { |decision| decision.line == 1 } + assert_empty doc.branch_arms.select { |arm| arm.line == 1 } + assert_includes doc.decision_sites.map { |decision| [decision.line, decision.kind, decision.members] }, + [3, :conjunction, %w[a b]] + end + end + + def test_tree_sitter_go_adapter_extracts_name_type_struct_fields + grammar = ENV["DECOMPLEX_TS_GO_PATH"] + skip "set DECOMPLEX_TS_GO_PATH to run Go structural facts test" unless grammar && File.file?(grammar) + + with_file(<<~GO, ".go") do |path| + package util + + type Slab struct { + I16 []int16 + Count int + } + GO + doc = Decomplex::Syntax.parse(path, parser: "tree_sitter", language: :go) + + assert_includes doc.owner_defs.map { |owner| [owner.name, owner.kind] }, ["Slab", :owner] + assert_includes doc.state_declarations.map { |state| [state.owner, state.field, state.type] }, + ["Slab", "I16", "[]int16"] + assert_includes doc.state_declarations.map { |state| [state.owner, state.field, state.type] }, + ["Slab", "Count", "int"] + end + end + def test_tree_sitter_ruby_adapter_extracts_portable_facts_when_grammar_is_available grammar = ENV["DECOMPLEX_TS_RUBY_PATH"] skip "set DECOMPLEX_TS_RUBY_PATH to run Tree-sitter adapter smoke test" unless grammar && File.file?(grammar) diff --git a/gems/lineage/docs/agents/lang-support-quality.md b/gems/lineage/docs/agents/lang-support-quality.md new file mode 100644 index 000000000..69483ffc6 --- /dev/null +++ b/gems/lineage/docs/agents/lang-support-quality.md @@ -0,0 +1,208 @@ +# Multi-Language Support Quality Pass + +This pass spot checked the validation DBs created for Python, TypeScript, Go, Lua, C, C++, C#, Java, Swift, and Kotlin. The goal was not to prove feature parity with Ruby, but to verify that Lineage can ingest and display useful SARIF/coverage/risk evidence for each language, and to fix clear cross-language false positives found during review. + +## Quality Checklist + +- Lineage DB exists and UI serves the repository. +- SARIF artifacts ingest into `sarif_findings` with stable paths and line anchors. +- Decomplex findings include enough detector-specific context to be actionable. +- Nil-kill static pressure findings do not flag obviously typed or non-null constructs as loose contracts. +- SlopCop and Boobytrap produce useful output when coverage/churn exists, and degrade clearly when coverage is absent. +- Espalier emits architecture facts where class/function ownership extraction is mature. +- Native lint SARIF is ingested when the local toolchain can produce it. +- Runtime or hazard evidence is present for languages where support currently exists. + +## Fixes From This Pass + +- Decomplex SARIF messages now include detector-specific payloads for the major findings. For example, Rich `console.py` now shows `Derived-State Staleness: max_height derived from size at line 995; size reassigned at line 996 but max_height is not recomputed` instead of only naming the method. +- Decomplex suppresses generated Lua/Teal `_tl_compat` compatibility prelude branches. LuaRocks no longer reports line-1 generated prelude missing-abstraction findings. +- Decomplex extracts Go `name type` struct field declarations, so fields like `I16 []int16` keep their type. +- Nil-kill no longer treats Python `-> None` as nullable pressure by itself. `str | None`, `None | str`, `Optional[...]`, `null`, and `undefined` still count. +- Nil-kill Go static evidence now preserves typed struct fields through to SARIF; fzf no longer reports `Slab#I16` as an untyped field. + +Regression tests added: + +- `gems/decomplex/test/syntax_test.rb`: Lua generated prelude suppression and Go name-type struct fields. +- `gems/decomplex/test/report_test.rb`: actionable SARIF messages for derived-state staleness and broken protocols. +- `gems/nil-kill/spec/multi_language_runtime_spec.rb`: Python `-> None` nullable handling and Go typed struct field evidence. + +## Current Validation DBs + +All UI servers responded with HTTP 200 on ports `8081` through `8090` after SARIF reingest and UI summary refresh. + +| Language | Repo | Port | Logical Units | SARIF Artifacts | SARIF Findings | Coverage Lines | Quality Events | +| --- | --- | ---: | ---: | ---: | ---: | ---: | ---: | +| Python | Rich | 8081 | 2,152 | 6 | 6,905 | 7,792 | 1,022 | +| TypeScript | Zod | 8082 | 2,437 | 6 | 8,246 | 8,908 | 1,365 | +| Go | fzf | 8083 | 1,421 | 7 | 13,219 | 16,422 | 608 | +| Lua | LuaRocks | 8084 | 1,043 | 6 | 5,731 | 0 | 0 | +| C | libuv | 8085 | 3,920 | 6 | 30,310 | 0 | 0 | +| C++ | fmt | 8086 | 6,014 | 6 | 5,120 | 0 | 0 | +| C# | Serilog | 8087 | 615 | 6 | 1,524 | 0 | 0 | +| Java | Gson | 8088 | 4,921 | 6 | 3,542 | 0 | 0 | +| Swift | Argument Parser | 8089 | 1,938 | 6 | 1,129 | 0 | 0 | +| Kotlin | Okio | 8090 | 3,357 | 6 | 2,243 | 0 | 0 | + +Swift and Kotlin SARIF reingest skipped two non-SARIF JSON evidence files in each `tmp/lineage-sarif` directory. That is expected because the ingest command accepts directories and ignores JSON files that are not SARIF documents. + +## Language Findings + +### Python / Rich + +Status: good. + +The strongest path is covered: Lineage DB, Decomplex, Nil-kill, Espalier, SlopCop, Boobytrap, native lint, coverage, quality events, and one runtime stack-trace smoke event all ingest. Rich is the best multi-language validation target after CLEAR Ruby because it has meaningful Python type annotations and coverage. + +Spot checks: + +- Decomplex state-branch and derived-state findings now include state refs, predicates, and stale variable/source details. +- Nil-kill nullable signatures now avoid false positives for plain `-> None`, while still flagging real nullable params/returns. +- SlopCop and Boobytrap findings are anchored to real coverage/churn data. +- Native lint SARIF from Black is visible and path-anchored. + +Remaining caveat: test/example files are included in the validation DB. That is useful for ingestion coverage, but production review should use source-role filtering in Lineage. + +### TypeScript / Zod + +Status: good, with test-file noise. + +TypeScript SARIF ingestion, coverage, Decomplex, Nil-kill, Espalier, SlopCop, and Boobytrap all produce anchored findings. Decomplex points at real large schema/parser functions and TypeScript annotations feed Nil-kill static pressure. + +Spot checks: + +- Decomplex state-branch density on Zod parser paths includes concrete `_def`/schema refs and predicates. +- Nil-kill flags `unknown`/`any`-style slots without requiring runtime tracing. +- SlopCop and Boobytrap coverage/churn rows ingest correctly. + +Remaining caveat: broken-protocol and Boobytrap rows in test suites are noisy. This is mostly a source-role/ranking issue, not a TypeScript parser failure. + +### Go / fzf + +Status: good. + +Go has the best non-Ruby systems-language story in this pass. Lineage ingests coverage, SlopCop coverage gaps, Boobytrap risk, Decomplex, Nil-kill static facts, and Go concurrency hazard SARIF. + +Spot checks: + +- SlopCop Go constraint SARIF flags channel and lock/sync hazards lacking race coverage. +- Decomplex identifies large terminal/control-flow functions with convergence across several detectors. +- Nil-kill now preserves typed struct fields such as `Slab.I16 []int16`, removing a concrete false positive. + +Remaining caveat: Go hazard support is currently concurrency-focused. Other safety categories need explicit language rules if we want broader Go systems checks. + +### Lua / LuaRocks + +Status: usable static ingestion, experimental analysis quality. + +Lineage DB and SARIF ingestion work. Decomplex produces useful Lua findings after generated Teal prelude suppression. Nil-kill and Espalier are sparse, which matches the current maturity of Lua ownership/type extraction. + +Spot checks: + +- Generated `_tl_compat` prelude line-1 missing-abstraction findings are gone. +- Real Lua findings remain, e.g. repeated guard tuples and state-branch predicates. +- SlopCop/Boobytrap rows exist but are static/no-coverage quality because no Lua coverage was available. + +Remaining caveat: Lua needs better function ownership and module/type conventions before Espalier and Nil-kill can be more than light static signals. + +### C / libuv + +Status: strong SARIF ingestion, experimental analysis quality. + +Lineage handles the large libuv DB and ingests Decomplex, SlopCop, Boobytrap, Nil-kill, Espalier, and syntax-lint SARIF. Decomplex results are plentiful and anchored. + +Spot checks: + +- Decomplex state-branch density points at real C state/predicate-heavy files like `src/win/pipe.c`. +- SlopCop/Boobytrap can rank paths, but no coverage was generated in this environment. +- Native syntax lint catches environment/header availability issues. Those are useful as toolchain diagnostics, not code-quality verdicts. + +Remaining caveat: C has no coverage here, and C header/platform conditionals create noisy lint results unless the native build environment is configured. + +### C++ / fmt + +Status: strong SARIF ingestion, experimental analysis quality. + +Lineage ingests fmt SARIF and the UI handles template-heavy headers. Decomplex and Nil-kill produce anchored findings; Espalier has limited but nonzero ownership extraction. + +Spot checks: + +- Decomplex findings are anchored in headers and bundled tests. +- Nil-kill nullable findings around pointer/time APIs are plausible. +- Native C++ syntax lint found module/toolchain issues. + +Remaining caveat: bundled third-party/test code is included, so production review needs source-role filtering. C++ templates and macros need more language-specific tuning before high confidence architecture claims. + +### C# / Serilog + +Status: usable static ingestion, moderate Decomplex signal. + +SARIF ingestion works and Decomplex points at real branch-heavy formatting/parsing code. Nil-kill nullable signature findings map well to C# nullable-style APIs. + +Spot checks: + +- Decomplex state-branch findings include concrete property names and predicates. +- Nil-kill nullable signature findings are plausible in Serilog configuration APIs. +- SlopCop/Boobytrap are static/no-coverage quality because coverage was unavailable. + +Remaining caveat: Espalier emitted no findings in this validation pass, so C# architecture extraction needs more work before it can be relied on. + +### Java / Gson + +Status: usable static ingestion, moderate Decomplex/Espalier signal. + +Java SARIF ingestion works. Decomplex, Nil-kill, Espalier, SlopCop, and Boobytrap all produce anchored findings, with Decomplex pointing at real parser/adapter complexity. + +Spot checks: + +- Decomplex state-branch density in `TypeAdapters` and `JsonReader` has meaningful refs/predicates. +- Espalier emits read-only function facts for immutable-style value methods. +- Nil-kill untyped fields/methods are plausible where generic/reflection-heavy Java code defeats simple extraction. + +Remaining caveat: no Java coverage or native lint was available in this environment, so risk ranking lacks coverage-backed confidence. + +### Swift / Argument Parser + +Status: usable static ingestion, experimental analysis quality. + +Lineage DB and SARIF ingestion work. Decomplex and Espalier produce anchored Swift findings; Nil-kill static evidence ingests. SlopCop is empty because no coverage was generated. + +Spot checks: + +- Decomplex state-branch density in completion generation includes Swift option/subcommand predicates. +- Espalier has limited read-only function extraction. +- Nil-kill static untyped signatures are present where generic Swift inference is not yet mature. + +Remaining caveat: Swift needs coverage ingestion and better function/owner extraction before architecture metrics should be treated as strong signal. + +### Kotlin / Okio + +Status: usable static ingestion, moderate Decomplex signal. + +Kotlin DB and SARIF ingestion work. Decomplex has useful findings in buffer/filesystem code, and Espalier emits a small set of function facts. Nil-kill static findings are anchored. + +Spot checks: + +- Decomplex state-branch density in `Buffer.kt` includes concrete buffer/segment refs and predicates. +- Espalier identifies some read-only/impure functions. +- Nil-kill untyped signatures point at equality/select APIs where extraction needs stronger Kotlin typing rules. + +Remaining caveat: no coverage was generated, SlopCop is empty, and Kotlin parser extraction needs more language-specific tuning before architecture metrics are high confidence. + +## Cross-Cutting Assessment + +The common ingestion path is solid: all ten DBs load, SARIF artifacts persist, UI summaries refresh, and servers respond. Decomplex is the most broadly useful analyzer across all languages because Tree-sitter extraction gives it enough syntax to anchor complexity findings. + +The biggest quality divider is coverage. Python, TypeScript, and Go have coverage-backed SlopCop/Boobytrap signal; the other seven languages currently have static-only or churn-only risk, which should be presented as lower confidence. + +Nil-kill is useful for Python, TypeScript, Go, C#, Java, Swift, and Kotlin static pressure, but language-specific type extraction determines signal quality. The Go struct-field and Python `-> None` fixes show the right pattern: false positives should be fixed in the shared syntax/provider layer with regression tests, not tuned per repository. + +Espalier is useful where class/function ownership extraction is mature. It is sparse for Lua, C, C#, and Swift/Kotlin compared with Ruby/TypeScript/Go/Java. Treat missing Espalier signal in those languages as adapter immaturity, not proof of good architecture. + +## Recommended Next Work + +- Add source-role filtering in Lineage views and ranking so `src`/production findings can be reviewed separately from tests, examples, vendored code, and generated code. +- Add explicit generated/vendor detection to the shared source filter for common language artifacts. +- Improve C/C++ native build-aware lint/coverage collection; static parser output alone is not enough for high-confidence systems-language review. +- Add coverage ingestion recipes for Lua, C#, Java, Swift, and Kotlin validation repos. +- Continue adding language-specific ownership/type extraction only when a spot check finds a concrete false positive or missing high-value signal. diff --git a/gems/nil-kill/lib/nil_kill/report.rb b/gems/nil-kill/lib/nil_kill/report.rb index 9dffa10a8..b81f96d25 100644 --- a/gems/nil-kill/lib/nil_kill/report.rb +++ b/gems/nil-kill/lib/nil_kill/report.rb @@ -336,9 +336,9 @@ def static_nullable_signature?(signature) text = signature.to_s text.match?(/\bT\.nilable\b/) || text.match?(/\bOptional\s*\[/) || - text.match?(/\bNone\b/) || text.match?(/\bnull\b/) || text.match?(/\bundefined\b/) || + text.match?(/\bNone\s*\|/) || text.match?(/\|\s*(?:None|null|undefined)\b/) end diff --git a/gems/nil-kill/spec/multi_language_runtime_spec.rb b/gems/nil-kill/spec/multi_language_runtime_spec.rb index e866d3ab1..3dbdbbe39 100644 --- a/gems/nil-kill/spec/multi_language_runtime_spec.rb +++ b/gems/nil-kill/spec/multi_language_runtime_spec.rb @@ -119,6 +119,31 @@ def fetch(self, value: str | None) -> str | None: ... end end + it "does not report Python None-only returns as nullable signatures" do + report = NilKill::Report.allocate + + void_method = { + "language" => "python", + "path" => "src/worker.py", + "owner" => "Worker", + "name" => "call", + "kind" => "method", + "line" => 10, + "signature" => "def call(self, value: str) -> None:", + } + maybe_method = void_method.merge( + "name" => "fetch", + "line" => 20, + "signature" => "def fetch(self, value: str | None) -> str | None:", + ) + + void_findings = report.send(:static_method_findings, void_method) + maybe_findings = report.send(:static_method_findings, maybe_method) + + expect(void_findings.map { |finding| finding["kind"] }).not_to include("nullable_signature") + expect(maybe_findings.map { |finding| finding["kind"] }).to include("nullable_signature") + end + it "uses TypeScript provider annotations when building Tree-sitter static evidence" do grammar = ENV["DECOMPLEX_TS_TYPESCRIPT_PATH"] skip "set DECOMPLEX_TS_TYPESCRIPT_PATH to run TypeScript Tree-sitter static evidence test" unless grammar && File.file?(grammar) @@ -185,6 +210,38 @@ class Worker { end end + it "keeps Go name-type struct fields typed in static evidence" do + grammar = ENV["DECOMPLEX_TS_GO_PATH"] + skip "set DECOMPLEX_TS_GO_PATH to run Go Tree-sitter static evidence test" unless grammar && File.file?(grammar) + + Dir.mktmpdir("nil-kill-go-static", NilKill::ROOT) do |dir| + src = File.join(dir, "src") + FileUtils.mkdir_p(src) + File.write(File.join(src, "slab.go"), <<~GO) + package util + + type Slab struct { + I16 []int16 + Count int + } + GO + + evidence = NilKill::StaticEvidence.build([src], root: dir) + fields = evidence.fetch("fields") + report = NilKill::Report.allocate + + expect(evidence.dig("facts", "state_types", "Slab\u0000I16")).to eq("[]int16") + expect(evidence.dig("facts", "state_types", "Slab\u0000Count")).to eq("int") + expect(fields).to include(a_hash_including( + "language" => "go", + "name" => "I16", + "declared_type" => "[]int16" + )) + expect(report.send(:static_field_finding, fields.find { |field| field["name"] == "I16" })).to be_nil + expect(report.send(:static_field_finding, fields.find { |field| field["name"] == "Count" })).to be_nil + end + end + it "exposes provider capabilities from trace-spec" do spec = NilKill::Commands::TraceSpecCommand.new([]).spec languages = spec.fetch("language_capabilities").to_h { |cap| [cap.fetch("language"), cap] } From 27afcdd53b6f7dd6b9348a1ad77ebc0a2189aa19 Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Tue, 16 Jun 2026 12:50:51 +0000 Subject: [PATCH 03/52] Improve multi-language architecture signals --- gems/decomplex/lib/decomplex/report.rb | 18 +- gems/decomplex/lib/decomplex/syntax.rb | 436 +++++++++++++++++++++-- gems/decomplex/test/report_test.rb | 29 ++ gems/espalier/test/ast_extractor_test.rb | 165 ++++++++- gems/nil-kill/lib/nil_kill/report.rb | 106 +++++- gems/nil-kill/spec/nil_kill_spec.rb | 75 ++++ 6 files changed, 795 insertions(+), 34 deletions(-) diff --git a/gems/decomplex/lib/decomplex/report.rb b/gems/decomplex/lib/decomplex/report.rb index 3a86e6400..53bd92902 100644 --- a/gems/decomplex/lib/decomplex/report.rb +++ b/gems/decomplex/lib/decomplex/report.rb @@ -371,8 +371,7 @@ def to_json(*_args) private def sarif_rules - SECTIONS.reject { |title, *_| CONVERGENCE_EXCLUDED_SECTIONS.include?(title) } - .map do |title, _ivar, tier, desc| + sarif_sections_data(include_findings: false).map do |title, tier, _findings, desc| Decomplex::Sarif.rule( id: sarif_rule_id(title), name: title, @@ -397,7 +396,7 @@ def ranked_sarif_results(results) end def sarif_results(include_finding_payload: true) - sections_data.flat_map do |title, tier, findings| + sarif_sections_data.flat_map do |title, tier, findings, _desc| Array(findings).flat_map do |finding| sarif_locations_for_finding(finding).map do |location| properties = { @@ -448,6 +447,12 @@ def sarif_message_detail(title, finding) "#{finding[:methods]} method(s)" when "Redundant Nil Guards" "`#{finding[:local]}` is nil-guarded by `#{finding[:guard]}` after proof `#{finding[:proof]}`" + when "State Heatmap" + writers = Array(finding[:top_writers]).first(3).join(" | ") + readers = Array(finding[:top_readers]).first(3).join(" | ") + "state `#{finding[:field]}` has pressure=#{finding[:pressure]}, messiness=#{finding[:messiness]} " \ + "(writes=#{finding[:writes]}, reads=#{finding[:reads]}, re-derived=#{finding[:re_derivations]}, " \ + "scatter=#{finding[:scatter]}); writers #{writers}; readers #{readers}" when "Missing Abstractions" "guard tuple `#{Array(finding[:members]).join(' | ')}` repeats in #{finding[:support]} site(s) " \ "with scatter=#{finding[:scatter]}" @@ -561,6 +566,13 @@ def parse_sarif_loc(loc) } end + def sarif_sections_data(include_findings: true) + SECTIONS.map do |title, ivar, tier, desc| + findings = include_findings ? instance_variable_get(ivar) : nil + [title, tier, findings, desc] + end + end + def zero_based_column_to_sarif(value) return nil if value.nil? diff --git a/gems/decomplex/lib/decomplex/syntax.rb b/gems/decomplex/lib/decomplex/syntax.rb index 24edd5eeb..207c204e9 100644 --- a/gems/decomplex/lib/decomplex/syntax.rb +++ b/gems/decomplex/lib/decomplex/syntax.rb @@ -511,12 +511,13 @@ class TreeSitterAdapter swift: "tree-sitter-swift", kotlin: "tree-sitter-kotlin" }.freeze - LANGUAGE_GRAMMAR_NAMES = { - csharp: ["c-sharp", "csharp"] - }.freeze - TREE_SITTER_LANGUAGE_NAMES = { - csharp: "c_sharp" - }.freeze + LANGUAGE_GRAMMAR_NAMES = { + csharp: ["c-sharp", "csharp"] + }.freeze + TREE_SITTER_LANGUAGE_NAMES = { + csharp: "c_sharp" + }.freeze + FIRST_ARGUMENT_RECEIVER_LANGUAGES = %i[c].freeze def parse(file, language: nil) lang = (language || Syntax.language_for(file)).to_sym @@ -600,7 +601,7 @@ def structural_facts(document) state_reads: [], state_writes: [] } - walk(document.root, [{ file_owner: file_owner(document.file) }]) do |node, stack| + walk(document.root, [{ file_owner: file_owner(document.file), language: document.language }]) do |node, stack| record_function_def(document, node, stack, out[:function_defs]) record_owner_def(document, node, stack, out[:owner_defs]) record_call_site(document, node, stack, out[:call_sites]) @@ -609,6 +610,7 @@ def structural_facts(document) record_state_read(document, node, stack, out[:state_reads]) record_state_write(document, node, stack, out[:state_writes]) end + record_implicit_state_accesses(document, out) out[:function_defs].uniq! { |fn| [fn.file, fn.owner, fn.name, fn.line] } out[:owner_defs].uniq! { |owner| [owner.file, owner.name, owner.kind] } out[:call_sites].uniq! { |call| [call.file, call.owner, call.function, call.line, call.receiver, call.message] } @@ -781,6 +783,11 @@ def current_owner_from_stack(stack) entry && entry[:owner] end + def current_language(stack) + entry = stack.reverse.find { |item| item.is_a?(Hash) && item[:language] } + entry && entry[:language] + end + def conditional_context?(stack) stack.any? { |item| item.is_a?(Hash) && %i[conditional iterates].include?(item[:control]) } end @@ -795,12 +802,14 @@ def function_context(node, stack) function: function_name(node), owner: function_owner_name(node, stack), params: function_params(node), - receiver: function_receiver_name(node) + receiver: function_receiver_name(node, stack) } end def function_owner_name(node, stack) - receiver_owner_name(node) || current_owner_from_stack(stack) + receiver_owner_name(node) || + current_owner_from_stack(stack) || + receiver_convention_owner_name(node, language: current_language(stack)) end def function_name(node) @@ -829,10 +838,97 @@ def function_kind(node, stack) :function end - def visibility_for(node) + def visibility_for(document, node) return ruby_inline_def_visibility(node) if inline_def_argument_list?(node) - return :public if node.children.any? { |child| child.text == "pub" } + case document.language + when :ruby + ruby_method_visibility(node) + when :python + python_visibility(node) + when :go + exported_name_visibility(function_name(node)) + when :rust + modifier_visibility(node) || :private + when :typescript, :javascript + modifier_visibility(node) || typescript_visibility(node) + when :cpp + modifier_visibility(node) || cpp_visibility(node) + when :csharp + modifier_visibility(node) || :private + when :c + c_visibility(node) + else + modifier_visibility(node) + end + end + + def ruby_method_visibility(node) + modifier_visibility(node) + end + + def python_visibility(node) + name = function_name(node).to_s + return :private if name.start_with?("_") && !name.start_with?("__") + + :public + end + + def exported_name_visibility(name) + text = name.to_s + return nil if text.empty? + + text.match?(/\A[A-Z]/) ? :public : :private + end + + def typescript_visibility(node) + return :private if function_name(node).to_s.start_with?("#") + + :public + end + + def modifier_visibility(node) + return :private if node.children.any? { |child| child.text == "private" } + return :protected if node.children.any? { |child| child.text == "protected" } + return :public if node.children.any? { |child| %w[public pub].include?(child.text) } + + nil + end + + def cpp_visibility(node) + visibility = previous_cpp_access_specifier(node) + return visibility if visibility + + owner = nearest_owner_declaration(node) + return :public if owner&.kind == "struct_specifier" + + :private + end + + def c_visibility(node) + node.children.any? { |child| child.text == "static" } ? :private : :public + end + + def previous_cpp_access_specifier(node) + sibling = prev_sibling(node) + while sibling + return sibling.text.to_sym if sibling.kind == "access_specifier" && + %w[public private protected].include?(sibling.text) + + sibling = prev_sibling(sibling) + end + nil + end + + def nearest_owner_declaration(node) + parent = parent_node(node) + seen = Set.new + while parent && !seen.include?(node_key(parent)) + seen << node_key(parent) + return parent if %w[class_specifier struct_specifier class class_definition class_declaration].include?(parent.kind) + + parent = parent_node(parent) + end nil end @@ -1032,7 +1128,7 @@ def record_function_def(document, node, stack, out) line: line(node), span: span(node), body: node, - visibility: visibility_for(node), + visibility: visibility_for(document, node), params: function_params(node), signature: function_signature(document, node), kind: function_kind(node, stack) @@ -1326,6 +1422,137 @@ def record_state_read(document, node, stack, out) ) end + def record_implicit_state_accesses(document, out) + return unless %i[cpp csharp].include?(document.language) + + declared = declared_state_index(out[:state_declarations]) + return if declared.empty? + + locals = local_declaration_index(document) + params = function_param_index(out[:function_defs]) + walk(document.root, [{ file_owner: file_owner(document.file), language: document.language }]) do |node, stack| + next unless implicit_state_identifier?(node) + + owner = current_owner(document, stack) + function = current_function(stack) + next if function == "(top-level)" + + field = node.text.to_s + next unless declared[owner].include?(field) + next if params[[owner, function]].include?(field) + next if locals[[owner, function]].include?(field) + next if identifier_declaration_site?(node) + next if member_message_identifier?(node) + + if implicit_assignment_lhs?(node) + out[:state_writes] << StateWrite.new( + field: field, + receiver: "self", + file: document.file, + function: function, + line: line(node), + span: span(node), + owner: owner + ) + else + out[:state_reads] << StateRead.new( + field: field, + receiver: "self", + file: document.file, + function: function, + line: line(node), + span: span(node), + owner: owner + ) + end + end + end + + def declared_state_index(declarations) + declarations.each_with_object(Hash.new { |h, k| h[k] = Set.new }) do |decl, index| + index[decl.owner.to_s].add(decl.field.to_s) + end + end + + def function_param_index(functions) + functions.each_with_object(Hash.new { |h, k| h[k] = Set.new }) do |fn, index| + index[[fn.owner.to_s, fn.name.to_s]].merge(Array(fn.params).map(&:to_s)) + end + end + + def local_declaration_index(document) + index = Hash.new { |h, k| h[k] = Set.new } + walk(document.root, [{ file_owner: file_owner(document.file), language: document.language }]) do |node, stack| + next unless local_variable_declarator?(node) + + owner = current_owner(document, stack) + function = current_function(stack) + next if function == "(top-level)" + + local_name_node(node)&.then { |name| index[[owner, function]].add(name.text.to_s) } + end + index + end + + def local_variable_declarator?(node) + return false unless ts_node?(node) + return false unless %w[variable_declarator init_declarator].include?(node.kind) + + !inside_kind?(node, %w[field_declaration property_declaration public_field_definition]) + end + + def local_name_node(node) + named_field(node, "name") || + node.named_children.find { |child| %w[identifier field_identifier property_identifier].include?(child.kind) } + end + + def implicit_state_identifier?(node) + ts_node?(node) && %w[identifier field_identifier property_identifier].include?(node.kind) + end + + def identifier_declaration_site?(node) + parent = parent_node(node) + return false unless parent + return true if %w[parameter_declaration parameter variable_declarator init_declarator function_declarator + method_declaration function_definition class_specifier class].include?(parent.kind) + return true if inside_kind?(node, %w[field_declaration property_declaration public_field_definition]) + + false + end + + def member_message_identifier?(node) + parent = parent_node(node) + return false unless parent && field_like_node?(parent) + + field = named_field(parent, "field") || named_field(parent, "property") || + named_field(parent, "name") || parent.named_children.last + field == node + end + + def implicit_assignment_lhs?(node) + parent = parent_node(node) + return false unless parent + + if %w[assignment_expression assignment assignment_statement augmented_assignment operator_assignment].include?(parent.kind) + lhs = named_field(parent, "left") || parent.named_children.first + return lhs == node + end + + assignment_lhs?(node) + end + + def inside_kind?(node, kinds) + parent = parent_node(node) + seen = Set.new + while parent && !seen.include?(node_key(parent)) + seen << node_key(parent) + return true if kinds.include?(parent.kind) + + parent = parent_node(parent) + end + false + end + def record_state_param_origin(document, node, stack, out) lhs = nil rhs = nil @@ -1659,6 +1886,8 @@ def collect_identifiers(node, out) def owner_for_node(document, node, stack: nil) receiver_owner = receiver_owner_name(node) return receiver_owner if receiver_owner + convention_owner = receiver_convention_owner_name(node, language: document&.language) + return convention_owner if convention_owner stacked_owner = current_owner_from_stack(Array(stack)) return stacked_owner if stacked_owner @@ -1699,7 +1928,7 @@ def owner_name_from_declaration(document, node) named_field(node, "name")&.text || first_named_text(node, %w[constant identifier type_identifier]) when "impl_item", "impl_block" impl_owner_name(node) - when "struct_item", "struct_spec", "type_spec", "type_declaration" + when "struct_item", "struct_spec", "struct_specifier", "type_spec", "type_declaration" named_field(node, "name")&.text || first_named_text(node, %w[type_identifier identifier]) when "struct_declaration", "union_declaration", "enum_declaration" bound_container_name(node) || returned_container_owner(node) || anonymous_owner_name(document, node) @@ -1713,7 +1942,7 @@ def owner_kind(node) when "class", "class_definition", "class_declaration", "class_specifier" then :class when "module" then :module when "impl_item", "impl_block" then :impl - when "struct_declaration", "struct_item", "struct_spec" then :struct + when "struct_declaration", "struct_item", "struct_spec", "struct_specifier" then :struct when "union_declaration" then :union when "enum_declaration" then :enum else :owner @@ -1731,9 +1960,10 @@ def receiver_owner_name(node) receiver_type && normalize_type_owner(receiver_type.text) end - def function_receiver_name(node) + def function_receiver_name(node, stack) receiver_param = method_receiver_param_node(node) - receiver_param&.text + receiver_param&.text || + receiver_convention_param_name(node, language: current_language(stack)) end def method_receiver_type_node(node) @@ -1759,6 +1989,60 @@ def method_receiver_declaration(node) receiver_params&.named_children&.find { |child| child.kind == "parameter_declaration" } end + def receiver_convention_owner_name(node, language:) + return nil unless first_argument_receiver_language?(language) + return nil unless node.kind == "function_definition" + + receiver = first_argument_receiver_parameter(node) + return nil unless receiver + + type = normalize_type_owner(receiver[:type]) + name = function_name(node).to_s + return nil if type.empty? || name.empty? + + prefix = snake_case_type_name(type) + name.start_with?("#{prefix}_") ? type : nil + end + + def receiver_convention_param_name(node, language:) + return nil unless first_argument_receiver_language?(language) + + first_argument_receiver_parameter(node)&.fetch(:name, nil) + end + + def first_argument_receiver_parameter(node) + params = named_field(named_field(node, "declarator"), "parameters") || + named_field(node, "parameters") || + node.named_children.find { |child| child.kind == "parameter_list" } || + named_field(node, "declarator")&.named_children&.find { |child| child.kind == "parameter_list" } + first = params&.named_children&.find { |child| child.kind == "parameter_declaration" } + return nil unless first + + type_node = first.named_children.find do |child| + %w[type_identifier primitive_type qualified_identifier scoped_type_identifier].include?(child.kind) + end + name_node = first.named_children.reverse.find do |child| + %w[identifier field_identifier].include?(child.kind) + end + name_node ||= declarator_name(first) + return nil unless type_node && name_node + + name = ts_node?(name_node) ? name_node.text : name_node.to_s + { type: type_node.text, name: name } + end + + def first_argument_receiver_language?(language) + FIRST_ARGUMENT_RECEIVER_LANGUAGES.include?(language&.to_sym) + end + + def snake_case_type_name(type) + type.to_s + .split("::").last + .gsub(/([A-Z]+)([A-Z][a-z])/, '\1_\2') + .gsub(/([a-z\d])([A-Z])/, '\1_\2') + .downcase + end + def bound_container_name(node) parent = parent_node(node) seen_nodes = Set.new @@ -1815,7 +2099,7 @@ def call_target(document, node) when "identifier" ruby_bare_call_target(document, node) when "call_expression", "method_invocation", "invocation_expression" - generic_call_target(node) + generic_call_target(document, node) when "attribute", "selector_expression", "field", "field_access", "member_expression", "member_access_expression", "field_expression", "expression_list" adjacent_argument_call_target(node) @@ -1882,16 +2166,38 @@ def ruby_simple_call_text?(text) text.to_s.strip.match?(/\A[a-z_]\w*[!?=]?\z/) end - def generic_call_target(node) + def generic_call_target(document, node) callee = named_field(node, "function") || named_field(node, "callee") || node.named_children.first return nil unless callee return nil if callee.kind == "builtin_function" || callee.text.to_s.start_with?("@") - target_from_callee(callee).merge(arguments: []) + target = target_from_callee(callee).merge(arguments: []) + first_argument_receiver_call_target(document, node, target) || target rescue NoMethodError nil end + def first_argument_receiver_call_target(document, node, target) + return nil unless first_argument_receiver_language?(document.language) + return nil unless target[:receiver] == "self" + + first_arg = call_argument_nodes(node).first + arg_target = state_read_target(first_arg) + return nil unless arg_target + + { + receiver: "#{arg_target[:receiver]}.#{arg_target[:field]}", + message: target[:message], + arguments: target[:arguments] + } + end + + def call_argument_nodes(node) + args = named_field(node, "arguments") || + node.named_children.find { |child| child.kind == "argument_list" } + Array(args&.named_children) + end + def adjacent_argument_call_target(node) return nil unless next_sibling(node)&.kind == "argument_list" @@ -1954,8 +2260,8 @@ def noise_call?(target) def state_declaration(node) case node.kind - when "assignment" - ruby_t_let_state_declaration(node) + when "assignment", "assignment_expression", "assignment_statement" + ruby_t_let_state_declaration(node) || assignment_state_declaration(node) when "container_field" zig_container_field_declaration(node) when "property_declaration", "public_field_definition", "field_definition", "field_declaration" @@ -1973,13 +2279,47 @@ def zig_container_field_declaration(node) end def generic_field_declaration(node) - name = named_field(node, "name") || - node.named_children.find { |child| %w[identifier field_identifier property_identifier].include?(child.kind) } + name = field_declaration_name_node(node) return nil unless name { field: name.text, type: declared_type_text(node, name) } end + def field_declaration_name_node(node) + named_field(node, "name") || + variable_declarator_name(node) || + node.named_children.find { |child| %w[field_identifier property_identifier].include?(child.kind) } || + node.named_children.reverse.find { |child| child.kind == "identifier" } + end + + def variable_declarator_name(node) + pending = node.named_children.dup + seen = Set.new + until pending.empty? + current = pending.shift + next unless ts_node?(current) + key = node_key(current) + next if seen.include?(key) + + seen << key + if %w[variable_declarator pointer_declarator declarator].include?(current.kind) + direct_name = named_field(current, "name") || + current.named_children.find do |child| + %w[identifier field_identifier property_identifier].include?(child.kind) + end + return direct_name if direct_name + return current if current.kind == "variable_declarator" && current.text.match?(/\A[A-Za-z_]\w*\z/) + elsif current.kind == "init_declarator" + return named_field(current, "name") || + current.named_children.find do |child| + %w[identifier field_identifier property_identifier].include?(child.kind) + end + end + pending.concat(current.named_children) + end + nil + end + def declared_type_text(node, name_node) text = node.text.to_s after_name = text[(name_node.end_byte - node.start_byte)..].to_s @@ -1989,11 +2329,52 @@ def declared_type_text(node, name_node) normalize_text(match[1]) elsif (match = after_name.match(/\A\s+([^=;,\n]+)/)) normalize_text(match[1]) + elsif (type = declared_type_before_name(text, node, name_node)) + type end rescue StandardError nil end + def declared_type_before_name(text, node, name_node) + before_name = text[0...(name_node.start_byte - node.start_byte)].to_s + before_name = before_name.gsub(/\b(?:public|private|protected|internal|static|readonly|const|pub|mut|var|let)\b/, " ") + before_name = before_name.gsub(/[;,{].*\z/m, " ") + before_name = normalize_text(before_name) + return nil if before_name.empty? + + tokens = before_name.split(/\s+/).reject { |token| token.match?(/\A[*&]+\z/) } + candidate = tokens.last.to_s.delete_suffix("*").delete_suffix("&") + return nil if candidate.empty? + + candidate + end + + def assignment_state_declaration(node) + lhs = named_field(node, "left") || node.named_children.first + rhs = named_field(node, "right") || named_field(node, "value") || node.named_children[1] + target = state_target(lhs) + return nil unless target + return nil unless %w[self this].include?(target[:receiver].to_s) + + type = inferred_assignment_type(rhs) + return nil unless type + + { field: target[:field], type: type } + end + + def inferred_assignment_type(node) + return nil unless ts_node?(node) + + text = normalize_text(node.text) + patterns = [ + /\Anew\s+([A-Z][A-Za-z0-9_:]*)\s*(?:[({<]|$)/, + /\A([A-Z][A-Za-z0-9_:]*)\s*(?:[({<]|$)/ + ] + match = patterns.filter_map { |pattern| text.match(pattern) }.first + match && match[1] + end + def generated_lua_compat_prelude?(document, node) return false unless document.language == :lua return false unless line(node) == 1 @@ -2288,9 +2669,14 @@ def ruby_argument_texts(node) def normalize_target_receiver(target, stack) receiver = target[:receiver].to_s current_receiver = current_receiver_name(stack) - return target unless current_receiver && receiver == current_receiver + return target unless current_receiver + return target.merge(receiver: "self") if receiver == current_receiver + + if receiver.start_with?("#{current_receiver}.") + return target.merge(receiver: "self.#{receiver.delete_prefix("#{current_receiver}.")}") + end - target.merge(receiver: "self") + target end def current_receiver_name(stack) diff --git a/gems/decomplex/test/report_test.rb b/gems/decomplex/test/report_test.rb index 9108bc917..bae30b880 100644 --- a/gems/decomplex/test/report_test.rb +++ b/gems/decomplex/test/report_test.rb @@ -101,6 +101,35 @@ def test_sarif_message_includes_detector_specific_protocol_context assert_includes message, "confidence=0.89" end + def test_sarif_includes_actionable_state_heatmap_context + f = Tempfile.new(["rep_state_sarif", ".rb"]) + f.write(<<~RB) + class BillingService + def set_user(user); @user = user; end + def set_cart(cart); @cart = cart; end + def process + charge(@user) if @cart + audit(@user) + end + end + RB + f.close + + sarif = JSON.parse(Decomplex::Report.new([f.path]).to_sarif) + result = sarif.fetch("runs").first.fetch("results").find do |entry| + entry.fetch("ruleId") == "decomplex.state-heatmap" + end + + refute_nil result + message = result.fetch("message").fetch("text") + assert_includes message, "state `" + assert_includes message, "writes=" + assert_includes message, "reads=" + assert_includes message, "writers" + ensure + f&.unlink + end + def test_markdown_orders_sections_by_signal_tier_not_volume md = report.to_markdown prio = md[/## Project Prioritization.*?\n\n(.*?)\n\n/m, 1].to_s diff --git a/gems/espalier/test/ast_extractor_test.rb b/gems/espalier/test/ast_extractor_test.rb index d86087924..4fcc2e45e 100644 --- a/gems/espalier/test/ast_extractor_test.rb +++ b/gems/espalier/test/ast_extractor_test.rb @@ -12,7 +12,10 @@ class AstExtractorTest < Minitest::Test typescript: "DECOMPLEX_TS_TYPESCRIPT_PATH", go: "DECOMPLEX_TS_GO_PATH", rust: "DECOMPLEX_TS_RUST_PATH", - zig: "DECOMPLEX_TS_ZIG_PATH" + zig: "DECOMPLEX_TS_ZIG_PATH", + c: "DECOMPLEX_TS_C_PATH", + cpp: "DECOMPLEX_TS_CPP_PATH", + csharp: "DECOMPLEX_TS_CSHARP_PATH" }.freeze def parse_ruby(code) @@ -298,4 +301,164 @@ class Unit { refute_nil run[:span], language end end + + def test_extracts_architecture_parity_facts_across_supported_tree_sitter_languages + profiles = { + python: [ + ".py", + <<~PY, + class Worker: + def work(self): + pass + class Unit: + def __init__(self, value): + self.value = value + self.other = Worker() + def run(self): + self.value = self.value + 1 + self.other.work() + self._bump() + def _bump(self): + pass + PY + "Unit", + "run", + "_bump", + "other", + nil, + "self.other" + ], + typescript: [ + ".ts", + <<~TS, + class Worker { work(): void {} } + class Unit { + value: number; + private other: Worker; + constructor(value: number) { this.value = value; this.other = new Worker(); } + public run(): void { this.value = this.value + 1; this.other.work(); this.bump(); } + private bump(): void {} + } + TS + "Unit", + "run", + "bump", + "other", + "Worker", + "this.other" + ], + go: [ + ".go", + <<~GO, + package p + type Worker struct{} + func (w *Worker) Work() {} + type Unit struct { value int; other *Worker } + func (u *Unit) Run() { u.value = u.value + 1; u.other.Work(); u.bump() } + func (u *Unit) bump() {} + GO + "Unit", + "Run", + "bump", + "other", + "*Worker", + "self.other" + ], + rust: [ + ".rs", + <<~RS, + struct Worker {} + impl Worker { fn work(&self) {} } + struct Unit { value: i32, other: Worker } + impl Unit { + pub fn run(&mut self) { self.value = self.value + 1; self.other.work(); self.bump(); } + fn bump(&self) {} + } + RS + "Unit", + "run", + "bump", + "other", + "Worker", + "self.other" + ], + c: [ + ".c", + <<~C, + typedef struct Worker { int ready; } Worker; + typedef struct Unit { int value; Worker *other; } Unit; + void worker_work(Worker *worker) {} + static void unit_bump(Unit *unit) {} + void unit_run(Unit *unit) { unit->value = unit->value + 1; worker_work(unit->other); unit_bump(unit); } + C + "Unit", + "unit_run", + "unit_bump", + "other", + "Worker", + "self.other" + ], + cpp: [ + ".cpp", + <<~CPP, + class Worker { public: void work() {} }; + class Unit { + int value; + Worker other; + public: + void run(){ value = value + 1; other.work(); bump(); } + private: + void bump(){} + }; + CPP + "Unit", + "run", + "bump", + "other", + "Worker", + "other" + ], + csharp: [ + ".cs", + <<~CS, + class Worker { public void Work() {} } + class Unit { + private int value; + private Worker other = new Worker(); + public void Run(){ value = value + 1; other.Work(); Bump(); } + private void Bump(){} + } + CS + "Unit", + "Run", + "Bump", + "other", + "Worker", + "other" + ] + } + + available = profiles.select do |language, _profile| + grammar = ENV[GRAMMAR_ENVS.fetch(language)] + grammar && File.file?(grammar) + end + skip "set Tree-sitter grammar paths to run architecture parity extractor test" if available.empty? + + available.each do |language, (ext, source, owner_name, run_name, helper_name, state_name, state_type, receiver)| + mods = parse_source(source, ext) + mod = mods.find { |candidate| candidate[:name] == owner_name } + refute_nil mod, language + assert_includes mod[:states], state_name, language + assert_equal state_type, mod[:ivar_types][state_name] if state_type + + vis = mod[:methods].to_h { |method| [method[:name], method[:visibility]] } + assert_equal :public, vis[run_name], language + assert_equal :private, vis[helper_name], language + + run = mod[:methods].find { |method| method[:name] == run_name } + assert_includes run[:effects][:writes], "value", language + assert_includes run[:effects][:reads], state_name, language + assert_includes run[:delegations].map { |call| call[:receiver] }, receiver, language + end + end end diff --git a/gems/nil-kill/lib/nil_kill/report.rb b/gems/nil-kill/lib/nil_kill/report.rb index b81f96d25..e0c3cd336 100644 --- a/gems/nil-kill/lib/nil_kill/report.rb +++ b/gems/nil-kill/lib/nil_kill/report.rb @@ -241,13 +241,21 @@ def sarif_rules(evidence) short_description: "Nil-Kill static analysis signal" ) end - action_rules + diagnostic_rules + static_rules + pressure_rules = sarif_pressure_findings(evidence).map { |finding| finding.fetch("kind") }.uniq.map do |kind| + Decomplex::Sarif.rule( + id: "nil-kill.pressure.#{Decomplex::Sarif.slug(kind)}", + name: "Pressure: #{kind.tr("_", " ")}", + short_description: "Nil-Kill pressure signal" + ) + end + action_rules + diagnostic_rules + static_rules + pressure_rules end def sarif_results(evidence) sarif_actions(evidence).map { |action| sarif_action_result(action, evidence) } + sarif_diagnostics(evidence).map { |diagnostic| sarif_diagnostic_result(diagnostic) } + - sarif_static_findings(evidence).map { |finding| sarif_static_result(finding) } + sarif_static_findings(evidence).map { |finding| sarif_static_result(finding) } + + sarif_pressure_findings(evidence).map { |finding| sarif_pressure_result(finding) } end def sarif_actions(evidence) @@ -283,7 +291,8 @@ def static_method_findings(method) findings << { "kind" => "untyped_signature", "level" => "warning", - "message" => "static signature includes an untyped or unknown type for #{static_member_label(method)}", + "message" => "untyped signature pressure: #{static_member_label(method)} has `#{signature}`; " \ + "replace Any/T.untyped/unknown with the narrowest contract to stop downstream type guards", "path" => method["path"], "line" => method["line"], "static_kind" => method["kind"] || "method", @@ -297,7 +306,8 @@ def static_method_findings(method) findings << { "kind" => "nullable_signature", "level" => "note", - "message" => "static signature includes a nullable type for #{static_member_label(method)}", + "message" => "nilability pressure: #{static_member_label(method)} has `#{signature}`; " \ + "confirm absence is meaningful, otherwise tighten the contract or use an empty collection/value", "path" => method["path"], "line" => method["line"], "static_kind" => method["kind"] || "method", @@ -317,7 +327,8 @@ def static_field_finding(field) { "kind" => "untyped_field", "level" => "warning", - "message" => "static field has no precise type for #{static_member_label(field)}", + "message" => "untyped field pressure: #{static_member_label(field)} has no precise static type; " \ + "add a declared field type or typed initializer so readers do not need guards", "path" => field["path"], "line" => field["line"], "static_kind" => field["kind"] || "field", @@ -351,6 +362,77 @@ def static_member_label(member) "#{owner}##{name}" end + def sarif_pressure_findings(evidence) + hidden_enum_pressure_findings(evidence) + + fallibility_pressure_findings(evidence) + + primitive_record_pressure_findings(evidence) + end + + def hidden_enum_pressure_findings(evidence) + Array(evidence.dig("facts", "hidden_enum_pressure")).map do |row| + values = Array(row["values"]).first(10).join(", ") + label = pressure_member_label(row) + { + "kind" => "hidden_enum", + "level" => row["confidence"].to_s == "high" ? "warning" : "note", + "message" => "hidden enum pressure: #{label} #{row["kind"]} `#{row["slot"]}` has values #{values}; " \ + "decision pressure #{row["decision_pressure"].to_i}, score #{row["score"].to_i}; " \ + "#{row["suggestion"]}", + "path" => row["path"], + "line" => row["line"], + "pressure" => row, + } + end + end + + def fallibility_pressure_findings(evidence) + fallibility_display_rows(Array(evidence.dig("facts", "fallibility_pressure"))).map do |row| + runtime = row["runtime"] || {} + raised = "#{runtime["raised_calls"].to_i}/#{runtime["calls"].to_i}" + classes = Array(runtime["raised_classes"]).first(4).join(", ") + class_text = classes.empty? ? "" : "; raised #{classes}" + { + "kind" => "fallibility", + "level" => row["handler_pressure"].to_i.positive? || runtime["raised_calls"].to_i.positive? ? "warning" : "note", + "message" => "fallibility pressure: #{row["label"]} score #{row["score"].to_i}; " \ + "direct sources #{Array(row["direct_sources"]).size}; runtime raises #{raised} " \ + "(#{runtime["raised_rate"].to_f}%#{class_text}); handlers #{row["handler_pressure"].to_i}; " \ + "unhandled callers #{Array(row["fallible_callers"]).size}", + "path" => row["path"], + "line" => row["line"], + "pressure" => row, + } + end + end + + def primitive_record_pressure_findings(evidence) + hash_record_struct_pressure(evidence).map do |row| + location = parse_location(Array(row["examples"]).first) + keys = Array(row["keys"]).first(10).join(", ") + { + "kind" => "primitive_record", + "level" => row["total_pressure"].to_i >= 3 ? "warning" : "note", + "message" => "primitive record pressure: #{row["label"]} behaves like an ad-hoc struct; " \ + "total pressure #{row["total_pressure"].to_i} " \ + "(return #{row["return_slots"].to_i}, param #{row["param_slots"].to_i}, " \ + "ivar #{row["ivar_slots"].to_i}, collection #{row["collection_slots"].to_i}); keys #{keys}", + "path" => location[:path], + "line" => location[:line], + "pressure" => row, + } + end + end + + def pressure_member_label(row) + owner = row["owner"].to_s + method = row["method"].to_s + return owner if method.empty? + return method if owner.empty? + + separator = row["method_kind"] == "class" ? "." : "#" + "#{owner}#{separator}#{method}" + end + def sarif_action_result(action, evidence) kind = action["kind"].to_s.empty? ? "action" : action["kind"].to_s Decomplex::Sarif.result( @@ -393,6 +475,20 @@ def sarif_static_result(finding) ) end + def sarif_pressure_result(finding) + kind = finding["kind"].to_s.empty? ? "pressure" : finding["kind"].to_s + Decomplex::Sarif.result( + rule_id: "nil-kill.pressure.#{Decomplex::Sarif.slug(kind)}", + level: finding["level"] || "note", + message: finding["message"] || kind, + path: finding["path"], + line: finding["line"], + properties: Decomplex::Sarif.json_safe_value(finding).merge( + "source_format" => "nil-kill.pressure" + ) + ) + end + def sarif_action_level(action) case action["confidence"].to_s when HIGH then "warning" diff --git a/gems/nil-kill/spec/nil_kill_spec.rb b/gems/nil-kill/spec/nil_kill_spec.rb index 8e101187f..ad5a852fc 100644 --- a/gems/nil-kill/spec/nil_kill_spec.rb +++ b/gems/nil-kill/spec/nil_kill_spec.rb @@ -286,12 +286,87 @@ def pick "nil-kill.static.nullable-signature", "nil-kill.static.untyped-field", ) + expect(results).to include(a_hash_including( + "ruleId" => "nil-kill.static.untyped-signature", + "message" => a_hash_including("text" => include("replace Any/T.untyped/unknown")), + )) + expect(results).to include(a_hash_including( + "ruleId" => "nil-kill.static.nullable-signature", + "message" => a_hash_including("text" => include("nilability pressure")), + )) expect(results).not_to include(a_hash_including( "ruleId" => "nil-kill.static.untyped-field", "message" => a_hash_including("text" => include("CurrentUnitSpan#id")), )) end + it "renders pressure facts as actionable SARIF findings" do + evidence = { + "facts" => { + "hidden_enum_pressure" => [{ + "path" => "src/workflow.rb", + "line" => 10, + "owner" => "Workflow", + "method" => "label", + "method_kind" => "instance", + "kind" => "param", + "slot" => "status", + "confidence" => "high", + "score" => 12, + "values" => %w[:active :pending], + "decision_pressure" => 2, + "runtime" => {"calls" => 5, "classes" => ["Symbol"]}, + "blockers" => [], + "suggestion" => "review for a named Status enum or literal-union contract", + "decisions" => [], + }], + "fallibility_pressure" => [{ + "label" => "Parser#parse", + "path" => "src/parser.rb", + "line" => 12, + "score" => 9, + "direct_sources" => [{"path" => "src/parser.rb", "line" => 15, "kind" => "raise", "code" => "raise ParserError"}], + "runtime" => {"calls" => 20, "ok_calls" => 18, "raised_calls" => 2, "raised_rate" => 10.0, "raised_classes" => ["ParserError"]}, + "fallible_callers" => ["Compiler#run"], + "handler_pressure" => 1, + "exclusive_handlers" => 1, + "shared_handlers" => 0, + "handlers" => [], + }], + "collection_index_lookups" => [{ + "path" => "src/options.rb", + "line" => 8, + "code" => "opts[:mode]", + "receiver" => "opts", + "receiver_type" => "Hash", + "index" => ":mode", + "lookup_type" => "T.untyped", + "status" => "untyped receiver", + }], + "param_origins" => [], + "return_origins" => [], + }, + "actions" => [], + "diagnostics" => [], + } + + sarif = JSON.parse(described_class.new(["--format=sarif"], evidence: evidence).to_sarif(evidence)) + results = sarif.fetch("runs").first.fetch("results") + + expect(results).to include(a_hash_including( + "ruleId" => "nil-kill.pressure.hidden-enum", + "message" => a_hash_including("text" => include("hidden enum pressure: Workflow#label param `status`")), + )) + expect(results).to include(a_hash_including( + "ruleId" => "nil-kill.pressure.fallibility", + "message" => a_hash_including("text" => include("fallibility pressure: Parser#parse")), + )) + expect(results).to include(a_hash_including( + "ruleId" => "nil-kill.pressure.primitive-record", + "message" => a_hash_including("text" => include("primitive record pressure")), + )) + end + it "--hygiene emits only the slot summary and action counts, skipping heavy sections" do Dir.mktmpdir("nil-kill-hygiene-report", NilKill::ROOT) do |dir| report = described_class.new(["--hygiene"]) From f592ae3f12adc5315080452eca6575965ba33c05 Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Tue, 16 Jun 2026 13:33:24 +0000 Subject: [PATCH 04/52] Add Tree-sitter extraction and systems hazards Co-authored-by: Codex --- gems/lineage/CONTRIBUTING.md | 8 +- gems/lineage/Cargo.lock | 103 ++- gems/lineage/Cargo.toml | 12 +- gems/lineage/README.md | 45 +- .../lineage/docs/agents/cross-lang-support.md | 1 + gems/lineage/src/extract.rs | 588 ++++++++++++++- gems/lineage/src/hazard.rs | 681 ++++++++++++++++++ gems/lineage/src/mutant.rs | 20 +- gems/slopcop/README.md | 19 +- gems/slopcop/exe/slopcop | 7 +- gems/slopcop/lib/slopcop/constraints.rb | 8 + .../lib/slopcop/constraints/c_provider.rb | 155 ++++ .../lib/slopcop/constraints/cpp_provider.rb | 153 ++++ .../slopcop/constraints/csharp_provider.rb | 132 ++++ .../slopcop/constraints/language_provider.rb | 125 ++++ .../lib/slopcop/constraints/rust_provider.rb | 176 +++++ .../test/constraints_systems_provider_test.rb | 140 ++++ 17 files changed, 2311 insertions(+), 62 deletions(-) create mode 100644 gems/slopcop/lib/slopcop/constraints/c_provider.rb create mode 100644 gems/slopcop/lib/slopcop/constraints/cpp_provider.rb create mode 100644 gems/slopcop/lib/slopcop/constraints/csharp_provider.rb create mode 100644 gems/slopcop/lib/slopcop/constraints/language_provider.rb create mode 100644 gems/slopcop/lib/slopcop/constraints/rust_provider.rb create mode 100644 gems/slopcop/test/constraints_systems_provider_test.rb diff --git a/gems/lineage/CONTRIBUTING.md b/gems/lineage/CONTRIBUTING.md index 48f59e9fa..aabe430fa 100644 --- a/gems/lineage/CONTRIBUTING.md +++ b/gems/lineage/CONTRIBUTING.md @@ -76,9 +76,11 @@ provider adapters direct database authority. Logical-unit identity is the core contract. Changes to extraction should be conservative and tested against renames, moves, and refactors. -The current extractor is heuristic. Planned Tree-sitter-backed profiles -should replace extraction internals without changing the storage and -history contracts. +Supported production languages should use Tree-sitter-backed extraction, +not line regexes. Regex-style heuristics are acceptable only as a +bootstrap fallback for secondary languages that are explicitly marked +experimental. If a Tree-sitter-backed file cannot be parsed, prefer +returning no units over inventing low-confidence boundaries. ## UI And LSP diff --git a/gems/lineage/Cargo.lock b/gems/lineage/Cargo.lock index 897e16a10..d8c790dfd 100644 --- a/gems/lineage/Cargo.lock +++ b/gems/lineage/Cargo.lock @@ -761,8 +761,16 @@ dependencies = [ "tower-http", "tower-lsp", "tree-sitter", + "tree-sitter-c", + "tree-sitter-c-sharp", + "tree-sitter-cpp", + "tree-sitter-go", + "tree-sitter-javascript", "tree-sitter-language", + "tree-sitter-python", + "tree-sitter-ruby", "tree-sitter-rust", + "tree-sitter-typescript", "tree-sitter-zig", ] @@ -1217,6 +1225,12 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" + [[package]] name = "strsim" version = "0.10.0" @@ -1417,13 +1431,64 @@ dependencies = [ [[package]] name = "tree-sitter" -version = "0.23.2" +version = "0.24.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0203df02a3b6dd63575cc1d6e609edc2181c9a11867a271b25cfd2abff3ec5ca" +checksum = "a5387dffa7ffc7d2dae12b50c6f7aab8ff79d6210147c6613561fc3d474c6f75" dependencies = [ "cc", "regex", "regex-syntax", + "streaming-iterator", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-c" +version = "0.23.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "afd2b1bf1585dc2ef6d69e87d01db8adb059006649dd5f96f31aa789ee6e9c71" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-c-sharp" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67f06accca7b45351758663b8215089e643d53bd9a660ce0349314263737fcb0" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-cpp" +version = "0.23.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df2196ea9d47b4ab4a31b9297eaa5a5d19a0b121dceb9f118f6790ad0ab94743" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-go" +version = "0.23.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b13d476345220dbe600147dd444165c5791bf85ef53e28acbedd46112ee18431" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-javascript" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf40bf599e0416c16c125c3cec10ee5ddc7d1bb8b0c60fa5c4de249ad34dc1b1" +dependencies = [ + "cc", "tree-sitter-language", ] @@ -1433,6 +1498,26 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c199356c799a8945965bb5f2c55b2ad9d9aa7c4b4f6e587fe9dea0bc715e5f9c" +[[package]] +name = "tree-sitter-python" +version = "0.23.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d065aaa27f3aaceaf60c1f0e0ac09e1cb9eb8ed28e7bcdaa52129cffc7f4b04" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-ruby" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be0484ea4ef6bb9c575b4fdabde7e31340a8d2dbc7d52b321ac83da703249f95" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-rust" version = "0.23.0" @@ -1443,11 +1528,21 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-typescript" +version = "0.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c5f76ed8d947a75cc446d5fccd8b602ebf0cde64ccf2ffa434d873d7a575eff" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-zig" -version = "1.0.2" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2994e37b8ef1f715b931a5ff084a1b1713b1bc56e7aaebd148cc3efe0bf29ad9" +checksum = "ab11fc124851b0db4dd5e55983bbd9631192e93238389dcd44521715e5d53e28" dependencies = [ "cc", "tree-sitter-language", diff --git a/gems/lineage/Cargo.toml b/gems/lineage/Cargo.toml index 7f69bc22c..3b0fda3dd 100644 --- a/gems/lineage/Cargo.toml +++ b/gems/lineage/Cargo.toml @@ -31,10 +31,18 @@ sha2 = "0.10" tokio = { version = "1", features = ["io-std", "macros", "net", "rt", "rt-multi-thread"] } tower-http = { version = "0.5", features = ["set-header", "trace"] } tower-lsp = "0.20" -tree-sitter = "=0.23.2" +tree-sitter = "=0.24.7" +tree-sitter-c = "=0.23.4" +tree-sitter-c-sharp = "=0.23.1" +tree-sitter-cpp = "=0.23.4" +tree-sitter-go = "=0.23.4" +tree-sitter-javascript = "=0.23.1" tree-sitter-language = "=0.1.3" +tree-sitter-python = "=0.23.6" +tree-sitter-ruby = "=0.23.1" tree-sitter-rust = "=0.23.0" -tree-sitter-zig = "=1.0.2" +tree-sitter-typescript = "=0.23.2" +tree-sitter-zig = "=1.1.2" [dev-dependencies] tempfile = "=3.10.1" diff --git a/gems/lineage/README.md b/gems/lineage/README.md index 8caa87f39..7e38e05ed 100644 --- a/gems/lineage/README.md +++ b/gems/lineage/README.md @@ -120,7 +120,7 @@ from the same source and commit. | Coverage | `ingest-coverage` | Codecov JSON, SimpleCov JSON, Cobertura XML, kcov Cobertura XML | | Test exposure | `ingest-test-exposure` | Lineage `test-exposure` JSON | | Mutation testing | `ingest-mutants` | Ruby `mutant-facts/v1` | -| Systems hazards | `ingest-hazards` | Zig hazard provider | +| Systems hazards | `ingest-hazards` | Zig, Go, Rust, C, C++, C# hazard providers | | Stack traces | `ingest` | Sentry-style event JSON | | Static analysis and risk findings | `ingest-sarif` | SARIF 2.1.0 files from Decomplex, SlopCop, Boobytrap, Nil-Kill, Espalier, and third-party tools | @@ -283,6 +283,11 @@ Recommended CLEAR lanes: - Zig kcov unit coverage: `--format cobertura --test-type unit` - Zig systems evidence: `--test-type loom`, `--test-type vopr`, or `--test-type tsan` +- Rust systems evidence: `--test-type loom` for concurrency/atomic + checks and `--test-type miri` for unsafe-code checks +- C/C++ systems evidence: `--test-type tsan`, `asan`, `lsan`, or + `ubsan` +- C# systems evidence: `--test-type concurrency` or `unsafe` ### Test Exposure @@ -359,8 +364,12 @@ cargo run --manifest-path gems/lineage/Cargo.toml -- ingest-hazards \ --commit "$(git rev-parse HEAD)" ``` -The current first-party provider scans Zig runtime/lib hazard sites used -by CLEAR's Loom and VOPR coverage work. +First-party providers currently support `zig`, `go`, `rust`, `c`, +`cpp`, and `csharp`. Zig scans the CLEAR runtime/lib Loom and VOPR +hazard sites. Rust scans Loom-relevant concurrency/atomic sites and +unsafe blocks/operations. C and C++ scan sanitizer-relevant concurrency, +raw-memory, lifetime, and UB hazards. C# scans concurrency and unsafe +native-memory hazards. ### Stack Traces @@ -381,19 +390,23 @@ file. ## Supported Languages Roadmap -Lineage currently uses a heuristic logical-unit extractor. Ruby and Zig -are the most exercised paths because CLEAR uses them for compiler and -runtime review. Other language extraction is experimental until the -planned Tree-sitter-backed profiles replace the bootstrap extractor. - -- [x] Ruby: used for CLEAR compiler review. -- [x] Zig: used for CLEAR runtime review and systems hazards. -- [ ] Python: experimentally supported. -- [ ] JavaScript: experimentally supported. -- [ ] Lua: experimentally supported. -- [ ] C: experimentally supported. -- [ ] Go: experimentally supported. -- [ ] Assembly: experimentally supported. +Lineage uses Tree-sitter-backed logical-unit extraction for the core +languages it aims to track as a ground-truth risk ledger. For those +languages, parse failures produce no units instead of falling back to +regex boundaries. Heuristic extraction remains only for secondary +experimental languages. + +- [x] Ruby: Tree-sitter-backed; used for CLEAR compiler review. +- [x] Zig: Tree-sitter-backed; used for CLEAR runtime review and + systems hazards. +- [x] Rust: Tree-sitter-backed. +- [x] Python: Tree-sitter-backed. +- [x] JavaScript / TypeScript: Tree-sitter-backed. +- [x] Go: Tree-sitter-backed, including concurrency hazards. +- [x] C / C++: Tree-sitter-backed, including sanitizer hazards. +- [x] C#: Tree-sitter-backed, including concurrency/unsafe hazards. +- [ ] Lua: experimental heuristic extraction. +- [ ] Assembly: experimental label extraction. ## Boundaries diff --git a/gems/lineage/docs/agents/cross-lang-support.md b/gems/lineage/docs/agents/cross-lang-support.md index e667afbc4..1e43ff3ac 100644 --- a/gems/lineage/docs/agents/cross-lang-support.md +++ b/gems/lineage/docs/agents/cross-lang-support.md @@ -65,6 +65,7 @@ Each repository received as much of this evidence as the current tools could pro - Added Go concurrency hazard detection through SlopCop/Lineage. - Fixed Lineage source extraction and coverage ingestion issues found during TypeScript/Go validation. - Fixed Nil-kill static-only normalization so non-Ruby languages do not accidentally depend on stale runtime traces. +- Replaced Lineage regex-first logical-unit extraction for Ruby, Python, JavaScript/TypeScript, Go, Rust, Zig, C/C++, and C# with Tree-sitter-backed extraction. The regex heuristic path is now only for secondary experimental languages. ## Environment Gaps diff --git a/gems/lineage/src/extract.rs b/gems/lineage/src/extract.rs index 0ee457604..353089eda 100644 --- a/gems/lineage/src/extract.rs +++ b/gems/lineage/src/extract.rs @@ -175,21 +175,12 @@ impl BoundaryExtractor for HeuristicExtractor { let ext = extension(&file.path).map(|value| normalize_extension(&value)); let lines: Vec<&str> = file.contents.lines().collect(); - let mut candidates = ext - .as_deref() - .and_then(|extension| tree_sitter_candidates(file, extension, &lines)); - - if candidates.as_ref().map(Vec::is_empty).unwrap_or(true) { - let mut detected = Vec::new(); - for (index, line) in lines.iter().enumerate() { - if let Some(candidate) = detect_candidate(line, (index + 1) as u32, ext.as_deref()) { - detected.push(candidate); - } + let candidates = match ext.as_deref() { + Some(extension) if TreeSitterAdapter::for_extension(extension).is_some() => { + tree_sitter_candidates(file, extension, &lines).unwrap_or_default() } - candidates = Some(detected); - } - - let candidates = candidates.unwrap_or_default(); + _ => heuristic_candidates(&lines, ext.as_deref()), + }; candidates .iter() .enumerate() @@ -249,16 +240,44 @@ fn detect_candidate(line: &str, line_number: u32, extension: Option<&str>) -> Op } } +fn heuristic_candidates(lines: &[&str], extension: Option<&str>) -> Vec { + let mut detected = Vec::new(); + for (index, line) in lines.iter().enumerate() { + if let Some(candidate) = detect_candidate(line, (index + 1) as u32, extension) { + detected.push(candidate); + } + } + detected +} + #[derive(Debug, Clone, Copy)] enum TreeSitterAdapter { + C, + Cpp, + CSharp, + Go, + JavaScript, + Python, + Ruby, Rust, + Tsx, + TypeScript, Zig, } impl TreeSitterAdapter { fn for_extension(extension: &str) -> Option { match extension { + "c" | "h" => Some(Self::C), + "cc" | "cpp" | "cxx" | "hh" | "hpp" | "hxx" => Some(Self::Cpp), + "cs" => Some(Self::CSharp), + "go" => Some(Self::Go), + "js" | "jsx" | "mjs" | "cjs" => Some(Self::JavaScript), + "py" | "pyi" => Some(Self::Python), + "rb" => Some(Self::Ruby), "rs" => Some(Self::Rust), + "tsx" => Some(Self::Tsx), + "ts" => Some(Self::TypeScript), "zig" => Some(Self::Zig), _ => None, } @@ -266,7 +285,16 @@ impl TreeSitterAdapter { fn language(self) -> Language { match self { + Self::C => tree_sitter_c::LANGUAGE.into(), + Self::Cpp => tree_sitter_cpp::LANGUAGE.into(), + Self::CSharp => tree_sitter_c_sharp::LANGUAGE.into(), + Self::Go => tree_sitter_go::LANGUAGE.into(), + Self::JavaScript => tree_sitter_javascript::LANGUAGE.into(), + Self::Python => tree_sitter_python::LANGUAGE.into(), + Self::Ruby => tree_sitter_ruby::LANGUAGE.into(), Self::Rust => tree_sitter_rust::LANGUAGE.into(), + Self::Tsx => tree_sitter_typescript::LANGUAGE_TSX.into(), + Self::TypeScript => tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(), Self::Zig => tree_sitter_zig::LANGUAGE.into(), } } @@ -278,7 +306,15 @@ impl TreeSitterAdapter { lines: &[&str], ) -> Option { match self { + Self::C => c_candidate_for_node(node, source, lines), + Self::Cpp => cpp_candidate_for_node(node, source, lines), + Self::CSharp => csharp_candidate_for_node(node, source, lines), + Self::Go => go_candidate_for_node(node, source, lines), + Self::JavaScript => javascript_candidate_for_node(node, source, lines), + Self::Python => python_candidate_for_node(node, source, lines), + Self::Ruby => ruby_candidate_for_node(node, source, lines), Self::Rust => rust_candidate_for_node(node, source, lines), + Self::Tsx | Self::TypeScript => typescript_candidate_for_node(node, source, lines), Self::Zig => zig_candidate_for_node(node, source, lines), } } @@ -291,14 +327,44 @@ fn tree_sitter_candidates( ) -> Option> { let adapter = TreeSitterAdapter::for_extension(extension)?; let mut parser = Parser::new(); - parser.set_language(&adapter.language()).ok()?; - let tree = parser.parse(&file.contents, None)?; + if let Err(error) = parser.set_language(&adapter.language()) { + if std::env::var("LINEAGE_DEBUG_EXTRACT").is_ok() { + eprintln!( + "tree-sitter language setup failed in {} ({extension}): {error:?}", + file.path + ); + } + return None; + } + let tree = match parser.parse(&file.contents, None) { + Some(tree) => tree, + None => { + if std::env::var("LINEAGE_DEBUG_EXTRACT").is_ok() { + eprintln!("tree-sitter produced no tree in {} ({extension})", file.path); + } + return None; + } + }; if tree.root_node().has_error() { + if std::env::var("LINEAGE_DEBUG_EXTRACT").is_ok() { + eprintln!( + "tree-sitter parse error in {} ({extension}): {}", + file.path, + tree.root_node().to_sexp() + ); + } return None; } let mut candidates = Vec::new(); collect_tree_sitter_candidates(tree.root_node(), adapter, &file.contents, lines, &mut candidates); + if candidates.is_empty() && std::env::var("LINEAGE_DEBUG_EXTRACT").is_ok() { + eprintln!( + "tree-sitter found no units in {} ({extension}): {}", + file.path, + tree.root_node().to_sexp() + ); + } Some(candidates) } @@ -320,6 +386,202 @@ fn collect_tree_sitter_candidates( } } +fn ruby_candidate_for_node(node: Node<'_>, source: &str, lines: &[&str]) -> Option { + match node.kind() { + "class" => tree_sitter_named_candidate(node, UnitKind::Class, source, lines), + "module" => tree_sitter_named_candidate(node, UnitKind::Module, source, lines), + "method" => { + let name = field_text(node, "name", source)?; + Some(tree_sitter_candidate( + node, + qualified_name(node, name, source, &["class", "module", "method", "singleton_method"]), + UnitKind::Function, + source, + lines, + )) + } + "singleton_method" => { + let object = field_text(node, "object", source)?; + let name = field_text(node, "name", source)?; + Some(tree_sitter_candidate( + node, + qualified_name( + node, + &format!("{}.{}", clean_owner_name(object), name), + source, + &["class", "module", "method", "singleton_method"], + ), + UnitKind::Function, + source, + lines, + )) + } + _ => None, + } +} + +fn python_candidate_for_node(node: Node<'_>, source: &str, lines: &[&str]) -> Option { + match node.kind() { + "class_definition" => tree_sitter_named_candidate(node, UnitKind::Class, source, lines), + "function_definition" => { + let name = field_text(node, "name", source)?; + Some(tree_sitter_candidate( + node, + qualified_name(node, name, source, &["class_definition", "function_definition"]), + UnitKind::Function, + source, + lines, + )) + } + "type_alias_statement" => { + let name = field_text(node, "name", source) + .or_else(|| first_identifier_child(node, source))?; + Some(tree_sitter_candidate( + node, + name.to_string(), + UnitKind::Class, + source, + lines, + )) + } + _ => None, + } +} + +fn javascript_candidate_for_node(node: Node<'_>, source: &str, lines: &[&str]) -> Option { + match node.kind() { + "class_declaration" => tree_sitter_named_candidate(node, UnitKind::Class, source, lines), + "function_declaration" | "generator_function_declaration" => { + let name = field_text(node, "name", source)?; + Some(tree_sitter_candidate( + node, + qualified_name(node, name, source, &["class_declaration", "function_declaration"]), + UnitKind::Function, + source, + lines, + )) + } + "method_definition" => javascript_method_candidate(node, source, lines), + "variable_declarator" => javascript_variable_callable_candidate(node, source, lines), + _ => None, + } +} + +fn typescript_candidate_for_node(node: Node<'_>, source: &str, lines: &[&str]) -> Option { + match node.kind() { + "abstract_class_declaration" | "class_declaration" | "enum_declaration" + | "interface_declaration" | "internal_module" | "type_alias_declaration" => { + tree_sitter_named_candidate(node, UnitKind::Class, source, lines) + } + "function_declaration" | "generator_function_declaration" => { + let name = field_text(node, "name", source)?; + Some(tree_sitter_candidate( + node, + qualified_name(node, name, source, &["class_declaration", "abstract_class_declaration", "function_declaration"]), + UnitKind::Function, + source, + lines, + )) + } + "method_definition" => javascript_method_candidate(node, source, lines), + "variable_declarator" => javascript_variable_callable_candidate(node, source, lines), + "public_field_definition" => javascript_field_callable_candidate(node, source, lines), + _ => None, + } +} + +fn go_candidate_for_node(node: Node<'_>, source: &str, lines: &[&str]) -> Option { + match node.kind() { + "function_declaration" => tree_sitter_named_candidate(node, UnitKind::Function, source, lines), + "method_declaration" => { + let name = field_text(node, "name", source)?; + Some(tree_sitter_candidate( + node, + go_qualified_method_name(node, name, source), + UnitKind::Function, + source, + lines, + )) + } + "type_spec" | "type_alias" => { + let name = field_text(node, "name", source)?; + Some(tree_sitter_candidate( + node, + name.to_string(), + UnitKind::Class, + source, + lines, + )) + } + _ => None, + } +} + +fn c_candidate_for_node(node: Node<'_>, source: &str, lines: &[&str]) -> Option { + match node.kind() { + "function_definition" => { + let name = c_like_function_name(node, source)?; + Some(tree_sitter_candidate(node, name, UnitKind::Function, source, lines)) + } + "struct_specifier" | "union_specifier" | "enum_specifier" => c_like_type_candidate(node, source, lines), + "type_definition" => c_like_typedef_candidate(node, source, lines), + _ => None, + } +} + +fn cpp_candidate_for_node(node: Node<'_>, source: &str, lines: &[&str]) -> Option { + match node.kind() { + "function_definition" => { + let name = c_like_function_name(node, source)?; + Some(tree_sitter_candidate( + node, + qualified_name(node, &name, source, &["class_specifier", "namespace_definition"]), + UnitKind::Function, + source, + lines, + )) + } + "class_specifier" | "struct_specifier" | "union_specifier" | "enum_specifier" => { + c_like_type_candidate(node, source, lines) + } + "namespace_definition" => tree_sitter_named_candidate(node, UnitKind::Module, source, lines), + "type_definition" => c_like_typedef_candidate(node, source, lines), + _ => None, + } +} + +fn csharp_candidate_for_node(node: Node<'_>, source: &str, lines: &[&str]) -> Option { + match node.kind() { + "class_declaration" | "struct_declaration" | "interface_declaration" | "enum_declaration" + | "record_declaration" => tree_sitter_named_candidate(node, UnitKind::Class, source, lines), + "namespace_declaration" => tree_sitter_named_candidate(node, UnitKind::Module, source, lines), + "method_declaration" | "constructor_declaration" => { + let name = field_text(node, "name", source) + .map(str::to_string) + .or_else(|| nearest_owner_name(node, source, &["class_declaration", "struct_declaration", "record_declaration"]))?; + Some(tree_sitter_candidate( + node, + qualified_name( + node, + &name, + source, + &[ + "class_declaration", + "struct_declaration", + "interface_declaration", + "record_declaration", + "namespace_declaration", + ], + ), + UnitKind::Function, + source, + lines, + )) + } + _ => None, + } +} + fn rust_candidate_for_node(node: Node<'_>, source: &str, lines: &[&str]) -> Option { let kind = node.kind(); match kind { @@ -374,6 +636,68 @@ fn tree_sitter_named_candidate( Some(tree_sitter_candidate(node, name.to_string(), kind, source, lines)) } +fn javascript_method_candidate(node: Node<'_>, source: &str, lines: &[&str]) -> Option { + let name = field_text(node, "name", source)?; + Some(tree_sitter_candidate( + node, + qualified_name( + node, + name, + source, + &["class_declaration", "abstract_class_declaration", "function_declaration"], + ), + UnitKind::Function, + source, + lines, + )) +} + +fn javascript_variable_callable_candidate( + node: Node<'_>, + source: &str, + lines: &[&str], +) -> Option { + let value = node.child_by_field_name("value")?; + if !matches!( + value.kind(), + "arrow_function" | "function" | "function_expression" | "generator_function" | "class" + ) { + return None; + } + let name = field_text(node, "name", source)?; + let kind = if value.kind() == "class" { + UnitKind::Class + } else { + UnitKind::Function + }; + Some(tree_sitter_candidate( + node, + qualified_name(node, name, source, &["class_declaration", "abstract_class_declaration"]), + kind, + source, + lines, + )) +} + +fn javascript_field_callable_candidate( + node: Node<'_>, + source: &str, + lines: &[&str], +) -> Option { + let value = node.child_by_field_name("value")?; + if !matches!(value.kind(), "arrow_function" | "function" | "function_expression") { + return None; + } + let name = field_text(node, "name", source)?; + Some(tree_sitter_candidate( + node, + qualified_name(node, name, source, &["class_declaration", "abstract_class_declaration"]), + UnitKind::Function, + source, + lines, + )) +} + fn tree_sitter_candidate( node: Node<'_>, name: String, @@ -427,6 +751,155 @@ fn first_identifier_child<'a>(node: Node<'_>, source: &'a str) -> Option<&'a str None } +fn qualified_name(node: Node<'_>, base: &str, source: &str, owner_kinds: &[&str]) -> String { + let mut owners = Vec::new(); + let mut current = node; + while let Some(parent) = current.parent() { + if owner_kinds.contains(&parent.kind()) { + if let Some(owner) = owner_name(parent, source) { + owners.push(owner); + } + } + current = parent; + } + owners.reverse(); + owners.push(clean_owner_name(base)); + owners.join(".") +} + +fn nearest_owner_name(mut node: Node<'_>, source: &str, owner_kinds: &[&str]) -> Option { + while let Some(parent) = node.parent() { + if owner_kinds.contains(&parent.kind()) { + return owner_name(parent, source); + } + node = parent; + } + None +} + +fn owner_name(node: Node<'_>, source: &str) -> Option { + match node.kind() { + "class" | "module" | "class_definition" | "class_declaration" | "abstract_class_declaration" + | "interface_declaration" | "record_declaration" | "struct_declaration" | "enum_declaration" + | "namespace_definition" | "namespace_declaration" | "internal_module" => { + field_text(node, "name", source).map(clean_owner_name) + } + "function_definition" | "function_declaration" | "method" | "method_definition" + | "method_declaration" | "singleton_method" => { + field_text(node, "name", source).map(clean_owner_name) + } + "function_item" => field_text(node, "name", source).map(clean_owner_name), + "type_spec" | "type_alias" => field_text(node, "name", source).map(clean_owner_name), + "class_specifier" | "struct_specifier" | "union_specifier" | "enum_specifier" => { + c_like_type_name(node, source) + } + _ => None, + } +} + +fn go_qualified_method_name(node: Node<'_>, name: &str, source: &str) -> String { + let Some(receiver) = node.child_by_field_name("receiver") else { + return name.to_string(); + }; + let receiver_text = receiver.utf8_text(source.as_bytes()).unwrap_or_default(); + let receiver_type = receiver_text + .split_whitespace() + .last() + .unwrap_or(receiver_text) + .trim_matches(|ch: char| matches!(ch, '*' | '(' | ')' | '[' | ']')); + if receiver_type.is_empty() { + name.to_string() + } else { + format!("{receiver_type}.{name}") + } +} + +fn c_like_function_name(node: Node<'_>, source: &str) -> Option { + let declarator = node.child_by_field_name("declarator")?; + declarator_name(declarator, source).map(|name| clean_owner_name(&name)) +} + +fn c_like_type_candidate(node: Node<'_>, source: &str, lines: &[&str]) -> Option { + let name = c_like_type_name(node, source)?; + Some(tree_sitter_candidate(node, name, UnitKind::Class, source, lines)) +} + +fn c_like_typedef_candidate(node: Node<'_>, source: &str, lines: &[&str]) -> Option { + let name = node + .child_by_field_name("declarator") + .and_then(|declarator| declarator_name(declarator, source)) + .or_else(|| last_descendant_text(node, source, &["type_identifier", "identifier"]))?; + Some(tree_sitter_candidate( + node, + clean_owner_name(&name), + UnitKind::Class, + source, + lines, + )) +} + +fn c_like_type_name(node: Node<'_>, source: &str) -> Option { + field_text(node, "name", source) + .map(clean_owner_name) + .or_else(|| first_descendant_text(node, source, &["type_identifier", "identifier"]).map(|text| clean_owner_name(&text))) +} + +fn declarator_name(node: Node<'_>, source: &str) -> Option { + if let Some(name) = field_text(node, "name", source) { + return Some(name.to_string()); + } + if matches!( + node.kind(), + "identifier" | "field_identifier" | "type_identifier" | "qualified_identifier" | "scoped_identifier" + ) { + return node.utf8_text(source.as_bytes()).ok().map(str::to_string); + } + if let Some(child) = node.child_by_field_name("declarator") { + return declarator_name(child, source); + } + first_descendant_text( + node, + source, + &[ + "field_identifier", + "identifier", + "qualified_identifier", + "scoped_identifier", + "type_identifier", + ], + ) +} + +fn first_descendant_text(node: Node<'_>, source: &str, kinds: &[&str]) -> Option { + if kinds.contains(&node.kind()) { + return node.utf8_text(source.as_bytes()).ok().map(str::to_string); + } + for index in 0..node.named_child_count() { + if let Some(child) = node.named_child(index) { + if let Some(text) = first_descendant_text(child, source, kinds) { + return Some(text); + } + } + } + None +} + +fn last_descendant_text(node: Node<'_>, source: &str, kinds: &[&str]) -> Option { + let mut found = if kinds.contains(&node.kind()) { + node.utf8_text(source.as_bytes()).ok().map(str::to_string) + } else { + None + }; + for index in 0..node.named_child_count() { + if let Some(child) = node.named_child(index) { + if let Some(text) = last_descendant_text(child, source, kinds) { + found = Some(text); + } + } + } + found +} + fn rust_method_owner(node: Node<'_>, source: &str) -> Option { let impl_node = ancestor_kind(node, "impl_item")?; field_text(impl_node, "type", source).map(clean_owner_name) @@ -877,7 +1350,7 @@ mod tests { assert_eq!(units.len(), 2); assert_eq!(units[0].name, "Worker"); assert_eq!(units[0].kind, UnitKind::Class); - assert_eq!(units[1].name, "run"); + assert_eq!(units[1].name, "Worker.run"); assert_eq!(units[1].kind, UnitKind::Function); } @@ -893,12 +1366,12 @@ mod tests { }; let extractor = HeuristicExtractor::default(); - assert_eq!(extractor.extract_units(&go)[0].name, "Run"); + assert_eq!(extractor.extract_units(&go)[0].name, "Worker.Run"); assert_eq!(extractor.extract_units(&zig)[0].name, "run"); } #[test] - fn extracts_typescript_symbols_with_heuristics() { + fn extracts_typescript_symbols_with_tree_sitter() { let file = BlobFile { path: "packages/zod/src/demo.ts".into(), contents: r#" @@ -1038,8 +1511,81 @@ pub fn StringMap(comptime Value: type) type { let units = HeuristicExtractor::default().extract_units(&file); let names: Vec<_> = units.iter().map(|unit| unit.name.as_str()).collect(); - assert!(names.contains(&"self.build!")); - assert!(names.contains(&"value=")); + assert!(names.contains(&"Worker.self.build!")); + assert!(names.contains(&"Worker.value=")); + } + + #[test] + fn tree_sitter_extraction_handles_nested_and_multiline_boundaries() { + let python = BlobFile { + path: "src/service.py".into(), + contents: r#" +class Worker: + def run( + self, + value: int, + ) -> int: + def normalize(next_value: int) -> int: + return next_value + 1 + return normalize(value) +"# + .into(), + }; + let typescript = BlobFile { + path: "src/service.ts".into(), + contents: r#" +export class Worker { + async run( + value: number, + ): Promise { + return value + 1; + } +} +"# + .into(), + }; + + let extractor = HeuristicExtractor::default(); + let python_names: Vec<_> = extractor + .extract_units(&python) + .into_iter() + .map(|unit| (unit.name, unit.start_line, unit.end_line)) + .collect(); + assert!(python_names.contains(&("Worker".to_string(), 2, 9))); + assert!(python_names.contains(&("Worker.run".to_string(), 3, 9))); + assert!(python_names.contains(&("Worker.run.normalize".to_string(), 7, 8))); + + let typescript_names: Vec<_> = extractor + .extract_units(&typescript) + .into_iter() + .map(|unit| (unit.name, unit.start_line, unit.end_line)) + .collect(); + assert!(typescript_names.contains(&("Worker".to_string(), 2, 8))); + assert!(typescript_names.contains(&("Worker.run".to_string(), 3, 7))); + } + + #[test] + fn tree_sitter_extraction_ignores_strings_comments_and_parse_errors() { + let ruby = BlobFile { + path: "src/demo.rb".into(), + contents: "class Real\n TEXT = \"def fake\\nend\"\n # def also_fake\n def run\n end\nend\n".into(), + }; + let invalid_go = BlobFile { + path: "broken.go".into(), + contents: "func RegexWouldHaveMatched() {\n".into(), + }; + + let extractor = HeuristicExtractor::default(); + let ruby_names: Vec<_> = extractor + .extract_units(&ruby) + .into_iter() + .map(|unit| unit.name) + .collect(); + assert!(ruby_names.contains(&"Real".to_string())); + assert!(ruby_names.contains(&"Real.run".to_string())); + assert!(!ruby_names.contains(&"fake".to_string())); + assert!(!ruby_names.contains(&"also_fake".to_string())); + assert!(extractor.extract_units(&invalid_go).is_empty()); } #[test] diff --git a/gems/lineage/src/hazard.rs b/gems/lineage/src/hazard.rs index 90bc4ea9b..cf6423bbd 100644 --- a/gems/lineage/src/hazard.rs +++ b/gems/lineage/src/hazard.rs @@ -33,6 +33,10 @@ pub fn ingest_hazards( match provider { "zig" => ingest_zig_hazards(storage, repo.as_ref(), commit, timestamp), "go" => ingest_go_hazards(storage, repo.as_ref(), commit, timestamp), + "rust" => ingest_rust_hazards(storage, repo.as_ref(), commit, timestamp), + "c" => ingest_c_hazards(storage, repo.as_ref(), commit, timestamp), + "cpp" => ingest_cpp_hazards(storage, repo.as_ref(), commit, timestamp), + "csharp" => ingest_csharp_hazards(storage, repo.as_ref(), commit, timestamp), other => anyhow::bail!("unsupported hazard provider {other:?}"), } } @@ -55,6 +59,50 @@ fn ingest_go_hazards( ingest_language_hazards(storage, repo, commit, timestamp, "go", go_source_files, scan_go_sites) } +fn ingest_rust_hazards( + storage: &Storage, + repo: &Path, + commit: &str, + timestamp: Option, +) -> Result { + ingest_language_hazards(storage, repo, commit, timestamp, "rust", rust_source_files, scan_rust_sites) +} + +fn ingest_c_hazards( + storage: &Storage, + repo: &Path, + commit: &str, + timestamp: Option, +) -> Result { + ingest_language_hazards(storage, repo, commit, timestamp, "c", c_source_files, scan_c_sites) +} + +fn ingest_cpp_hazards( + storage: &Storage, + repo: &Path, + commit: &str, + timestamp: Option, +) -> Result { + ingest_language_hazards(storage, repo, commit, timestamp, "cpp", cpp_source_files, scan_cpp_sites) +} + +fn ingest_csharp_hazards( + storage: &Storage, + repo: &Path, + commit: &str, + timestamp: Option, +) -> Result { + ingest_language_hazards( + storage, + repo, + commit, + timestamp, + "csharp", + csharp_source_files, + scan_csharp_sites, + ) +} + fn ingest_language_hazards( storage: &Storage, repo: &Path, @@ -141,6 +189,55 @@ fn go_source_files(repo: &Path) -> Result> { Ok(files) } +fn rust_source_files(repo: &Path) -> Result> { + collect_language_files(repo, rust_source_path) +} + +fn c_source_files(repo: &Path) -> Result> { + collect_language_files(repo, c_source_path) +} + +fn cpp_source_files(repo: &Path) -> Result> { + collect_language_files(repo, cpp_source_path) +} + +fn csharp_source_files(repo: &Path) -> Result> { + collect_language_files(repo, csharp_source_path) +} + +fn collect_language_files(repo: &Path, source_path: fn(&str) -> bool) -> Result> { + let mut files = Vec::new(); + collect_matching_files(repo, Path::new(""), &mut files, source_path)?; + files.sort(); + files.dedup(); + Ok(files) +} + +fn collect_matching_files( + repo: &Path, + rel_dir: &Path, + out: &mut Vec, + source_path: fn(&str) -> bool, +) -> Result<()> { + let abs = repo.join(rel_dir); + if !abs.is_dir() { + return Ok(()); + } + for entry in fs::read_dir(&abs)? { + let entry = entry?; + let path = entry.path(); + let rel = rel_path(repo, &path)?; + if path.is_dir() { + if !excluded_common_dir(&rel) { + collect_matching_files(repo, Path::new(&rel), out, source_path)?; + } + } else if source_path(&rel) { + out.push(rel); + } + } + Ok(()) +} + fn collect_go_files(repo: &Path, rel_dir: &Path, out: &mut Vec) -> Result<()> { let abs = repo.join(rel_dir); if !abs.is_dir() { @@ -185,6 +282,30 @@ fn excluded_go_dir(path: &str) -> bool { || name.starts_with('.') } +fn excluded_common_dir(path: &str) -> bool { + let name = path.rsplit('/').next().unwrap_or(path); + matches!( + name, + ".git" + | "vendor" + | "third_party" + | "node_modules" + | "tmp" + | "dist" + | "build" + | "target" + | "bin" + | "obj" + | "packages" + | "cmake-build-debug" + | "cmake-build-release" + | "tests" + | "test" + | "benches" + | "examples" + ) || name.starts_with('.') +} + fn excluded_go_file(path: &str) -> bool { let Some(name) = path.rsplit('/').next() else { return true; @@ -192,6 +313,24 @@ fn excluded_go_file(path: &str) -> bool { name.ends_with("_test.go") } +fn rust_source_path(path: &str) -> bool { + path.ends_with(".rs") +} + +fn c_source_path(path: &str) -> bool { + path.ends_with(".c") || path.ends_with(".h") +} + +fn cpp_source_path(path: &str) -> bool { + [".cc", ".cpp", ".cxx", ".hh", ".hpp", ".hxx"] + .iter() + .any(|suffix| path.ends_with(suffix)) +} + +fn csharp_source_path(path: &str) -> bool { + path.ends_with(".cs") +} + fn excluded_zig_file(path: &str) -> bool { let Some(name) = path.rsplit('/').next() else { return true; @@ -292,6 +431,123 @@ fn scan_go_sites(path: &str, contents: &str) -> Vec { sites } +fn scan_rust_sites(path: &str, contents: &str) -> Vec { + let mut sites = Vec::new(); + let mut in_block_comment = false; + let mut unsafe_depth = 0_i32; + for (index, line) in contents.lines().enumerate() { + let line_no = (index + 1) as u32; + let code = strip_quoted_literals(&strip_go_comment(line, &mut in_block_comment)); + if code.trim().is_empty() { + continue; + } + if is_rust_atomic_site(&code) { + sites.push(site(path, line_no, line, "rust_loom_atomic", "loom")); + } + if is_rust_concurrency_site(&code) { + sites.push(site(path, line_no, line, "rust_loom_concurrency", "loom")); + } + if code.contains("unsafe fn ") || code.contains("unsafe fn(") { + sites.push(site(path, line_no, line, "rust_unsafe_fn", "miri")); + } + if code.contains("unsafe impl ") { + sites.push(site(path, line_no, line, "rust_unsafe_impl", "miri")); + } + let starts_unsafe = code.contains("unsafe {"); + if starts_unsafe { + sites.push(site(path, line_no, line, "rust_unsafe_block", "miri")); + } + if (unsafe_depth > 0 || starts_unsafe) && is_rust_unsafe_operation(&code) { + sites.push(site(path, line_no, line, "rust_unsafe_operation", "miri")); + } + unsafe_depth = update_unsafe_depth(&code, unsafe_depth); + } + sites +} + +fn scan_c_sites(path: &str, contents: &str) -> Vec { + let mut sites = Vec::new(); + let mut in_block_comment = false; + for (index, line) in contents.lines().enumerate() { + let line_no = (index + 1) as u32; + let code = strip_quoted_literals(&strip_go_comment(line, &mut in_block_comment)); + if code.trim().is_empty() { + continue; + } + if is_c_tsan_site(&code) { + sites.push(site(path, line_no, line, "c_tsan_concurrency", "tsan")); + } + if is_c_asan_api_site(&code) { + sites.push(site(path, line_no, line, "c_asan_raw_memory_api", "asan")); + } + if is_c_pointer_hazard(&code) { + sites.push(site(path, line_no, line, "c_asan_pointer", "asan")); + } + if is_c_lsan_site(&code) { + sites.push(site(path, line_no, line, "c_lsan_lifetime", "lsan")); + } + if is_arithmetic_ub_site(&code) { + sites.push(site(path, line_no, line, "c_ubsan_arithmetic", "ubsan")); + } + if is_c_cast_ub_site(&code) { + sites.push(site(path, line_no, line, "c_ubsan_cast", "ubsan")); + } + } + sites +} + +fn scan_cpp_sites(path: &str, contents: &str) -> Vec { + let mut sites = Vec::new(); + let mut in_block_comment = false; + for (index, line) in contents.lines().enumerate() { + let line_no = (index + 1) as u32; + let code = strip_quoted_literals(&strip_go_comment(line, &mut in_block_comment)); + if code.trim().is_empty() { + continue; + } + if is_cpp_tsan_site(&code) { + sites.push(site(path, line_no, line, "cpp_tsan_concurrency", "tsan")); + } + if is_cpp_asan_api_site(&code) { + sites.push(site(path, line_no, line, "cpp_asan_raw_memory_api", "asan")); + } + if is_cpp_pointer_or_cast_hazard(&code) { + sites.push(site(path, line_no, line, "cpp_asan_pointer_or_cast", "asan")); + } + if is_cpp_lsan_site(&code) { + sites.push(site(path, line_no, line, "cpp_lsan_lifetime", "lsan")); + } + if is_arithmetic_ub_site(&code) { + sites.push(site(path, line_no, line, "cpp_ubsan_arithmetic", "ubsan")); + } + if contains_any(&code, &["reinterpret_cast<", "const_cast<", "static_cast<"]) { + sites.push(site(path, line_no, line, "cpp_ubsan_cast", "ubsan")); + } + } + sites +} + +fn scan_csharp_sites(path: &str, contents: &str) -> Vec { + let mut sites = Vec::new(); + let mut in_block_comment = false; + let mut unsafe_depth = 0_i32; + for (index, line) in contents.lines().enumerate() { + let line_no = (index + 1) as u32; + let code = strip_quoted_literals(&strip_go_comment(line, &mut in_block_comment)); + if code.trim().is_empty() { + continue; + } + if is_csharp_concurrency_site(&code) { + sites.push(site(path, line_no, line, "csharp_concurrency", "concurrency")); + } + if is_csharp_unsafe_site(&code, unsafe_depth) { + sites.push(site(path, line_no, line, "csharp_unsafe_memory", "unsafe")); + } + unsafe_depth = update_csharp_unsafe_depth(&code, unsafe_depth); + } + sites +} + fn site( path: &str, line: u32, @@ -344,6 +600,341 @@ fn is_go_channel_site(code: &str) -> bool { || code.contains("<-") } +fn contains_any(code: &str, needles: &[&str]) -> bool { + needles.iter().any(|needle| code.contains(needle)) +} + +fn is_rust_atomic_site(code: &str) -> bool { + contains_any( + code, + &[ + "std::sync::atomic", + "core::sync::atomic", + "Ordering::", + ".load(", + ".store(", + ".swap(", + ".compare_exchange(", + ".compare_exchange_weak(", + ".fetch_add(", + ".fetch_sub(", + ".fetch_or(", + ".fetch_and(", + ".fetch_xor(", + ".fetch_update(", + "fence(", + "AtomicBool", + "AtomicI", + "AtomicU", + "AtomicPtr", + ], + ) +} + +fn is_rust_concurrency_site(code: &str) -> bool { + contains_any( + code, + &[ + "thread::spawn", + "std::thread::spawn", + "std::sync::Mutex", + "std::sync::RwLock", + "std::sync::Condvar", + "std::sync::Arc", + "Arc<", + "Mutex<", + "RwLock<", + "Condvar", + "mpsc::", + "crossbeam::channel", + ".lock(", + ".try_lock(", + ], + ) +} + +fn is_rust_unsafe_operation(code: &str) -> bool { + contains_any( + code, + &[ + "std::ptr::", + "core::ptr::", + "ptr::read", + "ptr::write", + "ptr::copy", + "copy_nonoverlapping", + "from_raw", + "into_raw", + "get_unchecked", + "get_unchecked_mut", + "unwrap_unchecked", + "transmute", + "assume_init", + "MaybeUninit", + "addr_of!", + "asm!", + ".add(", + ".offset(", + ".read(", + ".write(", + ".copy_to(", + ".copy_from(", + ], + ) || pointer_deref_site(code) +} + +fn update_unsafe_depth(code: &str, unsafe_depth: i32) -> i32 { + let relevant = if unsafe_depth > 0 { + code + } else if let Some(index) = code.find("unsafe {") { + &code[index..] + } else { + "" + }; + if relevant.is_empty() { + return unsafe_depth; + } + (unsafe_depth + brace_delta(relevant)).max(0) +} + +fn is_c_tsan_site(code: &str) -> bool { + contains_any( + code, + &[ + "_Atomic", + "atomic_", + "__atomic_", + "__sync_", + "pthread_create", + "pthread_mutex_", + "pthread_rwlock_", + "pthread_cond_", + "pthread_spin_", + "pthread_barrier_", + "mtx_", + "cnd_", + "thrd_create", + ], + ) +} + +fn is_c_asan_api_site(code: &str) -> bool { + contains_any( + code, + &[ + "memcpy(", + "memmove(", + "memset(", + "strcpy(", + "strncpy(", + "strcat(", + "strncat(", + "sprintf(", + "snprintf(", + "vsprintf(", + "vsnprintf(", + "gets(", + "scanf(", + "sscanf(", + "fscanf(", + "alloca(", + ], + ) +} + +fn is_c_lsan_site(code: &str) -> bool { + contains_any( + code, + &[ + "malloc(", + "calloc(", + "realloc(", + "aligned_alloc(", + "posix_memalign(", + "strdup(", + "strndup(", + "free(", + ], + ) +} + +fn is_c_pointer_hazard(code: &str) -> bool { + code.contains("->") || pointer_deref_site(code) +} + +fn is_c_cast_ub_site(code: &str) -> bool { + contains_any( + code, + &[ + "(intptr_t)", + "(uintptr_t)", + "(size_t)", + "(ssize_t)", + "(int)", + "(long)", + "(short)", + "(char)", + "(void *)", + "(char *)", + "(int *)", + "(long *)", + ], + ) +} + +fn is_cpp_tsan_site(code: &str) -> bool { + contains_any( + code, + &[ + "std::thread", + "std::jthread", + "std::async", + "std::atomic", + "std::mutex", + "std::shared_mutex", + "std::recursive_mutex", + "std::condition_variable", + "std::lock_guard", + "std::unique_lock", + "std::scoped_lock", + "std::call_once", + ".lock(", + ".try_lock(", + ".unlock(", + ], + ) +} + +fn is_cpp_asan_api_site(code: &str) -> bool { + contains_any( + code, + &[ + "std::memcpy(", + "std::memmove(", + "std::memset(", + "memcpy(", + "memmove(", + "memset(", + "strcpy(", + "strncpy(", + "strcat(", + "strncat(", + "sprintf(", + "snprintf(", + "std::span<", + "std::string_view", + ], + ) +} + +fn is_cpp_lsan_site(code: &str) -> bool { + contains_any( + code, + &[ + "malloc(", + "calloc(", + "realloc(", + "free(", + "std::malloc(", + "std::calloc(", + "std::realloc(", + "std::free(", + "new ", + "new[]", + "delete ", + "delete[]", + ], + ) +} + +fn is_cpp_pointer_or_cast_hazard(code: &str) -> bool { + code.contains("->") + || pointer_deref_site(code) + || contains_any(code, &["reinterpret_cast<", "const_cast<"]) +} + +fn is_arithmetic_ub_site(code: &str) -> bool { + contains_any(code, &[" / ", " % ", "<<", ">>"]) +} + +fn is_csharp_concurrency_site(code: &str) -> bool { + contains_any( + code, + &[ + "Task.Run", + "Task.Factory.StartNew", + "new Thread", + "ThreadPool.", + "Parallel.", + "lock (", + "lock(", + "Monitor.", + "Interlocked.", + "Volatile.", + "ConcurrentDictionary", + "ConcurrentQueue", + "ConcurrentBag", + "BlockingCollection", + "SemaphoreSlim", + "Mutex", + "ReaderWriterLockSlim", + "SpinLock", + ], + ) +} + +fn is_csharp_unsafe_site(code: &str, unsafe_depth: i32) -> bool { + (unsafe_depth > 0 && (code.contains("->") || pointer_deref_site(code))) + || contains_any( + code, + &[ + "unsafe", + "fixed (", + "fixed(", + "stackalloc", + "Marshal.", + "IntPtr", + "UIntPtr", + "GCHandle", + "Unsafe.", + "MemoryMarshal.", + "byte*", + "char*", + "int*", + "long*", + "void*", + ], + ) +} + +fn update_csharp_unsafe_depth(code: &str, unsafe_depth: i32) -> i32 { + let relevant = if unsafe_depth > 0 { + code + } else if let Some(index) = code.find("unsafe {") { + &code[index..] + } else { + "" + }; + if relevant.is_empty() { + return unsafe_depth; + } + (unsafe_depth + brace_delta(relevant)).max(0) +} + +fn pointer_deref_site(code: &str) -> bool { + let trimmed = code.trim_start(); + trimmed.starts_with('*') + || contains_any(code, &["= *", "=*", "return *", "(*", ", *", "[*"]) +} + +fn brace_delta(code: &str) -> i32 { + code.chars().fold(0_i32, |total, ch| match ch { + '{' => total + 1, + '}' => total - 1, + _ => total, + }) +} + fn is_atomic_site(code: &str) -> bool { code.contains("@atomic") || code.contains("@cmpxchg") @@ -505,6 +1096,34 @@ fn strip_go_comment(line: &str, in_block_comment: &mut bool) -> String { } } +fn strip_quoted_literals(line: &str) -> String { + let mut out = String::with_capacity(line.len()); + let mut chars = line.chars().peekable(); + while let Some(ch) = chars.next() { + if ch == '"' || ch == '\'' { + let quote = ch; + out.push_str("\"\""); + let mut escaped = false; + for inner in chars.by_ref() { + if escaped { + escaped = false; + continue; + } + if inner == '\\' { + escaped = true; + continue; + } + if inner == quote { + break; + } + } + } else { + out.push(ch); + } + } + out +} + fn unit_for_site(blob: &BlobFile, units: &[LogicalUnit], line: u32) -> LogicalUnit { units .iter() @@ -584,6 +1203,54 @@ mod tests { assert_eq!(storage.count_rows("unit_hazards").unwrap(), 3); } + #[test] + fn ingests_rust_loom_and_unsafe_hazards_for_current_snapshot() { + let dir = tempdir().unwrap(); + fs::create_dir_all(dir.path().join("src")).unwrap(); + fs::write( + dir.path().join("src/lib.rs"), + "use std::sync::atomic::{AtomicUsize, Ordering};\n\npub fn run(ptr: *const u8) -> usize {\n let value = AtomicUsize::new(0);\n value.fetch_add(1, Ordering::SeqCst);\n unsafe {\n ptr.add(1).read()\n }\n}\n", + ) + .unwrap(); + let storage = Storage::open_memory().unwrap(); + + let stats = ingest_hazards(&storage, dir.path(), "rust", "abc", Some(10)).unwrap(); + + assert_eq!(stats.scanned_files, 1); + assert_eq!(stats.hazards, 5); + assert_eq!(storage.count_rows("unit_hazards").unwrap(), 5); + } + + #[test] + fn system_hazard_scans_cover_c_cpp_and_csharp_categories() { + let c_types = hazard_types(scan_c_sites( + "runtime.c", + "void run(char *dst, char *src, int n) {\n pthread_mutex_lock(&lock);\n char *buf = malloc(32);\n memcpy(dst, src, n);\n int shifted = n << src[0];\n free(buf);\n}\n", + )); + assert!(c_types.contains(&"c_tsan_concurrency".to_string())); + assert!(c_types.contains(&"c_asan_raw_memory_api".to_string())); + assert!(c_types.contains(&"c_lsan_lifetime".to_string())); + assert!(c_types.contains(&"c_ubsan_arithmetic".to_string())); + + let cpp_types = hazard_types(scan_cpp_sites( + "runtime.cpp", + "void run(char *dst, char *src, int n) {\n std::atomic ready;\n auto *buf = new char[32];\n std::memcpy(dst, src, n);\n auto raw = reinterpret_cast(dst);\n auto shifted = n << raw[0];\n delete[] buf;\n}\n", + )); + assert!(cpp_types.contains(&"cpp_tsan_concurrency".to_string())); + assert!(cpp_types.contains(&"cpp_asan_raw_memory_api".to_string())); + assert!(cpp_types.contains(&"cpp_asan_pointer_or_cast".to_string())); + assert!(cpp_types.contains(&"cpp_lsan_lifetime".to_string())); + assert!(cpp_types.contains(&"cpp_ubsan_cast".to_string())); + assert!(cpp_types.contains(&"cpp_ubsan_arithmetic".to_string())); + + let csharp_types = hazard_types(scan_csharp_sites( + "Worker.cs", + "public unsafe class Worker {\n public void Run(byte* ptr) {\n Task.Run(() => {});\n fixed (byte* p = buffer) {\n *p = 1;\n }\n }\n}\n", + )); + assert!(csharp_types.contains(&"csharp_concurrency".to_string())); + assert!(csharp_types.contains(&"csharp_unsafe_memory".to_string())); + } + #[test] fn go_hazard_scan_ignores_comments() { let sites = scan_go_sites( @@ -594,4 +1261,18 @@ mod tests { assert_eq!(sites.len(), 1); assert_eq!(sites[0].hazard_type, "go_concurrency_channel"); } + + #[test] + fn systems_hazard_scans_ignore_comments_and_strings() { + let sites = scan_c_sites( + "runtime.c", + "void run(void) {\n // pthread_mutex_lock(&lock);\n const char *s = \"memcpy(dst, src, n)\";\n}\n", + ); + + assert!(sites.is_empty()); + } + + fn hazard_types(sites: Vec) -> Vec { + sites.into_iter().map(|site| site.hazard_type).collect() + } } diff --git a/gems/lineage/src/mutant.rs b/gems/lineage/src/mutant.rs index 56daba1d0..a04342f3e 100644 --- a/gems/lineage/src/mutant.rs +++ b/gems/lineage/src/mutant.rs @@ -248,7 +248,7 @@ fn matching_units<'a>(units: &'a [LogicalUnit], fact: &MutantFact) -> Vec<&'a Lo let aliases = method_aliases(&fact.method); units .iter() - .filter(|unit| aliases.iter().any(|alias| alias == &unit.name)) + .filter(|unit| unit_matches_aliases(unit, &aliases)) .collect() } @@ -281,7 +281,7 @@ fn fallback_matching_unit_entries( let aliases = method_aliases(&fact.method); units .iter() - .filter(|unit| aliases.iter().any(|alias| alias == &unit.name)) + .filter(|unit| unit_matches_aliases(unit, &aliases)) .collect::>() }; for unit in path_matches { @@ -316,7 +316,7 @@ fn fallback_owner_mentioned_function_entries( continue; } for unit in units { - if unit.kind.as_str() != "function" || !aliases.iter().any(|alias| alias == &unit.name) { + if unit.kind.as_str() != "function" || !unit_matches_aliases(unit, &aliases) { continue; } if !owner_needles.iter().any(|needle| { @@ -346,8 +346,7 @@ fn fallback_unique_source_function_entry( continue; } for unit in units { - if unit.kind.as_str() == "function" && aliases.iter().any(|alias| alias == &unit.name) - { + if unit.kind.as_str() == "function" && unit_matches_aliases(unit, &aliases) { candidates.push(UnitMatch { path: path.clone(), unit: unit.clone(), @@ -412,6 +411,13 @@ fn owner_text_needles(owner: &str) -> Vec { needles } +fn unit_matches_aliases(unit: &LogicalUnit, aliases: &[String]) -> bool { + aliases.iter().any(|alias| { + unit.name == *alias + || (!alias.contains('.') && !alias.contains('#') && unit.name.ends_with(&format!(".{alias}"))) + }) +} + fn method_aliases(method: &str) -> Vec { let raw = method.trim().trim_end_matches('*'); let mut aliases = vec![raw.to_string()]; @@ -706,7 +712,7 @@ mod tests { assert_eq!(stats.facts, 1); assert_eq!(stats.units, 1); assert_eq!(stats.quality_events, 1); - assert_eq!(stats.exposure_events, 4); + assert_eq!(stats.exposure_events, 3); let killed: i64 = storage .connection() .query_row( @@ -715,7 +721,7 @@ mod tests { |row| row.get(0), ) .unwrap(); - assert_eq!(killed, 4); + assert_eq!(killed, 3); } #[test] diff --git a/gems/slopcop/README.md b/gems/slopcop/README.md index 663799e77..622d5a554 100644 --- a/gems/slopcop/README.md +++ b/gems/slopcop/README.md @@ -130,8 +130,9 @@ This is the format Lineage uses for gutter and source overlays. ## Concurrency Hazard Detection / Constraint Reports -`constraints` checks changed files against named coverage constraints, -currently used by CLEAR for Loom and VOPR hazard coverage: +`constraints` checks changed files against named coverage constraints. +It currently supports first-party hazard providers for Zig, Go, Rust, +C, C++, and C#: ```bash bundle exec gems/slopcop/exe/slopcop constraints \ @@ -139,11 +140,14 @@ bundle exec gems/slopcop/exe/slopcop constraints \ --base=origin/master \ --coverage=loom:zig/zig-out/coverage-loom/merged/kcov-merged/cobertura.xml \ --coverage=vopr:zig/zig-out/coverage-vopr/merged/kcov-merged/cobertura.xml \ + --language=zig \ --markdown=/tmp/slopcop-constraints.md \ --json=/tmp/slopcop-constraints.json \ --sarif=/tmp/slopcop-constraints.sarif ``` +Common evidence tags are `loom`, `vopr`, `race`, `concurrency`, +`tsan`, `asan`, `lsan`, `ubsan`, `miri`, and `unsafe`. Findings are advisory unless `--strict` is supplied. ## CI Integration @@ -187,16 +191,19 @@ SlopCop relies on [Boobytrap](../boobytrap/README.md) for branch-arm normalization and [Decomplex](../decomplex/README.md) language lexicons for classifying type/null guards and diagnostic paths. Ruby support has been battle tested to develop the CLEAR compiler. Zig -support is currently being used for CLEAR runtime hazard coverage. Other -languages are currently experimental. +support is currently being used for CLEAR runtime hazard coverage. Go, +Rust, C, C++, and C# hazard providers are experimental. - [x] Ruby: fully supported. - [ ] Python: experimentally supported. - [ ] JavaScript: experimentally supported. - [ ] TypeScript: experimentally supported. -- [ ] Go: experimentally supported. -- [ ] Rust: experimentally supported. - [ ] Zig: experimentally supported. +- [ ] Go: experimentally supported, including concurrency hazards. +- [ ] Rust: experimentally supported, including Loom and unsafe hazards. +- [ ] C: experimentally supported, including sanitizer hazards. +- [ ] C++: experimentally supported, including sanitizer hazards. +- [ ] C#: experimentally supported, including concurrency/unsafe hazards. ## Boundaries diff --git a/gems/slopcop/exe/slopcop b/gems/slopcop/exe/slopcop index 64bc15b8a..73f115f98 100755 --- a/gems/slopcop/exe/slopcop +++ b/gems/slopcop/exe/slopcop @@ -21,7 +21,7 @@ def usage slopcop constraints [--repo=.] --base=origin/master [--head=HEAD] \\ [--coverage=loom:zig/zig-out/coverage-loom/merged/kcov-merged/cobertura.xml] \\ [--coverage=vopr:zig/zig-out/coverage-vopr/merged/kcov-merged/cobertura.xml] \\ - [--language=zig|go] \\ + [--language=zig|go|rust|c|cpp|csharp] \\ [--sarif=slopcop-constraints.sarif] [--json=constraints.sarif] \\ [--markdown=constraints.md] [--strict] @@ -30,7 +30,8 @@ def usage kcov Cobertura XML, kcov codecov JSON, coverage.py JSON, or Nil-Kill branch coverage JSON constraints --coverage - typed coverage input. Common types: loom:PATH, vopr:PATH, race:PATH, concurrency:PATH. + typed coverage input. Common types: loom:PATH, vopr:PATH, race:PATH, concurrency:PATH, + tsan:PATH, asan:PATH, lsan:PATH, ubsan:PATH, miri:PATH, unsafe:PATH. Findings are advisory warnings unless --strict is supplied. dark-arms Lineage-ready SARIF JSON overlay containing all classified dark arms. @@ -38,7 +39,7 @@ def usage exit 1 end -TREE_SITTER_EXTS = %w[rb py pyi js jsx mjs cjs ts tsx go rs zig].freeze +TREE_SITTER_EXTS = %w[rb py pyi js jsx mjs cjs ts tsx go rs zig c h cc cpp cxx hh hpp hxx cs].freeze def default_file_globs roots = %w[app lib src] diff --git a/gems/slopcop/lib/slopcop/constraints.rb b/gems/slopcop/lib/slopcop/constraints.rb index 0ba0fdf12..e7c89d148 100644 --- a/gems/slopcop/lib/slopcop/constraints.rb +++ b/gems/slopcop/lib/slopcop/constraints.rb @@ -1,10 +1,14 @@ # frozen_string_literal: true require_relative "constraints/audit" +require_relative "constraints/c_provider" +require_relative "constraints/cpp_provider" +require_relative "constraints/csharp_provider" require_relative "constraints/diff" require_relative "constraints/evidence" require_relative "constraints/finding" require_relative "constraints/go_provider" +require_relative "constraints/rust_provider" require_relative "constraints/sarif" require_relative "constraints/zig_provider" @@ -14,7 +18,11 @@ module Constraints def providers { + "c" => CProvider, + "cpp" => CppProvider, + "csharp" => CsharpProvider, "go" => GoProvider, + "rust" => RustProvider, "zig" => ZigProvider } end diff --git a/gems/slopcop/lib/slopcop/constraints/c_provider.rb b/gems/slopcop/lib/slopcop/constraints/c_provider.rb new file mode 100644 index 000000000..eb632e907 --- /dev/null +++ b/gems/slopcop/lib/slopcop/constraints/c_provider.rb @@ -0,0 +1,155 @@ +# frozen_string_literal: true + +require_relative "language_provider" + +module SlopCop + module Constraints + module CProvider + module_function + + EXCLUDED_DIRS = %w[.git vendor third_party node_modules build cmake-build-debug cmake-build-release tmp dist tests test].freeze + TSAN_NEEDLES = [ + "_Atomic", + "atomic_", + "__atomic_", + "__sync_", + "pthread_create", + "pthread_mutex_", + "pthread_rwlock_", + "pthread_cond_", + "pthread_spin_", + "pthread_barrier_", + "mtx_", + "cnd_", + "thrd_create" + ].freeze + ASAN_NEEDLES = [ + "memcpy(", + "memmove(", + "memset(", + "strcpy(", + "strncpy(", + "strcat(", + "strncat(", + "sprintf(", + "snprintf(", + "vsprintf(", + "vsnprintf(", + "gets(", + "scanf(", + "sscanf(", + "fscanf(", + "alloca(" + ].freeze + LSAN_NEEDLES = [ + "malloc(", + "calloc(", + "realloc(", + "aligned_alloc(", + "posix_memalign(", + "strdup(", + "strndup(", + "free(" + ].freeze + + def rules + evidence_rule("tsan", "C TSan coverage missing", "C shared-concurrency site lacks TSan coverage evidence") + + evidence_rule("asan", "C ASan coverage missing", "C raw-memory site lacks ASan coverage evidence") + + evidence_rule("lsan", "C LSan coverage missing", "C allocation/lifetime site lacks LSan coverage evidence") + + evidence_rule("ubsan", "C UBSan coverage missing", "C undefined-behavior site lacks UBSan coverage evidence") + end + + def evidence_rule(evidence, name, short) + [ + { + "id" => "slopcop-c-#{evidence}-uncovered", + "name" => name, + "shortDescription" => { "text" => short }, + "fullDescription" => { + "text" => "A changed C #{evidence.upcase} hazard was not reached by #{evidence.upcase} coverage evidence." + }, + "defaultConfiguration" => { "level" => "warning" } + } + ] + end + + def findings(repo:, additions:, evidence:) + LanguageProvider.findings(self, repo: repo, additions: additions, evidence: evidence) + end + + def scan_hazards(repo:, paths: nil) + LanguageProvider.scan_hazards(self, repo: repo, paths: paths) + end + + def source_path?(path) + (path.end_with?(".c") || path.end_with?(".h")) && + !LanguageProvider.excluded_path?(path, dirs: EXCLUDED_DIRS) + end + + def rule_id_for(required_evidence) + "slopcop-c-#{required_evidence}-uncovered" + end + + def scan_file(path, contents) + sites = [] + comment = { active: false } + contents.lines.each_with_index do |source, index| + line = index + 1 + code = LanguageProvider.c_style_code(source, comment) + next if code.strip.empty? + + add_tsan_site(sites, path, line, source, code) + add_asan_site(sites, path, line, source, code) + add_lsan_site(sites, path, line, source, code) + add_ubsan_site(sites, path, line, source, code) + end + sites + end + + def add_tsan_site(sites, path, line, source, code) + return unless LanguageProvider.any_include?(code, TSAN_NEEDLES) + + sites << LanguageProvider.hazard(path, line, source, "c_tsan_concurrency", "tsan", "C atomic/thread/lock site") + end + + def add_asan_site(sites, path, line, source, code) + if LanguageProvider.any_include?(code, ASAN_NEEDLES) + sites << LanguageProvider.hazard(path, line, source, "c_asan_raw_memory_api", "asan", "C raw-memory or unchecked buffer API") + end + if pointer_hazard?(code) + sites << LanguageProvider.hazard(path, line, source, "c_asan_pointer", "asan", "C pointer dereference/arithmetic site") + end + end + + def add_lsan_site(sites, path, line, source, code) + return unless LanguageProvider.any_include?(code, LSAN_NEEDLES) + + sites << LanguageProvider.hazard(path, line, source, "c_lsan_lifetime", "lsan", "C allocation/free lifetime site") + end + + def add_ubsan_site(sites, path, line, source, code) + if arithmetic_ub_site?(code) + sites << LanguageProvider.hazard(path, line, source, "c_ubsan_arithmetic", "ubsan", "C divide/modulo/shift arithmetic site") + end + if cast_ub_site?(code) + sites << LanguageProvider.hazard(path, line, source, "c_ubsan_cast", "ubsan", "C pointer/integer cast site") + end + end + + def pointer_hazard?(code) + code.include?("->") || + code.match?(/\A\s*\*\s*[A-Za-z_][A-Za-z0-9_]*/) || + code.match?(/(?:=\s*|return\s+|\(|,|\[)\*\s*[A-Za-z_][A-Za-z0-9_]*/) + end + + def arithmetic_ub_site?(code) + code.match?(%r{[A-Za-z0-9_\])]\s*(?:/|%)\s*[A-Za-z_(]}) || + code.match?(/[A-Za-z0-9_\])]\s*(?:<<|>>)\s*[A-Za-z_(]/) + end + + def cast_ub_site?(code) + code.match?(/\([A-Za-z_][A-Za-z0-9_\s]*(?:\*|intptr_t|uintptr_t|size_t|ssize_t|int|long|short|char)[A-Za-z0-9_\s\*]*\)\s*[A-Za-z_(&*]/) + end + end + end +end diff --git a/gems/slopcop/lib/slopcop/constraints/cpp_provider.rb b/gems/slopcop/lib/slopcop/constraints/cpp_provider.rb new file mode 100644 index 000000000..fc150edce --- /dev/null +++ b/gems/slopcop/lib/slopcop/constraints/cpp_provider.rb @@ -0,0 +1,153 @@ +# frozen_string_literal: true + +require_relative "language_provider" + +module SlopCop + module Constraints + module CppProvider + module_function + + EXCLUDED_DIRS = %w[.git vendor third_party node_modules build cmake-build-debug cmake-build-release tmp dist tests test].freeze + EXTENSIONS = %w[.cc .cpp .cxx .hh .hpp .hxx].freeze + TSAN_NEEDLES = [ + "std::thread", + "std::jthread", + "std::async", + "std::atomic", + "std::mutex", + "std::shared_mutex", + "std::recursive_mutex", + "std::condition_variable", + "std::lock_guard", + "std::unique_lock", + "std::scoped_lock", + "std::call_once", + ".lock(", + ".try_lock(", + ".unlock(" + ].freeze + ASAN_NEEDLES = [ + "std::memcpy(", + "std::memmove(", + "std::memset(", + "memcpy(", + "memmove(", + "memset(", + "strcpy(", + "strncpy(", + "strcat(", + "strncat(", + "sprintf(", + "snprintf(", + "std::span<", + "std::string_view" + ].freeze + LSAN_NEEDLES = [ + "malloc(", + "calloc(", + "realloc(", + "free(", + "std::malloc(", + "std::calloc(", + "std::realloc(", + "std::free(" + ].freeze + + def rules + evidence_rule("tsan", "C++ TSan coverage missing", "C++ shared-concurrency site lacks TSan coverage evidence") + + evidence_rule("asan", "C++ ASan coverage missing", "C++ raw-memory site lacks ASan coverage evidence") + + evidence_rule("lsan", "C++ LSan coverage missing", "C++ allocation/lifetime site lacks LSan coverage evidence") + + evidence_rule("ubsan", "C++ UBSan coverage missing", "C++ undefined-behavior site lacks UBSan coverage evidence") + end + + def evidence_rule(evidence, name, short) + [ + { + "id" => "slopcop-cpp-#{evidence}-uncovered", + "name" => name, + "shortDescription" => { "text" => short }, + "fullDescription" => { + "text" => "A changed C++ #{evidence.upcase} hazard was not reached by #{evidence.upcase} coverage evidence." + }, + "defaultConfiguration" => { "level" => "warning" } + } + ] + end + + def findings(repo:, additions:, evidence:) + LanguageProvider.findings(self, repo: repo, additions: additions, evidence: evidence) + end + + def scan_hazards(repo:, paths: nil) + LanguageProvider.scan_hazards(self, repo: repo, paths: paths) + end + + def source_path?(path) + EXTENSIONS.any? { |extension| path.end_with?(extension) } && + !LanguageProvider.excluded_path?(path, dirs: EXCLUDED_DIRS) + end + + def rule_id_for(required_evidence) + "slopcop-cpp-#{required_evidence}-uncovered" + end + + def scan_file(path, contents) + sites = [] + comment = { active: false } + contents.lines.each_with_index do |source, index| + line = index + 1 + code = LanguageProvider.c_style_code(source, comment) + next if code.strip.empty? + + add_tsan_site(sites, path, line, source, code) + add_asan_site(sites, path, line, source, code) + add_lsan_site(sites, path, line, source, code) + add_ubsan_site(sites, path, line, source, code) + end + sites + end + + def add_tsan_site(sites, path, line, source, code) + return unless LanguageProvider.any_include?(code, TSAN_NEEDLES) + + sites << LanguageProvider.hazard(path, line, source, "cpp_tsan_concurrency", "tsan", "C++ atomic/thread/lock site") + end + + def add_asan_site(sites, path, line, source, code) + if LanguageProvider.any_include?(code, ASAN_NEEDLES) + sites << LanguageProvider.hazard(path, line, source, "cpp_asan_raw_memory_api", "asan", "C++ raw-memory or unchecked buffer API") + end + if pointer_or_cast_hazard?(code) + sites << LanguageProvider.hazard(path, line, source, "cpp_asan_pointer_or_cast", "asan", "C++ pointer/cast hazard") + end + end + + def add_lsan_site(sites, path, line, source, code) + if LanguageProvider.any_include?(code, LSAN_NEEDLES) || code.match?(/\b(?:new|delete)(?:\[\])?\b/) + sites << LanguageProvider.hazard(path, line, source, "cpp_lsan_lifetime", "lsan", "C++ allocation/free lifetime site") + end + end + + def add_ubsan_site(sites, path, line, source, code) + if arithmetic_ub_site?(code) + sites << LanguageProvider.hazard(path, line, source, "cpp_ubsan_arithmetic", "ubsan", "C++ divide/modulo/shift arithmetic site") + end + if code.match?(/\b(?:reinterpret_cast|const_cast|static_cast)\s*") || + code.match?(/\b(?:reinterpret_cast|const_cast)\s*>)\s*[A-Za-z_(]/) + end + end + end +end diff --git a/gems/slopcop/lib/slopcop/constraints/csharp_provider.rb b/gems/slopcop/lib/slopcop/constraints/csharp_provider.rb new file mode 100644 index 000000000..4c3380c24 --- /dev/null +++ b/gems/slopcop/lib/slopcop/constraints/csharp_provider.rb @@ -0,0 +1,132 @@ +# frozen_string_literal: true + +require_relative "language_provider" + +module SlopCop + module Constraints + module CsharpProvider + module_function + + EXCLUDED_DIRS = %w[.git bin obj packages node_modules tmp dist tests test].freeze + CONCURRENCY_NEEDLES = [ + "Task.Run", + "Task.Factory.StartNew", + "new Thread", + "ThreadPool.", + "Parallel.", + "lock (", + "lock(", + "Monitor.", + "Interlocked.", + "Volatile.", + "ConcurrentDictionary", + "ConcurrentQueue", + "ConcurrentBag", + "BlockingCollection", + "SemaphoreSlim", + "Mutex", + "ReaderWriterLockSlim", + "SpinLock" + ].freeze + UNSAFE_NEEDLES = [ + "unsafe", + "fixed (", + "fixed(", + "stackalloc", + "Marshal.", + "IntPtr", + "UIntPtr", + "GCHandle", + "Unsafe.", + "MemoryMarshal." + ].freeze + + def rules + [ + { + "id" => "slopcop-csharp-concurrency-uncovered", + "name" => "C# concurrency coverage missing", + "shortDescription" => { "text" => "C# concurrency site lacks concurrency coverage evidence" }, + "fullDescription" => { + "text" => "A changed C# task, thread, lock, or concurrent collection site was not reached by concurrency coverage evidence." + }, + "defaultConfiguration" => { "level" => "warning" } + }, + { + "id" => "slopcop-csharp-unsafe-uncovered", + "name" => "C# unsafe coverage missing", + "shortDescription" => { "text" => "C# unsafe/native-memory site lacks unsafe coverage evidence" }, + "fullDescription" => { + "text" => "A changed C# unsafe, native-memory, pointer, or Marshal site was not reached by unsafe coverage evidence." + }, + "defaultConfiguration" => { "level" => "warning" } + } + ] + end + + def findings(repo:, additions:, evidence:) + LanguageProvider.findings(self, repo: repo, additions: additions, evidence: evidence) + end + + def scan_hazards(repo:, paths: nil) + LanguageProvider.scan_hazards(self, repo: repo, paths: paths) + end + + def source_path?(path) + path.end_with?(".cs") && !LanguageProvider.excluded_path?(path, dirs: EXCLUDED_DIRS) + end + + def rule_id_for(required_evidence) + required_evidence == "concurrency" ? "slopcop-csharp-concurrency-uncovered" : "slopcop-csharp-unsafe-uncovered" + end + + def scan_file(path, contents) + sites = [] + comment = { active: false } + unsafe_depth = 0 + contents.lines.each_with_index do |source, index| + line = index + 1 + code = LanguageProvider.c_style_code(source, comment) + next if code.strip.empty? + + if concurrency_site?(code) + sites << LanguageProvider.hazard(path, line, source, "csharp_concurrency", "concurrency", "C# task/thread/lock site") + end + if unsafe_site?(code, unsafe_depth) + sites << LanguageProvider.hazard(path, line, source, "csharp_unsafe_memory", "unsafe", "C# unsafe/native-memory site") + end + unsafe_depth = update_unsafe_depth(code, unsafe_depth) + end + sites + end + + def concurrency_site?(code) + LanguageProvider.any_include?(code, CONCURRENCY_NEEDLES) + end + + def unsafe_site?(code, unsafe_depth) + unsafe_depth.positive? && pointer_operation?(code) || + LanguageProvider.any_include?(code, UNSAFE_NEEDLES) || + code.match?(/\b(?:byte|char|int|long|void)\s*\*/) + end + + def pointer_operation?(code) + code.include?("->") || code.match?(/\*\s*[A-Za-z_][A-Za-z0-9_]*/) + end + + def update_unsafe_depth(code, unsafe_depth) + code = code.chomp + relevant = if unsafe_depth.positive? + code + elsif (match = code.match(/\bunsafe\s*\{.*\z/)) + match[0] + else + "" + end + return unsafe_depth if relevant.empty? + + [unsafe_depth + relevant.count("{") - relevant.count("}"), 0].max + end + end + end +end diff --git a/gems/slopcop/lib/slopcop/constraints/language_provider.rb b/gems/slopcop/lib/slopcop/constraints/language_provider.rb new file mode 100644 index 000000000..970de415d --- /dev/null +++ b/gems/slopcop/lib/slopcop/constraints/language_provider.rb @@ -0,0 +1,125 @@ +# frozen_string_literal: true + +require "set" + +require_relative "finding" + +module SlopCop + module Constraints + module LanguageProvider + module_function + + def findings(provider, repo:, additions:, evidence:) + repo = File.expand_path(repo) + additions.each_with_object([]) do |(path, lines), out| + next unless provider.source_path?(path) + + hazards = provider.scan_file(path, source_contents(repo, path)) + changed = lines.to_set + hazards.each do |hazard| + next unless changed.include?(hazard[:line]) + next if covered?(evidence, hazard) + + out << Finding.new( + path: path, + line: hazard[:line], + rule_id: provider.rule_id_for(hazard[:required_evidence]), + message: "changed #{hazard[:label]} has no #{hazard[:required_evidence]} coverage evidence", + source: hazard[:source], + hazard_type: hazard[:hazard_type], + required_evidence: hazard[:required_evidence], + severity: "warning" + ) + end + end + end + + def scan_hazards(provider, repo:, paths: nil) + repo = File.expand_path(repo) + files = if paths && !Array(paths).empty? + Array(paths).select { |path| provider.source_path?(path) } + else + Dir.chdir(repo) { Dir["**/*"] }.select { |path| File.file?(File.join(repo, path)) && provider.source_path?(path) } + end + files.flat_map do |path| + provider.scan_file(path, source_contents(repo, path)) + end.sort_by { |site| [site[:path], site[:line], site[:hazard_type]] } + end + + def covered?(evidence, hazard) + evidence_type = hazard[:required_evidence] + return false unless evidence.known_type?(evidence_type) + + evidence.line_covered?(evidence_type, hazard[:path], hazard[:line]) + end + + def source_contents(repo, path) + file = File.join(repo, path) + File.file?(file) ? File.read(file) : "" + end + + def hazard(path, line, source, hazard_type, required_evidence, label) + { + path: path, + line: line, + source: source.strip, + hazard_type: hazard_type, + required_evidence: required_evidence, + label: label + } + end + + def c_style_code(line, in_block_comment) + out = +"" + rest = line.to_s + loop do + if in_block_comment[:active] + after = rest.split("*/", 2)[1] + return strip_strings(out) unless after + + in_block_comment[:active] = false + rest = after + next + end + + block = rest.index("/*") + comment = rest.index("//") + case + when block && comment && comment < block + out << rest[0...comment] + return strip_strings(out) + when block + out << rest[0...block] + rest = rest[(block + 2)..].to_s + in_block_comment[:active] = true + when comment + out << rest[0...comment] + return strip_strings(out) + else + out << rest + return strip_strings(out) + end + end + end + + def strip_strings(code) + code.to_s.gsub(/"(?:\\.|[^"\\])*"|'(?:\\.|[^'\\])*'/, '""') + end + + def excluded_path?(path, dirs:, file_suffixes: []) + parts = path.split("/") + return true if parts.any? { |part| dirs.include?(part) || part.start_with?(".") } + + file_suffixes.any? { |suffix| path.end_with?(suffix) } + end + + def token?(code, token) + code.match?(/(? "slopcop-rust-loom-uncovered", + "name" => "Rust Loom coverage missing", + "shortDescription" => { "text" => "Rust concurrency site lacks Loom coverage evidence" }, + "fullDescription" => { + "text" => "A changed Rust atomic, lock, thread, or shared-concurrency site was not reached by Loom coverage evidence." + }, + "defaultConfiguration" => { "level" => "warning" } + }, + { + "id" => "slopcop-rust-miri-uncovered", + "name" => "Rust unsafe coverage missing", + "shortDescription" => { "text" => "Rust unsafe site lacks Miri/unsafe coverage evidence" }, + "fullDescription" => { + "text" => "A changed Rust unsafe block, unsafe declaration, or unsafe operation was not reached by Miri-style evidence." + }, + "defaultConfiguration" => { "level" => "warning" } + } + ] + end + + def findings(repo:, additions:, evidence:) + LanguageProvider.findings(self, repo: repo, additions: additions, evidence: evidence) + end + + def scan_hazards(repo:, paths: nil) + LanguageProvider.scan_hazards(self, repo: repo, paths: paths) + end + + def source_path?(path) + path.end_with?(".rs") && !LanguageProvider.excluded_path?(path, dirs: EXCLUDED_DIRS) + end + + def rule_id_for(required_evidence) + required_evidence == "loom" ? "slopcop-rust-loom-uncovered" : "slopcop-rust-miri-uncovered" + end + + def scan_file(path, contents) + sites = [] + comment = { active: false } + unsafe_depth = 0 + contents.lines.each_with_index do |source, index| + line = index + 1 + code = LanguageProvider.c_style_code(source, comment) + next if code.strip.empty? + + add_loom_sites(sites, path, line, source, code) + add_unsafe_sites(sites, path, line, source, code, unsafe_depth) + unsafe_depth = update_unsafe_depth(code, unsafe_depth) + end + sites + end + + def add_loom_sites(sites, path, line, source, code) + if atomic_site?(code) + sites << LanguageProvider.hazard(path, line, source, "rust_loom_atomic", "loom", "atomic or memory-ordering site") + end + if concurrency_site?(code) + sites << LanguageProvider.hazard(path, line, source, "rust_loom_concurrency", "loom", "thread/lock/shared-concurrency site") + end + end + + def add_unsafe_sites(sites, path, line, source, code, unsafe_depth) + if code.match?(/\bunsafe\s+fn\b/) + sites << LanguageProvider.hazard(path, line, source, "rust_unsafe_fn", "miri", "unsafe function") + end + if code.match?(/\bunsafe\s+impl\b/) + sites << LanguageProvider.hazard(path, line, source, "rust_unsafe_impl", "miri", "unsafe impl") + end + if unsafe_block_start?(code) + sites << LanguageProvider.hazard(path, line, source, "rust_unsafe_block", "miri", "unsafe block") + end + if unsafe_operation?(code) && (unsafe_depth.positive? || unsafe_block_start?(code)) + sites << LanguageProvider.hazard(path, line, source, "rust_unsafe_operation", "miri", "unsafe operation inside unsafe context") + end + end + + def atomic_site?(code) + code.match?(/\bAtomic(?:Bool|I(?:8|16|32|64|size)|U(?:8|16|32|64|size)|Ptr)\b/) || + LanguageProvider.any_include?(code, ATOMIC_NEEDLES) + end + + def concurrency_site?(code) + LanguageProvider.any_include?(code, CONCURRENCY_NEEDLES) + end + + def unsafe_block_start?(code) + code.match?(/\bunsafe\s*\{/) + end + + def unsafe_operation?(code) + LanguageProvider.any_include?(code, UNSAFE_API_NEEDLES) || + code.match?(/(?:\w|\))\s*\.\s*(?:add|offset|read|write|copy_to|copy_from)\s*\(/) || + code.match?(/\*\s*[A-Za-z_][A-Za-z0-9_]*/) + end + + def update_unsafe_depth(code, unsafe_depth) + code = code.chomp + relevant = if unsafe_depth.positive? + code + elsif (match = code.match(/\bunsafe\s*\{.*\z/)) + match[0] + else + "" + end + return unsafe_depth if relevant.empty? + + [unsafe_depth + relevant.count("{") - relevant.count("}"), 0].max + end + end + end +end diff --git a/gems/slopcop/test/constraints_systems_provider_test.rb b/gems/slopcop/test/constraints_systems_provider_test.rb new file mode 100644 index 000000000..b5e792ac7 --- /dev/null +++ b/gems/slopcop/test/constraints_systems_provider_test.rb @@ -0,0 +1,140 @@ +# frozen_string_literal: true + +require "json" +require "fileutils" +require "minitest/autorun" +require "tmpdir" + +require_relative "../lib/slopcop" + +class ConstraintsSystemsProviderTest < Minitest::Test + def test_new_systems_providers_are_registered + assert_same SlopCop::Constraints::RustProvider, SlopCop::Constraints.providers.fetch("rust") + assert_same SlopCop::Constraints::CProvider, SlopCop::Constraints.providers.fetch("c") + assert_same SlopCop::Constraints::CppProvider, SlopCop::Constraints.providers.fetch("cpp") + assert_same SlopCop::Constraints::CsharpProvider, SlopCop::Constraints.providers.fetch("csharp") + end + + def test_rust_provider_finds_loom_and_unsafe_hazards + with_file("src/lib.rs", <<~RS) do |dir, path| + use std::sync::atomic::{AtomicUsize, Ordering}; + + pub fn run(ptr: *const u8) -> usize { + let value = AtomicUsize::new(0); + value.fetch_add(1, Ordering::SeqCst); + unsafe { + ptr.add(1).read() + } + } + RS + hazards = SlopCop::Constraints::RustProvider.scan_hazards(repo: dir, paths: [path]) + types = hazards.map { |hazard| hazard[:hazard_type] } + + assert_includes types, "rust_loom_atomic" + assert_includes types, "rust_unsafe_block" + assert_includes types, "rust_unsafe_operation" + end + end + + def test_rust_provider_suppresses_matching_loom_coverage + with_file("src/lib.rs", "pub fn run(v: &AtomicUsize) { v.fetch_add(1, Ordering::SeqCst); }\n") do |dir, path| + evidence = SlopCop::Constraints::Evidence.from_specs(["loom:#{coverage_json(dir, path, 1 => 1)}"], repo: dir) + findings = SlopCop::Constraints::RustProvider.findings(repo: dir, additions: { path => [1] }, evidence: evidence) + + assert_empty findings + end + end + + def test_c_provider_finds_sanitizer_hazard_families + with_file("src/runtime.c", <<~C) do |dir, path| + #include + void run(char *dst, char *src, int n) { + pthread_mutex_lock(&lock); + char *buf = malloc(32); + memcpy(dst, src, n); + int shifted = n << src[0]; + free(buf); + } + C + hazards = SlopCop::Constraints::CProvider.scan_hazards(repo: dir, paths: [path]) + types = hazards.map { |hazard| hazard[:hazard_type] } + + assert_includes types, "c_tsan_concurrency" + assert_includes types, "c_asan_raw_memory_api" + assert_includes types, "c_lsan_lifetime" + assert_includes types, "c_ubsan_arithmetic" + end + end + + def test_cpp_provider_finds_sanitizer_hazard_families + with_file("src/runtime.cpp", <<~CPP) do |_dir, path| + #include + void run(char *dst, char *src, int n) { + std::atomic ready; + auto *buf = new char[32]; + std::memcpy(dst, src, n); + auto raw = reinterpret_cast(dst); + auto shifted = n << raw[0]; + delete[] buf; + } + CPP + hazards = SlopCop::Constraints::CppProvider.scan_file(path, File.read(File.join(_dir, path))) + types = hazards.map { |hazard| hazard[:hazard_type] } + + assert_includes types, "cpp_tsan_concurrency" + assert_includes types, "cpp_asan_raw_memory_api" + assert_includes types, "cpp_asan_pointer_or_cast" + assert_includes types, "cpp_lsan_lifetime" + assert_includes types, "cpp_ubsan_cast" + assert_includes types, "cpp_ubsan_arithmetic" + end + end + + def test_csharp_provider_finds_concurrency_and_unsafe_hazards + with_file("src/Worker.cs", <<~CS) do |dir, path| + using System.Threading.Tasks; + public unsafe class Worker { + public void Run(byte* ptr) { + Task.Run(() => {}); + fixed (byte* p = buffer) { + *p = 1; + } + } + } + CS + hazards = SlopCop::Constraints::CsharpProvider.scan_hazards(repo: dir, paths: [path]) + types = hazards.map { |hazard| hazard[:hazard_type] } + + assert_includes types, "csharp_concurrency" + assert_includes types, "csharp_unsafe_memory" + end + end + + def test_comment_and_string_hazards_are_ignored + with_file("src/runtime.c", <<~C) do |dir, path| + void run(void) { + // pthread_mutex_lock(&lock); + const char *s = "memcpy(dst, src, n)"; + } + C + assert_empty SlopCop::Constraints::CProvider.scan_hazards(repo: dir, paths: [path]) + end + end + + private + + def with_file(path, contents) + Dir.mktmpdir do |dir| + abs = File.join(dir, path) + FileUtils.mkdir_p(File.dirname(abs)) + File.write(abs, contents) + yield dir, path + end + end + + def coverage_json(dir, path, hits) + coverage = File.join(dir, "coverage.json") + File.write(coverage, JSON.dump(coverage: { path => hits.transform_keys(&:to_s) })) + coverage + end +end From f4ef9db39bda65fdc13251aacfad91bc816bc0ef Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Tue, 16 Jun 2026 15:48:27 +0000 Subject: [PATCH 05/52] Move Lineage UI shell into templates --- gems/lineage/src/lsp.rs | 2 + gems/lineage/src/storage.rs | 3 +- gems/lineage/src/ui.rs | 1479 +++++++++++------ gems/lineage/ui/assets/app.css | 145 +- gems/lineage/ui/templates/app.html | 8 + gems/lineage/ui/templates/branch_context.html | 31 + gems/lineage/ui/templates/coverage_table.html | 23 + gems/lineage/ui/templates/dashboard.html | 14 + .../ui/templates/dashboard_disclosure.html | 9 + .../ui/templates/dashboard_hazard_files.html | 15 + .../ui/templates/dashboard_ratio_bar.html | 11 + .../ui/templates/dashboard_sidebar.html | 16 + gems/lineage/ui/templates/hotspot_list.html | 17 + gems/lineage/ui/templates/layers_menu.html | 31 + gems/lineage/ui/templates/source_sidebar.html | 13 + .../ui/templates/source_unavailable.html | 9 + gems/lineage/ui/templates/source_view.html | 29 + gems/lineage/ui/templates/warning_banner.html | 12 + 18 files changed, 1317 insertions(+), 550 deletions(-) create mode 100644 gems/lineage/ui/templates/app.html create mode 100644 gems/lineage/ui/templates/branch_context.html create mode 100644 gems/lineage/ui/templates/coverage_table.html create mode 100644 gems/lineage/ui/templates/dashboard.html create mode 100644 gems/lineage/ui/templates/dashboard_disclosure.html create mode 100644 gems/lineage/ui/templates/dashboard_hazard_files.html create mode 100644 gems/lineage/ui/templates/dashboard_ratio_bar.html create mode 100644 gems/lineage/ui/templates/dashboard_sidebar.html create mode 100644 gems/lineage/ui/templates/hotspot_list.html create mode 100644 gems/lineage/ui/templates/layers_menu.html create mode 100644 gems/lineage/ui/templates/source_sidebar.html create mode 100644 gems/lineage/ui/templates/source_unavailable.html create mode 100644 gems/lineage/ui/templates/source_view.html create mode 100644 gems/lineage/ui/templates/warning_banner.html diff --git a/gems/lineage/src/lsp.rs b/gems/lineage/src/lsp.rs index cf7a904f6..4dc7e3f12 100644 --- a/gems/lineage/src/lsp.rs +++ b/gems/lineage/src/lsp.rs @@ -555,6 +555,8 @@ mod tests { distinct_tests: 2, mutant_verified_tests: 1, mutant_killed_tests: 1, + stochastic_mutant_verified_tests: 1, + invariant_mutant_verified_tests: 0, line_hits: Some(4), line_coverage: None, mutant_coverage: None, diff --git a/gems/lineage/src/storage.rs b/gems/lineage/src/storage.rs index 725f7e6c4..6c6b9a169 100644 --- a/gems/lineage/src/storage.rs +++ b/gems/lineage/src/storage.rs @@ -1433,6 +1433,7 @@ impl Storage { line_exposure AS ( SELECT e.path, e.line, + l.hits, COUNT(DISTINCT CASE WHEN e.is_verified = 1 THEN e.test_type END) AS verified_test_types, MAX(CASE WHEN e.is_verified = 1 AND e.is_mutation_verified = 1 THEN 1 ELSE 0 END) AS mutant_verified, MAX(CASE WHEN e.is_verified = 1 AND e.is_mutation_killed = 1 THEN 1 ELSE 0 END) AS mutant_killed, @@ -1475,7 +1476,7 @@ impl Storage { SUM(stochastic_mutant_killed) AS stochastic_mutant_killed_covered_lines, SUM(invariant_mutant_verified) AS invariant_mutant_verified_covered_lines, SUM(invariant_mutant_killed) AS invariant_mutant_killed_covered_lines, - SUM(CASE WHEN verified_test_types >= 2 THEN 1 ELSE 0 END) AS multi_type_covered_lines + SUM(CASE WHEN verified_test_types >= 2 OR hits > 1 THEN 1 ELSE 0 END) AS multi_type_covered_lines FROM line_exposure GROUP BY path ), diff --git a/gems/lineage/src/ui.rs b/gems/lineage/src/ui.rs index 22b131858..9c4f58162 100644 --- a/gems/lineage/src/ui.rs +++ b/gems/lineage/src/ui.rs @@ -95,9 +95,35 @@ struct UiCoverageContext { covered_lines: i64, partial_lines: i64, missed_lines: i64, + multi_type_lines: i64, + mutant_backed_lines: i64, + stochastic_mutant_backed_lines: i64, + invariant_mutant_backed_lines: i64, coverage_percent: f64, } +#[derive(Debug, Clone, Copy, PartialEq)] +struct LineQualityBar { + tracked_lines: i64, + covered_lines: i64, + partial_lines: i64, + multi_type_lines: i64, + mutant_backed_lines: i64, + coverage_percent: f64, +} + +#[derive(Debug, Clone, Copy, PartialEq)] +struct LineQualitySegments { + multi: f64, + covered: f64, + partial: f64, + missed: f64, + mutant_multi: f64, + mutant_covered: f64, + mutant_partial: f64, + mutant_gap: f64, +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum CoverageSort { Path, @@ -319,6 +345,8 @@ pub struct UiLineAnnotation { pub distinct_tests: i64, pub mutant_verified_tests: i64, pub mutant_killed_tests: i64, + pub stochastic_mutant_verified_tests: i64, + pub invariant_mutant_verified_tests: i64, pub line_hits: Option, pub line_coverage: Option, pub mutant_coverage: Option, @@ -467,6 +495,168 @@ struct IndexPageTemplate<'a> { body: &'a str, } +#[derive(Template)] +#[template(path = "app.html")] +struct AppTemplate<'a> { + source_sidebar: bool, + sidebar: &'a str, + main: &'a str, +} + +#[derive(Template)] +#[template(path = "dashboard_sidebar.html")] +struct DashboardSidebarTemplate<'a> { + summary: &'a str, + nav: &'a str, + current_directory: &'a str, + show_directory_input: bool, + filter: &'a str, + search_options: &'a str, + files: &'a str, +} + +#[derive(Template)] +#[template(path = "source_sidebar.html")] +struct SourceSidebarTemplate<'a> { + path: &'a str, + nav: &'a str, + outline: &'a str, + show_empty_outline: bool, +} + +#[derive(Template)] +#[template(path = "source_unavailable.html")] +struct SourceUnavailableTemplate<'a> { + error: &'a str, +} + +#[derive(Template)] +#[template(path = "dashboard.html")] +struct DashboardTemplate<'a> { + branch_context: &'a str, + warnings: &'a str, + active_hazards: &'a str, + highest_hazard_files: &'a str, + highest_risk_units: &'a str, + highest_architecture_risks: &'a str, + code_tree_heading: &'a str, + code_tree: &'a str, +} + +#[derive(Template)] +#[template(path = "dashboard_disclosure.html")] +struct DashboardDisclosureTemplate<'a> { + title: &'a str, + open: bool, + body: &'a str, +} + +#[derive(Template)] +#[template(path = "dashboard_ratio_bar.html")] +struct DashboardRatioBarTemplate<'a> { + label: &'a str, + detail: &'a str, + bar: &'a str, + total: i64, + total_label: &'a str, + covered: i64, + covered_label: &'a str, +} + +#[derive(Template)] +#[template(path = "dashboard_hazard_files.html")] +struct DashboardHazardFilesTemplate<'a> { + files: &'a [DashboardHazardFileItem], +} + +#[derive(Debug, Clone, PartialEq, Eq)] +struct DashboardHazardFileItem { + href: String, + path: String, + detail: String, + hazards: i64, +} + +#[derive(Template)] +#[template(path = "hotspot_list.html")] +struct HotspotListTemplate<'a> { + wrapper_class: &'a str, + empty_message: &'a str, + items: &'a [HotspotItem], +} + +#[derive(Debug, Clone, PartialEq)] +struct HotspotItem { + href: String, + kind: String, + name: String, + path: String, + detail: String, + score: String, +} + +#[derive(Template)] +#[template(path = "coverage_table.html")] +struct CoverageTableTemplate<'a> { + name_header: &'a str, + total_header: &'a str, + covered_header: &'a str, + partial_header: &'a str, + missed_header: &'a str, + percent_header: &'a str, + rows: &'a str, + empty: bool, + subtotal: &'a str, +} + +#[derive(Template)] +#[template(path = "branch_context.html")] +struct BranchContextTemplate<'a> { + branch: &'a str, + commit: &'a str, + coverage_percent: &'a str, + covered_lines: i64, + tracked_lines: i64, + partial_lines: i64, + missed_lines: i64, + mutant_backed_lines: i64, + stochastic_mutant_backed_lines: i64, + invariant_mutant_backed_lines: i64, + line_quality_bar: &'a str, + breadcrumbs: &'a str, +} + +#[derive(Template)] +#[template(path = "source_view.html")] +struct SourceViewTemplate<'a> { + path: &'a str, + summary: &'a str, + layers_menu: &'a str, + branch_context: &'a str, + warnings: &'a str, + code_lines: &'a str, + history: &'a str, +} + +#[derive(Template)] +#[template(path = "layers_menu.html")] +struct LayersMenuTemplate; + +#[derive(Template)] +#[template(path = "warning_banner.html")] +struct WarningBannerTemplate<'a> { + warnings: &'a [WarningBannerItem], +} + +#[derive(Debug, Clone, PartialEq, Eq)] +struct WarningBannerItem { + input_id: String, + key: String, + level: String, + label: String, + detail: String, +} + #[derive(Clone)] struct UiServerState { db: Arc, @@ -502,6 +692,8 @@ struct AnnotationBuilder { distinct_tests: i64, mutant_verified_tests: i64, mutant_killed_tests: i64, + stochastic_mutant_verified_tests: i64, + invariant_mutant_verified_tests: i64, line_hits: Option, line_coverage: Option, mutant_coverage: Option, @@ -1788,6 +1980,7 @@ fn dashboard_line_counts( ) SELECT path, line, + latest_lines.hits, COUNT(DISTINCT CASE WHEN is_verified = 1 THEN test_type END) AS verified_test_types, MAX(CASE WHEN is_verified = 1 AND is_mutation_verified = 1 THEN 1 ELSE 0 END) AS mutant_verified, MAX(CASE WHEN is_verified = 1 AND is_mutation_killed = 1 THEN 1 ELSE 0 END) AS mutant_killed, @@ -1832,12 +2025,14 @@ fn dashboard_line_counts( row.get::<_, i64>(6)?, row.get::<_, i64>(7)?, row.get::<_, i64>(8)?, + row.get::<_, i64>(9)?, )) })?; for row in rows { let ( path, _line, + hits, verified_test_types, has_mutant_verified, has_mutant_killed, @@ -1870,7 +2065,7 @@ fn dashboard_line_counts( if has_invariant_mutant_killed > 0 { counts.invariant_mutant_killed += 1; } - if verified_test_types >= 2 { + if verified_test_types >= 2 || hits > 1 { counts.multi_type += 1; } } @@ -2921,6 +3116,8 @@ pub fn line_annotations( distinct_tests: builder.distinct_tests, mutant_verified_tests: builder.mutant_verified_tests, mutant_killed_tests: builder.mutant_killed_tests, + stochastic_mutant_verified_tests: builder.stochastic_mutant_verified_tests, + invariant_mutant_verified_tests: builder.invariant_mutant_verified_tests, line_hits: builder.line_hits, line_coverage: builder.line_coverage, mutant_coverage: builder.mutant_coverage, @@ -3005,6 +3202,8 @@ fn empty_annotation(line: u32) -> UiLineAnnotation { distinct_tests: 0, mutant_verified_tests: 0, mutant_killed_tests: 0, + stochastic_mutant_verified_tests: 0, + invariant_mutant_verified_tests: 0, line_hits: None, line_coverage: None, mutant_coverage: None, @@ -3196,7 +3395,7 @@ fn apply_test_exposure( r#" WITH ranked_exposure AS ( SELECT path, line, branch_id, test_id, test_type, is_verified, - is_mutation_verified, is_mutation_killed, + is_mutation_verified, is_mutation_killed, mutation_kind, ROW_NUMBER() OVER ( PARTITION BY path, line, COALESCE(branch_id, ''), test_id, test_type ORDER BY timestamp DESC, id DESC @@ -3211,7 +3410,17 @@ fn apply_test_exposure( ) SELECT line, test_type, COUNT(DISTINCT test_id), COUNT(DISTINCT CASE WHEN is_mutation_verified = 1 THEN test_id END), - COUNT(DISTINCT CASE WHEN is_mutation_killed = 1 THEN test_id END) + COUNT(DISTINCT CASE WHEN is_mutation_killed = 1 THEN test_id END), + COUNT(DISTINCT CASE + WHEN is_mutation_verified = 1 + AND lower(COALESCE(mutation_kind, '')) = 'stochastic' + THEN test_id + END), + COUNT(DISTINCT CASE + WHEN is_mutation_verified = 1 + AND lower(COALESCE(mutation_kind, '')) IN ('invariant', 'contract') + THEN test_id + END) FROM latest_exposure WHERE is_verified = 1 GROUP BY line, test_type @@ -3224,10 +3433,20 @@ fn apply_test_exposure( row.get::<_, i64>(2)?, row.get::<_, i64>(3)?, row.get::<_, i64>(4)?, + row.get::<_, i64>(5)?, + row.get::<_, i64>(6)?, )) })?; for row in rows { - let (line, test_type, tests, mutation_verified, mutation_killed) = row?; + let ( + line, + test_type, + tests, + mutation_verified, + mutation_killed, + stochastic_mutation_verified, + invariant_mutation_verified, + ) = row?; let entry = lines.entry(line).or_default(); if paint_line_coverage { entry.covered = true; @@ -3237,6 +3456,8 @@ fn apply_test_exposure( entry.distinct_tests += tests; entry.mutant_verified_tests += mutation_verified; entry.mutant_killed_tests += mutation_killed; + entry.stochastic_mutant_verified_tests += stochastic_mutation_verified; + entry.invariant_mutant_verified_tests += invariant_mutation_verified; entry.mutant_tested |= mutation_verified > 0 || mutation_killed > 0; } Ok(()) @@ -4147,105 +4368,138 @@ fn render_index_page( .map(|path| source_payload_with_overlays(storage, repo, path, commit, overlays)) .transpose(); + let source_sidebar = matches!(&payload, Ok(Some(_))); + let sidebar = match &payload { + Ok(Some(payload)) => render_source_sidebar(payload, ¤t_directory, filter), + _ => render_dashboard_sidebar(DashboardSidebarArgs { + dashboard: &dashboard, + current_directory: ¤t_directory, + filter, + files: &files, + child_directories: &child_directories, + child_files: &child_files, + filtered_files: &filtered, + selected_path: selected_path.as_deref(), + }), + }; + let main = match &payload { + Ok(Some(payload)) => render_source_view(payload, filter, &branch_context), + Ok(None) => render_dashboard( + &dashboard, + ¤t_directory, + &child_directories, + &table_files, + filter, + sort, + &branch_context, + ), + Err(error) => render_source_unavailable(&error.to_string()), + }; + let app = AppTemplate { + source_sidebar, + sidebar: &sidebar, + main: &main, + } + .render() + .context("render lineage app template")?; + render_page("Lineage", &app) +} + +fn render_page(title: &str, body: &str) -> Result { + IndexPageTemplate { title, body } + .render() + .context("render lineage index template") +} + +struct DashboardSidebarArgs<'a> { + dashboard: &'a UiDashboard, + current_directory: &'a str, + filter: &'a str, + files: &'a [UiFile], + child_directories: &'a [UiDirectory], + child_files: &'a [&'a UiFile], + filtered_files: &'a [&'a UiFile], + selected_path: Option<&'a str>, +} + +fn render_dashboard_sidebar(args: DashboardSidebarArgs<'_>) -> String { + let summary = format!( + "{} files{} | {:.1}% covered", + args.dashboard.files, + directory_label_suffix(args.current_directory), + args.dashboard.coverage_percent + ); + let nav = render_sidebar_navigation(args.current_directory, args.filter); + let search_options = + render_search_options(args.files, args.child_directories, args.current_directory); + let file_links = render_sidebar_file_links(&args); + render_template_string( + DashboardSidebarTemplate { + summary: &summary, + nav: &nav, + current_directory: args.current_directory, + show_directory_input: !args.current_directory.is_empty(), + filter: args.filter, + search_options: &search_options, + files: &file_links, + }, + "dashboard sidebar template", + ) +} + +fn render_sidebar_file_links(args: &DashboardSidebarArgs<'_>) -> String { let mut out = String::new(); - out.push_str("
'); - match &payload { - Ok(Some(payload)) => { - out.push_str("

Lineage

"); - out.push_str(&html_escape(&payload.path)); - out.push_str("
"); - out.push_str(&render_sidebar_navigation(¤t_directory, filter)); - out.push_str("
"); - let outline = render_source_outline(payload); - if outline.is_empty() { - out.push_str(""); - } else { - out.push_str(&outline); - } - } - _ => { - out.push_str("

Lineage

"); - out.push_str(&format!( - "{} files{} | {:.1}% covered", - dashboard.files, - directory_label_suffix(¤t_directory), - dashboard.coverage_percent + if args.filter.trim().is_empty() { + if !args.current_directory.is_empty() { + out.push_str(&render_parent_directory_link( + args.current_directory, + args.filter, )); - out.push_str("
"); - out.push_str(&render_sidebar_navigation(¤t_directory, filter)); - out.push_str("
"); - out.push_str("
"); - if !current_directory.is_empty() { - out.push_str(""); - } - out.push_str(""); - out.push_str(&render_search_options(&files, &child_directories, ¤t_directory)); - out.push_str("
"); - out.push_str(""); } - } - out.push_str("
"); - match payload { - Ok(Some(payload)) => out.push_str(&render_source_view(&payload, filter, &branch_context)), - Ok(None) => { - out.push_str(&render_dashboard( - &dashboard, - ¤t_directory, - &child_directories, - &table_files, - filter, - sort, - &branch_context, - )); + for directory in args.child_directories { + out.push_str(&render_directory_link(directory, false, args.filter)); } - Err(error) => { - out.push_str("
Source unavailable
"); - out.push_str("
"); - out.push_str(&html_escape(&error.to_string())); - out.push_str("
"); - out.push_str("
The selected path is not available in the current checkout. Regenerate coverage for HEAD or open a historical commit view.
"); + for file in args.child_files { + let active = args.selected_path == Some(file.path.as_str()); + out.push_str(&render_file_link(file, active, args.filter)); + } + if args.child_directories.is_empty() && args.child_files.is_empty() { + out.push_str("
No tracked files in this directory.
"); + } + } else { + for file in args.filtered_files { + let active = args.selected_path == Some(file.path.as_str()); + out.push_str(&render_file_link(file, active, args.filter)); + } + if args.filtered_files.is_empty() { + out.push_str("
No matching files in this directory.
"); } } - out.push_str("
"); - render_page("Lineage", &out) + out } -fn render_page(title: &str, body: &str) -> Result { - IndexPageTemplate { title, body } - .render() - .context("render lineage index template") +fn render_source_sidebar(payload: &UiSourcePayload, current_directory: &str, filter: &str) -> String { + let nav = render_sidebar_navigation(current_directory, filter); + let outline = render_source_outline(payload); + render_template_string( + SourceSidebarTemplate { + path: &payload.path, + nav: &nav, + outline: &outline, + show_empty_outline: outline.is_empty(), + }, + "source sidebar template", + ) +} + +fn render_source_unavailable(error: &str) -> String { + render_template_string(SourceUnavailableTemplate { error }, "source unavailable template") +} + +fn render_template_string(template: T, name: &str) -> String { + template.render().unwrap_or_else(|error| { + panic!("failed to render {name}: {error}"); + }) } fn filtered_files<'a>(files: &'a [UiFile], filter: &str) -> Vec<&'a UiFile> { @@ -4556,55 +4810,83 @@ fn render_directory_link(directory: &UiDirectory, active: bool, filter: &str) -> out } -fn render_coverage_bar( - tracked_lines: i64, - covered_lines: i64, - line_coverage: f64, - mutant_killed_covered_lines: i64, - dark_arm_findings: i64, -) -> String { - let (strong, weak) = coverage_bar_widths( - tracked_lines, - covered_lines, - line_coverage, - mutant_killed_covered_lines, - dark_arm_findings, - ); +fn render_line_quality_bar(bar: LineQualityBar) -> String { + let segments = line_quality_segments(bar); let title = format!( - "{:.1}% covered; {:.1}% mutant-killed/no-partial confidence; {:.1}% weak covered tail", - (strong + weak).min(100.0), - strong, - weak + "{:.1}% covered; {} total, {} covered, {} multi-covered, {} partial, {} missed, {} mutant-backed", + bar.coverage_percent.clamp(0.0, 100.0), + bar.tracked_lines.max(0), + bar.covered_lines.clamp(0, bar.tracked_lines.max(0)), + bar.multi_type_lines.max(0), + bar.partial_lines.max(0), + missed_line_count(bar.tracked_lines, bar.covered_lines), + bar.mutant_backed_lines.max(0) ); format!( - "", + concat!( + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + ), html_escape(&title), - strong, - weak + segments.multi, + segments.covered, + segments.partial, + segments.missed, + segments.mutant_multi, + segments.mutant_covered, + segments.mutant_partial, + segments.mutant_gap ) } -fn coverage_bar_widths( - tracked_lines: i64, - covered_lines: i64, - line_coverage: f64, - mutant_killed_covered_lines: i64, - dark_arm_findings: i64, -) -> (f64, f64) { - if tracked_lines <= 0 { - let covered = line_coverage.clamp(0.0, 100.0); - return (0.0, covered); - } - let covered_lines = covered_lines.clamp(0, tracked_lines); - let dark_arm_lines = dark_arm_findings.clamp(0, covered_lines); - let missing_mutant_lines = covered_lines - .saturating_sub(mutant_killed_covered_lines.clamp(0, covered_lines)); - let weak_lines = missing_mutant_lines.max(dark_arm_lines).min(covered_lines); - let strong_lines = covered_lines.saturating_sub(weak_lines); - ( - percent(strong_lines, tracked_lines), - percent(weak_lines, tracked_lines), - ) +fn line_quality_segments(bar: LineQualityBar) -> LineQualitySegments { + let tracked_lines = bar.tracked_lines.max(0); + if tracked_lines == 0 { + let covered: f64 = bar.coverage_percent.clamp(0.0, 100.0); + return LineQualitySegments { + multi: 0.0, + covered, + partial: 0.0, + missed: (100.0 - covered).max(0.0), + mutant_multi: 0.0, + mutant_covered: 0.0, + mutant_partial: 0.0, + mutant_gap: 100.0, + }; + } + let covered_lines = bar.covered_lines.clamp(0, tracked_lines); + let partial_lines = bar.partial_lines.clamp(0, covered_lines); + let full_covered_lines = covered_lines.saturating_sub(partial_lines); + let multi_type_lines = bar.multi_type_lines.clamp(0, full_covered_lines); + let covered_single_lines = full_covered_lines.saturating_sub(multi_type_lines); + let missed_lines = tracked_lines.saturating_sub(covered_lines); + let mutant_backed_lines = bar.mutant_backed_lines.clamp(0, covered_lines); + let mutant_multi_lines = mutant_backed_lines.min(multi_type_lines); + let remaining_mutant = mutant_backed_lines.saturating_sub(mutant_multi_lines); + let mutant_covered_lines = remaining_mutant.min(covered_single_lines); + let remaining_mutant = remaining_mutant.saturating_sub(mutant_covered_lines); + let mutant_partial_lines = remaining_mutant.min(partial_lines); + let mutant_painted_lines = mutant_multi_lines + mutant_covered_lines + mutant_partial_lines; + LineQualitySegments { + multi: percent(multi_type_lines, tracked_lines), + covered: percent(covered_single_lines, tracked_lines), + partial: percent(partial_lines, tracked_lines), + missed: percent(missed_lines, tracked_lines), + mutant_multi: percent(mutant_multi_lines, tracked_lines), + mutant_covered: percent(mutant_covered_lines, tracked_lines), + mutant_partial: percent(mutant_partial_lines, tracked_lines), + mutant_gap: percent(tracked_lines.saturating_sub(mutant_painted_lines), tracked_lines), + } } fn render_dashboard( @@ -4617,163 +4899,136 @@ fn render_dashboard( branch_context: &UiBranchContext, ) -> String { let directory = normalize_directory(directory); - let mut out = String::new(); - out.push_str("
"); - if directory.is_empty() { - out.push_str("Coverage Dashboard"); - } else { - out.push_str("Directory: "); - out.push_str(&html_escape(&directory)); - out.push('/'); - } - out.push_str("
Current Lineage database snapshot"); - if !directory.is_empty() { - out.push_str(" scoped to "); - out.push_str(&html_escape(&directory)); - out.push('/'); - } - out.push_str("
"); - out.push_str("
root"); - if !directory.is_empty() { - out.push_str("up"); - } - out.push_str("
"); - out.push_str("
"); let coverage_context = dashboard_coverage_context(dashboard, directory.as_str(), files); - out.push_str(&render_branch_context(branch_context, &coverage_context, filter)); - out.push_str("
"); - out.push_str(&render_metric( - "Line coverage", - &format!("{:.1}%", dashboard.coverage_percent), - &format!( - "{} / {} tracked lines covered", - dashboard.covered_lines, dashboard.tracked_lines - ), - )); - out.push_str(&render_metric( - "Hazard evidence", - &format!("{:.1}%", dashboard.hazard_evidence_percent), - &format!( - "{} / {} active hazards have required systems evidence", - dashboard.evidence_covered_hazards, dashboard.active_hazards - ), - )); - out.push_str(&render_metric( - "Hazard verification", - &format!("{:.1}%", dashboard.hazard_coverage_percent), - &format!( - "{} / {} active hazards have evidence plus invariant mutants", - dashboard.covered_hazards, dashboard.active_hazards - ), - )); - out.push_str(&render_metric( - "Mutant-backed lines", - &format!("{:.1}%", dashboard.mutant_verified_covered_percent), - &format!( - "{} / {} covered lines have mutant-verified evidence", - dashboard.mutant_verified_covered_lines, dashboard.covered_lines - ), - )); - out.push_str(&render_metric( - "Stochastic mutants", - &format!("{:.1}%", dashboard.stochastic_mutant_verified_covered_percent), - &format!( - "{} / {} covered lines are stochastic-mutant verified", - dashboard.stochastic_mutant_verified_covered_lines, dashboard.covered_lines - ), - )); - out.push_str(&render_metric( - "Invariant mutants", - &format!("{:.1}%", dashboard.invariant_mutant_verified_covered_percent), - &format!( - "{} / {} covered lines are invariant-mutant verified", - dashboard.invariant_mutant_verified_covered_lines, dashboard.covered_lines - ), - )); - out.push_str(&render_metric( - "Multi-type lines", - &format!("{:.1}%", dashboard.multi_type_covered_percent), - &format!( - "{} / {} covered lines have multiple verified test types", - dashboard.multi_type_covered_lines, dashboard.covered_lines - ), - )); - out.push_str(&render_metric( - "SARIF findings", - &dashboard.sarif_findings.to_string(), - "persisted first-party and ecosystem analysis findings", - )); - out.push_str(&render_metric( - "Files", - &dashboard.files.to_string(), - &format!("{} files currently report coverage", dashboard.files_with_coverage), - )); - out.push_str("
"); - out.push_str(&render_warning_banner(&dashboard.warnings)); - - out.push_str("

Highest Risk Units

"); - out.push_str(&render_unit_hotspots(&dashboard.top_units, filter)); - out.push_str("
"); - - out.push_str("

Highest Architectural Risks

"); - out.push_str(&render_architecture_risks( - &dashboard.top_architecture_risks, - filter, - )); - out.push_str("
"); - - out.push_str("

Code tree

"); - out.push_str(&render_code_tree_table( + let branch_context = render_branch_context(branch_context, &coverage_context, filter); + let warnings = render_warning_banner(&dashboard.warnings); + let active_hazards = render_active_hazards_section(dashboard); + let highest_hazard_files = render_highest_hazard_files_section(dashboard, filter); + let highest_risk_units = render_dashboard_disclosure( + "Highest Risk Units", + false, + &render_unit_hotspots(&dashboard.top_units, filter), + ); + let highest_architecture_risks = render_dashboard_disclosure( + "Highest Architectural Risks", + false, + &render_architecture_risks(&dashboard.top_architecture_risks, filter), + ); + let code_tree_heading = format!( + "Code tree ({} files - {} SARIF findings)", + dashboard.files, dashboard.sarif_findings + ); + let code_tree = render_code_tree_table( dashboard, &directory, files, filter, sort, - )); - out.push_str("
"); + ); + render_template_string( + DashboardTemplate { + branch_context: &branch_context, + warnings: &warnings, + active_hazards: &active_hazards, + highest_hazard_files: &highest_hazard_files, + highest_risk_units: &highest_risk_units, + highest_architecture_risks: &highest_architecture_risks, + code_tree_heading: &code_tree_heading, + code_tree: &code_tree, + }, + "dashboard template", + ) +} + +fn render_dashboard_disclosure(title: &str, open: bool, body: &str) -> String { + render_template_string( + DashboardDisclosureTemplate { title, open, body }, + "dashboard disclosure template", + ) +} - out.push_str("

Active Hazards

"); +fn render_active_hazards_section(dashboard: &UiDashboard) -> String { + let mut body = String::new(); if dashboard.active_hazards == 0 { - out.push_str("

No active systems hazards are recorded.

"); + body.push_str("

No active systems hazards are recorded.

"); } else { - out.push_str("
"); - out.push_str("

"); - out.push_str(&format!( - "{} hazards have required systems evidence; {} also have invariant-mutant proof.", - dashboard.evidence_covered_hazards, + body.push_str(&render_dashboard_ratio_bar_row( + "Hazard verification", + dashboard.active_hazards, dashboard.covered_hazards, + &format!( + "{} total hazards / {} covered / {} with required systems evidence", + dashboard.active_hazards, + dashboard.covered_hazards, + dashboard.evidence_covered_hazards + ), + "active hazards", + "covered hazards", + "hazard-bar", )); - out.push_str("

"); } - out.push_str("
"); + render_dashboard_disclosure("Active Hazards", dashboard.active_hazards > 0, &body) +} - out.push_str("

Highest Hazard Files

"); - if dashboard.top_hazard_files.is_empty() { - out.push_str("

No hazard-heavy files to show.

"); - } else { - out.push_str(""); - } - out.push_str("
"); - out.push_str("
"); - out +fn render_highest_hazard_files_section(dashboard: &UiDashboard, filter: &str) -> String { + let files = dashboard + .top_hazard_files + .iter() + .map(|file| DashboardHazardFileItem { + href: page_href(&file.path, None, filter), + path: file.path.clone(), + detail: file_detail_text(file), + hazards: file.hazards, + }) + .collect::>(); + let body = render_template_string( + DashboardHazardFilesTemplate { files: &files }, + "dashboard hazard files template", + ); + render_dashboard_disclosure( + "Highest Hazard Files", + dashboard.active_hazards > 0 && !dashboard.top_hazard_files.is_empty(), + &body, + ) +} + +fn render_dashboard_ratio_bar_row( + label: &str, + total: i64, + covered: i64, + detail: &str, + total_label: &str, + covered_label: &str, + bar_class: &str, +) -> String { + let bar = render_ratio_bar(total, covered, bar_class); + render_template_string( + DashboardRatioBarTemplate { + label, + detail, + bar: &bar, + total: total.max(0), + total_label, + covered: covered.max(0), + covered_label, + }, + "dashboard ratio bar template", + ) +} + +fn render_ratio_bar(total: i64, covered: i64, bar_class: &str) -> String { + let total = total.max(0); + let covered = covered.clamp(0, total); + let covered_percent = percent(covered, total); + let missed_percent = 100.0 - covered_percent; + format!( + "", + html_escape(bar_class), + covered, + total, + covered_percent, + missed_percent.max(0.0) + ) } fn dashboard_coverage_context( @@ -4792,44 +5047,113 @@ fn dashboard_coverage_context( covered_lines: dashboard.covered_lines, partial_lines, missed_lines: missed_line_count(dashboard.tracked_lines, dashboard.covered_lines), + multi_type_lines: dashboard.multi_type_covered_lines, + mutant_backed_lines: dashboard.mutant_verified_covered_lines, + stochastic_mutant_backed_lines: dashboard.stochastic_mutant_verified_covered_lines, + invariant_mutant_backed_lines: dashboard.invariant_mutant_verified_covered_lines, coverage_percent: dashboard.coverage_percent, } } fn source_coverage_context(payload: &UiSourcePayload) -> UiCoverageContext { + let has_exact_line_hits = payload + .annotations + .iter() + .any(|annotation| annotation.line_hits.is_some()); let tracked_lines = payload .annotations .iter() .filter(|annotation| { - annotation.line_hits.is_some() - || annotation.line_coverage.is_some() - || annotation.covered - || !annotation.test_types.is_empty() - || !annotation.findings.is_empty() - || !annotation.hazards.is_empty() + if has_exact_line_hits { + annotation.line_hits.is_some() + } else { + annotation.line_coverage.is_some() + || annotation.covered + || !annotation.test_types.is_empty() + || !annotation.findings.is_empty() + || !annotation.hazards.is_empty() + } }) .count() as i64; let covered_lines = payload .annotations .iter() - .filter(|annotation| annotation.line_hits.unwrap_or(if annotation.covered { 1 } else { 0 }) > 0) + .filter(|annotation| { + if has_exact_line_hits { + annotation.line_hits.unwrap_or(0) > 0 + } else { + annotation.line_hits.unwrap_or(if annotation.covered { 1 } else { 0 }) > 0 + } + }) .count() as i64; let partial_lines = payload .annotations .iter() - .filter(|annotation| annotation_has_dark_arms(annotation)) + .filter(|annotation| { + (!has_exact_line_hits || annotation.line_hits.is_some()) + && annotation_has_dark_arms(annotation) + }) .count() as i64; let partial_lines = partial_lines.clamp(0, covered_lines); + let multi_type_lines = payload + .annotations + .iter() + .filter(|annotation| { + annotation_counts_for_coverage_context(annotation, has_exact_line_hits) + && (annotation.line_hits.unwrap_or(0) > 1 + || annotation.test_types.len() >= 2 + || annotation.distinct_tests >= 2) + }) + .count() as i64; + let mutant_backed_lines = payload + .annotations + .iter() + .filter(|annotation| { + annotation_counts_for_coverage_context(annotation, has_exact_line_hits) + && annotation.mutant_verified_tests > 0 + }) + .count() as i64; + let stochastic_mutant_backed_lines = payload + .annotations + .iter() + .filter(|annotation| { + annotation_counts_for_coverage_context(annotation, has_exact_line_hits) + && annotation.stochastic_mutant_verified_tests > 0 + }) + .count() as i64; + let invariant_mutant_backed_lines = payload + .annotations + .iter() + .filter(|annotation| { + annotation_counts_for_coverage_context(annotation, has_exact_line_hits) + && annotation.invariant_mutant_verified_tests > 0 + }) + .count() as i64; UiCoverageContext { path: payload.path.clone(), tracked_lines, covered_lines, partial_lines, missed_lines: missed_line_count(tracked_lines, covered_lines), + multi_type_lines: multi_type_lines.clamp(0, covered_lines), + mutant_backed_lines: mutant_backed_lines.clamp(0, covered_lines), + stochastic_mutant_backed_lines: stochastic_mutant_backed_lines.clamp(0, covered_lines), + invariant_mutant_backed_lines: invariant_mutant_backed_lines.clamp(0, covered_lines), coverage_percent: percent(covered_lines, tracked_lines), } } +fn annotation_counts_for_coverage_context( + annotation: &UiLineAnnotation, + has_exact_line_hits: bool, +) -> bool { + if has_exact_line_hits { + annotation.line_hits.unwrap_or(0) > 0 + } else { + annotation.line_hits.unwrap_or(if annotation.covered { 1 } else { 0 }) > 0 + } +} + fn partial_line_count(covered_lines: i64, partial_findings: i64) -> i64 { partial_findings.clamp(0, covered_lines.max(0)) } @@ -4843,41 +5167,33 @@ fn render_branch_context( coverage: &UiCoverageContext, filter: &str, ) -> String { - let mut out = String::new(); - out.push_str("
"); - out.push_str("
Branch Context
"); - out.push_str(&html_escape(&context.branch)); - out.push_str("Source: latest commit "); - out.push_str(&html_escape(&context.commit)); - out.push_str("
Coverage on branch"); - out.push_str(&format!("{:.2}%", coverage.coverage_percent)); - out.push_str(""); - out.push_str(&format!( - "{} of {} lines covered; {} partial, {} missed", - coverage.covered_lines, - coverage.tracked_lines, - coverage.partial_lines, - coverage.missed_lines - )); - out.push_str(""); - out.push_str(&render_coverage_bar( - coverage.tracked_lines, - coverage.covered_lines, - coverage.coverage_percent, - coverage.covered_lines.saturating_sub(coverage.partial_lines), - coverage.partial_lines, - )); - out.push_str("
"); - out.push_str("
"); - out.push_str(&render_path_breadcrumb(&coverage.path, filter)); - out.push_str("
"); - out.push_str("uncovered"); - out.push_str("partial"); - out.push_str("!hazard"); - out.push_str("covered"); - out.push_str("
"); - out.push_str("
"); - out + let line_quality_bar = render_line_quality_bar(LineQualityBar { + tracked_lines: coverage.tracked_lines, + covered_lines: coverage.covered_lines, + partial_lines: coverage.partial_lines, + multi_type_lines: coverage.multi_type_lines, + mutant_backed_lines: coverage.mutant_backed_lines, + coverage_percent: coverage.coverage_percent, + }); + let breadcrumbs = render_path_breadcrumb(&coverage.path, filter); + let coverage_percent = format!("{:.2}", coverage.coverage_percent); + render_template_string( + BranchContextTemplate { + branch: &context.branch, + commit: &context.commit, + coverage_percent: &coverage_percent, + covered_lines: coverage.covered_lines, + tracked_lines: coverage.tracked_lines, + partial_lines: coverage.partial_lines, + missed_lines: coverage.missed_lines, + mutant_backed_lines: coverage.mutant_backed_lines.max(0), + stochastic_mutant_backed_lines: coverage.stochastic_mutant_backed_lines.max(0), + invariant_mutant_backed_lines: coverage.invariant_mutant_backed_lines.max(0), + line_quality_bar: &line_quality_bar, + breadcrumbs: &breadcrumbs, + }, + "branch context template", + ) } fn render_path_breadcrumb(path: &str, filter: &str) -> String { @@ -4920,48 +5236,46 @@ fn render_code_tree_table( filter: &str, sort: CoverageSort, ) -> String { - let mut out = String::new(); - out.push_str(""); - out.push_str(""); - out.push_str(""); - out.push_str(""); - out.push_str(""); + let name_header = render_sort_link("File list", CoverageSort::Path, sort, directory, filter); + let total_header = render_sort_link("Total", CoverageSort::Total, sort, directory, filter); + let covered_header = render_sort_link("Covered", CoverageSort::Covered, sort, directory, filter); + let partial_header = render_sort_link("Partial", CoverageSort::Partial, sort, directory, filter); + let missed_header = render_sort_link("Missed", CoverageSort::Missed, sort, directory, filter); + let percent_header = render_sort_link("%", CoverageSort::Percent, sort, directory, filter); + let mut rows = String::new(); for file in files { - out.push_str(&render_file_coverage_row(file, directory, filter)); - } - if files.is_empty() { - out.push_str(""); + rows.push_str(&render_file_coverage_row(file, directory, filter)); } - out.push_str(""); let partial = files .iter() .map(|file| partial_line_count(file.covered_lines, file.dark_arm_findings)) .sum::(); let partial = partial.clamp(0, dashboard.covered_lines); - out.push_str(&render_coverage_table_row( + let subtotal = render_coverage_table_row( None, "Subtotal", "", dashboard.tracked_lines, dashboard.covered_lines, partial, - dashboard.mutant_killed_covered_lines, + dashboard.multi_type_covered_lines, + dashboard.mutant_verified_covered_lines, dashboard.coverage_percent, - )); - out.push_str("
"); - out.push_str(&render_sort_link("File list", CoverageSort::Path, sort, directory, filter)); - out.push_str(""); - out.push_str(&render_sort_link("Total", CoverageSort::Total, sort, directory, filter)); - out.push_str(""); - out.push_str(&render_sort_link("Covered", CoverageSort::Covered, sort, directory, filter)); - out.push_str(""); - out.push_str(&render_sort_link("Partial", CoverageSort::Partial, sort, directory, filter)); - out.push_str(""); - out.push_str(&render_sort_link("Missed", CoverageSort::Missed, sort, directory, filter)); - out.push_str("Coverage"); - out.push_str(&render_sort_link("%", CoverageSort::Percent, sort, directory, filter)); - out.push_str("
No tracked files in this directory.
"); - out + ); + render_template_string( + CoverageTableTemplate { + name_header: &name_header, + total_header: &total_header, + covered_header: &covered_header, + partial_header: &partial_header, + missed_header: &missed_header, + percent_header: &percent_header, + rows: &rows, + empty: files.is_empty(), + subtotal: &subtotal, + }, + "coverage table template", + ) } fn render_sort_link( @@ -5007,31 +5321,21 @@ fn render_file_coverage_row(file: &UiFile, directory: &str, filter: &str) -> Str file.tracked_lines, file.covered_lines, file.dark_arm_findings, - file.mutant_killed_covered_lines, + file.multi_type_covered_lines, + file.mutant_verified_covered_lines, file.line_coverage, ) } fn render_unit_hotspots(units: &[UiUnitHotspot], filter: &str) -> String { - if units.is_empty() { - return "

No function or class hotspots to show.

".to_string(); - } - - let mut out = String::new(); - out.push_str(""); - out + ), + score: format!("{:.1}", unit.score), + }) + .collect::>(); + render_template_string( + HotspotListTemplate { + wrapper_class: "unit-hotspots", + empty_message: "No function or class hotspots to show.", + items: &items, + }, + "unit hotspot template", + ) } fn render_architecture_risks(risks: &[UiArchitectureRisk], filter: &str) -> String { - if risks.is_empty() { - return "

No Espalier architectural risks to show.

".to_string(); - } - - let mut out = String::new(); - out.push_str(""); - out + ), + score: format!("{:.1}", risk.score), + }) + .collect::>(); + render_template_string( + HotspotListTemplate { + wrapper_class: "unit-hotspots architecture-hotspots", + empty_message: "No Espalier architectural risks to show.", + items: &items, + }, + "architecture hotspot template", + ) } fn unit_kind_label(kind: &str, name: &str) -> String { @@ -5100,7 +5403,8 @@ fn render_coverage_table_row( tracked_lines: i64, covered_lines: i64, partial_findings: i64, - mutant_killed_covered_lines: i64, + multi_type_lines: i64, + mutant_backed_lines: i64, line_coverage: f64, ) -> String { let partial = partial_line_count(covered_lines, partial_findings); @@ -5139,13 +5443,14 @@ fn render_coverage_table_row( out.push_str(""); out.push_str(&missed.to_string()); out.push_str(""); - out.push_str(&render_coverage_bar( + out.push_str(&render_line_quality_bar(LineQualityBar { tracked_lines, covered_lines, - percent_value, - mutant_killed_covered_lines, - partial, - )); + partial_lines: partial, + multi_type_lines, + mutant_backed_lines, + coverage_percent: percent_value, + })); out.push_str(""); out.push_str(&format!("{percent_value:.2}%")); out.push_str(""); @@ -5163,8 +5468,8 @@ fn file_display_path(path: &str, directory: &str) -> String { } } -fn file_detail(file: &UiFile) -> String { - html_escape(&format!( +fn file_detail_text(file: &UiFile) -> String { + format!( "{} units | {} / {} lines | {} hazards | {} SARIF | {} tests | {} mutant-killed tests", file.units, file.covered_lines, @@ -5173,51 +5478,29 @@ fn file_detail(file: &UiFile) -> String { file.sarif_findings, file.distinct_tests, file.mutant_killed_tests - )) -} - -fn render_metric(label: &str, value: &str, detail: &str) -> String { - let mut out = String::new(); - out.push_str("
"); - out.push_str(&html_escape(label)); - out.push_str("
"); - out.push_str(&html_escape(value)); - out.push_str("

"); - out.push_str(&html_escape(detail)); - out.push_str("

"); - out + ) } fn render_warning_banner(warnings: &[UiWarning]) -> String { - if warnings.is_empty() { - return String::new(); - } - - let mut out = String::new(); - out.push_str("
"); - for (index, warning) in warnings.iter().enumerate() { + let items = warnings + .iter() + .enumerate() + .map(|(index, warning)| { let key = warning_dismiss_key(warning); let input_id = format!("warning-dismiss-{index}-{}", stable_slug(&key)); - out.push_str("
"); - out.push_str(&html_escape(&warning.label)); - out.push_str("

"); - out.push_str(&html_escape(&warning.detail)); - out.push_str("

"); - } - out.push_str("
"); - out + WarningBannerItem { + input_id, + key, + level: warning.level.clone(), + label: warning.label.clone(), + detail: warning.detail.clone(), + } + }) + .collect::>(); + render_template_string( + WarningBannerTemplate { warnings: &items }, + "warning banner template", + ) } fn warning_dismiss_key(warning: &UiWarning) -> String { @@ -5280,43 +5563,21 @@ fn render_source_view( .map(|annotation| annotation.findings.len()) .sum(); - let mut out = String::new(); - out.push_str("
"); - out.push_str( - "", - ); - out.push_str(""); - out.push_str(""); - out.push_str(""); - out.push_str( - "", - ); - out.push_str( - "", - ); - out.push_str("
"); - out.push_str(&html_escape(&payload.path)); - out.push_str("
"); - out.push_str(&format!( + let summary = format!( "{} covered lines | {} mutant lines | {} hazards | {} partial | {} SARIF", covered, mutant, hazards, dark_arms, findings - )); - out.push_str("
"); - out.push_str( - "
", ); - out.push_str(&render_layers_menu()); - out.push_str("
"); - out.push_str(&render_branch_context( + let layers_menu = render_layers_menu(); + let branch_context = render_branch_context( branch_context, &source_coverage_context(payload), filter, - )); - out.push_str(&render_warning_banner(&payload.warnings)); - out.push_str("
"); + ); + let warnings = render_warning_banner(&payload.warnings); + let mut code_lines = String::new(); for (index, line) in payload.lines.iter().enumerate() { let line_no = (index + 1) as u32; - out.push_str(&render_code_line( + code_lines.push_str(&render_code_line( &payload.path, line_no, line, @@ -5325,21 +5586,23 @@ fn render_source_view( comment_fold_lines.get(&line_no), )); } - out.push_str("
"); - out.push_str(&render_history(payload, filter)); - out.push_str("
"); - out + let history = render_history(payload, filter); + render_template_string( + SourceViewTemplate { + path: &payload.path, + summary: &summary, + layers_menu: &layers_menu, + branch_context: &branch_context, + warnings: &warnings, + code_lines: &code_lines, + history: &history, + }, + "source view template", + ) } fn render_layers_menu() -> String { - let mut out = String::new(); - out.push_str("
Layers
"); - out.push_str(""); - out.push_str(""); - out.push_str(""); - out.push_str(""); - out.push_str("
"); - out + render_template_string(LayersMenuTemplate, "layers menu template") } fn render_history(payload: &UiSourcePayload, filter: &str) -> String { @@ -7463,7 +7726,7 @@ mod tests { .unwrap(); storage .insert_event(&Event { - unit_id: unit.id, + unit_id: unit.id.clone(), commit_hash: "abc".into(), event_type: EventType::Change, path: "src/demo.rb".into(), @@ -7480,17 +7743,62 @@ mod tests { .record_coverage_line("abc", 10, "src/demo.rb", 1, 0) .unwrap(); storage - .record_coverage_line("abc", 10, "src/demo.rb", 2, 1) + .record_coverage_line("abc", 10, "src/demo.rb", 2, 2) + .unwrap(); + storage + .insert_test_exposure_event(&TestExposureEvent { + unit_id: unit.id.clone(), + commit_hash: "abc".into(), + timestamp: 10, + path: "src/demo.rb".into(), + function: Some("run".into()), + line: Some(3), + branch_id: None, + test_id: "spec/demo_spec.rb:1".into(), + test_type: "unit".into(), + mutation_status: None, + mutation_kind: None, + is_mutation_verified: false, + is_mutation_killed: false, + is_verified: true, + payload_json: "{}".into(), + }) + .unwrap(); + storage + .insert_test_exposure_event(&TestExposureEvent { + unit_id: unit.id.clone(), + commit_hash: "abc".into(), + timestamp: 10, + path: "src/demo.rb".into(), + function: Some("run".into()), + line: Some(2), + branch_id: None, + test_id: "spec/demo_spec.rb:2".into(), + test_type: "unit".into(), + mutation_status: Some("killed".into()), + mutation_kind: Some("invariant".into()), + is_mutation_verified: true, + is_mutation_killed: true, + is_verified: true, + payload_json: "{}".into(), + }) .unwrap(); - let payload = source_payload(&storage, dir.path(), "src/demo.rb", None).unwrap(); let line_one = payload.annotations.iter().find(|line| line.line == 1).unwrap(); let line_two = payload.annotations.iter().find(|line| line.line == 2).unwrap(); + let line_three = payload.annotations.iter().find(|line| line.line == 3).unwrap(); + let coverage = source_coverage_context(&payload); assert!(!line_one.covered); assert_eq!(line_one.line_hits, Some(0)); assert!(line_two.covered); - assert_eq!(line_two.line_hits, Some(1)); + assert_eq!(line_two.line_hits, Some(2)); + assert!(line_three.test_types.contains(&"unit".to_string())); + assert_eq!(coverage.tracked_lines, 2); + assert_eq!(coverage.covered_lines, 1); + assert_eq!(coverage.missed_lines, 1); + assert_eq!(coverage.multi_type_lines, 1); + assert_eq!(coverage.mutant_backed_lines, 1); } #[test] @@ -7664,6 +7972,8 @@ mod tests { distinct_tests: 9, mutant_verified_tests: 0, mutant_killed_tests: 0, + stochastic_mutant_verified_tests: 0, + invariant_mutant_verified_tests: 0, line_hits: Some(3), line_coverage: Some(100.0), mutant_coverage: None, @@ -8031,7 +8341,7 @@ mod tests { assert!(html.contains("func")); assert!(html.contains("class=\"outline-hotspot\"")); assert!(html.contains("run")); - assert!(html.contains("class=\"coverage-bar\"")); + assert!(html.contains("class=\"coverage-bar line-quality-bar\"")); } #[test] @@ -8043,16 +8353,165 @@ mod tests { } #[test] - fn coverage_bar_splits_strong_and_weak_covered_lines() { - let (strong, weak) = coverage_bar_widths(10, 8, 80.0, 5, 1); + fn line_quality_segments_split_coverage_and_mutant_backing() { + let segments = line_quality_segments(LineQualityBar { + tracked_lines: 10, + covered_lines: 8, + partial_lines: 2, + multi_type_lines: 3, + mutant_backed_lines: 4, + coverage_percent: 80.0, + }); - assert_eq!(strong, 50.0); - assert_eq!(weak, 30.0); + assert_eq!(segments.multi, 30.0); + assert_eq!(segments.covered, 30.0); + assert_eq!(segments.partial, 20.0); + assert_eq!(segments.missed, 20.0); + assert_eq!(segments.mutant_multi, 30.0); + assert_eq!(segments.mutant_covered, 10.0); + assert_eq!(segments.mutant_partial, 0.0); + assert_eq!(segments.mutant_gap, 60.0); + + let html = render_line_quality_bar(LineQualityBar { + tracked_lines: 10, + covered_lines: 8, + partial_lines: 2, + multi_type_lines: 3, + mutant_backed_lines: 4, + coverage_percent: 80.0, + }); - let (strong, weak) = coverage_bar_widths(10, 8, 80.0, 8, 2); + assert!(html.contains("line-quality-bar")); + assert!(html.contains("coverage-track")); + assert!(html.contains("mutant-track")); + assert!(html.contains("coverage-partial")); + } - assert_eq!(strong, 60.0); - assert_eq!(weak, 20.0); + #[test] + fn dashboard_renders_collapsible_risks_hazards_first_and_stacked_bars() { + let dashboard = UiDashboard { + files: 2, + tracked_lines: 10, + covered_lines: 8, + coverage_percent: 80.0, + active_hazards: 2, + sarif_findings: 7, + evidence_covered_hazards: 2, + hazard_evidence_percent: 100.0, + covered_hazards: 1, + hazard_coverage_percent: 50.0, + mutant_verified_covered_lines: 4, + mutant_verified_covered_percent: 50.0, + mutant_killed_covered_lines: 4, + mutant_killed_covered_percent: 50.0, + stochastic_mutant_verified_covered_lines: 1, + stochastic_mutant_verified_covered_percent: 12.5, + stochastic_mutant_killed_covered_lines: 1, + stochastic_mutant_killed_covered_percent: 12.5, + invariant_mutant_verified_covered_lines: 2, + invariant_mutant_verified_covered_percent: 25.0, + invariant_mutant_killed_covered_lines: 2, + invariant_mutant_killed_covered_percent: 25.0, + multi_type_covered_lines: 3, + multi_type_covered_percent: 37.5, + files_with_coverage: 2, + top_hazard_files: vec![UiFile { + hazards: 2, + ..ui_file_for_sort("zig/runtime/a.zig", 10, 8, 1) + }], + top_units: Vec::new(), + top_architecture_risks: Vec::new(), + warnings: Vec::new(), + }; + let files = dashboard.top_hazard_files.iter().collect::>(); + let branch_context = UiBranchContext { + branch: "feature".to_string(), + commit: "abcdef123456".to_string(), + }; + let html = render_dashboard( + &dashboard, + "", + &[], + &files, + "", + CoverageSort::Path, + &branch_context, + ); + + assert!(html.contains("
")); + assert!(html.contains("

Active Hazards

")); + assert!(html.contains("

Highest Risk Units

")); + assert!(html.contains("

Highest Architectural Risks

")); + assert!(html.contains("class=\"coverage-bar line-quality-bar\"")); + assert!(html.contains("8 of 10 lines covered; 1 partial, 2 missed")); + assert!(!html.contains(">8 covered lines")); + assert!(html.contains("4 mutant-backed / 1 stochastic / 2 invariant")); + assert!(html.contains("class=\"ratio-bar hazard-bar\"")); + assert!(html.contains("Code tree (2 files - 7 SARIF findings)")); + assert!(!html.contains("class=\"metric\"")); + assert!(!html.contains("class=\"dashboard-bars\"")); + assert!(!html.contains("dashboard-line-quality")); + assert!(!html.contains("Lines")); + assert!(!html.contains("Mutants")); + assert_eq!(html.matches("class=\"ratio-bar hazard-bar\"").count(), 1); + assert_eq!(html.matches("class=\"ratio-bar mutant-bar\"").count(), 0); + assert!( + html.find("4 mutant-backed / 1 stochastic / 2 invariant").unwrap() + < html.find("Active Hazards").unwrap(), + "mutant detail should live in the top branch-context bar, not between dashboard sections" + ); + assert!( + html.find("Active Hazards").unwrap() < html.find("Code tree").unwrap(), + "hazards should render above code tree" + ); + assert!( + html.find("Highest Hazard Files").unwrap() < html.find("Highest Risk Units").unwrap(), + "hazard files should render above risk sections" + ); + + let no_hazard = UiDashboard { + active_hazards: 0, + covered_hazards: 0, + evidence_covered_hazards: 0, + top_hazard_files: Vec::new(), + ..dashboard + }; + let hazards = render_active_hazards_section(&no_hazard); + assert!(hazards.contains("
")); + assert!(!hazards.contains(" open")); + assert!(hazards.contains("No active systems hazards are recorded.")); + } + + #[test] + fn branch_context_legend_lists_coverage_states_without_hazard_marker() { + let context = UiBranchContext { + branch: "feature".to_string(), + commit: "abcdef123456".to_string(), + }; + let coverage = UiCoverageContext { + path: "src/demo.rb".to_string(), + tracked_lines: 4, + covered_lines: 3, + partial_lines: 1, + missed_lines: 1, + multi_type_lines: 1, + mutant_backed_lines: 1, + stochastic_mutant_backed_lines: 1, + invariant_mutant_backed_lines: 0, + coverage_percent: 75.0, + }; + let html = render_branch_context(&context, &coverage, ""); + + assert!(html.contains("coverage-multi\" style=\"width:25.000%")); + assert!(html.contains("Multi-covered")); + assert!(html.contains(">covered")); + assert!(html.contains(">partial")); + assert!(html.contains(">missed")); + assert!(!html.contains("legend-alert")); + assert!(!html.contains(">hazard")); + assert!(html.find("Multi-covered").unwrap() < html.find(">covered").unwrap()); + assert!(html.find(">covered").unwrap() < html.find(">partial").unwrap()); + assert!(html.find(">partial").unwrap() < html.find(">missed").unwrap()); } #[test] @@ -8446,7 +8905,7 @@ mod tests { ); storage.upsert_logical_unit(&unit, 10).unwrap(); storage - .record_coverage_line("abc", 10, "src/a.rb", 1, 1) + .record_coverage_line("abc", 10, "src/a.rb", 1, 2) .unwrap(); storage .record_coverage_line("abc", 10, "src/a.rb", 2, 0) @@ -8491,11 +8950,13 @@ mod tests { assert_eq!(files[0].tracked_lines, 2); assert_eq!(files[0].covered_lines, 1); assert_eq!(files[0].mutant_killed_covered_lines, 1); + assert_eq!(files[0].multi_type_covered_lines, 1); let dashboard = dashboard_summary(&storage).unwrap(); assert_eq!(dashboard.files, 1); assert_eq!(dashboard.tracked_lines, 2); assert_eq!(dashboard.covered_lines, 1); + assert_eq!(dashboard.multi_type_covered_lines, 1); assert_eq!(dashboard.mutant_killed_covered_percent, 100.0); } @@ -8565,6 +9026,8 @@ flags: distinct_tests: 0, mutant_verified_tests: 0, mutant_killed_tests: 0, + stochastic_mutant_verified_tests: 0, + invariant_mutant_verified_tests: 0, line_hits: Some(1), line_coverage: None, mutant_coverage: None, @@ -8631,6 +9094,8 @@ flags: distinct_tests: 0, mutant_verified_tests: 0, mutant_killed_tests: 0, + stochastic_mutant_verified_tests: 0, + invariant_mutant_verified_tests: 0, line_hits: Some(1), line_coverage: None, mutant_coverage: None, diff --git a/gems/lineage/ui/assets/app.css b/gems/lineage/ui/assets/app.css index 3c593af98..d4fda4f64 100644 --- a/gems/lineage/ui/assets/app.css +++ b/gems/lineage/ui/assets/app.css @@ -93,19 +93,28 @@ } .coverage-pill { color: #166534; background: rgba(34, 197, 94, 0.08); } .coverage-bar { - display: flex; - height: 8px; + display: grid; + grid-template-rows: 1fr 1fr; + height: 12px; min-width: 130px; - border-radius: 999px; - background: rgba(148, 163, 184, 0.18); + border: 1px solid rgba(100, 116, 139, 0.22); + border-radius: 0; + background: #fff; overflow: hidden; } .coverage-bar span { display: block; height: 100%; } - .coverage-strong { background: rgba(22, 101, 52, 0.54); } - .coverage-weak { background: rgba(34, 197, 94, 0.18); } + .coverage-track, + .mutant-track { + display: flex !important; + min-width: 0; + } + .coverage-multi { background: rgba(20, 83, 45, 0.72); } + .coverage-covered { background: rgba(34, 197, 94, 0.24); } + .coverage-partial { background: rgba(31, 41, 55, 0.22); } + .coverage-missed { background: transparent; } .outline { border-top: 1px solid var(--line); padding: 8px; @@ -347,6 +356,21 @@ min-width: 220px; margin-top: 4px; } + .branch-summary-foot { + display: flex; + justify-content: space-between; + gap: 12px; + color: var(--muted); + font-size: 12px; + line-height: 1.2; + min-width: 220px; + } + .branch-summary-foot span { + min-width: 0; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + } .coverage-on-branch strong { color: #166534; font-size: 26px; @@ -390,40 +414,14 @@ .legend-swatch { inline-size: 14px; block-size: 8px; - border-radius: 999px; + border-radius: 0; display: inline-block; border: 1px solid rgba(100, 116, 139, 0.18); } - .legend-uncovered { background: rgba(148, 163, 184, 0.18); } + .legend-multi { background: rgba(20, 83, 45, 0.72); } + .legend-missed { background: #fff; } .legend-partial { background: rgba(31, 41, 55, 0.22); } - .legend-covered { background: rgba(34, 197, 94, 0.14); } - .legend-alert { - display: inline-flex; - align-items: center; - justify-content: center; - inline-size: 14px; - block-size: 14px; - border-radius: 999px; - background: #7f1d1d; - color: #fff; - font-size: 10px; - font-style: normal; - font-weight: 700; - } - .metric-grid { - display: grid; - grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); - gap: 10px; - } - .metric { - border: 1px solid var(--line); - border-radius: 6px; - background: #fff; - padding: 12px; - } - .metric div { color: var(--muted); font-size: 12px; } - .metric strong { display: block; margin-top: 4px; font-size: 24px; letter-spacing: 0; } - .metric p { margin: 4px 0 0; color: var(--muted); font-size: 12px; } + .legend-covered { background: rgba(34, 197, 94, 0.24); } .warning-banner { display: grid; gap: 8px; @@ -470,17 +468,80 @@ border-top: 1px solid var(--line); padding-top: 14px; } - .hazard-bar { + .dashboard-disclosure { + padding-top: 0; + } + .dashboard-disclosure summary { + display: flex; + align-items: center; + gap: 6px; + min-height: 28px; + padding-top: 14px; + cursor: pointer; + list-style: none; + } + .dashboard-disclosure summary::-webkit-details-marker { display: none; } + .dashboard-disclosure h2 { margin: 0; } + .dashboard-disclosure-arrow::before { + content: ">"; + display: inline-block; + inline-size: 12px; + text-align: center; + transform: rotate(0deg); + transition: transform 120ms ease; + } + .dashboard-disclosure[open] .dashboard-disclosure-arrow::before { + transform: rotate(90deg); + } + .dashboard-section-body { + margin-top: 10px; + } + .dashboard-bar-row { + display: grid; + gap: 4px; + } + .dashboard-bar-head, + .dashboard-bar-foot { + display: flex; + justify-content: space-between; + gap: 12px; + color: var(--muted); + font-size: 12px; + } + .dashboard-bar-head strong { + color: var(--text); + } + .dashboard-bar-head span, + .dashboard-bar-foot span { + min-width: 0; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + } + .ratio-bar { + display: flex; height: 8px; - max-width: 520px; - border-radius: 999px; - background: rgba(180, 35, 24, 0.14); + min-width: 100%; + border: 1px solid rgba(100, 116, 139, 0.22); + border-radius: 0; + background: #fff; overflow: hidden; } - .hazard-bar span { + .ratio-bar span { display: block; height: 100%; - background: #166534; + } + .ratio-covered { + background: rgba(34, 197, 94, 0.36); + } + .ratio-missed { + background: transparent; + } + .hazard-bar .ratio-covered { + background: rgba(22, 101, 52, 0.54); + } + .hazard-bar { + max-width: 520px; } .dashboard-files { display: grid; diff --git a/gems/lineage/ui/templates/app.html b/gems/lineage/ui/templates/app.html new file mode 100644 index 000000000..a9b4d3c1f --- /dev/null +++ b/gems/lineage/ui/templates/app.html @@ -0,0 +1,8 @@ +
+ + {{ sidebar|safe }} + +
+ {{ main|safe }} +
+
diff --git a/gems/lineage/ui/templates/branch_context.html b/gems/lineage/ui/templates/branch_context.html new file mode 100644 index 000000000..bc59acf81 --- /dev/null +++ b/gems/lineage/ui/templates/branch_context.html @@ -0,0 +1,31 @@ +
+
+
+
Branch Context
+ {{ branch }} + Source: latest commit {{ commit }} +
+
+ Coverage on branch + {{ coverage_percent }}% + {{ covered_lines }} of {{ tracked_lines }} lines covered; {{ partial_lines }} partial, {{ missed_lines }} missed + + {{ line_quality_bar|safe }} + +
+ {{ mutant_backed_lines }} mutant-backed / {{ stochastic_mutant_backed_lines }} stochastic / {{ invariant_mutant_backed_lines }} invariant +
+
+
+
+
+ {{ breadcrumbs|safe }} +
+
+ Multi-covered + covered + partial + missed +
+
+
diff --git a/gems/lineage/ui/templates/coverage_table.html b/gems/lineage/ui/templates/coverage_table.html new file mode 100644 index 000000000..6cd81db9a --- /dev/null +++ b/gems/lineage/ui/templates/coverage_table.html @@ -0,0 +1,23 @@ + + + + + + + + + + + + + + {% if empty %} + + {% else %} + {{ rows|safe }} + {% endif %} + + + {{ subtotal|safe }} + +
{{ name_header|safe }}{{ total_header|safe }}{{ covered_header|safe }}{{ partial_header|safe }}{{ missed_header|safe }}Coverage{{ percent_header|safe }}
No tracked files in this directory.
diff --git a/gems/lineage/ui/templates/dashboard.html b/gems/lineage/ui/templates/dashboard.html new file mode 100644 index 000000000..ff9195a32 --- /dev/null +++ b/gems/lineage/ui/templates/dashboard.html @@ -0,0 +1,14 @@ +
+
+ {{ branch_context|safe }} + {{ warnings|safe }} + {{ active_hazards|safe }} + {{ highest_hazard_files|safe }} + {{ highest_risk_units|safe }} + {{ highest_architecture_risks|safe }} +
+

{{ code_tree_heading }}

+ {{ code_tree|safe }} +
+
+
diff --git a/gems/lineage/ui/templates/dashboard_disclosure.html b/gems/lineage/ui/templates/dashboard_disclosure.html new file mode 100644 index 000000000..3ac90f859 --- /dev/null +++ b/gems/lineage/ui/templates/dashboard_disclosure.html @@ -0,0 +1,9 @@ +
+ + +

{{ title }}

+
+
+ {{ body|safe }} +
+
diff --git a/gems/lineage/ui/templates/dashboard_hazard_files.html b/gems/lineage/ui/templates/dashboard_hazard_files.html new file mode 100644 index 000000000..70321575a --- /dev/null +++ b/gems/lineage/ui/templates/dashboard_hazard_files.html @@ -0,0 +1,15 @@ +{% if files.len() == 0 %} +

No hazard-heavy files to show.

+{% else %} +
+ {% for file in files %} + + + {{ file.path }} + {{ file.detail }} + + {{ file.hazards }} + + {% endfor %} +
+{% endif %} diff --git a/gems/lineage/ui/templates/dashboard_ratio_bar.html b/gems/lineage/ui/templates/dashboard_ratio_bar.html new file mode 100644 index 000000000..a248b2d0d --- /dev/null +++ b/gems/lineage/ui/templates/dashboard_ratio_bar.html @@ -0,0 +1,11 @@ +
+
+ {{ label }} + {{ detail }} +
+ {{ bar|safe }} +
+ {{ total }} {{ total_label }} + {{ covered }} {{ covered_label }} +
+
diff --git a/gems/lineage/ui/templates/dashboard_sidebar.html b/gems/lineage/ui/templates/dashboard_sidebar.html new file mode 100644 index 000000000..a7602addc --- /dev/null +++ b/gems/lineage/ui/templates/dashboard_sidebar.html @@ -0,0 +1,16 @@ +
+

Lineage

+
{{ summary }}
+ {{ nav|safe }} +
+
+ {% if show_directory_input %} + + {% endif %} + + + {{ search_options|safe }} +
+ diff --git a/gems/lineage/ui/templates/hotspot_list.html b/gems/lineage/ui/templates/hotspot_list.html new file mode 100644 index 000000000..9d95634b4 --- /dev/null +++ b/gems/lineage/ui/templates/hotspot_list.html @@ -0,0 +1,17 @@ +{% if items.len() == 0 %} +

{{ empty_message }}

+{% else %} + +{% endif %} diff --git a/gems/lineage/ui/templates/layers_menu.html b/gems/lineage/ui/templates/layers_menu.html new file mode 100644 index 000000000..7c568919f --- /dev/null +++ b/gems/lineage/ui/templates/layers_menu.html @@ -0,0 +1,31 @@ +
+ + + Layers + +
+ + + + +
+
diff --git a/gems/lineage/ui/templates/source_sidebar.html b/gems/lineage/ui/templates/source_sidebar.html new file mode 100644 index 000000000..dca03e135 --- /dev/null +++ b/gems/lineage/ui/templates/source_sidebar.html @@ -0,0 +1,13 @@ +
+

Lineage

+
{{ path }}
+ {{ nav|safe }} +
+{% if show_empty_outline %} + +{% else %} + {{ outline|safe }} +{% endif %} diff --git a/gems/lineage/ui/templates/source_unavailable.html b/gems/lineage/ui/templates/source_unavailable.html new file mode 100644 index 000000000..e7f498908 --- /dev/null +++ b/gems/lineage/ui/templates/source_unavailable.html @@ -0,0 +1,9 @@ +
+
+
Source unavailable
+
{{ error }}
+
+
+
+
The selected path is not available in the current checkout. Regenerate coverage for HEAD or open a historical commit view.
+
diff --git a/gems/lineage/ui/templates/source_view.html b/gems/lineage/ui/templates/source_view.html new file mode 100644 index 000000000..8b322a635 --- /dev/null +++ b/gems/lineage/ui/templates/source_view.html @@ -0,0 +1,29 @@ +
+ + + + + + +
+
+
{{ path }}
+
{{ summary }}
+
+
+
+ + +
+ {{ layers_menu|safe }} +
+
+
+ {{ branch_context|safe }} + {{ warnings|safe }} +
+ {{ code_lines|safe }} +
+
+ {{ history|safe }} +
diff --git a/gems/lineage/ui/templates/warning_banner.html b/gems/lineage/ui/templates/warning_banner.html new file mode 100644 index 000000000..edaa8ec69 --- /dev/null +++ b/gems/lineage/ui/templates/warning_banner.html @@ -0,0 +1,12 @@ +{% if warnings.len() > 0 %} +
+ {% for warning in warnings %} + +
+ + {{ warning.label }} +

{{ warning.detail }}

+
+ {% endfor %} +
+{% endif %} From 71f13732d94bba528e418e6261ae844b81d927bb Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Tue, 16 Jun 2026 17:37:09 +0000 Subject: [PATCH 06/52] Add native Decomplex co-update detector runner --- .gitignore | 3 + gems/decomplex/exe/decomplex | 56 ++ gems/decomplex/lib/decomplex.rb | 1 + .../lib/decomplex/detector_runner.rb | 89 ++++ .../lib/decomplex/native/state_writes.rb | 81 +++ gems/decomplex/rust/Cargo.lock | 375 ++++++++++++++ gems/decomplex/rust/Cargo.toml | 21 + .../rust/src/decomplex/detectors/co_update.rs | 11 + .../rust/src/decomplex/detectors/mod.rs | 1 + gems/decomplex/rust/src/decomplex/mod.rs | 2 + .../rust/src/decomplex/syntax/mod.rs | 14 + .../rust/src/decomplex/syntax/ruby.rs | 486 ++++++++++++++++++ gems/decomplex/rust/src/main.rs | 53 ++ gems/decomplex/test/detector_runner_test.rb | 69 +++ .../test/fixtures/co_update_sample.rb | 23 + 15 files changed, 1285 insertions(+) create mode 100644 gems/decomplex/lib/decomplex/detector_runner.rb create mode 100644 gems/decomplex/lib/decomplex/native/state_writes.rb create mode 100644 gems/decomplex/rust/Cargo.lock create mode 100644 gems/decomplex/rust/Cargo.toml create mode 100644 gems/decomplex/rust/src/decomplex/detectors/co_update.rs create mode 100644 gems/decomplex/rust/src/decomplex/detectors/mod.rs create mode 100644 gems/decomplex/rust/src/decomplex/mod.rs create mode 100644 gems/decomplex/rust/src/decomplex/syntax/mod.rs create mode 100644 gems/decomplex/rust/src/decomplex/syntax/ruby.rs create mode 100644 gems/decomplex/rust/src/main.rs create mode 100644 gems/decomplex/test/detector_runner_test.rb create mode 100644 gems/decomplex/test/fixtures/co_update_sample.rb diff --git a/.gitignore b/.gitignore index 9cf345e49..8ae93f4e8 100644 --- a/.gitignore +++ b/.gitignore @@ -159,3 +159,6 @@ transpile-tests/fuzz/*.cht # Generated architecture reports gems/espalier/architecture.yml + +# Decomplex native Rust build artifacts +gems/decomplex/rust/target/ diff --git a/gems/decomplex/exe/decomplex b/gems/decomplex/exe/decomplex index e0a7616fa..a02badfc0 100755 --- a/gems/decomplex/exe/decomplex +++ b/gems/decomplex/exe/decomplex @@ -60,6 +60,59 @@ if ARGV[0] == "delta" exit 0 end +if ARGV[0] == "detector" + args = ARGV[1..] + detector = args&.shift + abort "usage: decomplex detector DETECTOR --engine=ruby|rust --json FILE..." unless detector + + engine = "ruby" + json = false + compare = false + benchmark = false + args = args.reject do |arg| + case arg + when /\A--engine=(.+)\z/ + engine = Regexp.last_match(1) + true + when "--json" + json = true + true + when "--compare-engines" + compare = true + true + when "--benchmark" + benchmark = true + true + else + false + end + end + files = collect_files(args) + abort no_files_message if files.empty? + + if compare + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare(detector, files) + unless ok + warn "decomplex detector #{detector} output differs between ruby and rust engines" + warn "--- ruby" + warn ruby_json + warn "--- rust" + warn rust_json + exit 1 + end + puts ruby_json + elsif json + started = Process.clock_gettime(Process::CLOCK_MONOTONIC) + output = Decomplex::DetectorRunner.canonical_json(detector, files, engine: engine) + elapsed = Process.clock_gettime(Process::CLOCK_MONOTONIC) - started + warn format("decomplex detector=%s engine=%s files=%d elapsed=%.6fs", detector, engine, files.size, elapsed) if benchmark + print output + else + abort "decomplex detector currently requires --json or --compare-engines" + end + exit 0 +end + if ARGV[0] == "state-mesh" require_relative "../lib/decomplex/state_mesh" args = ARGV[1..] @@ -234,6 +287,8 @@ if ARGV.empty? || ARGV[0] == "-h" || ARGV[0] == "--help" decomplex FILE_OR_DIR [FILE_OR_DIR ...] decomplex report [--output=FILE] [--emit-json=FILE] [--sarif=FILE] [--exclude=GLOB] FILE_OR_DIR ... + decomplex detector DETECTOR --engine=ruby|rust --json [--benchmark] FILE_OR_DIR ... + decomplex detector DETECTOR --compare-engines FILE_OR_DIR ... decomplex state-mesh [--output=FILE] [--exclude=GLOB] FILE_OR_DIR ... decomplex state-branches [--output=FILE] [--exclude=GLOB] FILE_OR_DIR ... decomplex temporal-ordering [--output=FILE] [--exclude=GLOB] FILE_OR_DIR ... @@ -247,6 +302,7 @@ if ARGV.empty? || ARGV[0] == "-h" || ARGV[0] == "--help" Subcommands: report Full markdown report with all detectors + detector Single-detector canonical JSON for migration/benchmarking state-mesh JSON graph of reader/writer hierarchy by field state-branches Markdown table of branches whose predicate reads state diff --git a/gems/decomplex/lib/decomplex.rb b/gems/decomplex/lib/decomplex.rb index 263910c35..dd72c819e 100644 --- a/gems/decomplex/lib/decomplex.rb +++ b/gems/decomplex/lib/decomplex.rb @@ -9,6 +9,7 @@ require_relative "decomplex/site_extractor" require_relative "decomplex/miner" require_relative "decomplex/co_update" +require_relative "decomplex/detector_runner" require_relative "decomplex/predicate_alias" require_relative "decomplex/path_condition" require_relative "decomplex/semantic_alias" diff --git a/gems/decomplex/lib/decomplex/detector_runner.rb b/gems/decomplex/lib/decomplex/detector_runner.rb new file mode 100644 index 000000000..1b74cc825 --- /dev/null +++ b/gems/decomplex/lib/decomplex/detector_runner.rb @@ -0,0 +1,89 @@ +# frozen_string_literal: true + +require "json" +require_relative "co_update" +require_relative "native/state_writes" + +module Decomplex + # Runs one detector in isolation and emits deterministic machine output. + # + # This is intentionally narrower than Report: it gives parser/runtime + # migration work an apples-to-apples target that excludes report wording, + # timing, SARIF metadata, and other nondeterministic details. + module DetectorRunner + DETECTORS = { + "co-update" => :co_update + }.freeze + ENGINES = %w[ruby rust].freeze + + module_function + + def run(detector, files, engine: "ruby") + canonical = canonical_detector(detector) + validate_engine!(engine) + + case canonical + when :co_update + co_update(files, engine: engine) + else + raise ArgumentError, "unsupported decomplex detector: #{detector}" + end + end + + def canonical_json(detector, files, engine: "ruby") + JSON.generate(canonicalize(run(detector, files, engine: engine))) << "\n" + end + + def compare(detector, files) + ruby_json = canonical_json(detector, files, engine: "ruby") + rust_json = canonical_json(detector, files, engine: "rust") + [ruby_json == rust_json, ruby_json, rust_json] + end + + def detector_names + DETECTORS.keys + end + + private_class_method def self.canonical_detector(detector) + DETECTORS.fetch(detector.to_s) do + raise ArgumentError, "unsupported decomplex detector: #{detector}" + end + end + + private_class_method def self.validate_engine!(engine) + return if ENGINES.include?(engine.to_s) + + raise ArgumentError, "unsupported decomplex detector engine: #{engine}" + end + + private_class_method def self.co_update(files, engine:) + report = + if engine.to_s == "rust" + CoUpdate::Report.new(Native::StateWrites.extract(files)) + else + CoUpdate.scan(files) + end + + { + "co_written_pairs" => report.co_written_pairs, + "neglected_updates" => report.neglected_updates + } + end + + private_class_method def self.canonicalize(value) + case value + when Hash + value.keys.map(&:to_s).sort.each_with_object({}) do |key, out| + original = value.key?(key) ? key : value.keys.find { |candidate| candidate.to_s == key } + out[key] = canonicalize(value.fetch(original)) + end + when Array + value.map { |item| canonicalize(item) } + when Symbol + value.to_s + else + value + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/state_writes.rb b/gems/decomplex/lib/decomplex/native/state_writes.rb new file mode 100644 index 000000000..910807dd0 --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/state_writes.rb @@ -0,0 +1,81 @@ +# frozen_string_literal: true + +require "json" +require "open3" +require_relative "../co_update" + +module Decomplex + module Native + # Bridge from the Ruby detector layer to the native Decomplex fact extractor. + # The native binary emits syntax facts only; Ruby still owns detector scoring + # and canonical output for the migration proof. + module StateWrites + module_function + + def extract(files) + paths = Array(files).map(&:to_s) + validate_ruby_files!(paths) + payload = run_native(paths) + JSON.parse(payload).map do |row| + CoUpdate::Write.new( + attr: row.fetch("field"), + recv: row.fetch("receiver"), + file: row.fetch("file"), + defn: row.fetch("function"), + line: row.fetch("line"), + span: row.fetch("span"), + ) + end + end + + def binary_path + env = ENV["DECOMPLEX_RUST_BIN"] + return env if env && !env.empty? + + crate_root = File.expand_path("../../../rust", __dir__) + exe = Gem.win_platform? ? "decomplex-rust.exe" : "decomplex-rust" + File.join(crate_root, "target", "release", exe) + end + + def crate_root + File.expand_path("../../../rust", __dir__) + end + + private_class_method def self.validate_ruby_files!(paths) + bad = paths.reject { |path| File.extname(path) == ".rb" } + return if bad.empty? + + raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" + end + + private_class_method def self.run_native(paths) + command = + if fresh_binary?(binary_path) + [binary_path, "state-writes", "--language", "ruby", *paths] + else + ["cargo", "run", "--quiet", "--release", "--manifest-path", + File.join(crate_root, "Cargo.toml"), "--", + "state-writes", "--language", "ruby", *paths] + end + stdout, stderr, status = Open3.capture3(*command) + return stdout if status.success? + + raise "decomplex rust state-writes failed: #{stderr.empty? ? stdout : stderr}" + rescue Errno::ENOENT => e + raise "decomplex rust state-writes requires cargo or DECOMPLEX_RUST_BIN: #{e.message}" + end + + private_class_method def self.fresh_binary?(path) + return false unless File.executable?(path) + return true if ENV["DECOMPLEX_RUST_BIN"] && !ENV["DECOMPLEX_RUST_BIN"].empty? + + binary_mtime = File.mtime(path) + rust_sources.all? { |source| File.mtime(source) <= binary_mtime } + end + + private_class_method def self.rust_sources + Dir[File.join(crate_root, "Cargo.toml"), File.join(crate_root, "src", "**", "*.rs")] + end + end + end +end diff --git a/gems/decomplex/rust/Cargo.lock b/gems/decomplex/rust/Cargo.lock new file mode 100644 index 000000000..0be14f618 --- /dev/null +++ b/gems/decomplex/rust/Cargo.lock @@ -0,0 +1,375 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "anyhow" +version = "1.0.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" + +[[package]] +name = "bitflags" +version = "2.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4388bee8683e3d04af747c73422af53102d2bd24d9eadb6cbc100baef4b43f8" + +[[package]] +name = "cc" +version = "1.2.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dad887fd958be91b5098c0248def011f4523ab786cd411be668777e55063501f" +dependencies = [ + "find-msvc-tools", + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "decomplex-rust" +version = "0.1.0" +dependencies = [ + "anyhow", + "serde", + "serde_json", + "tempfile", + "tree-sitter", + "tree-sitter-language", + "tree-sitter-ruby", +] + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "fastrand" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6" + +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + +[[package]] +name = "itoa" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" + +[[package]] +name = "libc" +version = "0.2.186" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" + +[[package]] +name = "linux-raw-sys" +version = "0.4.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" + +[[package]] +name = "memchr" +version = "2.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88904434abc2901f197fe8cc55f0445e7ded921dba5911dad2e2b39b48e663c4" + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex" +version = "1.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1292b7759ae1cb9ec195452d1390a074f0cd8541ab7a5a8c31cd6db45d4a6ba" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6f6ff9a378485b298a5286656da665ba74413d36db0979633275d2e708145d4" + +[[package]] +name = "rustix" +version = "0.38.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.59.0", +] + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.150" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8014e44b4736ed0538adeecded0fce2a272f22dc9578a7eb6b2d9993c74cfb9" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "shlex" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8fadd59c855ef2080decdef8ff161eb6661b86933c9d82e5ba29dc602a55aba" + +[[package]] +name = "streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" + +[[package]] +name = "syn" +version = "2.0.118" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9ae57f904213ebb649ce6895b8a66c66f0203b9319718f69a5612a065b1422" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tempfile" +version = "3.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" +dependencies = [ + "cfg-if", + "fastrand", + "rustix", + "windows-sys 0.52.0", +] + +[[package]] +name = "tree-sitter" +version = "0.24.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5387dffa7ffc7d2dae12b50c6f7aab8ff79d6210147c6613561fc3d474c6f75" +dependencies = [ + "cc", + "regex", + "regex-syntax", + "streaming-iterator", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-language" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c199356c799a8945965bb5f2c55b2ad9d9aa7c4b4f6e587fe9dea0bc715e5f9c" + +[[package]] +name = "tree-sitter-ruby" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be0484ea4ef6bb9c575b4fdabde7e31340a8d2dbc7d52b321ac83da703249f95" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/gems/decomplex/rust/Cargo.toml b/gems/decomplex/rust/Cargo.toml new file mode 100644 index 000000000..245ae5322 --- /dev/null +++ b/gems/decomplex/rust/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "decomplex-rust" +version = "0.1.0" +edition = "2021" +description = "Native fact extraction slices for Decomplex" +license = "MIT" + +[[bin]] +name = "decomplex-rust" +path = "src/main.rs" + +[dependencies] +anyhow = "1.0" +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +tree-sitter = "=0.24.7" +tree-sitter-language = "=0.1.3" +tree-sitter-ruby = "=0.23.1" + +[dev-dependencies] +tempfile = "=3.10.1" diff --git a/gems/decomplex/rust/src/decomplex/detectors/co_update.rs b/gems/decomplex/rust/src/decomplex/detectors/co_update.rs new file mode 100644 index 000000000..510853c40 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/co_update.rs @@ -0,0 +1,11 @@ +use crate::decomplex::syntax::{ruby, StateWrite}; +use anyhow::Result; +use std::path::PathBuf; + +pub fn state_writes_for_files(files: &[PathBuf]) -> Result> { + let mut facts = Vec::new(); + for file in files { + facts.extend(ruby::state_writes_for_file(file)?); + } + Ok(facts) +} diff --git a/gems/decomplex/rust/src/decomplex/detectors/mod.rs b/gems/decomplex/rust/src/decomplex/detectors/mod.rs new file mode 100644 index 000000000..0c7589a70 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/mod.rs @@ -0,0 +1 @@ +pub mod co_update; diff --git a/gems/decomplex/rust/src/decomplex/mod.rs b/gems/decomplex/rust/src/decomplex/mod.rs new file mode 100644 index 000000000..0b5596ae2 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/mod.rs @@ -0,0 +1,2 @@ +pub mod detectors; +pub mod syntax; diff --git a/gems/decomplex/rust/src/decomplex/syntax/mod.rs b/gems/decomplex/rust/src/decomplex/syntax/mod.rs new file mode 100644 index 000000000..d12feaf3f --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/mod.rs @@ -0,0 +1,14 @@ +pub mod ruby; + +use serde::Serialize; + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct StateWrite { + pub field: String, + pub receiver: String, + pub file: String, + pub function: String, + pub line: usize, + pub span: [usize; 4], + pub owner: String, +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/ruby.rs b/gems/decomplex/rust/src/decomplex/syntax/ruby.rs new file mode 100644 index 000000000..50a0364c7 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/ruby.rs @@ -0,0 +1,486 @@ +use super::StateWrite; +use anyhow::{Context, Result}; +use std::collections::HashSet; +use std::fs; +use std::path::Path; +use tree_sitter::{Language, Node, Parser}; + +pub fn state_writes_for_file(file: &Path) -> Result> { + let source = fs::read_to_string(file) + .with_context(|| format!("failed to read {}", file.display()))?; + let mut parser = Parser::new(); + parser + .set_language(&ruby_language()) + .with_context(|| "failed to initialize tree-sitter ruby parser")?; + let tree = parser + .parse(&source, None) + .with_context(|| format!("tree-sitter produced no tree for {}", file.display()))?; + + let mut out = Vec::new(); + let mut seen = HashSet::new(); + let context = ContextState::new(file_owner(file)); + walk(tree.root_node(), &source, file, &context, &mut out, &mut seen); + Ok(out) +} + +fn ruby_language() -> Language { + tree_sitter_ruby::LANGUAGE.into() +} + +#[derive(Clone, Debug, Eq, PartialEq)] +struct ContextState { + file_owner: String, + owner: Option, + function: Option, +} + +impl ContextState { + fn new(file_owner: String) -> Self { + Self { + file_owner, + owner: None, + function: None, + } + } + + fn current_owner(&self) -> String { + self.owner + .clone() + .unwrap_or_else(|| self.file_owner.clone()) + } + + fn current_function(&self) -> String { + self.function + .clone() + .unwrap_or_else(|| "(top-level)".to_string()) + } +} + +fn walk( + node: Node<'_>, + source: &str, + file: &Path, + context: &ContextState, + out: &mut Vec, + seen: &mut HashSet, +) { + let next_context = push_function_context(node, push_owner_context(node, source, context), source); + record_state_write(node, source, file, &next_context, out, seen); + + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + walk(child, source, file, &next_context, out, seen); + } +} + +fn push_owner_context(node: Node<'_>, source: &str, context: &ContextState) -> ContextState { + let Some(owner) = owner_name_from_declaration(node, source) else { + return context.clone(); + }; + let parent_owner = context.owner.clone(); + let full_owner = if let Some(parent) = parent_owner { + if parent != owner && !owner.contains("::") { + format!("{parent}::{owner}") + } else { + owner + } + } else { + owner + }; + let mut next = context.clone(); + next.owner = Some(full_owner); + next +} + +fn push_function_context(node: Node<'_>, mut context: ContextState, source: &str) -> ContextState { + let Some(function) = function_name(node, source) else { + return context; + }; + let owner = context.current_owner(); + context.function = Some(function); + context.owner = Some(owner); + context +} + +fn record_state_write( + node: Node<'_>, + source: &str, + file: &Path, + context: &ContextState, + out: &mut Vec, + seen: &mut HashSet, +) { + if node.kind() == "operator_assignment" { + return; + } + + let Some(assignment) = assignment_target(node) else { + return; + }; + let Some(target) = state_target(assignment.lhs, source) else { + return; + }; + if target.field == "[]" || target.field.starts_with('$') { + return; + } + + let file_name = file.to_string_lossy().to_string(); + let owner = context.current_owner(); + let function = context.current_function(); + let line = line(assignment.source); + let key = format!( + "{}\0{}\0{}\0{}\0{}\0{}", + file_name, owner, function, line, target.receiver, target.field + ); + if !seen.insert(key) { + return; + } + + out.push(StateWrite { + field: target.field, + receiver: target.receiver, + file: file_name, + function, + line, + span: span(assignment.source), + owner, + }); +} + +#[derive(Clone, Debug, Eq, PartialEq)] +struct AssignmentTarget<'tree> { + lhs: Node<'tree>, + source: Node<'tree>, +} + +#[derive(Clone, Debug, Eq, PartialEq)] +struct Target { + receiver: String, + field: String, +} + +fn assignment_target(node: Node<'_>) -> Option> { + match node.kind() { + "assignment" | "assignment_expression" | "assignment_statement" => { + let lhs = node + .child_by_field_name("left") + .or_else(|| first_named_child(node))?; + Some(AssignmentTarget { lhs, source: node }) + } + "instance_variable" | "global_variable" if assignment_lhs_node(node) => { + Some(AssignmentTarget { + lhs: node, + source: node.parent().unwrap_or(node), + }) + } + _ => None, + } +} + +fn assignment_lhs_node(node: Node<'_>) -> bool { + if previous_sibling_raw_text(node).as_deref() == Some(":") { + return false; + } + matches!( + next_sibling_raw_text(node).as_deref(), + Some("=" | "+=" | "-=" | "*=" | "/=" | "%=" | "&&=" | "||=") + ) +} + +fn state_target(lhs: Node<'_>, source: &str) -> Option { + if previous_sibling_text(lhs, source).as_deref() == Some(":") { + return None; + } + + match lhs.kind() { + "call" => { + let receiver = lhs.child_by_field_name("receiver")?; + let method = lhs.child_by_field_name("method")?; + Some(Target { + receiver: normalize_text(node_text(receiver, source)), + field: strip_assignment_suffix(node_text(method, source)), + }) + } + "field" + | "field_access" + | "selector_expression" + | "member_expression" + | "member_access_expression" + | "attribute" + | "field_expression" + | "navigation_expression" + | "directly_assignable_expression" + | "expression_list" => { + let object = lhs + .child_by_field_name("object") + .or_else(|| lhs.child_by_field_name("receiver")) + .or_else(|| lhs.child_by_field_name("expression")) + .or_else(|| lhs.child_by_field_name("operand")) + .or_else(|| lhs.child_by_field_name("value")) + .or_else(|| lhs.child_by_field_name("argument")) + .or_else(|| first_named_child_except(lhs, "navigation_suffix"))?; + let field = lhs + .child_by_field_name("field") + .or_else(|| lhs.child_by_field_name("property")) + .or_else(|| lhs.child_by_field_name("name")) + .or_else(|| lhs.child_by_field_name("suffix")) + .or_else(|| first_named_child_with_kind(lhs, "navigation_suffix")) + .or_else(|| last_named_child(lhs))?; + let field_text = member_field_text(field, source)?; + Some(Target { + receiver: normalize_text(node_text(object, source)), + field: strip_assignment_suffix(&field_text), + }) + } + "instance_variable" | "global_variable" => Some(Target { + receiver: "self".to_string(), + field: node_text(lhs, source).to_string(), + }), + _ => None, + } +} + +fn function_name(node: Node<'_>, source: &str) -> Option { + match node.kind() { + "method" => node + .child_by_field_name("name") + .map(|name| node_text(name, source).to_string()) + .or_else(|| first_named_text(node, source, &["identifier", "constant", "property_identifier"])), + "singleton_method" => { + let name = node + .child_by_field_name("name") + .map(|name| node_text(name, source).to_string()) + .or_else(|| { + named_children(node) + .into_iter() + .rev() + .find(|child| { + matches!( + child.kind(), + "identifier" | "field_identifier" | "property_identifier" + ) + }) + .map(|child| node_text(child, source).to_string()) + })?; + Some(format!("self.{name}")) + } + "body_statement" if first_child_kind(node) == Some("def") => hidden_ruby_method_name(node, source), + "argument_list" if first_child_kind(node) == Some("def") => inline_def_name(node, source), + _ => None, + } +} + +fn owner_name_from_declaration(node: Node<'_>, source: &str) -> Option { + if node.kind() == "body_statement" && matches!(first_child_kind(node), Some("class" | "module")) { + return first_named_text(node, source, &["constant", "identifier", "type_identifier"]); + } + + match node.kind() { + "class" | "module" => node + .child_by_field_name("name") + .map(|name| node_text(name, source).to_string()) + .or_else(|| first_named_text(node, source, &["constant", "identifier", "type_identifier"])), + _ => None, + } +} + +fn hidden_ruby_method_name(node: Node<'_>, source: &str) -> Option { + let children = named_children(node); + let receiver_index = children + .iter() + .position(|child| matches!(child.kind(), "self" | "constant")); + let search: Vec> = if let Some(index) = receiver_index { + children.into_iter().skip(index + 1).collect() + } else { + children + }; + let name = search + .into_iter() + .find(|child| matches!(child.kind(), "identifier" | "field_identifier" | "property_identifier")) + .map(|child| node_text(child, source).to_string())?; + if receiver_index.is_some() { + Some(format!("self.{name}")) + } else { + Some(name) + } +} + +fn inline_def_name(node: Node<'_>, source: &str) -> Option { + hidden_ruby_method_name(node, source) +} + +fn file_owner(file: &Path) -> String { + file.file_stem() + .and_then(|stem| stem.to_str()) + .filter(|stem| !stem.is_empty()) + .unwrap_or("(file)") + .to_string() +} + +fn first_named_text(node: Node<'_>, source: &str, kinds: &[&str]) -> Option { + named_children(node) + .into_iter() + .find(|child| kinds.iter().any(|kind| *kind == child.kind())) + .map(|child| node_text(child, source).to_string()) +} + +fn first_named_child(node: Node<'_>) -> Option> { + let mut cursor = node.walk(); + let child = node.named_children(&mut cursor).next(); + child +} + +fn last_named_child(node: Node<'_>) -> Option> { + named_children(node).into_iter().last() +} + +fn first_named_child_except<'tree>(node: Node<'tree>, excluded_kind: &str) -> Option> { + named_children(node) + .into_iter() + .find(|child| child.kind() != excluded_kind) +} + +fn first_named_child_with_kind<'tree>(node: Node<'tree>, kind: &str) -> Option> { + named_children(node) + .into_iter() + .find(|child| child.kind() == kind) +} + +fn named_children(node: Node<'_>) -> Vec> { + let mut cursor = node.walk(); + node.named_children(&mut cursor).collect() +} + +fn first_child_kind(node: Node<'_>) -> Option<&str> { + let mut cursor = node.walk(); + let kind = node.children(&mut cursor).next().map(|child| child.kind()); + kind +} + +fn previous_sibling_text(node: Node<'_>, source: &str) -> Option { + node.prev_sibling() + .map(|sibling| node_text(sibling, source).to_string()) +} + +fn previous_sibling_raw_text(node: Node<'_>) -> Option { + node.prev_sibling() + .map(|sibling| sibling.kind().to_string()) +} + +fn next_sibling_raw_text(node: Node<'_>) -> Option { + node.next_sibling().map(|sibling| sibling.kind().to_string()) +} + +fn member_field_text(field: Node<'_>, source: &str) -> Option { + if field.kind() == "navigation_suffix" { + let suffix = field + .child_by_field_name("suffix") + .or_else(|| { + named_children(field) + .into_iter() + .find(|child| matches!(child.kind(), "identifier" | "simple_identifier" | "field_identifier" | "property_identifier")) + }) + .or_else(|| last_named_child(field))?; + let text = node_text(suffix, source).trim_start_matches(['.', '?']); + return (!text.is_empty()).then(|| text.to_string()); + } + + Some(node_text(field, source).trim_start_matches(['.', '?']).to_string()) +} + +fn strip_assignment_suffix(text: &str) -> String { + text.strip_suffix('=').unwrap_or(text).to_string() +} + +fn node_text<'a>(node: Node<'_>, source: &'a str) -> &'a str { + node.utf8_text(source.as_bytes()).unwrap_or("") +} + +fn normalize_text(text: &str) -> String { + text.split_whitespace().collect::>().join(" ") +} + +fn span(node: Node<'_>) -> [usize; 4] { + let start = node.start_position(); + let end = node.end_position(); + [start.row + 1, start.column, end.row + 1, end.column] +} + +fn line(node: Node<'_>) -> usize { + node.start_position().row + 1 +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Write; + use tempfile::NamedTempFile; + + fn extract(source: &str) -> Vec { + let mut file = NamedTempFile::new().expect("tempfile"); + file.write_all(source.as_bytes()).expect("write source"); + state_writes_for_file(file.path()).expect("state writes") + } + + #[test] + fn extracts_ruby_attribute_and_instance_writes() { + let writes = extract( + r#" +class Box + def a(n) + n.storage = :heap + n.provenance = :heap + @field = 1 + @counter += 1 + n.count += 1 + e[:kind] = 1 + end + def self.b(x); x.value = 1; end +end +"#, + ); + + let summary: Vec<(&str, &str, &str, &str)> = writes + .iter() + .map(|write| { + ( + write.owner.as_str(), + write.function.as_str(), + write.receiver.as_str(), + write.field.as_str(), + ) + }) + .collect(); + + assert_eq!( + summary, + vec![ + ("Box", "a", "n", "storage"), + ("Box", "a", "n", "provenance"), + ("Box", "a", "self", "@field"), + ("Box", "a", "self", "@counter"), + ("Box", "self.b", "x", "value"), + ] + ); + } + + #[test] + fn extracts_nested_owner_names() { + let writes = extract( + r#" +module Outer + class Inner + def set(node) + node.state = :ready + end + end +end +"#, + ); + + assert_eq!(writes.len(), 1); + assert_eq!(writes[0].owner, "Outer::Inner"); + assert_eq!(writes[0].function, "set"); + assert_eq!(writes[0].field, "state"); + } +} diff --git a/gems/decomplex/rust/src/main.rs b/gems/decomplex/rust/src/main.rs new file mode 100644 index 000000000..a0db40476 --- /dev/null +++ b/gems/decomplex/rust/src/main.rs @@ -0,0 +1,53 @@ +mod decomplex; + +use anyhow::{bail, Context, Result}; +use decomplex::detectors::co_update; +use std::path::PathBuf; + +fn main() -> Result<()> { + let command = parse_args(std::env::args().skip(1).collect())?; + match command { + Command::StateWrites { language, files } => { + if language != "ruby" { + bail!("state-writes currently supports --language ruby only"); + } + let facts = co_update::state_writes_for_files(&files) + .with_context(|| "failed to extract state-write facts")?; + println!("{}", serde_json::to_string(&facts)?); + } + } + Ok(()) +} + +enum Command { + StateWrites { language: String, files: Vec }, +} + +fn parse_args(args: Vec) -> Result { + let mut cursor = args.into_iter(); + let Some(command) = cursor.next() else { + bail!("usage: decomplex-rust state-writes --language ruby FILE..."); + }; + match command.as_str() { + "state-writes" => { + let mut language = String::from("ruby"); + let mut files = Vec::new(); + while let Some(arg) = cursor.next() { + if arg == "--language" { + language = cursor + .next() + .with_context(|| "--language requires a value")?; + } else if let Some(value) = arg.strip_prefix("--language=") { + language = value.to_string(); + } else { + files.push(PathBuf::from(arg)); + } + } + if files.is_empty() { + bail!("state-writes requires at least one file"); + } + Ok(Command::StateWrites { language, files }) + } + _ => bail!("unknown decomplex-rust command: {command}"), + } +} diff --git a/gems/decomplex/test/detector_runner_test.rb b/gems/decomplex/test/detector_runner_test.rb new file mode 100644 index 000000000..21a1f2032 --- /dev/null +++ b/gems/decomplex/test/detector_runner_test.rb @@ -0,0 +1,69 @@ +# frozen_string_literal: true + +require "minitest/autorun" +require "open3" +require_relative "../lib/decomplex" + +class DetectorRunnerTest < Minitest::Test + FIXTURE = "gems/decomplex/test/fixtures/co_update_sample.rb" + + def test_co_update_ruby_engine_canonical_json_is_frozen + expected = <<~JSON + {"co_written_pairs":[{"pair":["provenance","storage"],"sites":["gems/decomplex/test/fixtures/co_update_sample.rb:stable_one","gems/decomplex/test/fixtures/co_update_sample.rb:stable_two","gems/decomplex/test/fixtures/co_update_sample.rb:stable_three"],"support":3}],"neglected_updates":[{"at":"gems/decomplex/test/fixtures/co_update_sample.rb:misses_provenance:17","has":"storage","missing":"provenance","pair":["provenance","storage"],"recv":"node","spans":{"gems/decomplex/test/fixtures/co_update_sample.rb:misses_provenance:17":[17,2,17,22]},"support":3}]} + JSON + + assert_equal expected, Decomplex::DetectorRunner.canonical_json("co-update", [FIXTURE], engine: "ruby") + end + + def test_co_update_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("co-update", [FIXTURE]) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + + def test_detector_cli_compare_engines_outputs_canonical_json + skip "cargo is not available" unless cargo_available? + + stdout, stderr, status = Open3.capture3( + "ruby", + "gems/decomplex/exe/decomplex", + "detector", + "co-update", + "--compare-engines", + FIXTURE + ) + + assert status.success?, stderr + assert_equal Decomplex::DetectorRunner.canonical_json("co-update", [FIXTURE], engine: "ruby"), stdout + end + + def test_detector_cli_benchmark_keeps_json_stdout_canonical + stdout, stderr, status = Open3.capture3( + "ruby", + "gems/decomplex/exe/decomplex", + "detector", + "co-update", + "--engine=ruby", + "--json", + "--benchmark", + FIXTURE + ) + + assert status.success?, stderr + assert_equal Decomplex::DetectorRunner.canonical_json("co-update", [FIXTURE], engine: "ruby"), stdout + assert_match(/decomplex detector=co-update engine=ruby files=1 elapsed=\d+\.\d+s/, stderr) + end + + private + + def cargo_available? + system("cargo", "--version", out: File::NULL, err: File::NULL) + end + + def diff_message(left, right) + "ruby and rust detector output differed\n--- ruby\n#{left}\n--- rust\n#{right}" + end +end diff --git a/gems/decomplex/test/fixtures/co_update_sample.rb b/gems/decomplex/test/fixtures/co_update_sample.rb new file mode 100644 index 000000000..fc183143b --- /dev/null +++ b/gems/decomplex/test/fixtures/co_update_sample.rb @@ -0,0 +1,23 @@ +def stable_one(node) + node.storage = :heap + node.provenance = :heap +end + +def stable_two(node) + node.storage = :heap + node.provenance = :heap +end + +def stable_three(node) + node.storage = :heap + node.provenance = :heap +end + +def misses_provenance(node) + node.storage = :heap +end + +def ignored_index_write(entry) + entry[:storage] = :heap + entry[:provenance] = :heap +end From adf782b32540d03881ed3040ede3c5e24b93f499 Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Tue, 16 Jun 2026 17:37:43 +0000 Subject: [PATCH 07/52] Refine Lineage source review UI --- gems/lineage/src/ui.rs | 575 +++++++++++++++++++++++++++------ gems/lineage/ui/assets/app.css | 78 ++++- 2 files changed, 536 insertions(+), 117 deletions(-) diff --git a/gems/lineage/src/ui.rs b/gems/lineage/src/ui.rs index 9c4f58162..4eb9ce37d 100644 --- a/gems/lineage/src/ui.rs +++ b/gems/lineage/src/ui.rs @@ -4360,7 +4360,6 @@ fn render_index_page( )?; let child_directories = directory_index(&files, ¤t_directory); let child_files = files_in_directory(&files, ¤t_directory); - let table_files = sorted_table_files(&files, filter, ¤t_directory, sort); let filtered = filtered_files_in_directory(&files, filter, ¤t_directory); let branch_context = branch_context(repo); let payload = selected_path @@ -4388,7 +4387,7 @@ fn render_index_page( &dashboard, ¤t_directory, &child_directories, - &table_files, + &child_files, filter, sort, &branch_context, @@ -4521,38 +4520,6 @@ fn filtered_files_in_directory<'a>( .collect() } -fn sorted_table_files<'a>( - files: &'a [UiFile], - filter: &str, - directory: &str, - sort: CoverageSort, -) -> Vec<&'a UiFile> { - let mut files = filtered_files_in_directory(files, filter, directory); - files.sort_by(|left, right| match sort { - CoverageSort::Path => left.path.cmp(&right.path), - CoverageSort::Total => right - .tracked_lines - .cmp(&left.tracked_lines) - .then_with(|| left.path.cmp(&right.path)), - CoverageSort::Covered => right - .covered_lines - .cmp(&left.covered_lines) - .then_with(|| left.path.cmp(&right.path)), - CoverageSort::Partial => partial_line_count(right.covered_lines, right.dark_arm_findings) - .cmp(&partial_line_count(left.covered_lines, left.dark_arm_findings)) - .then_with(|| left.path.cmp(&right.path)), - CoverageSort::Missed => missed_line_count(right.tracked_lines, right.covered_lines) - .cmp(&missed_line_count(left.tracked_lines, left.covered_lines)) - .then_with(|| left.path.cmp(&right.path)), - CoverageSort::Percent => right - .line_coverage - .partial_cmp(&left.line_coverage) - .unwrap_or(std::cmp::Ordering::Equal) - .then_with(|| left.path.cmp(&right.path)), - }); - files -} - fn files_in_directory<'a>(files: &'a [UiFile], directory: &str) -> Vec<&'a UiFile> { let directory = normalize_directory(directory); files @@ -4656,44 +4623,277 @@ fn render_source_outline(payload: &UiSourcePayload) -> String { return String::new(); } + let containers = outline_containers(&payload.symbols); + let functions = outline_functions(&payload.symbols, &containers); let mut out = String::new(); out.push_str(""); - out + (None, outline_short_name(&symbol.name)) +} + +fn outline_short_name(name: &str) -> String { + normalize_outline_owner(name) + .rsplit('.') + .next() + .unwrap_or(name) + .trim_start_matches("self.") + .to_string() +} + +fn normalize_outline_owner(name: &str) -> String { + name.replace("::", ".") +} + +fn resolve_outline_owner(owner: &str, containers: &[OutlineContainer<'_>]) -> Option { + let normalized = normalize_outline_owner(owner); + containers + .iter() + .find(|container| container.full_name == normalized) + .or_else(|| { + containers + .iter() + .find(|container| container.full_name.ends_with(&format!(".{normalized}"))) + }) + .or_else(|| containers.iter().find(|container| container.display_name == normalized)) + .map(|container| container.full_name.clone()) +} + +fn containing_outline_owner( + symbol: &UiSourceSymbol, + containers: &[OutlineContainer<'_>], +) -> Option { + containers + .iter() + .filter(|container| outline_contains(container.symbol, symbol)) + .max_by_key(|container| container.depth) + .map(|container| container.full_name.clone()) +} + +fn root_outline_entries<'a>( + containers: &'a [OutlineContainer<'a>], + functions: &'a [OutlineFunction<'a>], +) -> Vec> { + sorted_outline_entries( + containers + .iter() + .filter(|container| container.parent.is_none()) + .map(OutlineEntry::Container) + .chain( + functions + .iter() + .filter(|function| function.owner.is_none()) + .map(OutlineEntry::Function), + ), + ) +} + +fn child_outline_entries<'a>( + owner: &str, + containers: &'a [OutlineContainer<'a>], + functions: &'a [OutlineFunction<'a>], +) -> Vec> { + sorted_outline_entries( + containers + .iter() + .filter(|container| container.parent.as_deref() == Some(owner)) + .map(OutlineEntry::Container) + .chain( + functions + .iter() + .filter(|function| function.owner.as_deref() == Some(owner)) + .map(OutlineEntry::Function), + ), + ) +} + +fn sorted_outline_entries<'a>( + entries: impl Iterator>, +) -> Vec> { + let mut entries = entries.collect::>(); + entries.sort_by(|left, right| { + left.start_line() + .cmp(&right.start_line()) + .then_with(|| left.kind_rank().cmp(&right.kind_rank())) + }); + entries +} + +fn render_outline_entry( + out: &mut String, + entry: OutlineEntry<'_>, + containers: &[OutlineContainer<'_>], + functions: &[OutlineFunction<'_>], +) { + match entry { + OutlineEntry::Container(container) => { + render_outline_symbol_link(out, container.symbol, &container.display_name, container.depth); + for child in child_outline_entries(&container.full_name, containers, functions) { + render_outline_entry(out, child, containers, functions); + } + } + OutlineEntry::Function(function) => { + render_outline_symbol_link( + out, + function.symbol, + &function.display_name, + function.depth, + ); + } + } +} + +fn render_outline_symbol_link( + out: &mut String, + symbol: &UiSourceSymbol, + display_name: &str, + depth: usize, +) { + out.push_str(""); + if symbol.impure { + out.push_str(""); + } + out.push_str(""); + out.push_str(&html_escape(&outline_kind_label(symbol))); + out.push_str(""); + out.push_str(&html_escape(display_name)); + out.push_str(""); } fn outline_kind_label(symbol: &UiSourceSymbol) -> String { @@ -4892,7 +5092,7 @@ fn line_quality_segments(bar: LineQualityBar) -> LineQualitySegments { fn render_dashboard( dashboard: &UiDashboard, directory: &str, - _directories: &[UiDirectory], + directories: &[UiDirectory], files: &[&UiFile], filter: &str, sort: CoverageSort, @@ -4915,12 +5115,15 @@ fn render_dashboard( &render_architecture_risks(&dashboard.top_architecture_risks, filter), ); let code_tree_heading = format!( - "Code tree ({} files - {} SARIF findings)", - dashboard.files, dashboard.sarif_findings + "Directory entries ({} dirs - {} files - {} SARIF findings)", + directories.len(), + files.len(), + dashboard.sarif_findings ); let code_tree = render_code_tree_table( dashboard, &directory, + directories, files, filter, sort, @@ -5232,20 +5435,22 @@ fn render_path_breadcrumb(path: &str, filter: &str) -> String { fn render_code_tree_table( dashboard: &UiDashboard, directory: &str, + directories: &[UiDirectory], files: &[&UiFile], filter: &str, sort: CoverageSort, ) -> String { - let name_header = render_sort_link("File list", CoverageSort::Path, sort, directory, filter); + let name_header = render_sort_link("Name", CoverageSort::Path, sort, directory, filter); let total_header = render_sort_link("Total", CoverageSort::Total, sort, directory, filter); let covered_header = render_sort_link("Covered", CoverageSort::Covered, sort, directory, filter); let partial_header = render_sort_link("Partial", CoverageSort::Partial, sort, directory, filter); let missed_header = render_sort_link("Missed", CoverageSort::Missed, sort, directory, filter); let percent_header = render_sort_link("%", CoverageSort::Percent, sort, directory, filter); let mut rows = String::new(); - for file in files { - rows.push_str(&render_file_coverage_row(file, directory, filter)); + for entry in sorted_code_tree_entries(directories, files, sort) { + rows.push_str(&render_code_tree_row(&entry, directory, filter)); } + let empty = directories.is_empty() && files.is_empty(); let partial = files .iter() .map(|file| partial_line_count(file.covered_lines, file.dark_arm_findings)) @@ -5253,6 +5458,7 @@ fn render_code_tree_table( let partial = partial.clamp(0, dashboard.covered_lines); let subtotal = render_coverage_table_row( None, + "", "Subtotal", "", dashboard.tracked_lines, @@ -5271,13 +5477,121 @@ fn render_code_tree_table( missed_header: &missed_header, percent_header: &percent_header, rows: &rows, - empty: files.is_empty(), + empty, subtotal: &subtotal, }, "coverage table template", ) } +#[derive(Debug, Clone, PartialEq)] +enum CodeTreeEntry<'a> { + Directory(&'a UiDirectory), + File(&'a UiFile), +} + +impl CodeTreeEntry<'_> { + fn name(&self) -> &str { + match self { + CodeTreeEntry::Directory(directory) => &directory.path, + CodeTreeEntry::File(file) => &file.path, + } + } + + fn tracked_lines(&self) -> i64 { + match self { + CodeTreeEntry::Directory(directory) => directory.tracked_lines, + CodeTreeEntry::File(file) => file.tracked_lines, + } + } + + fn covered_lines(&self) -> i64 { + match self { + CodeTreeEntry::Directory(directory) => directory.covered_lines, + CodeTreeEntry::File(file) => file.covered_lines, + } + } + + fn partial_findings(&self) -> i64 { + match self { + CodeTreeEntry::Directory(directory) => directory.dark_arm_findings, + CodeTreeEntry::File(file) => file.dark_arm_findings, + } + } + + fn missed_lines(&self) -> i64 { + missed_line_count(self.tracked_lines(), self.covered_lines()) + } + + fn line_coverage(&self) -> f64 { + match self { + CodeTreeEntry::Directory(directory) => directory.line_coverage, + CodeTreeEntry::File(file) => file.line_coverage, + } + } + + fn path_for_tiebreak(&self) -> &str { + self.name() + } +} + +fn sorted_code_tree_entries<'a>( + directories: &'a [UiDirectory], + files: &'a [&'a UiFile], + sort: CoverageSort, +) -> Vec> { + let mut entries = directories + .iter() + .map(CodeTreeEntry::Directory) + .chain(files.iter().copied().map(CodeTreeEntry::File)) + .collect::>(); + entries.sort_by(|left, right| match sort { + CoverageSort::Path => code_tree_entry_kind_rank(left) + .cmp(&code_tree_entry_kind_rank(right)) + .then_with(|| left.path_for_tiebreak().cmp(right.path_for_tiebreak())), + CoverageSort::Total => right + .tracked_lines() + .cmp(&left.tracked_lines()) + .then_with(|| code_tree_entry_kind_rank(left).cmp(&code_tree_entry_kind_rank(right))) + .then_with(|| left.path_for_tiebreak().cmp(right.path_for_tiebreak())), + CoverageSort::Covered => right + .covered_lines() + .cmp(&left.covered_lines()) + .then_with(|| code_tree_entry_kind_rank(left).cmp(&code_tree_entry_kind_rank(right))) + .then_with(|| left.path_for_tiebreak().cmp(right.path_for_tiebreak())), + CoverageSort::Partial => partial_line_count(right.covered_lines(), right.partial_findings()) + .cmp(&partial_line_count(left.covered_lines(), left.partial_findings())) + .then_with(|| code_tree_entry_kind_rank(left).cmp(&code_tree_entry_kind_rank(right))) + .then_with(|| left.path_for_tiebreak().cmp(right.path_for_tiebreak())), + CoverageSort::Missed => right + .missed_lines() + .cmp(&left.missed_lines()) + .then_with(|| code_tree_entry_kind_rank(left).cmp(&code_tree_entry_kind_rank(right))) + .then_with(|| left.path_for_tiebreak().cmp(right.path_for_tiebreak())), + CoverageSort::Percent => right + .line_coverage() + .partial_cmp(&left.line_coverage()) + .unwrap_or(std::cmp::Ordering::Equal) + .then_with(|| code_tree_entry_kind_rank(left).cmp(&code_tree_entry_kind_rank(right))) + .then_with(|| left.path_for_tiebreak().cmp(right.path_for_tiebreak())), + }); + entries +} + +fn code_tree_entry_kind_rank(entry: &CodeTreeEntry<'_>) -> u8 { + match entry { + CodeTreeEntry::Directory(_) => 0, + CodeTreeEntry::File(_) => 1, + } +} + +fn render_code_tree_row(entry: &CodeTreeEntry<'_>, directory: &str, filter: &str) -> String { + match entry { + CodeTreeEntry::Directory(child) => render_directory_coverage_row(child, directory, filter), + CodeTreeEntry::File(file) => render_file_coverage_row(file, directory, filter), + } +} + fn render_sort_link( label: &str, target: CoverageSort, @@ -5316,6 +5630,7 @@ fn render_file_coverage_row(file: &UiFile, directory: &str, filter: &str) -> Str ); render_coverage_table_row( Some(&page_href(&file.path, None, filter)), + "fa-regular fa-file-lines", &display_path, &detail, file.tracked_lines, @@ -5327,6 +5642,34 @@ fn render_file_coverage_row(file: &UiFile, directory: &str, filter: &str) -> Str ) } +fn render_directory_coverage_row(directory: &UiDirectory, parent: &str, filter: &str) -> String { + let mut display_path = file_display_path(&directory.path, parent); + if !display_path.ends_with('/') { + display_path.push('/'); + } + let detail = format!( + "{} files, {} units, {} hazards, {} SARIF, {} tests, {} mutant killed", + directory.files, + directory.units, + directory.hazards, + directory.sarif_findings, + directory.distinct_tests, + directory.mutant_killed_tests + ); + render_coverage_table_row( + Some(&directory_href(&directory.path, filter)), + "fa-regular fa-folder", + &display_path, + &detail, + directory.tracked_lines, + directory.covered_lines, + directory.dark_arm_findings, + 0, + 0, + directory.line_coverage, + ) +} + fn render_unit_hotspots(units: &[UiUnitHotspot], filter: &str) -> String { let items = units .iter() @@ -5398,6 +5741,7 @@ fn unit_kind_label(kind: &str, name: &str) -> String { fn render_coverage_table_row( href: Option<&str>, + icon_class: &str, name: &str, detail: &str, tracked_lines: i64, @@ -5421,9 +5765,11 @@ fn render_coverage_table_row( if let Some(href) = href { out.push_str(""); + out.push_str("\" class=\"coverage-name-link\">"); out.push_str(&html_escape(name)); - out.push_str(""); + out.push_str(""); } else { out.push_str(""); out.push_str(&html_escape(name)); @@ -7391,6 +7737,44 @@ mod tests { assert_eq!(line.dark_arm_spans[0].span, Some([2, 2, 2, 6])); } + #[test] + fn source_outline_groups_qualified_methods_under_containers() { + let payload = UiSourcePayload { + path: "gems/slopcop/lib/slopcop/dark_arm_overlay.rb".into(), + commit: None, + lines: Vec::new(), + versions: Vec::new(), + symbols: vec![ + empty_source_symbol("module", "SlopCop", 1, 20), + empty_source_symbol("class", "DarkArmOverlay", 3, 19), + empty_source_symbol("function", "SlopCop.DarkArmOverlay.build", 4, 5), + empty_source_symbol("function", "SlopCop.DarkArmOverlay.to_json", 7, 8), + empty_source_symbol("function", "SlopCop.DarkArmOverlay.to_sarif", 11, 12), + ], + blame: Vec::new(), + annotations: Vec::new(), + warnings: Vec::new(), + }; + + let outline = render_source_outline(&payload); + + assert!(outline.contains("outline-depth-0")); + assert!(outline.contains("outline-depth-1")); + assert!(outline.contains("outline-depth-2")); + assert!(outline.contains("SlopCop")); + assert!(outline.contains("DarkArmOverlay")); + assert!(outline.contains("build")); + assert!(outline.contains("to_json")); + assert!(outline.contains("to_sarif")); + assert!(!outline.contains("SlopCop.DarkArmOverlay.build")); + assert!( + outline.find(">build").unwrap() < outline.find(">to_json").unwrap() + ); + assert!( + outline.find(">to_json").unwrap() < outline.find(">to_sarif").unwrap() + ); + } + #[test] fn source_payload_includes_persisted_sarif_findings() { let dir = tempdir().unwrap(); @@ -8447,7 +8831,8 @@ mod tests { assert!(!html.contains(">8 covered lines")); assert!(html.contains("4 mutant-backed / 1 stochastic / 2 invariant")); assert!(html.contains("class=\"ratio-bar hazard-bar\"")); - assert!(html.contains("Code tree (2 files - 7 SARIF findings)")); + assert!(html.contains("Directory entries (0 dirs - 1 files - 7 SARIF findings)")); + assert!(html.contains("class=\"coverage-name-link\">>(); - let by_missed = sorted_table_files(&files, "", "src", CoverageSort::Missed) + let by_missed = sorted_code_tree_entries(&directories, &files, CoverageSort::Missed) .into_iter() - .map(|file| file.path.as_str()) + .map(|entry| entry.name().to_string()) .collect::>(); - let by_percent = sorted_table_files(&files, "", "src", CoverageSort::Percent) + let by_percent = sorted_code_tree_entries(&directories, &files, CoverageSort::Percent) .into_iter() - .map(|file| file.path.as_str()) - .collect::>(); - let filtered = sorted_table_files(&files, "deeper", "src", CoverageSort::Path) - .into_iter() - .map(|file| file.path.as_str()) + .map(|entry| entry.name().to_string()) .collect::>(); - assert_eq!( - by_path, - vec!["src/a.rb", "src/internal/b.rb", "src/internal/deeper/c.rb"] - ); - assert_eq!( - by_missed, - vec!["src/internal/b.rb", "src/a.rb", "src/internal/deeper/c.rb"] - ); - assert_eq!( - by_percent, - vec!["src/internal/deeper/c.rb", "src/a.rb", "src/internal/b.rb"] - ); - assert_eq!(filtered, vec!["src/internal/deeper/c.rb"]); + assert_eq!(by_path, vec!["src/internal", "src/a.rb"]); + assert_eq!(by_missed, vec!["src/internal", "src/a.rb"]); + assert_eq!(by_percent, vec!["src/a.rb", "src/internal"]); } #[test] diff --git a/gems/lineage/ui/assets/app.css b/gems/lineage/ui/assets/app.css index d4fda4f64..e55cb610a 100644 --- a/gems/lineage/ui/assets/app.css +++ b/gems/lineage/ui/assets/app.css @@ -10,8 +10,14 @@ --hazard: #b42318; --dark-arm: #374151; --dark-arm-bg: rgba(31, 41, 55, 0.22); + --link: #1d4ed8; } * { box-sizing: border-box; } + a { + color: var(--link); + text-decoration: none; + } + a:hover { text-decoration: underline; } body { margin: 0; background: var(--bg); @@ -41,7 +47,7 @@ h2 { margin: 0 0 10px; font-size: 13px; letter-spacing: 0; } .subtle { color: var(--muted); font-size: 12px; } .nav-links { display: flex; flex-wrap: wrap; gap: 8px; margin-top: 6px; } - .home-link { color: #1d4ed8; font-size: 12px; text-decoration: none; } + .home-link { color: var(--link); font-size: 12px; text-decoration: none; } .toolbar { display: flex; gap: 8px; padding: 10px 14px; border-bottom: 1px solid var(--line); } input { width: 100%; @@ -69,11 +75,12 @@ gap: 8px; border-radius: 6px; padding: 7px 8px; - color: var(--text); + color: var(--link); text-decoration: none; } .file:hover, .file.active { background: #eef2f7; } - .dir-up { color: var(--muted); } + .file:hover .file-path { text-decoration: underline; } + .dir-up { color: var(--link); } .file-path { overflow: hidden; text-overflow: ellipsis; @@ -138,12 +145,16 @@ grid-template-columns: 30px 36px minmax(0, 1fr); gap: 6px; padding: 4px 2px; - color: var(--text); + color: var(--link); text-decoration: none; font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; font-size: 11px; } - .outline a:hover { background: #eef2f7; } + .outline a:hover { + background: #eef2f7; + text-decoration: none; + } + .outline a:hover .outline-name { text-decoration: underline; } .outline-kind { color: var(--muted); } .outline-rail { display: grid; @@ -171,9 +182,17 @@ .hotspot-light-red .outline-hotspot { background: rgba(248, 113, 113, 0.66); } .hotspot-red .outline-hotspot { background: rgba(185, 28, 28, 0.78); } .hotspot-deep-red .outline-hotspot { background: rgba(127, 29, 29, 0.92); } - .pure-symbol .outline-name { color: #166534; } - .impure-symbol .outline-name { color: #7f1d1d; } - .outline-name { min-width: 0; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; } + .outline-depth-1 { padding-left: 14px; } + .outline-depth-2 { padding-left: 28px; } + .outline-depth-3 { padding-left: 42px; } + .outline-depth-4 { padding-left: 56px; } + .outline-name { + color: var(--link); + min-width: 0; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + } main { min-width: 0; min-height: 0; overflow: hidden; display: flex; flex-direction: column; } .topbar { display: grid; @@ -639,14 +658,20 @@ gap: 5px; align-items: center; justify-content: flex-end; - color: inherit; + color: var(--link); text-decoration: none; } .name-col .sort-link { justify-content: flex-start; } - .sort-link:hover { color: #1d4ed8; } - .active-sort { color: var(--text); } + .sort-link:hover { + color: var(--link); + text-decoration: underline; + } + .active-sort { + color: var(--link); + font-weight: 700; + } .sort-marker { border: 1px solid var(--line); border-radius: 999px; @@ -672,13 +697,30 @@ display: grid; gap: 2px; } - .coverage-name a, + .coverage-name a { + color: var(--link); + text-decoration: none; + font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; + } .coverage-name span { color: var(--text); text-decoration: none; font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; } - .coverage-name a:hover { color: #1d4ed8; } + .coverage-name a:hover { text-decoration: underline; } + .coverage-name-link { + display: inline-flex; + align-items: center; + gap: 6px; + min-width: 0; + } + .coverage-name-link span { + color: inherit; + min-width: 0; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + } .coverage-name small { color: var(--muted); font-weight: 400; @@ -930,10 +972,11 @@ } .finding-panel a, .finding-panel strong { - color: #334155; font-weight: 700; text-decoration: none; } + .finding-panel a { color: var(--link); } + .finding-panel strong { color: #334155; } .finding-panel a:hover { text-decoration: underline; } .finding-tier { color: var(--muted); @@ -1003,12 +1046,15 @@ min-height: 30px; padding: 6px 16px; border-bottom: 1px solid var(--line); - color: var(--text); + color: var(--link); text-decoration: none; font-size: 12px; } .history-row:last-child { border-bottom: 0; } - .history-row:hover { background: #f8fafc; } + .history-row:hover { + background: #f8fafc; + text-decoration: underline; + } .history-row code, .history-row span:nth-child(4), .history-row span:nth-child(5) { From 8ec541ddf6ef0ba7ba8863ec4d41cbeef692d0aa Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Tue, 16 Jun 2026 18:59:03 +0000 Subject: [PATCH 08/52] Port decomplex detectors to mirrored Rust architecture Co-authored-by: Codex --- gems/decomplex/exe/decomplex | 20 +- .../lib/decomplex/detector_runner.rb | 56 +- .../lib/decomplex/native/co_update.rb | 25 + .../decomplex/lib/decomplex/native/command.rb | 54 + .../lib/decomplex/native/flay_similarity.rb | 41 + .../lib/decomplex/native/predicate_aliases.rb | 25 + .../lib/decomplex/native/state_writes.rb | 48 +- gems/decomplex/rust/src/decomplex/ast.rs | 165 +++ .../rust/src/decomplex/detectors/co_update.rs | 194 ++- .../decomplex/detectors/flay_similarity.rs | 1131 +++++++++++++++++ .../rust/src/decomplex/detectors/mod.rs | 2 + .../decomplex/detectors/predicate_alias.rs | 108 ++ gems/decomplex/rust/src/decomplex/mod.rs | 1 + gems/decomplex/rust/src/decomplex/syntax.rs | 88 ++ .../rust/src/decomplex/syntax/mod.rs | 14 - .../rust/src/decomplex/syntax/ruby.rs | 220 +++- gems/decomplex/rust/src/main.rs | 111 +- 17 files changed, 2157 insertions(+), 146 deletions(-) create mode 100644 gems/decomplex/lib/decomplex/native/co_update.rb create mode 100644 gems/decomplex/lib/decomplex/native/command.rb create mode 100644 gems/decomplex/lib/decomplex/native/flay_similarity.rb create mode 100644 gems/decomplex/lib/decomplex/native/predicate_aliases.rb create mode 100644 gems/decomplex/rust/src/decomplex/ast.rs create mode 100644 gems/decomplex/rust/src/decomplex/detectors/flay_similarity.rs create mode 100644 gems/decomplex/rust/src/decomplex/detectors/predicate_alias.rs create mode 100644 gems/decomplex/rust/src/decomplex/syntax.rs delete mode 100644 gems/decomplex/rust/src/decomplex/syntax/mod.rs diff --git a/gems/decomplex/exe/decomplex b/gems/decomplex/exe/decomplex index a02badfc0..d33e5325f 100755 --- a/gems/decomplex/exe/decomplex +++ b/gems/decomplex/exe/decomplex @@ -69,6 +69,7 @@ if ARGV[0] == "detector" json = false compare = false benchmark = false + detector_options = {} args = args.reject do |arg| case arg when /\A--engine=(.+)\z/ @@ -83,6 +84,12 @@ if ARGV[0] == "detector" when "--benchmark" benchmark = true true + when /\A--mass=(\d+)\z/ + detector_options[:mass] = Integer(Regexp.last_match(1)) + true + when /\A--fuzzy=(\d+)\z/ + detector_options[:fuzzy] = Integer(Regexp.last_match(1)) + true else false end @@ -91,7 +98,7 @@ if ARGV[0] == "detector" abort no_files_message if files.empty? if compare - ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare(detector, files) + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare(detector, files, **detector_options) unless ok warn "decomplex detector #{detector} output differs between ruby and rust engines" warn "--- ruby" @@ -103,9 +110,12 @@ if ARGV[0] == "detector" puts ruby_json elsif json started = Process.clock_gettime(Process::CLOCK_MONOTONIC) - output = Decomplex::DetectorRunner.canonical_json(detector, files, engine: engine) + output = Decomplex::DetectorRunner.canonical_json(detector, files, engine: engine, **detector_options) elapsed = Process.clock_gettime(Process::CLOCK_MONOTONIC) - started - warn format("decomplex detector=%s engine=%s files=%d elapsed=%.6fs", detector, engine, files.size, elapsed) if benchmark + if benchmark + warn format("decomplex detector=%s engine=%s files=%d elapsed=%.6fs", + detector, engine, files.size, elapsed) + end print output else abort "decomplex detector currently requires --json or --compare-engines" @@ -287,8 +297,8 @@ if ARGV.empty? || ARGV[0] == "-h" || ARGV[0] == "--help" decomplex FILE_OR_DIR [FILE_OR_DIR ...] decomplex report [--output=FILE] [--emit-json=FILE] [--sarif=FILE] [--exclude=GLOB] FILE_OR_DIR ... - decomplex detector DETECTOR --engine=ruby|rust --json [--benchmark] FILE_OR_DIR ... - decomplex detector DETECTOR --compare-engines FILE_OR_DIR ... + decomplex detector DETECTOR --engine=ruby|rust --json [--benchmark] [--mass=N] [--fuzzy=N] FILE_OR_DIR ... + decomplex detector DETECTOR --compare-engines [--mass=N] [--fuzzy=N] FILE_OR_DIR ... decomplex state-mesh [--output=FILE] [--exclude=GLOB] FILE_OR_DIR ... decomplex state-branches [--output=FILE] [--exclude=GLOB] FILE_OR_DIR ... decomplex temporal-ordering [--output=FILE] [--exclude=GLOB] FILE_OR_DIR ... diff --git a/gems/decomplex/lib/decomplex/detector_runner.rb b/gems/decomplex/lib/decomplex/detector_runner.rb index 1b74cc825..a7b384e56 100644 --- a/gems/decomplex/lib/decomplex/detector_runner.rb +++ b/gems/decomplex/lib/decomplex/detector_runner.rb @@ -2,7 +2,11 @@ require "json" require_relative "co_update" -require_relative "native/state_writes" +require_relative "flay_similarity" +require_relative "native/co_update" +require_relative "native/predicate_aliases" +require_relative "native/flay_similarity" +require_relative "predicate_alias" module Decomplex # Runs one detector in isolation and emits deterministic machine output. @@ -12,31 +16,39 @@ module Decomplex # timing, SARIF metadata, and other nondeterministic details. module DetectorRunner DETECTORS = { - "co-update" => :co_update + "co-update" => :co_update, + "predicate-alias" => :predicate_alias, + "predicate-aliases" => :predicate_alias, + "flay-similarity" => :flay_similarity, + "structural-similarity" => :flay_similarity }.freeze ENGINES = %w[ruby rust].freeze module_function - def run(detector, files, engine: "ruby") + def run(detector, files, engine: "ruby", mass: FlaySimilarity::DEFAULT_MASS, fuzzy: FlaySimilarity::DEFAULT_FUZZY) canonical = canonical_detector(detector) validate_engine!(engine) case canonical when :co_update co_update(files, engine: engine) + when :predicate_alias + predicate_alias(files, engine: engine) + when :flay_similarity + flay_similarity(files, engine: engine, mass: mass, fuzzy: fuzzy) else raise ArgumentError, "unsupported decomplex detector: #{detector}" end end - def canonical_json(detector, files, engine: "ruby") - JSON.generate(canonicalize(run(detector, files, engine: engine))) << "\n" + def canonical_json(detector, files, engine: "ruby", **options) + JSON.generate(canonicalize(run(detector, files, engine: engine, **options))) << "\n" end - def compare(detector, files) - ruby_json = canonical_json(detector, files, engine: "ruby") - rust_json = canonical_json(detector, files, engine: "rust") + def compare(detector, files, **options) + ruby_json = canonical_json(detector, files, engine: "ruby", **options) + rust_json = canonical_json(detector, files, engine: "rust", **options) [ruby_json == rust_json, ruby_json, rust_json] end @@ -57,12 +69,9 @@ def detector_names end private_class_method def self.co_update(files, engine:) - report = - if engine.to_s == "rust" - CoUpdate::Report.new(Native::StateWrites.extract(files)) - else - CoUpdate.scan(files) - end + return Native::CoUpdate.scan(files) if engine.to_s == "rust" + + report = CoUpdate.scan(files) { "co_written_pairs" => report.co_written_pairs, @@ -70,6 +79,25 @@ def detector_names } end + private_class_method def self.predicate_alias(files, engine:) + return Native::PredicateAliases.scan(files) if engine.to_s == "rust" + + report = PredicateAlias.scan(files) + + { "alias_clusters" => report.alias_clusters } + end + + private_class_method def self.flay_similarity(files, engine:, mass:, fuzzy:) + findings = + if engine.to_s == "rust" + Native::FlaySimilarity.scan(files, mass: mass, fuzzy: fuzzy) + else + FlaySimilarity.scan(files, mass: mass, fuzzy: fuzzy) + end + + { "findings" => findings } + end + private_class_method def self.canonicalize(value) case value when Hash diff --git a/gems/decomplex/lib/decomplex/native/co_update.rb b/gems/decomplex/lib/decomplex/native/co_update.rb new file mode 100644 index 000000000..98ae8bc4a --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/co_update.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module CoUpdate + module_function + + def scan(files) + paths = Array(files).map(&:to_s) + validate_ruby_files!(paths) + JSON.parse(Command.run("co-update", "--language", "ruby", *paths)) + end + + private_class_method def self.validate_ruby_files!(paths) + bad = paths.reject { |path| File.extname(path) == ".rb" } + return if bad.empty? + + raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/command.rb b/gems/decomplex/lib/decomplex/native/command.rb new file mode 100644 index 000000000..c2eb1151a --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/command.rb @@ -0,0 +1,54 @@ +# frozen_string_literal: true + +require "open3" + +module Decomplex + module Native + # Shared launcher for the native Decomplex migration slices. + module Command + module_function + + def run(*args) + stdout, stderr, status = Open3.capture3(*native_command(args)) + return stdout if status.success? + + raise "decomplex rust #{args.first} failed: #{stderr.empty? ? stdout : stderr}" + rescue Errno::ENOENT => e + raise "decomplex rust #{args.first} requires cargo or DECOMPLEX_RUST_BIN: #{e.message}" + end + + def binary_path + env = ENV["DECOMPLEX_RUST_BIN"] + return env if env && !env.empty? + + exe = Gem.win_platform? ? "decomplex-rust.exe" : "decomplex-rust" + File.join(crate_root, "target", "release", exe) + end + + def crate_root + File.expand_path("../../../rust", __dir__) + end + + private_class_method def self.native_command(args) + if fresh_binary?(binary_path) + [binary_path, *args] + else + ["cargo", "run", "--quiet", "--release", "--manifest-path", + File.join(crate_root, "Cargo.toml"), "--", *args] + end + end + + private_class_method def self.fresh_binary?(path) + return false unless File.executable?(path) + return true if ENV["DECOMPLEX_RUST_BIN"] && !ENV["DECOMPLEX_RUST_BIN"].empty? + + binary_mtime = File.mtime(path) + rust_sources.all? { |source| File.mtime(source) <= binary_mtime } + end + + private_class_method def self.rust_sources + Dir[File.join(crate_root, "Cargo.toml"), File.join(crate_root, "src", "**", "*.rs")] + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/flay_similarity.rb b/gems/decomplex/lib/decomplex/native/flay_similarity.rb new file mode 100644 index 000000000..33298b360 --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/flay_similarity.rb @@ -0,0 +1,41 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module FlaySimilarity + module_function + + def scan(files, mass:, fuzzy:) + paths = Array(files).map(&:to_s) + validate_ruby_files!(paths) + JSON.parse( + Command.run( + "flay-similarity", + "--language", "ruby", + "--mass", mass.to_i.to_s, + "--fuzzy", fuzzy.to_i.to_s, + *paths + ), + symbolize_names: true + ).map { |finding| normalize_finding(finding) } + end + + private_class_method def self.validate_ruby_files!(paths) + bad = paths.reject { |path| File.extname(path) == ".rb" } + return if bad.empty? + + raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" + end + + private_class_method def self.normalize_finding(finding) + finding.merge( + clone_type: finding.fetch(:clone_type).to_sym, + spans: finding.fetch(:spans).transform_values { |span| Array(span).map(&:to_i) } + ) + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/predicate_aliases.rb b/gems/decomplex/lib/decomplex/native/predicate_aliases.rb new file mode 100644 index 000000000..b69ab8c99 --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/predicate_aliases.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module PredicateAliases + module_function + + def scan(files) + paths = Array(files).map(&:to_s) + validate_ruby_files!(paths) + JSON.parse(Command.run("predicate-aliases", "--language", "ruby", *paths)) + end + + private_class_method def self.validate_ruby_files!(paths) + bad = paths.reject { |path| File.extname(path) == ".rb" } + return if bad.empty? + + raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/state_writes.rb b/gems/decomplex/lib/decomplex/native/state_writes.rb index 910807dd0..a36b1fb83 100644 --- a/gems/decomplex/lib/decomplex/native/state_writes.rb +++ b/gems/decomplex/lib/decomplex/native/state_writes.rb @@ -1,14 +1,14 @@ # frozen_string_literal: true require "json" -require "open3" require_relative "../co_update" +require_relative "command" module Decomplex module Native - # Bridge from the Ruby detector layer to the native Decomplex fact extractor. - # The native binary emits syntax facts only; Ruby still owns detector scoring - # and canonical output for the migration proof. + # Bridge from the Ruby detector layer to the native Decomplex state-write + # fact extractor. The full co-update detector now runs in native Rust too; + # this module remains for focused fact debugging. module StateWrites module_function @@ -28,19 +28,6 @@ def extract(files) end end - def binary_path - env = ENV["DECOMPLEX_RUST_BIN"] - return env if env && !env.empty? - - crate_root = File.expand_path("../../../rust", __dir__) - exe = Gem.win_platform? ? "decomplex-rust.exe" : "decomplex-rust" - File.join(crate_root, "target", "release", exe) - end - - def crate_root - File.expand_path("../../../rust", __dir__) - end - private_class_method def self.validate_ruby_files!(paths) bad = paths.reject { |path| File.extname(path) == ".rb" } return if bad.empty? @@ -49,32 +36,7 @@ def crate_root end private_class_method def self.run_native(paths) - command = - if fresh_binary?(binary_path) - [binary_path, "state-writes", "--language", "ruby", *paths] - else - ["cargo", "run", "--quiet", "--release", "--manifest-path", - File.join(crate_root, "Cargo.toml"), "--", - "state-writes", "--language", "ruby", *paths] - end - stdout, stderr, status = Open3.capture3(*command) - return stdout if status.success? - - raise "decomplex rust state-writes failed: #{stderr.empty? ? stdout : stderr}" - rescue Errno::ENOENT => e - raise "decomplex rust state-writes requires cargo or DECOMPLEX_RUST_BIN: #{e.message}" - end - - private_class_method def self.fresh_binary?(path) - return false unless File.executable?(path) - return true if ENV["DECOMPLEX_RUST_BIN"] && !ENV["DECOMPLEX_RUST_BIN"].empty? - - binary_mtime = File.mtime(path) - rust_sources.all? { |source| File.mtime(source) <= binary_mtime } - end - - private_class_method def self.rust_sources - Dir[File.join(crate_root, "Cargo.toml"), File.join(crate_root, "src", "**", "*.rs")] + Command.run("state-writes", "--language", "ruby", *paths) end end end diff --git a/gems/decomplex/rust/src/decomplex/ast.rs b/gems/decomplex/rust/src/decomplex/ast.rs new file mode 100644 index 000000000..8507d7c15 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/ast.rs @@ -0,0 +1,165 @@ +use serde::Serialize; +use tree_sitter::Node; + +pub type Span = [usize; 4]; + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct RawNode { + pub kind: String, + pub text: String, + pub span: Span, + pub named: bool, + pub children: Vec, +} + +impl RawNode { + pub fn from_tree_sitter(node: Node<'_>, source: &str) -> Self { + let mut cursor = node.walk(); + let mut children: Vec = node + .children(&mut cursor) + .map(|child| Self::from_tree_sitter(child, source)) + .collect(); + + if node.kind() == "argument_list" + && !node_text(node, source).trim_start().starts_with('(') + && children.len() == 1 + && children[0].kind == "scope_resolution" + { + children = children[0].children.clone(); + } + + if node.kind() == "call" { + let mut flattened = Vec::new(); + for child in children { + if child.kind == "argument_list" + && !child.text.trim_start().starts_with('(') + && child.children.len() == 1 + && child.children[0].kind != "scope_resolution" + { + flattened.extend(child.children); + } else { + flattened.push(child); + } + } + children = flattened; + } + + if node.kind() == "bare_string" { + children.clear(); + } + + if matches!(node.kind(), "return" | "next" | "break" | "yield") { + let mut flattened = Vec::new(); + for child in children { + if child.kind == "argument_list" { + flattened.extend(child.children); + } else { + flattened.push(child); + } + } + children = flattened; + } + + if node.kind() == "pattern" + && children.len() == 1 + && children[0].kind == "scope_resolution" + { + children = children[0].children.clone(); + } + + if node.kind() == "when" { + let mut flattened = Vec::new(); + for child in children { + if child.kind == "pattern" + && child.children.len() == 1 + && child.children[0].kind != "scope_resolution" + { + flattened.extend(child.children); + } else { + flattened.push(child); + } + } + children = flattened; + } + + if node.kind() == "body_statement" && children.len() == 1 && children[0].kind == "array" { + children = children[0].children.clone(); + } + if node.kind() == "body_statement" && children.len() == 1 && children[0].kind == "call" { + children = children[0].children.clone(); + } + if node.kind() == "body_statement" && children.len() == 1 && children[0].kind == "conditional" { + children = children[0].children.clone(); + } + if node.kind() == "body_statement" && children.len() == 1 && children[0].kind == "module" { + children = children[0].children.clone(); + } + if node.kind() == "body_statement" && children.len() == 1 && children[0].kind == "binary" { + children = children[0].children.clone(); + } + if node.kind() == "body_statement" + && children.len() == 1 + && children[0].kind == "assignment" + && children[0] + .children + .first() + .map(|child| child.kind == "element_reference") + .unwrap_or(false) + { + children = children[0].children.clone(); + } + if node.kind() == "block_body" && children.len() == 1 && children[0].kind == "call" { + children = children[0].children.clone(); + } + if node.kind() == "block_body" && children.len() == 1 && children[0].kind == "assignment" { + children = children[0].children.clone(); + } + if node.kind() == "block_body" + && children.len() == 1 + && matches!(children[0].kind.as_str(), "array" | "binary" | "string" | "unary") + { + children = children[0].children.clone(); + } + + Self { + kind: node.kind().to_string(), + text: node_text(node, source).to_string(), + span: span(node), + named: node.is_named(), + children, + } + } + + pub fn named_children(&self) -> Vec<&RawNode> { + self.children.iter().filter(|child| child.named).collect() + } + + pub fn walk<'a>(&'a self, out: &mut Vec<&'a RawNode>) { + out.push(self); + for child in &self.children { + child.walk(out); + } + } + + pub fn line(&self) -> usize { + self.span[0] + } +} + +pub fn normalize_text(text: &str) -> String { + text.split_whitespace().collect::>().join(" ") +} + +pub fn span(node: Node<'_>) -> Span { + let start = node.start_position(); + let end = node.end_position(); + [start.row + 1, start.column, end.row + 1, end.column] +} + +pub fn line(node: Node<'_>) -> usize { + node.start_position().row + 1 +} + +pub fn node_text<'a>(node: Node<'_>, source: &'a str) -> &'a str { + node.utf8_text(source.as_bytes()).unwrap_or("") +} diff --git a/gems/decomplex/rust/src/decomplex/detectors/co_update.rs b/gems/decomplex/rust/src/decomplex/detectors/co_update.rs index 510853c40..06c0c0944 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/co_update.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/co_update.rs @@ -1,11 +1,193 @@ -use crate::decomplex::syntax::{ruby, StateWrite}; +use crate::decomplex::ast::Span; +use crate::decomplex::syntax::{self, Document, Language, StateWrite}; use anyhow::Result; +use serde::Serialize; +use std::collections::{BTreeMap, BTreeSet}; use std::path::PathBuf; -pub fn state_writes_for_files(files: &[PathBuf]) -> Result> { - let mut facts = Vec::new(); - for file in files { - facts.extend(ruby::state_writes_for_file(file)?); +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct CoUpdateReport { + pub co_written_pairs: Vec, + pub neglected_updates: Vec, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct CoWrittenPair { + pub pair: [String; 2], + pub sites: Vec, + pub support: usize, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct NeglectedUpdate { + pub at: String, + pub has: String, + pub missing: String, + pub pair: [String; 2], + pub recv: String, + pub spans: BTreeMap, + pub support: usize, +} + +pub fn scan_files(files: &[PathBuf], language: Language) -> Result { + let documents = syntax::parse_files(files, language)?; + Ok(scan_documents(&documents, 3)) +} + +pub fn state_writes_for_files(files: &[PathBuf], language: Language) -> Result> { + let documents = syntax::parse_files(files, language)?; + Ok(documents + .iter() + .flat_map(|document| document.state_writes.clone()) + .collect()) +} + +pub fn scan_documents(documents: &[Document], min_support: usize) -> CoUpdateReport { + let writes = documents + .iter() + .flat_map(|document| document.state_writes.clone()) + .collect::>(); + let pairs = co_written_pairs(&writes, min_support); + let neglected = neglected_updates(&writes, &pairs); + CoUpdateReport { + co_written_pairs: pairs, + neglected_updates: neglected, + } +} + +fn co_written_pairs(writes: &[StateWrite], min_support: usize) -> Vec { + let by_unit = writes_by_unit(writes); + let mut counts: Vec<([String; 2], Vec<[String; 2]>)> = Vec::new(); + for ((file, function), unit_writes) in by_unit { + let attrs = unit_writes + .iter() + .map(|write| write.field.clone()) + .collect::>() + .into_iter() + .collect::>(); + for left in 0..attrs.len() { + for right in (left + 1)..attrs.len() { + let pair = [attrs[left].clone(), attrs[right].clone()]; + if let Some((_, units)) = counts.iter_mut().find(|(existing, _)| *existing == pair) { + units.push([file.clone(), function.clone()]); + } else { + counts.push((pair, vec![[file.clone(), function.clone()]])); + } + } + } + } + + let mut out = counts + .into_iter() + .filter_map(|(pair, units)| { + if units.len() < min_support { + return None; + } + let support = units.len(); + Some(CoWrittenPair { + pair, + sites: units + .into_iter() + .map(|unit| format!("{}:{}", unit[0], unit[1])) + .collect(), + support, + }) + }) + .collect::>(); + out.sort_by(|left, right| right.support.cmp(&left.support)); + out +} + +fn neglected_updates(writes: &[StateWrite], pairs: &[CoWrittenPair]) -> Vec { + let by_unit = writes_by_unit(writes); + let mut out = Vec::new(); + for ((file, function), unit_writes) in by_unit { + let attrs = unit_writes + .iter() + .map(|write| write.field.as_str()) + .collect::>(); + for pair in pairs { + let left = pair.pair[0].as_str(); + let right = pair.pair[1].as_str(); + let maybe = if attrs.contains(left) && !attrs.contains(right) { + Some((left, right)) + } else if attrs.contains(right) && !attrs.contains(left) { + Some((right, left)) + } else { + None + }; + let Some((has, missing)) = maybe else { + continue; + }; + let Some(write) = unit_writes.iter().find(|write| write.field == has) else { + continue; + }; + let at = format!("{file}:{function}:{}", write.line); + let mut spans = BTreeMap::new(); + spans.insert(at.clone(), write.span); + out.push(NeglectedUpdate { + at, + has: has.to_string(), + missing: missing.to_string(), + pair: pair.pair.clone(), + recv: write.receiver.clone(), + spans, + support: pair.support, + }); + } + } + out.sort_by(|left, right| right.support.cmp(&left.support)); + out +} + +fn writes_by_unit(writes: &[StateWrite]) -> Vec<((String, String), Vec)> { + let mut by_unit: Vec<((String, String), Vec)> = Vec::new(); + for write in writes { + let key = (write.file.clone(), write.function.clone()); + if let Some((_, unit_writes)) = by_unit.iter_mut().find(|(existing, _)| *existing == key) { + unit_writes.push(write.clone()); + } else { + by_unit.push((key, vec![write.clone()])); + } + } + by_unit +} + +#[cfg(test)] +mod tests { + use super::*; + + fn write(file: &str, function: &str, attr: &str, line: usize) -> StateWrite { + StateWrite { + field: attr.to_string(), + receiver: "node".to_string(), + file: file.to_string(), + function: function.to_string(), + line, + span: [line, 0, line, 1], + owner: "Box".to_string(), + } + } + + #[test] + fn reports_frequent_pairs_and_neglected_updates() { + let writes = vec![ + write("a.rb", "one", "storage", 1), + write("a.rb", "one", "provenance", 2), + write("a.rb", "two", "storage", 3), + write("a.rb", "two", "provenance", 4), + write("b.rb", "three", "storage", 5), + write("b.rb", "three", "provenance", 6), + write("c.rb", "broken", "storage", 7), + ]; + let pairs = co_written_pairs(&writes, 3); + assert_eq!(pairs.len(), 1); + assert_eq!(pairs[0].pair, ["provenance".to_string(), "storage".to_string()]); + assert_eq!(pairs[0].support, 3); + + let neglected = neglected_updates(&writes, &pairs); + assert_eq!(neglected.len(), 1); + assert_eq!(neglected[0].missing, "provenance"); + assert_eq!(neglected[0].at, "c.rb:broken:7"); } - Ok(facts) } diff --git a/gems/decomplex/rust/src/decomplex/detectors/flay_similarity.rs b/gems/decomplex/rust/src/decomplex/detectors/flay_similarity.rs new file mode 100644 index 000000000..ff42c39d7 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/flay_similarity.rs @@ -0,0 +1,1131 @@ +use crate::decomplex::ast::{normalize_text, RawNode, Span}; +use crate::decomplex::syntax::{self, Document, FunctionDef, Language, SimilarityFinding}; +use anyhow::Result; +use std::collections::{BTreeMap, HashMap, HashSet}; +use std::path::PathBuf; + +const MAX_FUZZY_CHILDREN: usize = 14; +const IDENTIFIER_KINDS: &[&str] = &[ + "identifier", + "constant", + "type_identifier", + "field_identifier", + "property_identifier", + "shorthand_property_identifier_pattern", + "variable_name", +]; +const LITERAL_KINDS: &[&str] = &[ + "string", + "string_content", + "string_literal", + "interpreted_string_literal", + "raw_string_literal", + "integer", + "float", + "int", + "number", + "rational", + "imaginary", + "character", + "char_literal", + "symbol", + "simple_symbol", + "true", + "false", + "nil", + "none", + "null", +]; +const SKIP_CANDIDATE_KINDS: &[&str] = &[ + "comment", + "identifier", + "constant", + "type_identifier", + "field_identifier", + "property_identifier", + "parameters", + "formal_parameters", + "parameter_list", + "argument_list", + "arguments", + "block_parameters", + "method_parameters", + "scope_resolution", +]; +const CLONE_CANDIDATE_KINDS: &[&str] = &[ + "array", + "assignment", + "assignment_statement", + "block", + "case", + "case_clause", + "class", + "class_definition", + "class_declaration", + "do_block", + "enum_declaration", + "for", + "for_statement", + "hash", + "if", + "if_statement", + "match_expression", + "match_statement", + "method", + "method_definition", + "module", + "operator_assignment", + "singleton_method", + "struct_declaration", + "switch_case", + "switch_expression", + "switch_statement", + "unless", + "until", + "while", + "while_statement", +]; +const BODY_KINDS: &[&str] = &[ + "body", + "block", + "body_statement", + "declaration_list", + "statement_block", + "compound_statement", + "suite", + "do_block", +]; +const CALL_KINDS: &[&str] = &[ + "call", + "call_expression", + "method_invocation", + "invocation_expression", +]; + +#[derive(Clone, Debug)] +struct MethodSpan { + name: String, + first_line: usize, + last_line: usize, +} + +#[derive(Clone, Debug)] +struct Candidate { + file: String, + line: usize, + span: Span, + method_name: String, + node_name: String, + mass: usize, + fingerprint: String, + raw: String, + child_fingerprints: Vec, + child_masses: Vec, +} + +pub fn scan_files( + files: &[PathBuf], + language: Language, + mass: usize, + fuzzy: usize, +) -> Result> { + let documents = syntax::parse_files(files, language)?; + Ok(scan_documents(&documents, mass, fuzzy)) +} + +pub fn scan_documents( + documents: &[Document], + mass: usize, + fuzzy: usize, +) -> Vec { + let mut scanner = Scanner::new(mass, fuzzy); + scanner.scan(documents) +} + +struct Scanner { + mass: usize, + fuzzy: usize, + method_spans: HashMap>, + source_lines: HashMap>, +} + +impl Scanner { + fn new(mass: usize, fuzzy: usize) -> Self { + Self { + mass, + fuzzy, + method_spans: HashMap::new(), + source_lines: HashMap::new(), + } + } + + fn scan(&mut self, documents: &[Document]) -> Vec { + let mut candidates = Vec::new(); + for document in documents { + candidates.extend(self.candidates_for_document(document)); + } + let mut findings = self.type2_findings(&candidates); + findings.extend(self.type3_findings(&candidates)); + findings.sort_by(|left, right| { + ( + clone_type_rank(&left.clone_type), + std::cmp::Reverse(left.mass), + left.node.clone(), + left.at.clone(), + ) + .cmp(&( + clone_type_rank(&right.clone_type), + std::cmp::Reverse(right.mass), + right.node.clone(), + right.at.clone(), + )) + }); + self.prune_nested_findings(findings) + } + + fn candidates_for_document(&mut self, document: &Document) -> Vec { + self.source_lines + .insert(document.file.clone(), document.lines.clone()); + self.method_spans + .insert(document.file.clone(), collect_method_spans(&document.function_defs)); + + let mut out = Vec::new(); + let mut seen = HashSet::new(); + for function in &document.function_defs { + if let Some(candidate) = self.candidate_for(&document.file, &function.body, Some("defn")) { + self.add_candidate(&mut out, &mut seen, candidate); + } + } + + let mut nodes = Vec::new(); + document.root.walk(&mut nodes); + for node in nodes { + if candidate_node(node) { + if let Some(candidate) = self.candidate_for(&document.file, node, None) { + self.add_candidate(&mut out, &mut seen, candidate); + } + } + } + out + } + + fn add_candidate(&self, out: &mut Vec, seen: &mut HashSet, candidate: Candidate) { + if candidate.mass < self.effective_mass_floor() || typed_struct_schema_text(&candidate.raw) { + return; + } + let key = format!( + "{}\0{}\0{:?}\0{}\0{}", + candidate.file, candidate.line, candidate.span, candidate.node_name, candidate.fingerprint + ); + if seen.insert(key) { + out.push(candidate); + } + } + + fn candidate_for( + &self, + file: &str, + node: &RawNode, + node_name: Option<&str>, + ) -> Option { + let (node_fingerprint, mass) = fingerprint(node, &mut HashSet::new()); + if node_fingerprint.is_empty() { + return None; + } + let line = node.line(); + let method = self.method_span_for(file, line); + let children = fuzzy_children_for(node); + let mut child_fingerprints = Vec::new(); + let mut child_masses = Vec::new(); + for child in children { + let (child_fp, child_mass) = fingerprint(child, &mut HashSet::new()); + if !child_fp.is_empty() && child_mass > 0 { + child_fingerprints.push(child_fp); + child_masses.push(child_mass); + } + } + Some(Candidate { + file: file.to_string(), + line, + span: node.span, + method_name: method.name, + node_name: node_name + .map(ToString::to_string) + .unwrap_or_else(|| flay_node_name(node).to_string()), + mass, + fingerprint: node_fingerprint, + raw: normalize_text(&node.text), + child_fingerprints, + child_masses, + }) + } + + fn type2_findings(&self, candidates: &[Candidate]) -> Vec { + let mut groups: HashMap<&str, Vec> = HashMap::new(); + for candidate in candidates { + groups + .entry(candidate.fingerprint.as_str()) + .or_default() + .push(candidate.clone()); + } + let mut out = Vec::new(); + for cluster in groups.values() { + let cluster = uniq_sites(cluster.clone()); + if cluster.len() < 2 { + continue; + } + let raw_count = cluster.iter().map(|candidate| candidate.raw.as_str()).collect::>().len(); + if raw_count < 2 || self.typed_struct_schema_cluster(&cluster) { + continue; + } + let mass = cluster.iter().map(|candidate| candidate.mass).min().unwrap_or(0); + out.push(self.finding_for(&cluster, "type2", mass)); + } + out + } + + fn type3_findings(&self, candidates: &[Candidate]) -> Vec { + if self.fuzzy == 0 { + return Vec::new(); + } + let mut groups: HashMap> = HashMap::new(); + for candidate in candidates { + for (signature, signature_mass) in self.fuzzy_signatures(candidate) { + if signature_mass >= self.effective_mass_floor() { + groups + .entry(signature) + .or_default() + .push((candidate.clone(), signature_mass)); + } + } + } + + let mut seen = HashSet::new(); + let mut out = Vec::new(); + for rows in groups.values() { + let cluster = uniq_sites(rows.iter().map(|(candidate, _)| candidate.clone()).collect()); + if cluster.len() < 2 { + continue; + } + let fingerprint_count = cluster + .iter() + .map(|candidate| candidate.fingerprint.as_str()) + .collect::>() + .len(); + if fingerprint_count < 2 || self.typed_struct_schema_cluster(&cluster) { + continue; + } + let mut key = cluster + .iter() + .map(|candidate| format!("{}\0{}\0{}", candidate.file, candidate.line, candidate.node_name)) + .collect::>(); + key.sort(); + let key = key.join("\0"); + if !seen.insert(key) { + continue; + } + let mass = rows.iter().map(|(_, signature_mass)| *signature_mass).max().unwrap_or(0); + out.push(self.finding_for(&cluster, "type3", mass)); + } + out + } + + fn finding_for(&self, cluster: &[Candidate], clone_type: &str, mass: usize) -> SimilarityFinding { + let mut sites = cluster.iter().map(site_for).collect::>(); + sites.sort(); + SimilarityFinding { + at: sites.first().cloned().unwrap_or_default(), + sites, + spans: self.spans_for(cluster), + clone_type: clone_type.to_string(), + node: most_common_node(cluster), + mass, + locations: { + let mut locations = cluster + .iter() + .map(|candidate| format!("{}:{}", candidate.file, candidate.line)) + .collect::>(); + locations.sort(); + locations + }, + } + } + + fn spans_for(&self, cluster: &[Candidate]) -> BTreeMap { + let mut spans = BTreeMap::new(); + for candidate in cluster { + let value = if candidate.node_name == "defn" { + let method = self.method_span_for(&candidate.file, candidate.line); + [method.first_line, 0, method.last_line, 1] + } else { + candidate.span + }; + spans.insert(site_for(candidate), value); + } + spans + } + + fn prune_nested_findings(&self, findings: Vec) -> Vec { + let mut kept = Vec::new(); + for finding in findings { + if kept.iter().any(|larger| nested_finding(&finding, larger)) { + continue; + } + kept.push(finding); + } + kept + } + + fn fuzzy_signatures(&self, candidate: &Candidate) -> Vec<(String, usize)> { + let children = &candidate.child_fingerprints; + if children.len() < 2 || children.len() > MAX_FUZZY_CHILDREN { + return Vec::new(); + } + let max_delete = self.fuzzy.min(children.len() - 1); + let mut signatures = Vec::new(); + for delete_count in 0..=max_delete { + for deleted in combinations(children.len(), delete_count) { + let deleted = deleted.into_iter().collect::>(); + let mut kept = Vec::new(); + let mut mass = 0; + for (index, fingerprint) in children.iter().enumerate() { + if deleted.contains(&index) { + continue; + } + kept.push(fingerprint.as_str()); + mass += candidate.child_masses[index]; + } + signatures.push((format!("{}({})", candidate.node_name, kept.join("|")), mass)); + } + } + signatures + } + + fn typed_struct_schema_cluster(&self, cluster: &[Candidate]) -> bool { + cluster.iter().all(|candidate| { + self.typed_struct_schema_line(&candidate.file, candidate.line) + || typed_struct_schema_text(&candidate.raw) + }) + } + + fn typed_struct_schema_line(&self, file: &str, line_no: usize) -> bool { + self.source_lines + .get(file) + .and_then(|lines| lines.get(line_no.saturating_sub(1))) + .map(|line| { + let stripped = line.trim_start(); + stripped.starts_with("const :") || stripped.starts_with("prop :") + }) + .unwrap_or(false) + } + + fn method_span_for(&self, file: &str, line_no: usize) -> MethodSpan { + self.method_spans + .get(file) + .and_then(|spans| { + spans + .iter() + .find(|span| span.first_line <= line_no && line_no <= span.last_line) + }) + .cloned() + .unwrap_or_else(|| MethodSpan { + name: "(top-level)".to_string(), + first_line: line_no, + last_line: line_no, + }) + } + + fn effective_mass_floor(&self) -> usize { + self.mass.max(((self.mass as f64) * 23.0 / 8.0).ceil() as usize) + } +} + +fn collect_method_spans(functions: &[FunctionDef]) -> Vec { + let mut spans = functions + .iter() + .map(|function| MethodSpan { + name: function.name.clone(), + first_line: function.span[0], + last_line: function.span[2], + }) + .collect::>(); + spans.sort_by_key(|method| (method.first_line, std::cmp::Reverse(method.last_line))); + spans +} + +fn candidate_node(node: &RawNode) -> bool { + node.named + && !SKIP_CANDIDATE_KINDS.contains(&node.kind.as_str()) + && CLONE_CANDIDATE_KINDS.contains(&node.kind.as_str()) + && !typed_struct_schema_text(&node.text) + && !node.named_children().is_empty() +} + +fn fuzzy_children_for(node: &RawNode) -> Vec<&RawNode> { + let source_node = body_node(node).unwrap_or(node); + let mut children = source_node.named_children(); + if children.is_empty() { + children = node.named_children(); + } + children + .into_iter() + .filter(|child| { + !SKIP_CANDIDATE_KINDS.contains(&child.kind.as_str()) + && !typed_struct_schema_text(&child.text) + }) + .collect() +} + +fn body_node(node: &RawNode) -> Option<&RawNode> { + node.children + .iter() + .find(|child| BODY_KINDS.contains(&child.kind.as_str())) +} + +fn fingerprint(node: &RawNode, active: &mut HashSet) -> (String, usize) { + let key = node_key(node); + if active.contains(&key) || node.kind == "comment" { + return (String::new(), 0); + } + active.insert(key.clone()); + let out = if CALL_KINDS.contains(&node.kind.as_str()) && call_message(node).is_some() { + fingerprint_call(node, active) + } else if node.children.is_empty() { + let token = terminal_token(node); + if token.is_empty() { + (String::new(), 0) + } else { + (token, 1) + } + } else { + let mut child_parts = Vec::new(); + let mut mass = 1; + for child in &node.children { + let (child_fp, child_mass) = fingerprint(child, active); + if child_fp.is_empty() { + continue; + } + child_parts.push(child_fp); + mass += child_mass; + } + if child_parts.is_empty() { + (terminal_token(node), 1) + } else { + (format!("{}({})", node.kind, child_parts.join(" ")), mass) + } + }; + active.remove(&key); + out +} + +fn fingerprint_call(node: &RawNode, active: &mut HashSet) -> (String, usize) { + let message = call_message(node).unwrap_or_default(); + let mut child_parts = Vec::new(); + let mut mass = 1; + for child in &node.children { + let (child_fp, child_mass) = fingerprint(child, active); + if child_fp.is_empty() { + continue; + } + child_parts.push(child_fp); + mass += child_mass; + } + ( + format!("{}<{}>({})", node.kind, message, child_parts.join(" ")), + mass, + ) +} + +fn call_message(node: &RawNode) -> Option { + if !node + .children + .iter() + .any(|child| matches!(child.kind.as_str(), "argument_list" | "arguments")) + { + return None; + } + let argument_start = node + .children + .iter() + .find(|child| matches!(child.kind.as_str(), "argument_list" | "arguments")) + .map(|child| (child.span[0], child.span[1])); + let named_before_args = node + .named_children() + .into_iter() + .filter(|child| { + argument_start + .map(|start| (child.span[0], child.span[1]) < start) + .unwrap_or(true) + }) + .collect::>(); + named_before_args + .last() + .and_then(|callee| callee_message(callee)) +} + +fn callee_message(node: &RawNode) -> Option { + if IDENTIFIER_KINDS.contains(&node.kind.as_str()) { + return Some(node.text.clone()); + } + node.named_children() + .into_iter() + .rev() + .find(|child| IDENTIFIER_KINDS.contains(&child.kind.as_str())) + .map(|child| child.text.clone()) +} + +fn terminal_token(node: &RawNode) -> String { + let kind = node.kind.as_str(); + if IDENTIFIER_KINDS.contains(&kind) { + return "id".to_string(); + } + if LITERAL_KINDS.contains(&kind) { + return literal_token(kind).to_string(); + } + let text = normalize_text(&node.text); + if text.is_empty() { + return String::new(); + } + if identifier_text(&text) { + return "id".to_string(); + } + if literal_text(&text) { + return "lit".to_string(); + } + format!("{kind}:{text}") +} + +fn literal_token(kind: &str) -> &str { + match kind { + "true" | "false" => "bool", + "nil" | "none" | "null" => "nil", + _ => "lit", + } +} + +fn identifier_text(text: &str) -> bool { + let mut chars = text.chars(); + let Some(first) = chars.next() else { + return false; + }; + (first == '_' || first.is_ascii_alphabetic()) + && chars.all(|char| char == '_' || char == '!' || char == '?' || char == '=' || char.is_ascii_alphanumeric()) +} + +fn literal_text(text: &str) -> bool { + if symbol_literal_text(text) || quoted_literal_text(text, '"') || quoted_literal_text(text, '\'') { + return true; + } + text.parse::().is_ok() +} + +fn symbol_literal_text(text: &str) -> bool { + let mut chars = text.chars(); + if chars.next() != Some(':') { + return false; + } + let Some(first) = chars.next() else { + return false; + }; + (first == '_' || first.is_ascii_alphabetic()) + && chars.all(|char| char == '_' || char.is_ascii_alphanumeric()) +} + +fn quoted_literal_text(text: &str, quote: char) -> bool { + text.len() >= 2 && text.starts_with(quote) && text.ends_with(quote) +} + +fn flay_node_name(node: &RawNode) -> &str { + match node.kind.as_str() { + "method" | "function_definition" | "function_declaration" | "method_definition" | "function_item" => "defn", + "singleton_method" => "defs", + other => other, + } +} + +fn uniq_sites(candidates: Vec) -> Vec { + let mut seen = HashSet::new(); + let mut out = Vec::new(); + for candidate in candidates { + let key = format!("{}\0{}\0{}", candidate.file, candidate.line, candidate.node_name); + if seen.insert(key) { + out.push(candidate); + } + } + out +} + +fn most_common_node(cluster: &[Candidate]) -> String { + let mut order = Vec::new(); + let mut tally: HashMap<&str, usize> = HashMap::new(); + for candidate in cluster { + if !tally.contains_key(candidate.node_name.as_str()) { + order.push(candidate.node_name.as_str()); + } + *tally.entry(candidate.node_name.as_str()).or_default() += 1; + } + let mut best = ""; + let mut best_count = 0; + for node in order { + let count = tally.get(node).copied().unwrap_or(0); + if count > best_count { + best = node; + best_count = count; + } + } + best.to_string() +} + +fn site_for(candidate: &Candidate) -> String { + format!("{}:{}:{}", candidate.file, candidate.method_name, candidate.line) +} + +fn nested_finding(inner: &SimilarityFinding, outer: &SimilarityFinding) -> bool { + if outer.mass <= inner.mass { + return false; + } + inner.spans.iter().all(|(site, span)| { + let file = site_file(site); + outer.spans.iter().any(|(outer_site, outer_span)| { + site_file(outer_site) == file && contains_span(*outer_span, *span) + }) + }) +} + +fn contains_span(outer: Span, inner: Span) -> bool { + let outer_start = (outer[0], outer[1]); + let outer_end = (outer[2], outer[3]); + let inner_start = (inner[0], inner[1]); + let inner_end = (inner[2], inner[3]); + outer_start <= inner_start && outer_end >= inner_end +} + +fn site_file(site: &str) -> String { + let mut parts = site.split(':').collect::>(); + if parts.len() >= 2 { + parts.truncate(parts.len() - 2); + } + parts.join(":") +} + +fn typed_struct_schema_text(text: &str) -> bool { + text.contains("< T::Struct") + || text.contains(" usize { + if clone_type == "type2" { + 0 + } else { + 1 + } +} + +fn node_key(node: &RawNode) -> String { + format!( + "{}\0{}\0{}\0{}\0{}\0{}", + node.kind, + node.span[0], + node.span[1], + node.span[2], + node.span[3], + node.text.len() + ) +} + +fn combinations(size: usize, count: usize) -> Vec> { + fn step(start: usize, size: usize, count: usize, current: &mut Vec, out: &mut Vec>) { + if current.len() == count { + out.push(current.clone()); + return; + } + for index in start..size { + current.push(index); + step(index + 1, size, count, current, out); + current.pop(); + } + } + let mut out = Vec::new(); + step(0, size, count, &mut Vec::new(), &mut out); + out +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Write; + use tempfile::NamedTempFile; + + fn scan(source: &str, mass: usize, fuzzy: usize) -> Vec { + let mut file = NamedTempFile::new().expect("tempfile"); + file.write_all(source.as_bytes()).expect("write source"); + scan_files(&[file.path().to_path_buf()], Language::Ruby, mass, fuzzy).expect("scan") + } + + fn document(source: &str) -> Document { + let mut file = NamedTempFile::new().expect("tempfile"); + file.write_all(source.as_bytes()).expect("write source"); + syntax::parse_file(file.path().to_path_buf(), Language::Ruby).expect("document") + } + + #[test] + fn detects_type2_similarity_for_renamed_ruby_methods() { + let out = scan( + r#" +def a(node) + return false unless node.respond_to?(:type) + node.type == :heap || node.type == :frame +end + +def b(entry) + return false unless entry.respond_to?(:kind) + entry.kind == :heap || entry.kind == :frame +end +"#, + 8, + 1, + ); + assert!(out + .iter() + .any(|finding| finding.clone_type == "type2" && finding.node == "defn")); + } + + #[test] + fn detects_type3_similarity_for_missing_child() { + let out = scan( + r#" +def a(node) + alpha(node.left) + beta(node.right) + gamma(node.name) + delta(node.type) +end + +def b(entry) + alpha(entry.left) + beta(entry.right) + delta(entry.type) +end +"#, + 4, + 1, + ); + assert!(out.iter().any(|finding| finding.clone_type == "type3")); + } + + #[test] + fn singleton_method_mass_matches_ruby_oracle_shape() { + let doc = document( + r#" +def self.release(ctx_id, lock_index, lock_ref, unlock_method) + [ + MIR::Set.new( + MIR::FieldGet.new(MIR::Ident.new("__ctx_#{ctx_id}"), "__lock_held_#{lock_index}"), + MIR::Lit.new("false"), + false, + ), + MIR::ExprStmt.new( + MIR::MethodCall.new(MIR::Ident.new(lock_ref), unlock_method, [], false), + false, + ), + ] +end +"#, + ); + let function = doc.function_defs.first().expect("function"); + let (_fingerprint, mass) = fingerprint(&function.body, &mut HashSet::new()); + assert_eq!(mass, 128); + } + + #[test] + fn unless_mass_matches_ruby_oracle_shape() { + let doc = document( + r#" +def check(attrs, tok) + unless attrs + has_at = T.must(tok).value.start_with?('@') + candidates = has_at ? BG_SIGILS.keys : BG_SIGILS.keys.map { |k| k.sub(/^@/, '') } + emit_typo_suggestion!( + tok, T.must(tok).value, candidates, + "Unknown BG prefix #{T.must(tok).value.inspect}", + "closest BG body sigil", + category: :type, cascade: true + ) + end +end +"#, + ); + let mut nodes = Vec::new(); + doc.root.walk(&mut nodes); + let node = nodes + .into_iter() + .find(|node| node.kind == "unless" && node.named) + .expect("unless"); + let (_fingerprint, mass) = fingerprint(node, &mut HashSet::new()); + assert_eq!(mass, 126); + } + + #[test] + fn struct_assignment_mass_matches_ruby_oracle_shape() { + let doc = document( + r#" +DeferStmt = Struct.new(:body) do + extend T::Sig + include Stmt + sig { params(body: DeferBodyInput).void } + def initialize(body) + MIR.validate_defer_body!(body, "MIR::DeferStmt") + super(body) + end + + sig { returns(T::Array[BodySlot]) } + def body_slots + body.is_a?(Array) ? [body_slot(:body, body, ->(new_body) { self.body = new_body })] : [] + end + sig { returns(T::Array[Emittable]) } + def child_exprs = body.is_a?(Array) ? [] : compact_child_exprs([body]) +end +"#, + ); + let mut nodes = Vec::new(); + doc.root.walk(&mut nodes); + let node = nodes + .into_iter() + .find(|node| node.kind == "assignment" && node.named) + .expect("assignment"); + let (_fingerprint, mass) = fingerprint(node, &mut HashSet::new()); + assert_eq!(mass, 178); + } + + #[test] + fn body_slots_mass_matches_ruby_oracle_shape() { + let doc = document( + r#" +SwitchStmt = Struct.new(:subject, :arms, :default_body) do + extend T::Sig + include Stmt + sig { returns(T::Array[Emittable]) } + def child_exprs + compact_child_exprs([subject, *(arms || []).flat_map(&:patterns)]) + end + sig { returns(T::Array[BodySlot]) } + def body_slots + slots = T.let([], T::Array[BodySlot]) + arms&.each_with_index do |arm, index| + slots << body_slot(:"arms_#{index}", arm.body, ->(new_body) { arm.body = new_body }) + end + slots << body_slot(:default_body, default_body, ->(new_body) { self.default_body = new_body }) if default_body + slots + end +end +"#, + ); + let function = doc + .function_defs + .iter() + .find(|function| function.name == "body_slots") + .expect("body_slots"); + let (_fingerprint, mass) = fingerprint(&function.body, &mut HashSet::new()); + assert_eq!(mass, 110); + } + + #[test] + fn if_bind_do_block_mass_matches_ruby_oracle_shape() { + let doc = document( + r#" +IfBind = Struct.new(:token, :bindings, :then_branch, :else_branch) do + extend T::Sig + include Locatable + + sig { params(args: T.untyped).void } + def initialize(*args) + super + self[:bindings] = [] if self[:bindings].nil? + end + + sig { params(val: T::Array[AST::Binding]).void } + def bindings=(val) + self[:bindings] = val + end +end +"#, + ); + let mut nodes = Vec::new(); + doc.root.walk(&mut nodes); + let node = nodes + .into_iter() + .find(|node| node.kind == "do_block" && node.named) + .expect("do_block"); + let (_fingerprint, mass) = fingerprint(node, &mut HashSet::new()); + assert_eq!(mass, 110); + } + + #[test] + fn control_flow_argument_mass_matches_ruby_oracle_shape() { + let doc = document( + r#" +def self.find_package_source(pkg_name, start_dir:) + dir = File.expand_path(start_dir) + loop do + candidate = File.join(dir, "packages", pkg_name, "src", "lib.cht") + return candidate if File.exist?(candidate) + + parent = File.dirname(dir) + break if parent == dir + + dir = parent + end + nil +end +"#, + ); + let function = doc.function_defs.first().expect("function"); + let (_fingerprint, mass) = fingerprint(&function.body, &mut HashSet::new()); + assert_eq!(mass, 96); + } + + #[test] + fn case_scope_pattern_mass_matches_ruby_oracle_shape() { + let doc = document( + r#" +def walk_for_local_decls(node, &block) + return if node.nil? + case node + when AST::BindExpr, AST::VarDecl + yield node if auto?(node.type) + walk_for_local_decls(node.value, &block) + when AST::FunctionDef + when Array + node.each { |c| walk_for_local_decls(c, &block) } + when Hash + node.each_value { |v| walk_for_local_decls(v, &block) } + else + if node.respond_to?(:each_pair) + node.each_pair { |_, v| walk_for_local_decls(v, &block) } + end + end +end +"#, + ); + let mut nodes = Vec::new(); + doc.root.walk(&mut nodes); + let node = nodes + .into_iter() + .find(|node| node.kind == "case" && node.named) + .expect("case"); + let (_fingerprint, mass) = fingerprint(node, &mut HashSet::new()); + assert_eq!(mass, 136); + } + + #[test] + fn case_simple_pattern_mass_matches_ruby_oracle_shape() { + let doc = document( + r#" +def references_alias?(expr, alias_name) + found = false + walk = lambda do |n| + return if found + case n + when nil, Symbol, String, Integer, Float, TrueClass, FalseClass + when Array then n.each { |x| walk.call(x) } + when AST::Identifier + found = true if n.name == alias_name + else + n.each_pair { |_, v| walk.call(v) } if n.respond_to?(:each_pair) + end + end + walk.call(expr) + found +end +"#, + ); + let mut nodes = Vec::new(); + doc.root.walk(&mut nodes); + let node = nodes + .into_iter() + .find(|node| node.kind == "case" && node.named) + .expect("case"); + let (_fingerprint, mass) = fingerprint(node, &mut HashSet::new()); + assert_eq!(mass, 96); + } + + #[test] + fn alias_cluster_mass_matches_ruby_oracle_shape() { + let doc = document( + r##" +def alias_clusters + @preds.group_by(&:body).filter_map do |body, ps| + names = ps.map(&:name).uniq + next if names.size < 2 + + { body: body, names: names, + sites: ps.map { |p| "#{p.file}:#{p.name}:#{p.line}" }, + spans: ps.to_h { |p| ["#{p.file}:#{p.name}:#{p.line}", p.span] } } + end.sort_by { |h| -h[:names].size } +end +"##, + ); + let function = doc.function_defs.first().expect("function"); + let (_fingerprint, mass) = fingerprint(&function.body, &mut HashSet::new()); + assert_eq!(mass, 175); + } + + #[test] + fn native_module_mass_matches_ruby_oracle_shape() { + let doc = document( + r##" +module Decomplex + module Native + module CoUpdate + module_function + + def scan(files) + paths = Array(files).map(&:to_s) + validate_ruby_files!(paths) + JSON.parse(Command.run("co-update", "--language", "ruby", *paths)) + end + + private_class_method def self.validate_ruby_files!(paths) + bad = paths.reject { |path| File.extname(path) == ".rb" } + return if bad.empty? + + raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" + end + end + end +end +"##, + ); + let mut nodes = Vec::new(); + doc.root.walk(&mut nodes); + let node = nodes + .into_iter() + .find(|node| node.kind == "module" && node.named) + .expect("module"); + let (_fingerprint, mass) = fingerprint(node, &mut HashSet::new()); + assert_eq!(mass, 150); + } + + #[test] + fn hidden_method_name_mass_matches_ruby_oracle_shape() { + let doc = document( + r##" +def inline_def_name(node) + return nil unless inline_def_argument_list?(node) + + receiver_index = node.named_children.index { |child| child.kind == "self" || child.kind == "constant" } + search = receiver_index ? node.named_children[(receiver_index + 1)..] : node.named_children + name = search&.find { |child| %w[identifier field_identifier property_identifier].include?(child.kind) }&.text + receiver_index ? "self.#{name}" : name +end +"##, + ); + let function = doc.function_defs.first().expect("function"); + let (_fingerprint, mass) = fingerprint(&function.body, &mut HashSet::new()); + assert_eq!(mass, 132); + } +} diff --git a/gems/decomplex/rust/src/decomplex/detectors/mod.rs b/gems/decomplex/rust/src/decomplex/detectors/mod.rs index 0c7589a70..c40ce19bf 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/mod.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/mod.rs @@ -1 +1,3 @@ pub mod co_update; +pub mod flay_similarity; +pub mod predicate_alias; diff --git a/gems/decomplex/rust/src/decomplex/detectors/predicate_alias.rs b/gems/decomplex/rust/src/decomplex/detectors/predicate_alias.rs new file mode 100644 index 000000000..e7fda8767 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/predicate_alias.rs @@ -0,0 +1,108 @@ +use crate::decomplex::ast::Span; +use crate::decomplex::syntax::{self, Document, Language, PredicateAlias}; +use anyhow::Result; +use serde::Serialize; +use std::collections::BTreeMap; +use std::path::PathBuf; + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct PredicateAliasReport { + pub alias_clusters: Vec, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct AliasCluster { + pub body: String, + pub names: Vec, + pub sites: Vec, + pub spans: BTreeMap, +} + +pub fn scan_files(files: &[PathBuf], language: Language) -> Result { + let documents = syntax::parse_files(files, language)?; + Ok(scan_documents(&documents)) +} + +pub fn scan_documents(documents: &[Document]) -> PredicateAliasReport { + let predicates = documents + .iter() + .flat_map(|document| document.predicate_aliases.clone()) + .collect::>(); + PredicateAliasReport { + alias_clusters: alias_clusters(&predicates), + } +} + +fn alias_clusters(predicates: &[PredicateAlias]) -> Vec { + let mut by_body: Vec<(&str, Vec<&PredicateAlias>)> = Vec::new(); + for predicate in predicates { + if let Some((_, rows)) = by_body.iter_mut().find(|(body, _)| *body == predicate.body.as_str()) { + rows.push(predicate); + } else { + by_body.push((predicate.body.as_str(), vec![predicate])); + } + } + let mut out = by_body + .into_iter() + .filter_map(|(body, rows)| { + let mut names = Vec::new(); + for predicate in &rows { + if !names.contains(&predicate.name) { + names.push(predicate.name.clone()); + } + } + if names.len() < 2 { + return None; + } + let sites = rows + .iter() + .map(|predicate| format!("{}:{}:{}", predicate.file, predicate.name, predicate.line)) + .collect::>(); + let spans = rows + .iter() + .map(|predicate| { + ( + format!("{}:{}:{}", predicate.file, predicate.name, predicate.line), + predicate.span, + ) + }) + .collect::>(); + Some(AliasCluster { + body: body.to_string(), + names, + sites, + spans, + }) + }) + .collect::>(); + out.sort_by(|left, right| right.names.len().cmp(&left.names.len())); + out +} + +#[cfg(test)] +mod tests { + use super::*; + + fn pred(name: &str, body: &str, line: usize) -> PredicateAlias { + PredicateAlias { + name: name.to_string(), + body: body.to_string(), + file: "a.rb".to_string(), + defn: name.to_string(), + line, + span: [line, 0, line, 1], + } + } + + #[test] + fn clusters_distinct_names_with_same_body() { + let clusters = alias_clusters(&[ + pred("heap?", "node.storage == :heap", 1), + pred("owned?", "node.storage == :heap", 2), + pred("other?", "node.storage == :frame", 3), + ]); + assert_eq!(clusters.len(), 1); + assert_eq!(clusters[0].body, "node.storage == :heap"); + assert_eq!(clusters[0].names, vec!["heap?".to_string(), "owned?".to_string()]); + } +} diff --git a/gems/decomplex/rust/src/decomplex/mod.rs b/gems/decomplex/rust/src/decomplex/mod.rs index 0b5596ae2..cc08541ec 100644 --- a/gems/decomplex/rust/src/decomplex/mod.rs +++ b/gems/decomplex/rust/src/decomplex/mod.rs @@ -1,2 +1,3 @@ +pub mod ast; pub mod detectors; pub mod syntax; diff --git a/gems/decomplex/rust/src/decomplex/syntax.rs b/gems/decomplex/rust/src/decomplex/syntax.rs new file mode 100644 index 000000000..8f3397ad7 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax.rs @@ -0,0 +1,88 @@ +pub mod ruby; + +use crate::decomplex::ast::{RawNode, Span}; +use anyhow::{bail, Result}; +use serde::Serialize; +use std::collections::BTreeMap; +use std::path::PathBuf; + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum Language { + Ruby, +} + +impl Language { + pub fn parse(value: &str) -> Result { + match value { + "ruby" => Ok(Self::Ruby), + _ => bail!("unsupported Decomplex native language: {value}"), + } + } +} + +#[derive(Clone, Debug)] +pub struct Document { + pub file: String, + pub language: Language, + pub source: String, + pub lines: Vec, + pub root: RawNode, + pub function_defs: Vec, + pub state_writes: Vec, + pub predicate_aliases: Vec, +} + +#[derive(Clone, Debug)] +pub struct FunctionDef { + pub file: String, + pub name: String, + pub owner: String, + pub line: usize, + pub span: Span, + pub body: RawNode, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct StateWrite { + pub field: String, + pub receiver: String, + pub file: String, + pub function: String, + pub line: usize, + pub span: Span, + pub owner: String, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct PredicateAlias { + pub name: String, + pub body: String, + pub file: String, + pub defn: String, + pub line: usize, + pub span: Span, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct SimilarityFinding { + pub at: String, + pub sites: Vec, + pub spans: BTreeMap, + pub clone_type: String, + pub node: String, + pub mass: usize, + pub locations: Vec, +} + +pub fn parse_file(file: PathBuf, language: Language) -> Result { + match language { + Language::Ruby => ruby::parse_file(file), + } +} + +pub fn parse_files(files: &[PathBuf], language: Language) -> Result> { + files + .iter() + .map(|file| parse_file(file.clone(), language)) + .collect() +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/mod.rs b/gems/decomplex/rust/src/decomplex/syntax/mod.rs deleted file mode 100644 index d12feaf3f..000000000 --- a/gems/decomplex/rust/src/decomplex/syntax/mod.rs +++ /dev/null @@ -1,14 +0,0 @@ -pub mod ruby; - -use serde::Serialize; - -#[derive(Clone, Debug, Eq, PartialEq, Serialize)] -pub struct StateWrite { - pub field: String, - pub receiver: String, - pub file: String, - pub function: String, - pub line: usize, - pub span: [usize; 4], - pub owner: String, -} diff --git a/gems/decomplex/rust/src/decomplex/syntax/ruby.rs b/gems/decomplex/rust/src/decomplex/syntax/ruby.rs index 50a0364c7..b8b400f87 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/ruby.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/ruby.rs @@ -1,32 +1,67 @@ -use super::StateWrite; +use super::{Document, FunctionDef, Language, PredicateAlias, StateWrite}; +use crate::decomplex::ast::{line, node_text, normalize_text, span, RawNode}; use anyhow::{Context, Result}; use std::collections::HashSet; use std::fs; -use std::path::Path; -use tree_sitter::{Language, Node, Parser}; - -pub fn state_writes_for_file(file: &Path) -> Result> { - let source = fs::read_to_string(file) - .with_context(|| format!("failed to read {}", file.display()))?; - let mut parser = Parser::new(); - parser - .set_language(&ruby_language()) - .with_context(|| "failed to initialize tree-sitter ruby parser")?; - let tree = parser - .parse(&source, None) - .with_context(|| format!("tree-sitter produced no tree for {}", file.display()))?; - - let mut out = Vec::new(); - let mut seen = HashSet::new(); - let context = ContextState::new(file_owner(file)); - walk(tree.root_node(), &source, file, &context, &mut out, &mut seen); - Ok(out) -} - -fn ruby_language() -> Language { +use std::path::{Path, PathBuf}; +use tree_sitter::{Language as TreeSitterLanguage, Node, Parser}; + +pub fn parse_file(file: PathBuf) -> Result { + let parsed = ParsedRuby::parse(file)?; + let mut function_defs = Vec::new(); + let mut state_writes = Vec::new(); + let mut predicate_aliases = Vec::new(); + let mut seen_writes = HashSet::new(); + let context = ContextState::new(file_owner(&parsed.file)); + + collect_facts( + parsed.tree.root_node(), + &parsed.source, + &parsed.file, + &context, + &mut function_defs, + &mut state_writes, + &mut predicate_aliases, + &mut seen_writes, + ); + + Ok(Document { + file: parsed.file.to_string_lossy().to_string(), + language: Language::Ruby, + source: parsed.source.clone(), + lines: parsed.source.lines().map(ToString::to_string).collect(), + root: RawNode::from_tree_sitter(parsed.tree.root_node(), &parsed.source), + function_defs, + state_writes, + predicate_aliases, + }) +} + +fn ruby_language() -> TreeSitterLanguage { tree_sitter_ruby::LANGUAGE.into() } +struct ParsedRuby { + file: PathBuf, + source: String, + tree: tree_sitter::Tree, +} + +impl ParsedRuby { + fn parse(file: PathBuf) -> Result { + let source = fs::read_to_string(&file) + .with_context(|| format!("failed to read {}", file.display()))?; + let mut parser = Parser::new(); + parser + .set_language(&ruby_language()) + .with_context(|| "failed to initialize tree-sitter ruby parser")?; + let tree = parser + .parse(&source, None) + .with_context(|| format!("tree-sitter produced no tree for {}", file.display()))?; + Ok(Self { file, source, tree }) + } +} + #[derive(Clone, Debug, Eq, PartialEq)] struct ContextState { file_owner: String, @@ -56,20 +91,112 @@ impl ContextState { } } -fn walk( +fn collect_facts( node: Node<'_>, source: &str, file: &Path, context: &ContextState, - out: &mut Vec, - seen: &mut HashSet, + function_defs: &mut Vec, + state_writes: &mut Vec, + predicate_aliases: &mut Vec, + seen_writes: &mut HashSet, ) { let next_context = push_function_context(node, push_owner_context(node, source, context), source); - record_state_write(node, source, file, &next_context, out, seen); + record_function_def(node, source, file, &next_context, function_defs); + record_state_write(node, source, file, &next_context, state_writes, seen_writes); + record_predicate_alias(node, source, file, predicate_aliases); let mut cursor = node.walk(); for child in node.children(&mut cursor) { - walk(child, source, file, &next_context, out, seen); + collect_facts( + child, + source, + file, + &next_context, + function_defs, + state_writes, + predicate_aliases, + seen_writes, + ); + } +} + +fn record_function_def( + node: Node<'_>, + source: &str, + file: &Path, + context: &ContextState, + out: &mut Vec, +) { + let Some(name) = function_name(node, source) else { + return; + }; + let function = FunctionDef { + file: file.to_string_lossy().to_string(), + name, + owner: context.current_owner(), + line: line(node), + span: span(node), + body: RawNode::from_tree_sitter(node, source), + }; + let key = (function.file.clone(), function.owner.clone(), function.name.clone(), function.line); + if out + .iter() + .any(|existing| (existing.file.clone(), existing.owner.clone(), existing.name.clone(), existing.line) == key) + { + return; + } + out.push(function); +} + +fn record_predicate_alias( + node: Node<'_>, + source: &str, + file: &Path, + out: &mut Vec, +) { + if node.kind() != "method" { + return; + } + let Some(name) = function_name(node, source) else { + return; + }; + let Some(body) = method_single_expression_body(node) else { + return; + }; + let text = normalize_text(node_text(body, source)); + if text.is_empty() || text == "nil" || text.len() > 200 { + return; + } + let file_name = file.to_string_lossy().to_string(); + out.push(PredicateAlias { + name: name.clone(), + body: text, + file: file_name, + defn: name, + line: line(node), + span: span(node), + }); +} + +fn method_single_expression_body(node: Node<'_>) -> Option> { + let mut cursor = node.walk(); + if node.children(&mut cursor).any(|child| child.kind() == "=") { + let named = named_children(node); + return named.last().copied(); + } + + let body = node + .child_by_field_name("body") + .or_else(|| named_children(node).into_iter().find(|child| child.kind() == "body_statement"))?; + let statements: Vec> = named_children(body) + .into_iter() + .filter(|child| !matches!(child.kind(), "comment" | "heredoc_body")) + .collect(); + if statements.len() == 1 { + statements.first().copied() + } else { + None } } @@ -392,39 +519,21 @@ fn strip_assignment_suffix(text: &str) -> String { text.strip_suffix('=').unwrap_or(text).to_string() } -fn node_text<'a>(node: Node<'_>, source: &'a str) -> &'a str { - node.utf8_text(source.as_bytes()).unwrap_or("") -} - -fn normalize_text(text: &str) -> String { - text.split_whitespace().collect::>().join(" ") -} - -fn span(node: Node<'_>) -> [usize; 4] { - let start = node.start_position(); - let end = node.end_position(); - [start.row + 1, start.column, end.row + 1, end.column] -} - -fn line(node: Node<'_>) -> usize { - node.start_position().row + 1 -} - #[cfg(test)] mod tests { use super::*; use std::io::Write; use tempfile::NamedTempFile; - fn extract(source: &str) -> Vec { + fn document(source: &str) -> Document { let mut file = NamedTempFile::new().expect("tempfile"); file.write_all(source.as_bytes()).expect("write source"); - state_writes_for_file(file.path()).expect("state writes") + parse_file(file.path().to_path_buf()).expect("document") } #[test] fn extracts_ruby_attribute_and_instance_writes() { - let writes = extract( + let doc = document( r#" class Box def a(n) @@ -440,7 +549,8 @@ end "#, ); - let summary: Vec<(&str, &str, &str, &str)> = writes + let summary: Vec<(&str, &str, &str, &str)> = doc + .state_writes .iter() .map(|write| { ( @@ -466,7 +576,7 @@ end #[test] fn extracts_nested_owner_names() { - let writes = extract( + let doc = document( r#" module Outer class Inner @@ -478,9 +588,9 @@ end "#, ); - assert_eq!(writes.len(), 1); - assert_eq!(writes[0].owner, "Outer::Inner"); - assert_eq!(writes[0].function, "set"); - assert_eq!(writes[0].field, "state"); + assert_eq!(doc.state_writes.len(), 1); + assert_eq!(doc.state_writes[0].owner, "Outer::Inner"); + assert_eq!(doc.state_writes[0].function, "set"); + assert_eq!(doc.state_writes[0].field, "state"); } } diff --git a/gems/decomplex/rust/src/main.rs b/gems/decomplex/rust/src/main.rs index a0db40476..f32c86fcf 100644 --- a/gems/decomplex/rust/src/main.rs +++ b/gems/decomplex/rust/src/main.rs @@ -1,26 +1,56 @@ mod decomplex; use anyhow::{bail, Context, Result}; -use decomplex::detectors::co_update; +use decomplex::detectors::{co_update, flay_similarity, predicate_alias}; +use decomplex::syntax::Language; use std::path::PathBuf; fn main() -> Result<()> { let command = parse_args(std::env::args().skip(1).collect())?; match command { Command::StateWrites { language, files } => { - if language != "ruby" { - bail!("state-writes currently supports --language ruby only"); - } - let facts = co_update::state_writes_for_files(&files) + let language = Language::parse(&language)?; + let facts = co_update::state_writes_for_files(&files, language) .with_context(|| "failed to extract state-write facts")?; println!("{}", serde_json::to_string(&facts)?); } + Command::CoUpdate { language, files } => { + let language = Language::parse(&language)?; + let report = co_update::scan_files(&files, language) + .with_context(|| "failed to scan co-update facts")?; + println!("{}", serde_json::to_string(&report)?); + } + Command::PredicateAliases { language, files } => { + let language = Language::parse(&language)?; + let report = predicate_alias::scan_files(&files, language) + .with_context(|| "failed to scan predicate-alias facts")?; + println!("{}", serde_json::to_string(&report)?); + } + Command::FlaySimilarity { + language, + mass, + fuzzy, + files, + } => { + let language = Language::parse(&language)?; + let findings = flay_similarity::scan_files(&files, language, mass, fuzzy) + .with_context(|| "failed to scan structural similarity")?; + println!("{}", serde_json::to_string(&findings)?); + } } Ok(()) } enum Command { StateWrites { language: String, files: Vec }, + CoUpdate { language: String, files: Vec }, + PredicateAliases { language: String, files: Vec }, + FlaySimilarity { + language: String, + mass: usize, + fuzzy: usize, + files: Vec, + }, } fn parse_args(args: Vec) -> Result { @@ -30,24 +60,87 @@ fn parse_args(args: Vec) -> Result { }; match command.as_str() { "state-writes" => { + let (language, files) = parse_language_and_files(cursor.collect())?; + if files.is_empty() { + bail!("state-writes requires at least one file"); + } + Ok(Command::StateWrites { language, files }) + } + "co-update" => { + let (language, files) = parse_language_and_files(cursor.collect())?; + if files.is_empty() { + bail!("co-update requires at least one file"); + } + Ok(Command::CoUpdate { language, files }) + } + "predicate-aliases" => { + let (language, files) = parse_language_and_files(cursor.collect())?; + if files.is_empty() { + bail!("predicate-aliases requires at least one file"); + } + Ok(Command::PredicateAliases { language, files }) + } + "flay-similarity" => { let mut language = String::from("ruby"); + let mut mass = 32usize; + let mut fuzzy = 1usize; let mut files = Vec::new(); - while let Some(arg) = cursor.next() { + let mut rest = cursor.collect::>().into_iter(); + while let Some(arg) = rest.next() { if arg == "--language" { - language = cursor + language = rest .next() .with_context(|| "--language requires a value")?; } else if let Some(value) = arg.strip_prefix("--language=") { language = value.to_string(); + } else if arg == "--mass" { + mass = rest + .next() + .with_context(|| "--mass requires a value")? + .parse() + .with_context(|| "--mass must be an integer")?; + } else if let Some(value) = arg.strip_prefix("--mass=") { + mass = value.parse().with_context(|| "--mass must be an integer")?; + } else if arg == "--fuzzy" { + fuzzy = rest + .next() + .with_context(|| "--fuzzy requires a value")? + .parse() + .with_context(|| "--fuzzy must be an integer")?; + } else if let Some(value) = arg.strip_prefix("--fuzzy=") { + fuzzy = value.parse().with_context(|| "--fuzzy must be an integer")?; } else { files.push(PathBuf::from(arg)); } } if files.is_empty() { - bail!("state-writes requires at least one file"); + bail!("flay-similarity requires at least one file"); } - Ok(Command::StateWrites { language, files }) + Ok(Command::FlaySimilarity { + language, + mass, + fuzzy, + files, + }) } _ => bail!("unknown decomplex-rust command: {command}"), } } + +fn parse_language_and_files(args: Vec) -> Result<(String, Vec)> { + let mut language = String::from("ruby"); + let mut files = Vec::new(); + let mut cursor = args.into_iter(); + while let Some(arg) = cursor.next() { + if arg == "--language" { + language = cursor + .next() + .with_context(|| "--language requires a value")?; + } else if let Some(value) = arg.strip_prefix("--language=") { + language = value.to_string(); + } else { + files.push(PathBuf::from(arg)); + } + } + Ok((language, files)) +} From f4f1f99488e5b834f1c4db0fa46a37ec53140cf1 Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Tue, 16 Jun 2026 19:17:05 +0000 Subject: [PATCH 09/52] Add ordered parallel parsing for decomplex rust Co-authored-by: Codex --- gems/decomplex/docs/agents/scaling.md | 63 +++++++++ gems/decomplex/exe/decomplex | 9 +- .../lib/decomplex/detector_runner.rb | 20 +-- .../lib/decomplex/native/co_update.rb | 4 +- .../decomplex/lib/decomplex/native/command.rb | 9 ++ .../lib/decomplex/native/flay_similarity.rb | 3 +- .../lib/decomplex/native/predicate_aliases.rb | 4 +- gems/decomplex/rust/src/decomplex/mod.rs | 1 + gems/decomplex/rust/src/decomplex/parallel.rs | 105 +++++++++++++++ gems/decomplex/rust/src/decomplex/syntax.rs | 36 +++++- gems/decomplex/rust/src/main.rs | 121 +++++++++++++++--- gems/decomplex/test/detector_runner_test.rb | 17 +++ 12 files changed, 355 insertions(+), 37 deletions(-) create mode 100644 gems/decomplex/docs/agents/scaling.md create mode 100644 gems/decomplex/rust/src/decomplex/parallel.rs diff --git a/gems/decomplex/docs/agents/scaling.md b/gems/decomplex/docs/agents/scaling.md new file mode 100644 index 000000000..c6cdc38f5 --- /dev/null +++ b/gems/decomplex/docs/agents/scaling.md @@ -0,0 +1,63 @@ +# Decomplex Native Scaling Notes + +## Current Strategy + +The Rust port parallelizes at the `Document` boundary: + +```text +scan_files -> syntax::parse_files -> scan_documents +``` + +`syntax::parse_files` parses and normalizes files in parallel, while detectors still consume a deterministic `Vec` in input order. This is intentional. It keeps the Rust code close to the Ruby architecture so detectors and language normalizers can be ported file-for-file instead of redesigned around detector-specific map/reduce pipelines. + +Parallelism is controlled with: + +- `--jobs=N` on `decomplex detector ... --engine=rust` +- `--jobs=N` on the native `decomplex-rust` command +- `DECOMPLEX_RUST_JOBS` +- `DECOMPLEX_JOBS` + +## Measured Scaling + +Measured on `src/` with 162 Ruby files, using the release native binary. + +| Detector | Jobs | Elapsed | Speedup | Efficiency | +|---|---:|---:|---:|---:| +| `co-update` | 1 | 2.125s | 1.00x | 100.0% | +| `co-update` | 2 | 1.217s | 1.75x | 87.3% | +| `co-update` | 4 | 0.732s | 2.90x | 72.6% | +| `co-update` | 8 | 0.491s | 4.33x | 54.1% | +| `co-update` | 16 | 0.424s | 5.01x | 31.3% | +| `co-update` | 32 | 0.446s | 4.77x | 14.9% | +| `predicate-alias` | 1 | 2.097s | 1.00x | 100.0% | +| `predicate-alias` | 2 | 1.220s | 1.72x | 86.0% | +| `predicate-alias` | 4 | 0.716s | 2.93x | 73.2% | +| `predicate-alias` | 8 | 0.486s | 4.32x | 53.9% | +| `predicate-alias` | 16 | 0.383s | 5.47x | 34.2% | +| `predicate-alias` | 32 | 0.462s | 4.54x | 14.2% | +| `structural-similarity` | 1 | 4.265s | 1.00x | 100.0% | +| `structural-similarity` | 2 | 3.480s | 1.23x | 61.3% | +| `structural-similarity` | 4 | 3.010s | 1.42x | 35.4% | +| `structural-similarity` | 8 | 2.756s | 1.55x | 19.3% | +| `structural-similarity` | 16 | 2.740s | 1.56x | 9.7% | +| `structural-similarity` | 32 | 2.761s | 1.54x | 4.8% | + +## Interpretation + +The current implementation does not scale well to 32 jobs on this workload. + +`co-update` and `predicate-alias` are parse-heavy enough to benefit substantially from parallel document construction, peaking around 16 jobs. `structural-similarity` has more serial detector aggregation after parsing, so it barely improves beyond 4-8 jobs. + +For now, the best practical default is `--jobs=8` or `--jobs=16`, not `--jobs=32`. + +## Why Not Deeper Parallelism Yet? + +The immediate goal is a sustainable Ruby-to-Rust migration: + +1. Port Ruby `Syntax`/`Document` shape to Rust. +2. Port each detector as a direct `scan_documents` translation. +3. Port each language normalizer into the shared `Document` abstraction. + +Detector-specific map/reduce aggregation could improve some metrics later, but it would also force architectural drift while the port is still incomplete. The current boundary gives useful speedups without making future detector and language migrations harder. + +Once all detectors and language normalizers are ported, deeper parallel aggregation can be added selectively where profiling shows a decisive win. diff --git a/gems/decomplex/exe/decomplex b/gems/decomplex/exe/decomplex index d33e5325f..551493426 100755 --- a/gems/decomplex/exe/decomplex +++ b/gems/decomplex/exe/decomplex @@ -63,7 +63,7 @@ end if ARGV[0] == "detector" args = ARGV[1..] detector = args&.shift - abort "usage: decomplex detector DETECTOR --engine=ruby|rust --json FILE..." unless detector + abort "usage: decomplex detector DETECTOR --engine=ruby|rust --json [--jobs=N] FILE..." unless detector engine = "ruby" json = false @@ -90,6 +90,9 @@ if ARGV[0] == "detector" when /\A--fuzzy=(\d+)\z/ detector_options[:fuzzy] = Integer(Regexp.last_match(1)) true + when /\A--jobs=(\d+)\z/ + detector_options[:jobs] = Integer(Regexp.last_match(1)) + true else false end @@ -297,8 +300,8 @@ if ARGV.empty? || ARGV[0] == "-h" || ARGV[0] == "--help" decomplex FILE_OR_DIR [FILE_OR_DIR ...] decomplex report [--output=FILE] [--emit-json=FILE] [--sarif=FILE] [--exclude=GLOB] FILE_OR_DIR ... - decomplex detector DETECTOR --engine=ruby|rust --json [--benchmark] [--mass=N] [--fuzzy=N] FILE_OR_DIR ... - decomplex detector DETECTOR --compare-engines [--mass=N] [--fuzzy=N] FILE_OR_DIR ... + decomplex detector DETECTOR --engine=ruby|rust --json [--benchmark] [--mass=N] [--fuzzy=N] [--jobs=N] FILE_OR_DIR ... + decomplex detector DETECTOR --compare-engines [--mass=N] [--fuzzy=N] [--jobs=N] FILE_OR_DIR ... decomplex state-mesh [--output=FILE] [--exclude=GLOB] FILE_OR_DIR ... decomplex state-branches [--output=FILE] [--exclude=GLOB] FILE_OR_DIR ... decomplex temporal-ordering [--output=FILE] [--exclude=GLOB] FILE_OR_DIR ... diff --git a/gems/decomplex/lib/decomplex/detector_runner.rb b/gems/decomplex/lib/decomplex/detector_runner.rb index a7b384e56..6621901b3 100644 --- a/gems/decomplex/lib/decomplex/detector_runner.rb +++ b/gems/decomplex/lib/decomplex/detector_runner.rb @@ -26,17 +26,17 @@ module DetectorRunner module_function - def run(detector, files, engine: "ruby", mass: FlaySimilarity::DEFAULT_MASS, fuzzy: FlaySimilarity::DEFAULT_FUZZY) + def run(detector, files, engine: "ruby", mass: FlaySimilarity::DEFAULT_MASS, fuzzy: FlaySimilarity::DEFAULT_FUZZY, jobs: nil) canonical = canonical_detector(detector) validate_engine!(engine) case canonical when :co_update - co_update(files, engine: engine) + co_update(files, engine: engine, jobs: jobs) when :predicate_alias - predicate_alias(files, engine: engine) + predicate_alias(files, engine: engine, jobs: jobs) when :flay_similarity - flay_similarity(files, engine: engine, mass: mass, fuzzy: fuzzy) + flay_similarity(files, engine: engine, mass: mass, fuzzy: fuzzy, jobs: jobs) else raise ArgumentError, "unsupported decomplex detector: #{detector}" end @@ -68,8 +68,8 @@ def detector_names raise ArgumentError, "unsupported decomplex detector engine: #{engine}" end - private_class_method def self.co_update(files, engine:) - return Native::CoUpdate.scan(files) if engine.to_s == "rust" + private_class_method def self.co_update(files, engine:, jobs:) + return Native::CoUpdate.scan(files, jobs: jobs) if engine.to_s == "rust" report = CoUpdate.scan(files) @@ -79,18 +79,18 @@ def detector_names } end - private_class_method def self.predicate_alias(files, engine:) - return Native::PredicateAliases.scan(files) if engine.to_s == "rust" + private_class_method def self.predicate_alias(files, engine:, jobs:) + return Native::PredicateAliases.scan(files, jobs: jobs) if engine.to_s == "rust" report = PredicateAlias.scan(files) { "alias_clusters" => report.alias_clusters } end - private_class_method def self.flay_similarity(files, engine:, mass:, fuzzy:) + private_class_method def self.flay_similarity(files, engine:, mass:, fuzzy:, jobs:) findings = if engine.to_s == "rust" - Native::FlaySimilarity.scan(files, mass: mass, fuzzy: fuzzy) + Native::FlaySimilarity.scan(files, mass: mass, fuzzy: fuzzy, jobs: jobs) else FlaySimilarity.scan(files, mass: mass, fuzzy: fuzzy) end diff --git a/gems/decomplex/lib/decomplex/native/co_update.rb b/gems/decomplex/lib/decomplex/native/co_update.rb index 98ae8bc4a..c63b54f25 100644 --- a/gems/decomplex/lib/decomplex/native/co_update.rb +++ b/gems/decomplex/lib/decomplex/native/co_update.rb @@ -8,10 +8,10 @@ module Native module CoUpdate module_function - def scan(files) + def scan(files, jobs: nil) paths = Array(files).map(&:to_s) validate_ruby_files!(paths) - JSON.parse(Command.run("co-update", "--language", "ruby", *paths)) + JSON.parse(Command.run("co-update", "--language", "ruby", *Command.jobs_args(jobs), *paths)) end private_class_method def self.validate_ruby_files!(paths) diff --git a/gems/decomplex/lib/decomplex/native/command.rb b/gems/decomplex/lib/decomplex/native/command.rb index c2eb1151a..87646efc3 100644 --- a/gems/decomplex/lib/decomplex/native/command.rb +++ b/gems/decomplex/lib/decomplex/native/command.rb @@ -29,6 +29,15 @@ def crate_root File.expand_path("../../../rust", __dir__) end + def jobs_args(jobs) + return [] if jobs.nil? + + count = Integer(jobs) + raise ArgumentError, "jobs must be greater than zero" if count <= 0 + + ["--jobs", count.to_s] + end + private_class_method def self.native_command(args) if fresh_binary?(binary_path) [binary_path, *args] diff --git a/gems/decomplex/lib/decomplex/native/flay_similarity.rb b/gems/decomplex/lib/decomplex/native/flay_similarity.rb index 33298b360..88fd1b6d0 100644 --- a/gems/decomplex/lib/decomplex/native/flay_similarity.rb +++ b/gems/decomplex/lib/decomplex/native/flay_similarity.rb @@ -8,13 +8,14 @@ module Native module FlaySimilarity module_function - def scan(files, mass:, fuzzy:) + def scan(files, mass:, fuzzy:, jobs: nil) paths = Array(files).map(&:to_s) validate_ruby_files!(paths) JSON.parse( Command.run( "flay-similarity", "--language", "ruby", + *Command.jobs_args(jobs), "--mass", mass.to_i.to_s, "--fuzzy", fuzzy.to_i.to_s, *paths diff --git a/gems/decomplex/lib/decomplex/native/predicate_aliases.rb b/gems/decomplex/lib/decomplex/native/predicate_aliases.rb index b69ab8c99..78b93c2b6 100644 --- a/gems/decomplex/lib/decomplex/native/predicate_aliases.rb +++ b/gems/decomplex/lib/decomplex/native/predicate_aliases.rb @@ -8,10 +8,10 @@ module Native module PredicateAliases module_function - def scan(files) + def scan(files, jobs: nil) paths = Array(files).map(&:to_s) validate_ruby_files!(paths) - JSON.parse(Command.run("predicate-aliases", "--language", "ruby", *paths)) + JSON.parse(Command.run("predicate-aliases", "--language", "ruby", *Command.jobs_args(jobs), *paths)) end private_class_method def self.validate_ruby_files!(paths) diff --git a/gems/decomplex/rust/src/decomplex/mod.rs b/gems/decomplex/rust/src/decomplex/mod.rs index cc08541ec..bbf05f28f 100644 --- a/gems/decomplex/rust/src/decomplex/mod.rs +++ b/gems/decomplex/rust/src/decomplex/mod.rs @@ -1,3 +1,4 @@ pub mod ast; pub mod detectors; +pub mod parallel; pub mod syntax; diff --git a/gems/decomplex/rust/src/decomplex/parallel.rs b/gems/decomplex/rust/src/decomplex/parallel.rs new file mode 100644 index 000000000..ab1f0b6c9 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/parallel.rs @@ -0,0 +1,105 @@ +use anyhow::{bail, Result}; +use std::env; +use std::sync::mpsc; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::thread; + +static JOBS_OVERRIDE: AtomicUsize = AtomicUsize::new(0); + +pub fn set_jobs_for_process(jobs: Option) -> Result<()> { + let Some(jobs) = jobs else { + return Ok(()); + }; + if jobs == 0 { + bail!("--jobs must be greater than zero"); + } + JOBS_OVERRIDE.store(jobs, Ordering::Relaxed); + Ok(()) +} + +pub fn job_count() -> usize { + let configured = JOBS_OVERRIDE.load(Ordering::Relaxed); + if configured > 0 { + return configured; + } + + env_jobs() + .unwrap_or_else(|| thread::available_parallelism().map(usize::from).unwrap_or(1)) + .max(1) +} + +pub fn map_ordered(items: &[T], func: F) -> Result> +where + T: Sync, + U: Send, + F: Fn(&T) -> Result + Sync, +{ + let jobs = job_count(); + if jobs <= 1 || items.len() <= 1 { + return items.iter().map(func).collect(); + } + + let worker_count = jobs.min(items.len()); + let next_index = AtomicUsize::new(0); + let (tx, rx) = mpsc::channel(); + + thread::scope(|scope| { + for _ in 0..worker_count { + let tx = tx.clone(); + let func = &func; + let next_index = &next_index; + scope.spawn(move || loop { + let index = next_index.fetch_add(1, Ordering::Relaxed); + if index >= items.len() { + break; + } + if tx.send((index, func(&items[index]))).is_err() { + break; + } + }); + } + drop(tx); + }); + + let mut results = (0..items.len()).map(|_| None).collect::>(); + for (index, result) in rx { + results[index] = Some(result); + } + + results + .into_iter() + .map(|slot| slot.expect("parallel worker did not return a result")) + .collect() +} + +fn env_jobs() -> Option { + ["DECOMPLEX_RUST_JOBS", "DECOMPLEX_JOBS"] + .into_iter() + .find_map(|name| env::var(name).ok().and_then(|value| parse_jobs(&value))) +} + +fn parse_jobs(value: &str) -> Option { + let trimmed = value.trim(); + if trimmed.is_empty() { + return None; + } + trimmed.parse::().ok().filter(|jobs| *jobs > 0) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parallel_map_preserves_input_order() { + set_jobs_for_process(Some(4)).expect("jobs"); + let input = vec![3, 2, 1, 0]; + let output = map_ordered(&input, |item| Ok(item * 10)).expect("map"); + assert_eq!(output, vec![30, 20, 10, 0]); + } + + #[test] + fn rejects_zero_jobs_override() { + assert!(set_jobs_for_process(Some(0)).is_err()); + } +} diff --git a/gems/decomplex/rust/src/decomplex/syntax.rs b/gems/decomplex/rust/src/decomplex/syntax.rs index 8f3397ad7..aa760d0de 100644 --- a/gems/decomplex/rust/src/decomplex/syntax.rs +++ b/gems/decomplex/rust/src/decomplex/syntax.rs @@ -1,6 +1,7 @@ pub mod ruby; use crate::decomplex::ast::{RawNode, Span}; +use crate::decomplex::parallel; use anyhow::{bail, Result}; use serde::Serialize; use std::collections::BTreeMap; @@ -81,8 +82,35 @@ pub fn parse_file(file: PathBuf, language: Language) -> Result { } pub fn parse_files(files: &[PathBuf], language: Language) -> Result> { - files - .iter() - .map(|file| parse_file(file.clone(), language)) - .collect() + parallel::map_ordered(files, |file| parse_file(file.clone(), language)) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::decomplex::parallel; + use std::io::Write; + use tempfile::NamedTempFile; + + #[test] + fn parallel_parse_files_preserves_input_order() { + parallel::set_jobs_for_process(Some(4)).expect("jobs"); + let mut first = NamedTempFile::new().expect("first"); + let mut second = NamedTempFile::new().expect("second"); + first + .write_all(b"def first\n 1\nend\n") + .expect("write first"); + second + .write_all(b"def second\n 2\nend\n") + .expect("write second"); + + let files = vec![first.path().to_path_buf(), second.path().to_path_buf()]; + let docs = parse_files(&files, Language::Ruby).expect("parse files"); + + assert_eq!(docs.len(), 2); + assert_eq!(docs[0].file, first.path().to_string_lossy()); + assert_eq!(docs[1].file, second.path().to_string_lossy()); + assert_eq!(docs[0].function_defs[0].name, "first"); + assert_eq!(docs[1].function_defs[0].name, "second"); + } } diff --git a/gems/decomplex/rust/src/main.rs b/gems/decomplex/rust/src/main.rs index f32c86fcf..78891f147 100644 --- a/gems/decomplex/rust/src/main.rs +++ b/gems/decomplex/rust/src/main.rs @@ -2,25 +2,27 @@ mod decomplex; use anyhow::{bail, Context, Result}; use decomplex::detectors::{co_update, flay_similarity, predicate_alias}; +use decomplex::parallel; use decomplex::syntax::Language; use std::path::PathBuf; fn main() -> Result<()> { let command = parse_args(std::env::args().skip(1).collect())?; + parallel::set_jobs_for_process(command.jobs())?; match command { - Command::StateWrites { language, files } => { + Command::StateWrites { language, files, .. } => { let language = Language::parse(&language)?; let facts = co_update::state_writes_for_files(&files, language) .with_context(|| "failed to extract state-write facts")?; println!("{}", serde_json::to_string(&facts)?); } - Command::CoUpdate { language, files } => { + Command::CoUpdate { language, files, .. } => { let language = Language::parse(&language)?; let report = co_update::scan_files(&files, language) .with_context(|| "failed to scan co-update facts")?; println!("{}", serde_json::to_string(&report)?); } - Command::PredicateAliases { language, files } => { + Command::PredicateAliases { language, files, .. } => { let language = Language::parse(&language)?; let report = predicate_alias::scan_files(&files, language) .with_context(|| "failed to scan predicate-alias facts")?; @@ -31,6 +33,7 @@ fn main() -> Result<()> { mass, fuzzy, files, + .. } => { let language = Language::parse(&language)?; let findings = flay_similarity::scan_files(&files, language, mass, fuzzy) @@ -42,48 +45,85 @@ fn main() -> Result<()> { } enum Command { - StateWrites { language: String, files: Vec }, - CoUpdate { language: String, files: Vec }, - PredicateAliases { language: String, files: Vec }, + StateWrites { + language: String, + files: Vec, + jobs: Option, + }, + CoUpdate { + language: String, + files: Vec, + jobs: Option, + }, + PredicateAliases { + language: String, + files: Vec, + jobs: Option, + }, FlaySimilarity { language: String, mass: usize, fuzzy: usize, files: Vec, + jobs: Option, }, } +impl Command { + fn jobs(&self) -> Option { + match self { + Self::StateWrites { jobs, .. } + | Self::CoUpdate { jobs, .. } + | Self::PredicateAliases { jobs, .. } + | Self::FlaySimilarity { jobs, .. } => *jobs, + } + } +} + fn parse_args(args: Vec) -> Result { let mut cursor = args.into_iter(); let Some(command) = cursor.next() else { - bail!("usage: decomplex-rust state-writes --language ruby FILE..."); + bail!("usage: decomplex-rust COMMAND [--language ruby] [--jobs N] FILE..."); }; match command.as_str() { "state-writes" => { - let (language, files) = parse_language_and_files(cursor.collect())?; + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; if files.is_empty() { bail!("state-writes requires at least one file"); } - Ok(Command::StateWrites { language, files }) + Ok(Command::StateWrites { + language, + files, + jobs, + }) } "co-update" => { - let (language, files) = parse_language_and_files(cursor.collect())?; + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; if files.is_empty() { bail!("co-update requires at least one file"); } - Ok(Command::CoUpdate { language, files }) + Ok(Command::CoUpdate { + language, + files, + jobs, + }) } "predicate-aliases" => { - let (language, files) = parse_language_and_files(cursor.collect())?; + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; if files.is_empty() { bail!("predicate-aliases requires at least one file"); } - Ok(Command::PredicateAliases { language, files }) + Ok(Command::PredicateAliases { + language, + files, + jobs, + }) } "flay-similarity" => { let mut language = String::from("ruby"); let mut mass = 32usize; let mut fuzzy = 1usize; + let mut jobs = None; let mut files = Vec::new(); let mut rest = cursor.collect::>().into_iter(); while let Some(arg) = rest.next() { @@ -109,6 +149,12 @@ fn parse_args(args: Vec) -> Result { .with_context(|| "--fuzzy must be an integer")?; } else if let Some(value) = arg.strip_prefix("--fuzzy=") { fuzzy = value.parse().with_context(|| "--fuzzy must be an integer")?; + } else if arg == "--jobs" { + jobs = Some(parse_jobs( + rest.next().with_context(|| "--jobs requires a value")?, + )?); + } else if let Some(value) = arg.strip_prefix("--jobs=") { + jobs = Some(parse_jobs(value.to_string())?); } else { files.push(PathBuf::from(arg)); } @@ -121,14 +167,16 @@ fn parse_args(args: Vec) -> Result { mass, fuzzy, files, + jobs, }) } _ => bail!("unknown decomplex-rust command: {command}"), } } -fn parse_language_and_files(args: Vec) -> Result<(String, Vec)> { +fn parse_language_files_and_jobs(args: Vec) -> Result<(String, Vec, Option)> { let mut language = String::from("ruby"); + let mut jobs = None; let mut files = Vec::new(); let mut cursor = args.into_iter(); while let Some(arg) = cursor.next() { @@ -138,9 +186,52 @@ fn parse_language_and_files(args: Vec) -> Result<(String, Vec)> .with_context(|| "--language requires a value")?; } else if let Some(value) = arg.strip_prefix("--language=") { language = value.to_string(); + } else if arg == "--jobs" { + jobs = Some(parse_jobs( + cursor.next().with_context(|| "--jobs requires a value")?, + )?); + } else if let Some(value) = arg.strip_prefix("--jobs=") { + jobs = Some(parse_jobs(value.to_string())?); } else { files.push(PathBuf::from(arg)); } } - Ok((language, files)) + Ok((language, files, jobs)) +} + +fn parse_jobs(value: String) -> Result { + let jobs = value + .parse::() + .with_context(|| "--jobs must be an integer")?; + if jobs == 0 { + bail!("--jobs must be greater than zero"); + } + Ok(jobs) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parses_common_jobs_option() { + let command = parse_args(vec![ + "co-update".to_string(), + "--jobs=4".to_string(), + "a.rb".to_string(), + ]) + .expect("command"); + + assert_eq!(command.jobs(), Some(4)); + } + + #[test] + fn rejects_zero_jobs_option() { + assert!(parse_args(vec![ + "co-update".to_string(), + "--jobs=0".to_string(), + "a.rb".to_string(), + ]) + .is_err()); + } } diff --git a/gems/decomplex/test/detector_runner_test.rb b/gems/decomplex/test/detector_runner_test.rb index 21a1f2032..64c3461ba 100644 --- a/gems/decomplex/test/detector_runner_test.rb +++ b/gems/decomplex/test/detector_runner_test.rb @@ -40,6 +40,23 @@ def test_detector_cli_compare_engines_outputs_canonical_json assert_equal Decomplex::DetectorRunner.canonical_json("co-update", [FIXTURE], engine: "ruby"), stdout end + def test_detector_cli_compare_engines_accepts_jobs + skip "cargo is not available" unless cargo_available? + + stdout, stderr, status = Open3.capture3( + "ruby", + "gems/decomplex/exe/decomplex", + "detector", + "co-update", + "--compare-engines", + "--jobs=2", + FIXTURE + ) + + assert status.success?, stderr + assert_equal Decomplex::DetectorRunner.canonical_json("co-update", [FIXTURE], engine: "ruby"), stdout + end + def test_detector_cli_benchmark_keeps_json_stdout_canonical stdout, stderr, status = Open3.capture3( "ruby", From a4a265f1782085dac8d47e4b6c9007a7d7f93928 Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Tue, 16 Jun 2026 22:29:44 +0000 Subject: [PATCH 10/52] WIP: Decomplex: Migrate decision pressure to Rust --- .../docs/agents/tree-sitter-migration.md | 84 + .../lib/decomplex/detector_runner.rb | 49 + .../lib/decomplex/native/decision_pressure.rb | 25 + gems/decomplex/lib/decomplex/native/miner.rb | 25 + .../lib/decomplex/native/semantic_aliases.rb | 25 + gems/decomplex/rust/src/decomplex/ast.rs | 2777 ++++++++++++++++- .../decomplex/detectors/decision_pressure.rs | 502 +++ .../rust/src/decomplex/detectors/miner.rs | 213 ++ .../rust/src/decomplex/detectors/mod.rs | 3 + .../decomplex/detectors/predicate_alias.rs | 108 +- .../src/decomplex/detectors/semantic_alias.rs | 339 ++ gems/decomplex/rust/src/decomplex/syntax.rs | 23 + .../rust/src/decomplex/syntax/ruby.rs | 461 ++- gems/decomplex/rust/src/main.rs | 73 +- gems/decomplex/test/detector_runner_test.rb | 114 + 15 files changed, 4799 insertions(+), 22 deletions(-) create mode 100644 gems/decomplex/lib/decomplex/native/decision_pressure.rb create mode 100644 gems/decomplex/lib/decomplex/native/miner.rb create mode 100644 gems/decomplex/lib/decomplex/native/semantic_aliases.rb create mode 100644 gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs create mode 100644 gems/decomplex/rust/src/decomplex/detectors/miner.rs create mode 100644 gems/decomplex/rust/src/decomplex/detectors/semantic_alias.rs diff --git a/gems/decomplex/docs/agents/tree-sitter-migration.md b/gems/decomplex/docs/agents/tree-sitter-migration.md index 70c0f37c9..a3a31e8a1 100644 --- a/gems/decomplex/docs/agents/tree-sitter-migration.md +++ b/gems/decomplex/docs/agents/tree-sitter-migration.md @@ -200,6 +200,36 @@ Example capability flags: Detectors should skip unsupported sub-signals rather than infer them from unrelated syntax. +## Native Rust Port Contract + +The Rust implementation is a performance port, not a new Decomplex. +It must stay structurally symmetric with the Ruby implementation so the +remaining detectors and languages can be migrated mechanically. + +Rules: + +- Port Ruby files file-for-file and function-for-function unless a + later optimization is proven after parity. +- Keep the normalized AST API aligned with `lib/decomplex/ast.rb`: + `parse`, `node`, `slice`, `body_stmts`, `def_push`, + `canon_polarity`, `flatten_and`, and the `Node` vocabulary. +- Keep language adapters responsible for syntax normalization, not + detector decisions. Detectors should consume the same normalized AST + or the same syntax facts their Ruby counterpart consumes. +- Do not hide AST drift by sorting, filtering, or reshaping detector + results. Fix the normalizer or the detector port so the canonical + JSON matches Ruby output. +- Every native detector needs an engine-parity test and a real `src/` + parity smoke before it is treated as migrated. + +Current split: + +- `DecisionPressure`, `PredicateAlias`, and `SemanticAlias` are + AST-backed ports and compare byte-for-byte with Ruby on `src/`. +- `CoUpdate` and `Miner` consume syntax facts because their Ruby + counterparts consume `Syntax.parse` / `SiteExtractor` facts. +- `FlaySimilarity` consumes `Syntax.parse` in both Ruby and Rust. + ## Preserving Output Ruby migration must be gated by exact-output tests before Tree-sitter @@ -490,3 +520,57 @@ That slice protects the current consumers, proves the output discipline on the highest-value detectors, and creates the extension point needed for Python/JavaScript/TypeScript/Go/Rust/Zig profiles without forcing a full rewrite. + +## Native Rust Detector Migration + +Status: in progress. The native Rust port must stay a structural mirror +of the Ruby implementation: shared syntax/AST facts first, detector +reducers second. Do not add detector-specific Tree-sitter walkers. + +Migration order follows the Decomplex Metrics Expo tiers. Tier 1 +detectors move first because they carry the highest signal and should +benefit earliest from native speed. + +Benchmarks below use `src/` on this repository through: + +``` +ruby gems/decomplex/exe/decomplex detector DETECTOR --engine=ruby --json src/ +ruby gems/decomplex/exe/decomplex detector DETECTOR --engine=rust --json --jobs=8 src/ +``` + +The JSON outputs are canonical detector-only payloads and are byte-for- +byte compared before recording a detector as migrated. + +| Tier | Detector / section | Native status | Ruby | Rust | Speedup | Notes | +|---|---|---:|---:|---:|---:|---| +| 1 | Missing Abstractions | migrated | 13.02s | 0.64s | 20.3x | Implemented by `miner`; consumes shared `DecisionSite` facts, matching Ruby `SiteExtractor`. | +| 1 | Semantic Predicate Aliases | migrated | 86.41s | 2.60s | 33.2x | AST-backed file/function port of `SemanticAlias`. | +| 1 | Reification Misses | migrated | 86.41s | 2.60s | 33.2x | Same AST-backed native pass as semantic aliases. | +| 1 | Exact Predicate Aliases | migrated | 85.50s | 2.58s | 33.1x | AST-backed file/function port of `PredicateAlias`. | +| 1 | Decision Pressure | migrated | 84.45s | 2.77s | 30.5x | AST-backed file/function port of `DecisionPressure`. | +| 1 | Redundant Nil Guards | pending | - | - | - | Needs local dominance/null-check normalized AST facts. | +| 1 | State Heatmap | pending | - | - | - | Needs shared `StateRead`, `StateWrite`, and semantic re-derivation facts. | +| 1 | State-Based Branch Density | pending | - | - | - | Needs branch decision facts with state refs. | +| 1 | Temporal Ordering Pressure | pending | - | - | - | Needs owner/method visibility plus state read/write facts. | +| 2 | Structural Similarity (Type-2/3) | migrated | 85.34s | 2.88s | 29.6x | File/function port of structural fingerprinting over shared `RawNode`. | +| 2 | Neglected Updates | migrated | 43.90s | 0.62s | 70.8x | Same native pass as co-update. | +| 2 | Neglected Conditions | migrated | 13.02s | 0.64s | 20.3x | Implemented by `miner`; consumes shared `DecisionSite` facts, matching Ruby `SiteExtractor`. | +| 2 | Derived-State Staleness | pending | - | - | - | Needs local write/read/dependency facts or Rust normalized AST. | +| 2 | Inconsistent Rename Clones | pending | - | - | - | Can likely share structural clone tokenization with Rust AST facade. | +| 2 | Implicit Control Flow | pending | - | - | - | Needs topology/path protocol and state effect facts. | +| 2 | Weighted Inlined Cognitive Complexity | pending | - | - | - | Needs topology plus local cognitive scorer. | +| 2 | Locality Drag | pending | - | - | - | Needs local flow summaries and boundaries. | +| 2/3 | Operational Discontinuity | pending | - | - | - | Needs local flow summaries and boundaries. | +| 3 | Neglected Path Conditions | pending | - | - | - | Needs path-condition facts over normalized branch syntax. | +| 3 | Oversized Predicates | pending | - | - | - | Needs normalized boolean atom counting. | +| 3 | Broken Protocols | pending | - | - | - | Needs call-sequence mining facts. | +| 3 | Function LCOM | pending | - | - | - | Needs local flow summaries. | +| 3 | False Simplicity | pending | - | - | - | Needs language lexicons plus call/mutation/reopen facts. | +| 3 | Fat Unions | pending | - | - | - | Needs class/variant dispatch and member-use facts. | + +Earlier single-thread / pre-architecture-correction timings recorded before the +AST-backed alias and decision-pressure ports: + +- co-update: Ruby 43.205838s, Rust 2.144622s, 20.1x. +- predicate-alias: Ruby 81.583126s, Rust 2.136387s, 38.2x. +- structural-similarity: Ruby 85.163481s, Rust 4.331976s, 19.7x. diff --git a/gems/decomplex/lib/decomplex/detector_runner.rb b/gems/decomplex/lib/decomplex/detector_runner.rb index 6621901b3..560659d98 100644 --- a/gems/decomplex/lib/decomplex/detector_runner.rb +++ b/gems/decomplex/lib/decomplex/detector_runner.rb @@ -4,9 +4,15 @@ require_relative "co_update" require_relative "flay_similarity" require_relative "native/co_update" +require_relative "native/decision_pressure" require_relative "native/predicate_aliases" require_relative "native/flay_similarity" +require_relative "native/miner" +require_relative "native/semantic_aliases" +require_relative "miner" +require_relative "decision_pressure" require_relative "predicate_alias" +require_relative "semantic_alias" module Decomplex # Runs one detector in isolation and emits deterministic machine output. @@ -17,8 +23,17 @@ module Decomplex module DetectorRunner DETECTORS = { "co-update" => :co_update, + "decision-pressure" => :decision_pressure, "predicate-alias" => :predicate_alias, "predicate-aliases" => :predicate_alias, + "miner" => :miner, + "decision-miner" => :miner, + "missing-abstractions" => :miner, + "neglected-conditions" => :miner, + "semantic-alias" => :semantic_alias, + "semantic-aliases" => :semantic_alias, + "semantic-predicate-aliases" => :semantic_alias, + "reification-misses" => :semantic_alias, "flay-similarity" => :flay_similarity, "structural-similarity" => :flay_similarity }.freeze @@ -33,8 +48,14 @@ def run(detector, files, engine: "ruby", mass: FlaySimilarity::DEFAULT_MASS, fuz case canonical when :co_update co_update(files, engine: engine, jobs: jobs) + when :decision_pressure + decision_pressure(files, engine: engine, jobs: jobs) when :predicate_alias predicate_alias(files, engine: engine, jobs: jobs) + when :miner + miner(files, engine: engine, jobs: jobs) + when :semantic_alias + semantic_alias(files, engine: engine, jobs: jobs) when :flay_similarity flay_similarity(files, engine: engine, mass: mass, fuzzy: fuzzy, jobs: jobs) else @@ -79,6 +100,12 @@ def detector_names } end + private_class_method def self.decision_pressure(files, engine:, jobs:) + return Native::DecisionPressure.scan(files, jobs: jobs) if engine.to_s == "rust" + + DecisionPressure.scan(files).ranked + end + private_class_method def self.predicate_alias(files, engine:, jobs:) return Native::PredicateAliases.scan(files, jobs: jobs) if engine.to_s == "rust" @@ -87,6 +114,28 @@ def detector_names { "alias_clusters" => report.alias_clusters } end + private_class_method def self.miner(files, engine:, jobs:) + return Native::Miner.scan(files, jobs: jobs) if engine.to_s == "rust" + + report = Miner.scan(files) + + { + "missing_abstractions" => report.missing_abstractions, + "neglected_conditions" => report.neglected_conditions + } + end + + private_class_method def self.semantic_alias(files, engine:, jobs:) + return Native::SemanticAliases.scan(files, jobs: jobs) if engine.to_s == "rust" + + report = SemanticAlias.scan(files) + + { + "alias_clusters" => report.alias_clusters, + "reification_misses" => report.reification_misses + } + end + private_class_method def self.flay_similarity(files, engine:, mass:, fuzzy:, jobs:) findings = if engine.to_s == "rust" diff --git a/gems/decomplex/lib/decomplex/native/decision_pressure.rb b/gems/decomplex/lib/decomplex/native/decision_pressure.rb new file mode 100644 index 000000000..5bb1a4854 --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/decision_pressure.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module DecisionPressure + module_function + + def scan(files, jobs: nil) + paths = Array(files).map(&:to_s) + validate_ruby_files!(paths) + JSON.parse(Command.run("decision-pressure", "--language", "ruby", *Command.jobs_args(jobs), *paths)) + end + + private_class_method def self.validate_ruby_files!(paths) + bad = paths.reject { |path| File.extname(path) == ".rb" } + return if bad.empty? + + raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/miner.rb b/gems/decomplex/lib/decomplex/native/miner.rb new file mode 100644 index 000000000..e1fec456c --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/miner.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module Miner + module_function + + def scan(files, jobs: nil) + paths = Array(files).map(&:to_s) + validate_ruby_files!(paths) + JSON.parse(Command.run("miner", "--language", "ruby", *Command.jobs_args(jobs), *paths)) + end + + private_class_method def self.validate_ruby_files!(paths) + bad = paths.reject { |path| File.extname(path) == ".rb" } + return if bad.empty? + + raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/semantic_aliases.rb b/gems/decomplex/lib/decomplex/native/semantic_aliases.rb new file mode 100644 index 000000000..541be0cd2 --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/semantic_aliases.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module SemanticAliases + module_function + + def scan(files, jobs: nil) + paths = Array(files).map(&:to_s) + validate_ruby_files!(paths) + JSON.parse(Command.run("semantic-aliases", "--language", "ruby", *Command.jobs_args(jobs), *paths)) + end + + private_class_method def self.validate_ruby_files!(paths) + bad = paths.reject { |path| File.extname(path) == ".rb" } + return if bad.empty? + + raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" + end + end + end +end diff --git a/gems/decomplex/rust/src/decomplex/ast.rs b/gems/decomplex/rust/src/decomplex/ast.rs index 8507d7c15..f567c32d2 100644 --- a/gems/decomplex/rust/src/decomplex/ast.rs +++ b/gems/decomplex/rust/src/decomplex/ast.rs @@ -1,7 +1,13 @@ use serde::Serialize; -use tree_sitter::Node; +use anyhow::{Context, Result}; +use std::collections::BTreeSet; +use std::fs; +use std::path::Path; +use tree_sitter::{Node as TreeSitterNode, Parser}; pub type Span = [usize; 4]; +const COMPARISON_OPERATORS: &[&str] = &["==", "!=", "===", "!==", "<", "<=", ">", ">="]; +const OPERATOR_CALL_OPERATORS: &[&str] = &["+", "-", "*", "/", "%", "**", "|", "&", "^", "<<", ">>", "=~", "!~"]; #[derive(Clone, Debug, Eq, PartialEq, Serialize)] pub struct RawNode { @@ -13,7 +19,7 @@ pub struct RawNode { } impl RawNode { - pub fn from_tree_sitter(node: Node<'_>, source: &str) -> Self { + pub fn from_tree_sitter(node: TreeSitterNode<'_>, source: &str) -> Self { let mut cursor = node.walk(); let mut children: Vec = node .children(&mut cursor) @@ -150,16 +156,2777 @@ pub fn normalize_text(text: &str) -> String { text.split_whitespace().collect::>().join(" ") } -pub fn span(node: Node<'_>) -> Span { +pub fn span(node: TreeSitterNode<'_>) -> Span { let start = node.start_position(); let end = node.end_position(); [start.row + 1, start.column, end.row + 1, end.column] } -pub fn line(node: Node<'_>) -> usize { +pub fn line(node: TreeSitterNode<'_>) -> usize { node.start_position().row + 1 } -pub fn node_text<'a>(node: Node<'_>, source: &'a str) -> &'a str { +pub fn node_text<'a>(node: TreeSitterNode<'_>, source: &'a str) -> &'a str { node.utf8_text(source.as_bytes()).unwrap_or("") } + +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum Child { + Node(Box), + Symbol(String), + String(String), + Nil, +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct Node { + pub r#type: String, + pub children: Vec, + pub first_lineno: usize, + pub first_column: usize, + pub last_lineno: usize, + pub last_column: usize, + pub text: String, +} + +pub fn parse(file: &Path) -> Result<(Node, Vec)> { + let source = fs::read_to_string(file) + .with_context(|| format!("failed to read {}", file.display()))?; + let mut parser = Parser::new(); + parser + .set_language(&tree_sitter_ruby::LANGUAGE.into()) + .with_context(|| "failed to initialize tree-sitter ruby parser")?; + let tree = parser + .parse(&source, None) + .with_context(|| format!("tree-sitter produced no tree for {}", file.display()))?; + let root = TreeSitterNormalizer::new(&source).normalize(tree.root_node()); + let lines = source.lines().map(ToString::to_string).collect(); + Ok((root, lines)) +} + +pub fn node(child: &Child) -> Option<&Node> { + match child { + Child::Node(node) => Some(node), + _ => None, + } +} + +pub fn slice(node: &Node, _lines: &[String]) -> String { + normalize_text(&node.text) +} + +pub fn body_stmts(defn_node: &Node) -> Vec<&Node> { + let scope_index = if defn_node.r#type == "DEFS" { 2 } else { 1 }; + let Some(scope) = defn_node.children.get(scope_index).and_then(node) else { + return Vec::new(); + }; + if scope.r#type != "SCOPE" { + return Vec::new(); + } + let Some(body) = scope.children.get(2).and_then(node) else { + return Vec::new(); + }; + if body.r#type == "BLOCK" { + body.children.iter().filter_map(node).collect() + } else { + vec![body] + } +} + +pub fn def_push(node: &Node, stack: &[String]) -> Vec { + let mut next = stack.to_vec(); + match node.r#type.as_str() { + "DEFN" => { + if let Some(name) = child_to_string(node.children.first()) { + next.push(name); + } + } + "DEFS" => { + if let Some(name) = child_to_string(node.children.get(1)) { + next.push(name); + } + } + _ => {} + } + next +} + +pub fn canon_polarity(text: &str) -> (String, bool) { + let trimmed = text.trim(); + if let Some(rest) = trimmed.strip_prefix('!') { + ( + rest.trim_start_matches('(') + .trim_end_matches(')') + .trim() + .to_string(), + true, + ) + } else { + (trimmed.to_string(), false) + } +} + +pub fn flatten_and(node: &Node) -> Vec<&Node> { + if node.r#type != "AND" { + return vec![node]; + } + node.children + .iter() + .filter_map(self::node) + .flat_map(flatten_and) + .collect() +} + +struct TreeSitterNormalizer<'source> { + source: &'source str, + local_stack: Vec>, +} + +impl<'source> TreeSitterNormalizer<'source> { + fn new(source: &'source str) -> Self { + Self { + source, + local_stack: Vec::new(), + } + } + + fn normalize(mut self, root: TreeSitterNode<'_>) -> Node { + let children = self.with_ruby_scope(root, true, |normalizer| { + normalizer.normalize_children(root) + }); + self.wrap("ROOT", children, root) + } + + fn normalize_node(&mut self, node: TreeSitterNode<'_>) -> Option { + if node.kind() == "comment" { + return None; + } + if self.assignment_lhs(node) { + return self.normalize_assignment_lhs(node); + } + if self.infix_statement(node) { + return self.normalize_infix_statement(node); + } + if self.ternary_statement(node) { + return self.normalize_ternary_statement(node); + } + if if_kind(node.kind()) { + return self.normalize_if(node); + } + if self.modifier_statement(node) { + return self.normalize_modifier_statement(node); + } + if self.statement_call_with_block(node) { + return self.normalize_statement_call_with_block(node); + } + if self.command_call_statement(node) { + return self.normalize_command_call_statement(node); + } + if self.interpolated_statement(node) { + return Some(self.normalize_interpolated_statement(node)); + } + if self.dotted_expression(node) { + return self.normalize_dotted_expression(node); + } + if self.boolean_expression(node) { + return self.normalize_boolean(node); + } + if self.operator_call_expression(node) { + return self.normalize_operator_call(node); + } + if self.comparison_expression(node) { + return self.normalize_comparison(node); + } + + match node.kind() { + "program" => { + let children = self.normalize_children(node); + Some(self.wrap("ROOT", children, node)) + } + "method" => self.normalize_function(node), + "singleton_method" => self.normalize_singleton_function(node), + "class" | "class_definition" | "class_declaration" | "class_specifier" => { + self.normalize_class(node) + } + "module" => self.normalize_module(node), + "lambda" => self.normalize_lambda(node), + "body_statement" | "block_body" | "block" => self.normalize_body(node), + "ensure" => self.normalize_ensure_clause(node), + "begin" => self.normalize_begin(node), + "assignment" | "assignment_expression" | "assignment_statement" => { + self.normalize_assignment(node) + } + "call" | "call_expression" | "method_call" | "method_call_expression" => { + self.normalize_call(node) + } + "element_reference" => self.normalize_element_reference(node), + "rescue_modifier" => self.normalize_rescue_modifier(node), + "super" => Some(self.normalize_super(node)), + "return" | "return_statement" | "return_expression" | "break" | "break_statement" + | "break_expression" | "next" | "continue_statement" => self.normalize_return(node), + "nil" => Some(self.wrap("NIL", Vec::new(), node)), + "true" => Some(self.wrap("TRUE", Vec::new(), node)), + "false" => Some(self.wrap("FALSE", Vec::new(), node)), + "instance_variable" => Some(self.wrap( + "IVAR", + vec![Child::String(node_text(node, self.source).to_string())], + node, + )), + "identifier" | "simple_identifier" | "property_identifier" | "field_identifier" => { + Some(self.normalize_identifier(node)) + } + "constant" | "scope_resolution" | "type_identifier" | "scoped_type_identifier" => { + Some(self.normalize_const(node)) + } + "self" | "this" => Some(self.wrap("SELF", Vec::new(), node)), + "global_variable" => Some(self.normalize_global_variable(node)), + "array" => Some(self.normalize_array_literal(node)), + "interpolation" => self.normalize_interpolation(node), + "heredoc_beginning" => Some(self.normalize_heredoc_beginning(node)), + "string" | "string_content" | "string_literal" | "interpreted_string_literal" + | "raw_string_literal" => { + if self.interpolated_string(node) { + Some(self.normalize_interpolated_string(node)) + } else { + Some(self.wrap( + "STR", + vec![Child::String(node_text(node, self.source).to_string())], + node, + )) + } + } + "integer" => Some(self.wrap( + "INTEGER", + vec![Child::String(node_text(node, self.source).to_string())], + node, + )), + "float" | "float_literal" => Some(self.wrap( + "FLOAT", + vec![Child::String(node_text(node, self.source).to_string())], + node, + )), + "pair" | "keyword_argument" => self.normalize_pair(node), + "simple_symbol" | "symbol" => Some(self.wrap( + "LIT", + vec![Child::Symbol( + node_text(node, self.source).trim_start_matches(':').to_string(), + )], + node, + )), + _ => { + let children = self.normalize_children(node); + if children.is_empty() { + None + } else { + Some(self.wrap(kind_type(node.kind()), children, node)) + } + } + } + } + + fn normalize_function(&mut self, node: TreeSitterNode<'_>) -> Option { + let name = self.function_name(node)?; + let args = self.normalize_parameters(self.named_field(node, "parameters")); + let body = self.with_ruby_scope(node, true, |normalizer| { + let body_node = normalizer + .named_field(node, "body") + .or_else(|| normalizer.block_child(node))?; + let body = normalizer.normalize_body(body_node); + let body = normalizer.elide_tail_returns(body); + normalizer.elide_implicit_nil_body(body) + }); + let scope = self.scope(body, args, node); + Some(self.wrap( + "DEFN", + vec![Child::Symbol(name), Child::Node(Box::new(scope))], + node, + )) + } + + fn normalize_singleton_function(&mut self, node: TreeSitterNode<'_>) -> Option { + let name = self.function_name(node)?; + let receiver = self + .named_children(node) + .into_iter() + .find(|child| matches!(child.kind(), "self" | "constant" | "identifier")) + .and_then(|child| self.normalize_node(child)) + .unwrap_or_else(|| self.wrap("SELF", Vec::new(), node)); + let args = self.normalize_parameters(self.named_field(node, "parameters")); + let body = self.with_ruby_scope(node, true, |normalizer| { + let body_node = normalizer + .named_field(node, "body") + .or_else(|| normalizer.block_child(node))?; + let body = normalizer.normalize_body(body_node); + let body = normalizer.elide_tail_returns(body); + normalizer.elide_implicit_nil_body(body) + }); + let scope = self.scope(body, args, node); + Some(self.wrap( + "DEFS", + vec![ + Child::Node(Box::new(receiver)), + Child::Symbol(name), + Child::Node(Box::new(scope)), + ], + node, + )) + } + + fn normalize_class(&mut self, node: TreeSitterNode<'_>) -> Option { + let name = self.const_for( + self.named_field(node, "name") + .or_else(|| self.first_named(node)), + node, + ); + let body = self + .named_field(node, "body") + .or_else(|| self.block_child(node)) + .and_then(|body| self.normalize_body(body)); + Some(self.wrap( + "CLASS", + vec![ + Child::Node(Box::new(name)), + Child::Nil, + Child::Node(Box::new(self.scope(body, None, node))), + ], + node, + )) + } + + fn normalize_module(&mut self, node: TreeSitterNode<'_>) -> Option { + let name = self.const_for( + self.named_field(node, "name") + .or_else(|| self.first_named(node)), + node, + ); + let body = self + .named_field(node, "body") + .or_else(|| self.block_child(node)) + .and_then(|body| self.normalize_body(body)); + Some(self.wrap( + "MODULE", + vec![ + Child::Node(Box::new(name)), + Child::Node(Box::new(self.scope(body, None, node))), + ], + node, + )) + } + + fn normalize_lambda(&mut self, node: TreeSitterNode<'_>) -> Option { + let body_node = self + .named_field(node, "body") + .or_else(|| self.block_child(node)) + .or_else(|| self.named_children(node).into_iter().last())?; + let body = self.with_ruby_scope(node, false, |normalizer| { + normalizer.normalize_body(body_node).map(dynamic_scope) + }); + let scope = self.scope(body, None, node); + Some(self.wrap("LAMBDA", vec![Child::Node(Box::new(scope))], node)) + } + + fn normalize_body(&mut self, node: TreeSitterNode<'_>) -> Option { + if self.leading_if_statement(node) { + return self.normalize_leading_if_statement(node); + } + if self.ternary_statement(node) { + return self.normalize_ternary_statement(node); + } + if if_kind(node.kind()) { + return self.normalize_if(node); + } + if self.modifier_statement(node) { + return self.normalize_modifier_statement(node); + } + if self.statement_call_with_block(node) { + return self.normalize_statement_call_with_block(node); + } + if self.command_call_statement(node) { + return self.normalize_command_call_statement(node); + } + if self.dotted_expression(node) { + return self.normalize_dotted_expression(node); + } + if self.infix_statement(node) { + return self.normalize_infix_statement(node); + } + if self.boolean_expression(node) { + return self.normalize_boolean(node); + } + + if self.block_kind(node.kind()) { + let children = self.normalize_children(node); + if children.is_empty() { + return None; + } + if children.len() == 1 { + return child_node(children.into_iter().next().unwrap()); + } + + return Some(self.wrap("BLOCK", children, node)); + } + + self.normalize_node(node) + } + + fn normalize_if(&mut self, node: TreeSitterNode<'_>) -> Option { + if matches!(node.kind(), "if_modifier" | "unless_modifier") { + let named = self.named_children(node); + let action = *named.first()?; + let condition = *named.get(1)?; + let node_type = if node.kind().starts_with("unless") { + "UNLESS" + } else { + "IF" + }; + let condition = optional_node(self.normalize_node(condition)); + let action = optional_node(self.normalize_modifier_action(action)); + return Some(self.wrap(node_type, vec![condition, action, Child::Nil], node)); + } + + let condition_raw = self + .named_field(node, "condition") + .or_else(|| self.named_field(node, "predicate")) + .or_else(|| self.first_named(node))?; + let condition = optional_node(self.normalize_node(condition_raw)); + let positive_raw = self + .named_field(node, "consequence") + .or_else(|| self.named_field(node, "body")) + .or_else(|| self.named_children(node).into_iter().find(|child| child.kind() == "then")) + .or_else(|| self.branch_child(node, condition_raw, 0)); + let negative_raw = self + .named_field(node, "alternative") + .or_else(|| self.explicit_alternative(node)); + let positive = optional_node(positive_raw.and_then(|child| self.normalize_body(child))); + let negative = optional_node(negative_raw.and_then(|child| self.normalize_else_or_branch(child))); + let node_type = if node.kind().starts_with("unless") { + "UNLESS" + } else { + "IF" + }; + Some(self.wrap(node_type, vec![condition, positive, negative], node)) + } + + fn normalize_else_or_branch(&mut self, node: TreeSitterNode<'_>) -> Option { + if node.kind() != "else" { + return self.normalize_body(node); + } + if let Some(call) = self.first_dotted_call_descendant(node) { + let trailing = self + .source + .get(call.end_byte()..node.end_byte()) + .unwrap_or("") + .trim(); + if trailing.is_empty() { + return self.normalize_node(call); + } + } + self.normalize_body_nodes(self.named_children(node), node) + } + + fn normalize_body_nodes( + &mut self, + nodes: Vec>, + source: TreeSitterNode<'_>, + ) -> Option { + let mut children = Vec::new(); + let mut index = 0; + while index < nodes.len() { + if index + 1 < nodes.len() { + if let Some(call) = self.normalize_flat_dotted_nodes(&nodes[index..=index + 1]) { + children.push(Child::Node(Box::new(call))); + index += 2; + continue; + } + } + if let Some(child) = self.normalize_body(nodes[index]) { + children.push(Child::Node(Box::new(child))); + } + index += 1; + } + if children.is_empty() { + None + } else if children.len() == 1 { + child_node(children.into_iter().next().unwrap()) + } else { + Some(self.wrap("BLOCK", children, source)) + } + } + + fn normalize_return(&mut self, node: TreeSitterNode<'_>) -> Option { + self.normalize_return_node(node) + } + + fn normalize_super(&mut self, node: TreeSitterNode<'_>) -> Node { + let args_node = self + .named_children(node) + .into_iter() + .find(|child| child.kind() == "argument_list"); + let args = args_node + .map(|args| { + self.named_children(args) + .into_iter() + .filter_map(|child| self.normalize_node(child)) + .collect::>() + }) + .unwrap_or_default(); + self.wrap("SUPER", vec![list_or_nil(args, args_node.unwrap_or(node), self)], node) + } + + fn normalize_return_node(&mut self, node: TreeSitterNode<'_>) -> Option { + let children = self + .named_children(node) + .into_iter() + .filter_map(|child| self.normalize_return_value(child)) + .map(|child| Child::Node(Box::new(child))) + .collect::>(); + Some(self.wrap(return_kind(node.kind()), children, node)) + } + + fn normalize_return_value(&mut self, node: TreeSitterNode<'_>) -> Option { + if node.kind() != "argument_list" { + return self.normalize_node(node); + } + if self.boolean_expression(node) { + return self.normalize_boolean(node); + } + if self.ternary_statement(node) { + return self.normalize_ternary_statement(node); + } + if self.dotted_expression(node) { + return self.normalize_dotted_expression(node); + } + if self.infix_statement(node) { + return self.normalize_infix_statement(node); + } + let values = self + .named_children(node) + .into_iter() + .filter_map(|child| self.normalize_node(child)) + .collect::>(); + if values.len() == 1 { + values.into_iter().next() + } else if values.is_empty() { + None + } else { + Some(self.list(values, node)) + } + } + + fn normalize_ternary_statement(&mut self, node: TreeSitterNode<'_>) -> Option { + let (question_byte, colon_byte) = self.ternary_separator_bytes(node)?; + let named = self.named_children(node); + let condition = *named.first()?; + let positive_nodes = named + .iter() + .copied() + .filter(|child| child.start_byte() > question_byte && child.end_byte() <= colon_byte) + .collect::>(); + let negative_nodes = named + .iter() + .copied() + .filter(|child| child.start_byte() > colon_byte) + .collect::>(); + let condition = optional_node(self.normalize_node(condition)); + let positive = optional_node(self.normalize_ternary_branch(&positive_nodes)); + let negative = optional_node(self.normalize_ternary_branch(&negative_nodes)); + Some(self.wrap("IF", vec![condition, positive, negative], node)) + } + + fn normalize_boolean(&mut self, node: TreeSitterNode<'_>) -> Option { + let operator = self.boolean_operator(node)?; + let node_type = if operator == "or" { "OR" } else { "AND" }; + let mut operands = Vec::new(); + for child in self.named_children(node) { + if let Some(normalized) = self.normalize_node(child) { + if normalized.r#type == node_type { + operands.extend(normalized.children); + } else { + operands.push(Child::Node(Box::new(normalized))); + } + } + } + Some(self.wrap(node_type, operands, node)) + } + + fn normalize_comparison(&mut self, node: TreeSitterNode<'_>) -> Option { + let operands = self.named_children(node); + let left = operands.first().and_then(|left| self.normalize_node(*left)); + let right_raw = operands.get(1).copied().unwrap_or(node); + let right = self.normalize_node(right_raw); + Some(self.wrap( + "OPCALL", + vec![ + optional_node(left), + Child::Symbol(self.comparison_operator(node)?), + list_or_nil(right.into_iter().collect(), right_raw, self), + ], + node, + )) + } + + fn normalize_operator_call(&mut self, node: TreeSitterNode<'_>) -> Option { + let operands = self.named_children(node); + let left = operands.first().and_then(|left| self.normalize_node(*left)); + let right_raw = operands.get(1).copied().unwrap_or(node); + let right = self.normalize_node(right_raw); + Some(self.wrap( + "OPCALL", + vec![ + optional_node(left), + Child::Symbol(self.binary_operator(node)?), + list_or_nil(right.into_iter().collect(), right_raw, self), + ], + node, + )) + } + + fn normalize_infix_statement(&mut self, node: TreeSitterNode<'_>) -> Option { + let (left_raw, operator, right_raw) = self.infix_statement_parts(node)?; + let left = self.normalize_node(left_raw); + let right = self.normalize_node(right_raw); + Some(self.wrap( + "OPCALL", + vec![ + optional_node(left), + Child::Symbol(operator), + list_or_nil(right.into_iter().collect(), right_raw, self), + ], + node, + )) + } + + fn normalize_ternary_branch(&mut self, nodes: &[TreeSitterNode<'_>]) -> Option { + if nodes.is_empty() { + return None; + } + if nodes.len() == 1 { + return self.normalize_node(nodes[0]); + } + if let Some(call) = self.normalize_flat_dotted_nodes(nodes) { + return Some(call); + } + self.normalize_body_nodes(nodes.to_vec(), nodes[0]) + } + + fn normalize_flat_dotted_nodes(&mut self, nodes: &[TreeSitterNode<'_>]) -> Option { + let receiver = *nodes.first()?; + let method = *nodes.get(1)?; + let connector = self + .source + .get(receiver.end_byte()..method.start_byte()) + .unwrap_or("") + .trim(); + if !matches!(connector, "." | "&.") { + return None; + } + let node_type = if connector == "&." { "QCALL" } else { "CALL" }; + let receiver_node = optional_node(self.normalize_node(receiver)); + Some(self.wrap_from_nodes( + node_type, + vec![ + receiver_node, + Child::Symbol(node_text(method, self.source).trim_end_matches('=').to_string()), + Child::Nil, + ], + receiver, + method, + )) + } + + fn ternary_separator_bytes(&self, node: TreeSitterNode<'_>) -> Option<(usize, usize)> { + let mut question = None; + let mut colon = None; + for child in node.children(&mut node.walk()) { + if child.is_named() { + continue; + } + let text = node_text(child, self.source); + if text == "?" && question.is_none() { + question = Some(child.start_byte()); + } else if text == ":" && question.is_some() { + colon = Some(child.start_byte()); + break; + } + } + Some((question?, colon?)) + } + + fn normalize_assignment(&mut self, node: TreeSitterNode<'_>) -> Option { + let left = self.assignment_left(node)?; + let right = self.assignment_right(node).and_then(|right| self.normalize_node(right)); + if let Some(target) = self.assignment_target(left, right.clone(), node) { + return Some(target); + } + Some(self.wrap( + "LASGN", + vec![Child::String(self.target_name(left)), optional_node(right)], + node, + )) + } + + fn normalize_call(&mut self, node: TreeSitterNode<'_>) -> Option { + if self.call_block(node).is_some() { + return self.normalize_call_with_block(node); + } + if self.visibility_inline_def_call(node) { + return self.normalize_visibility_inline_def(node); + } + self.normalize_call_without_block(node, None) + } + + fn normalize_call_with_block(&mut self, node: TreeSitterNode<'_>) -> Option { + let block = self.call_block(node); + let call = self.normalize_call_without_block(node, block)?; + let args = self.normalize_block_parameters(block); + let body = block.and_then(|block| { + self.with_ruby_scope(block, false, |normalizer| { + let body_node = normalizer + .named_field(block, "body") + .or_else(|| normalizer.block_child(block)) + .unwrap_or(block); + normalizer.normalize_body(body_node).map(dynamic_scope) + }) + }); + let scope = self.scope(body, args, node); + Some(self.wrap( + "ITER", + vec![Child::Node(Box::new(call)), Child::Node(Box::new(scope))], + node, + )) + } + + fn normalize_statement_call_with_block(&mut self, node: TreeSitterNode<'_>) -> Option { + let block = self.call_block(node); + let call_source = if self.dotted_call(node) { + node + } else { + self.named_children(node) + .into_iter() + .find(|child| { + Some(*child) != block + && (self.call_kind(child.kind()) || self.member_read_node(*child)) + })? + }; + let call = self.normalize_call_without_block(call_source, block)?; + let args = self.normalize_block_parameters(block); + let body = block.and_then(|block| { + self.with_ruby_scope(block, false, |normalizer| { + let body_node = normalizer + .named_field(block, "body") + .or_else(|| normalizer.block_child(block)) + .unwrap_or(block); + normalizer.normalize_body(body_node).map(dynamic_scope) + }) + }); + let scope = self.scope(body, args, node); + Some(self.wrap( + "ITER", + vec![Child::Node(Box::new(call)), Child::Node(Box::new(scope))], + node, + )) + } + + fn normalize_dotted_expression(&mut self, node: TreeSitterNode<'_>) -> Option { + let block = self.call_block(node); + let call = self.normalize_dotted_call_expression(node)?; + let Some(block) = block else { + return Some(call); + }; + let args = self.normalize_block_parameters(Some(block)); + let body = self.with_ruby_scope(block, false, |normalizer| { + let body_node = normalizer + .named_field(block, "body") + .or_else(|| normalizer.block_child(block)) + .unwrap_or(block); + normalizer.normalize_body(body_node).map(dynamic_scope) + }); + let scope = self.scope(body, args, node); + Some(self.wrap( + "ITER", + vec![Child::Node(Box::new(call)), Child::Node(Box::new(scope))], + node, + )) + } + + fn normalize_call_without_block( + &mut self, + node: TreeSitterNode<'_>, + block: Option>, + ) -> Option { + let call_source = block.map(|block| self.source_before_child(node, block)); + if self.dotted_call(node) { + let (receiver, method) = self.dotted_call_parts(node, block)?; + let args = self.call_arguments(node, None); + let node_type = if self.safe_navigation_call(node) { + "QCALL" + } else { + "CALL" + }; + let receiver = optional_node(self.normalize_node(receiver)); + let args = list_or_nil(args, node, self); + if let Some(source) = call_source.as_ref() { + return Some(self.wrap_from_source_node( + node_type, + vec![receiver, Child::Symbol(method), args], + source, + )); + } + return Some(self.wrap( + node_type, + vec![receiver, Child::Symbol(method), args], + node, + )); + } + + let function = self + .named_field(node, "function") + .or_else(|| self.named_field(node, "call")) + .or_else(|| { + self.named_children(node) + .into_iter() + .find(|child| Some(*child) != block) + })?; + let args = self.call_arguments(node, Some(function)); + if self.identifier_kind(function.kind()) { + let node_type = if args.is_empty() { "VCALL" } else { "FCALL" }; + return Some(self.wrap( + node_type, + vec![ + Child::Symbol(node_text(function, self.source).to_string()), + list_or_nil(args, node, self), + ], + node, + )); + } + if self.member_read_node(function) { + let (receiver, method) = self.member_parts(function)?; + let receiver = optional_node(self.normalize_node(receiver)); + let args = list_or_nil(args, node, self); + return Some(self.wrap( + "CALL", + vec![receiver, Child::Symbol(method), args], + node, + )); + } + let function = optional_node(self.normalize_node(function)); + let args = list_or_nil(args, node, self); + Some(self.wrap( + "CALL", + vec![function, Child::Symbol("call".to_string()), args], + node, + )) + } + + fn normalize_element_reference(&mut self, node: TreeSitterNode<'_>) -> Option { + let named = self.named_children(node); + let receiver = *named.first()?; + let args = named + .iter() + .skip(1) + .filter_map(|arg| self.normalize_node(*arg)) + .collect::>(); + if receiver.kind() == "self" { + return Some(self.wrap( + "FCALL", + vec![Child::Symbol("[]".to_string()), list_or_nil(args, node, self)], + node, + )); + } + let receiver = optional_node(self.normalize_node(receiver)); + let args = list_or_nil(args, node, self); + Some(self.wrap( + "CALL", + vec![receiver, Child::Symbol("[]".to_string()), args], + node, + )) + } + + fn normalize_rescue_modifier(&mut self, node: TreeSitterNode<'_>) -> Option { + let named = self.named_children(node); + let body = named.first().and_then(|body| self.normalize_node(*body)); + let handler = named.get(1).and_then(|handler| self.normalize_node(*handler)); + let resbody = self.wrap( + "RESBODY", + vec![Child::Nil, optional_node(handler), Child::Nil], + node, + ); + Some(self.wrap( + "RESCUE", + vec![ + optional_node(body), + Child::Node(Box::new(resbody)), + Child::Nil, + ], + node, + )) + } + + fn normalize_ensure_clause(&mut self, node: TreeSitterNode<'_>) -> Option { + if self.dotted_call(node) { + return self.normalize_dotted_call_expression(node); + } + if let Some(call) = self.first_dotted_call_descendant(node) { + return self.normalize_node(call); + } + self.normalize_body_nodes(self.named_children(node), node) + } + + fn normalize_dotted_call_expression(&mut self, node: TreeSitterNode<'_>) -> Option { + let (receiver_raw, method) = self.dotted_call_parts(node, None)?; + let args = self.call_arguments(node, None); + let args = list_or_nil(args, node, self); + let receiver = optional_node(self.normalize_node(receiver_raw)); + let node_type = if self.safe_navigation_call(node) { + "QCALL" + } else { + "CALL" + }; + let source_end = self + .named_children(node) + .into_iter() + .filter(|child| !matches!(child.kind(), "block" | "do_block")) + .last() + .unwrap_or(receiver_raw); + Some(self.wrap_from_nodes( + node_type, + vec![receiver, Child::Symbol(method), args], + receiver_raw, + source_end, + )) + } + + fn normalize_begin(&mut self, node: TreeSitterNode<'_>) -> Option { + let named = self.named_children(node); + let rescue_nodes = named + .iter() + .copied() + .filter(|child| child.kind() == "rescue") + .collect::>(); + let ensure_node = named.iter().copied().find(|child| child.kind() == "ensure"); + if rescue_nodes.is_empty() { + let Some(ensure_node) = ensure_node else { + let children = self.normalize_children(node); + return Some(self.wrap("BEGIN", children, node)); + }; + let body_nodes = named + .iter() + .copied() + .take_while(|child| child.kind() != "ensure") + .collect::>(); + let body = self.normalize_body_nodes(body_nodes.clone(), *body_nodes.first().unwrap_or(&node)); + let ensure_body = self.normalize_body(ensure_node); + return Some(self.wrap( + "ENSURE", + vec![optional_node(body), optional_node(ensure_body)], + node, + )); + } + + let body_nodes = named + .iter() + .copied() + .take_while(|child| child.kind() != "rescue") + .collect::>(); + let body = self.normalize_body_nodes(body_nodes.clone(), *body_nodes.first().unwrap_or(&node)); + let resbodies = rescue_nodes + .iter() + .filter_map(|child| self.normalize_rescue_clause(*child)) + .collect::>(); + let rescued = self.wrap( + "RESCUE", + vec![ + optional_node(body), + optional_node(self.link_rescue_chain(resbodies)), + Child::Nil, + ], + node, + ); + let Some(ensure_node) = ensure_node else { + return Some(rescued); + }; + let ensure_body = self.normalize_body(ensure_node); + Some(self.wrap( + "ENSURE", + vec![Child::Node(Box::new(rescued)), optional_node(ensure_body)], + node, + )) + } + + fn normalize_rescue_clause(&mut self, node: TreeSitterNode<'_>) -> Option { + let exceptions = self + .named_children(node) + .into_iter() + .find(|child| child.kind() == "exceptions"); + let exception_nodes = exceptions + .map(|exceptions| { + self.named_children(exceptions) + .into_iter() + .filter_map(|child| self.normalize_node(child)) + .collect::>() + }) + .unwrap_or_default(); + let exception_variable = self.rescue_exception_variable(node); + let handler = self.named_children(node).into_iter().rev().find(|child| { + !matches!(child.kind(), "exceptions" | "exception_variable" | "comment") + }); + let normalized_handler = handler.and_then(|handler| self.normalize_body(handler)); + let body = self.prepend_rescue_exception_assignment(normalized_handler, exception_variable); + Some(self.wrap( + "RESBODY", + vec![list_or_nil(exception_nodes, exceptions.unwrap_or(node), self), optional_node(body), Child::Nil], + node, + )) + } + + fn link_rescue_chain(&self, mut resbodies: Vec) -> Option { + let mut next = None; + while let Some(mut current) = resbodies.pop() { + if current.children.len() > 2 { + current.children[2] = optional_node(next); + } + next = Some(current); + } + next + } + + fn rescue_exception_variable(&self, node: TreeSitterNode<'_>) -> Option { + let variable = self + .named_children(node) + .into_iter() + .find(|child| child.kind() == "exception_variable")?; + let name = self + .named_children(variable) + .into_iter() + .find(|child| self.identifier_kind(child.kind()))?; + let errinfo = self.wrap("ERRINFO", Vec::new(), variable); + Some(self.wrap( + "LASGN", + vec![ + Child::String(node_text(name, self.source).to_string()), + Child::Node(Box::new(errinfo)), + ], + variable, + )) + } + + fn prepend_rescue_exception_assignment( + &self, + body: Option, + assignment: Option, + ) -> Option { + let Some(assignment) = assignment else { + return body; + }; + let Some(mut body) = body else { + return Some(assignment); + }; + if body.r#type == "BLOCK" { + let mut children = vec![Child::Node(Box::new(assignment))]; + children.extend(body.children); + body.children = children; + Some(body) + } else { + let first_lineno = assignment.first_lineno; + let first_column = assignment.first_column; + let last_lineno = body.last_lineno; + let last_column = body.last_column; + let text = if assignment.text.is_empty() { + body.text.clone() + } else if body.text.is_empty() { + assignment.text.clone() + } else { + format!("{} {}", assignment.text, body.text) + }; + Some(Node { + r#type: "BLOCK".to_string(), + children: vec![Child::Node(Box::new(assignment)), Child::Node(Box::new(body))], + first_lineno, + first_column, + last_lineno, + last_column, + text, + }) + } + } + + fn normalize_modifier_statement(&mut self, node: TreeSitterNode<'_>) -> Option { + let named = self.named_children(node); + let action = *named.first()?; + let condition = *named.last()?; + let condition = optional_node(self.normalize_node(condition)); + let action = optional_node(self.normalize_node(action)); + Some(self.wrap( + "IF", + vec![condition, action, Child::Nil], + node, + )) + } + + fn normalize_modifier_action(&mut self, node: TreeSitterNode<'_>) -> Option { + self.normalize_node(node) + } + + fn normalize_command_call_statement(&mut self, node: TreeSitterNode<'_>) -> Option { + let function = self.named_children(node).into_iter().next()?; + if self.visibility_inline_def_statement(node, function) { + let method = self.inline_def_from_statement(node); + return Some(self.wrap( + "FCALL", + vec![ + Child::Symbol(node_text(function, self.source).to_string()), + list_or_nil(method.into_iter().collect(), node, self), + ], + node, + )); + } + let args_node = self + .named_children(node) + .into_iter() + .find(|child| matches!(child.kind(), "argument_list" | "arguments")); + let args = args_node + .map(|args| self.command_arguments(args)) + .unwrap_or_default(); + let block = self.call_block(node); + if node_text(function, self.source) == "yield" { + return Some(self.wrap( + "YIELD", + vec![list_or_nil(args, args_node.unwrap_or(node), self)], + node, + )); + } + let call_type = if args.is_empty() { "VCALL" } else { "FCALL" }; + let call = self.wrap( + call_type, + vec![ + Child::Symbol(node_text(function, self.source).to_string()), + list_or_nil(args, args_node.unwrap_or(node), self), + ], + node, + ); + let Some(block) = block else { + return Some(call); + }; + let block_args = self.normalize_block_parameters(Some(block)); + let body = self.with_ruby_scope(block, false, |normalizer| { + let body_node = normalizer + .named_field(block, "body") + .or_else(|| normalizer.block_child(block)) + .unwrap_or(block); + normalizer.normalize_body(body_node).map(dynamic_scope) + }); + Some(self.wrap( + "ITER", + vec![ + Child::Node(Box::new(call)), + Child::Node(Box::new(self.scope(body, block_args, node))), + ], + node, + )) + } + + fn normalize_visibility_inline_def(&mut self, node: TreeSitterNode<'_>) -> Option { + let message = node_text(self.named_children(node).into_iter().next()?, self.source).to_string(); + let args = self + .named_children(node) + .into_iter() + .find(|child| child.kind() == "argument_list"); + let method = args.and_then(|args| self.inline_def_from_source(args)); + Some(self.wrap( + "FCALL", + vec![ + Child::Symbol(message), + list_or_nil(method.into_iter().collect(), args.unwrap_or(node), self), + ], + node, + )) + } + + fn normalize_const(&mut self, node: TreeSitterNode<'_>) -> Node { + if matches!(node.kind(), "scope_resolution" | "scoped_type_identifier") { + let parts = self.named_children(node); + let base = parts + .first() + .map(|part| self.normalize_const(*part)) + .map(|part| Child::Node(Box::new(part))) + .unwrap_or(Child::Nil); + let name = self + .named_field(node, "name") + .or_else(|| parts.last().copied()) + .map(|name| node_text(name, self.source).to_string()) + .unwrap_or_default(); + return self.wrap("COLON2", vec![base, Child::Symbol(name)], node); + } + + self.wrap( + "CONST", + vec![Child::Symbol(node_text(node, self.source).to_string())], + node, + ) + } + + fn const_for(&mut self, node: Option>, source: TreeSitterNode<'_>) -> Node { + let Some(node) = node else { + return self.wrap( + "CONST", + vec![Child::Symbol("(anonymous)".to_string())], + source, + ); + }; + if matches!( + node.kind(), + "constant" | "scope_resolution" | "type_identifier" | "scoped_type_identifier" + ) { + return self.normalize_const(node); + } + self.wrap( + "CONST", + vec![Child::Symbol(node_text(node, self.source).to_string())], + node, + ) + } + + fn normalize_global_variable(&self, node: TreeSitterNode<'_>) -> Node { + let text = node_text(node, self.source).to_string(); + if let Some(number) = text.strip_prefix('$').and_then(|value| value.parse::().ok()) { + return self.wrap("NTH_REF", vec![Child::String(number.to_string())], node); + } + self.wrap("GVAR", vec![Child::String(text)], node) + } + + fn normalize_array_literal(&mut self, node: TreeSitterNode<'_>) -> Node { + let values = self + .named_children(node) + .into_iter() + .filter_map(|child| self.normalize_node(child)) + .collect::>(); + if values.is_empty() { + self.wrap("ZLIST", Vec::new(), node) + } else { + self.list(values, node) + } + } + + fn normalize_pair(&mut self, node: TreeSitterNode<'_>) -> Option { + let named = self.named_children(node); + let key = *named.first()?; + let value = named.get(1).and_then(|value| self.normalize_node(*value)); + let key_text = node_text(key, self.source) + .trim_end_matches(':') + .trim_start_matches(':') + .to_string(); + let key_lit = self.wrap("LIT", vec![Child::Symbol(key_text)], key); + Some(self.wrap( + "HASH", + vec![Child::Node(Box::new(key_lit)), optional_node(value)], + node, + )) + } + + fn normalize_interpolated_string(&mut self, node: TreeSitterNode<'_>) -> Node { + let children = self.normalize_children(node); + self.wrap("DSTR", children, node) + } + + fn normalize_interpolated_statement(&mut self, node: TreeSitterNode<'_>) -> Node { + let children = self.normalize_children(node); + self.wrap("DSTR", children, node) + } + + fn normalize_interpolation(&mut self, node: TreeSitterNode<'_>) -> Option { + let exprs = self + .named_children(node) + .into_iter() + .filter_map(|child| self.normalize_node(child)) + .collect::>(); + let body = if exprs.len() == 1 { + exprs.into_iter().next() + } else if exprs.is_empty() { + None + } else { + Some(self.list(exprs, node)) + }; + Some(self.wrap("EVSTR", body.into_iter().map(|node| Child::Node(Box::new(node))).collect(), node)) + } + + fn normalize_heredoc_beginning(&mut self, node: TreeSitterNode<'_>) -> Node { + let heredoc_body = node + .parent() + .and_then(|parent| parent.parent()) + .and_then(|body_statement| { + self.named_children(body_statement) + .into_iter() + .find(|child| child.kind() == "heredoc_body") + }); + let children = heredoc_body + .map(|body| self.normalize_heredoc_children(body)) + .unwrap_or_default(); + self.wrap("DSTR", children, node) + } + + fn normalize_heredoc_children(&mut self, node: TreeSitterNode<'_>) -> Vec { + self.named_children(node) + .into_iter() + .filter_map(|child| match child.kind() { + "interpolation" => self.normalize_interpolation(child), + "heredoc_content" => { + let text = node_text(child, self.source).to_string(); + if text.is_empty() { + None + } else { + Some(self.wrap("STR", vec![Child::String(text)], child)) + } + } + _ => None, + }) + .map(|child| Child::Node(Box::new(child))) + .collect() + } + + fn normalize_identifier(&mut self, node: TreeSitterNode<'_>) -> Node { + let name = node_text(node, self.source).to_string(); + if self.ruby_vcall_identifier(node, &name) { + self.wrap("VCALL", vec![Child::Symbol(name)], node) + } else { + self.wrap("LVAR", vec![Child::String(name)], node) + } + } + + fn normalize_parameters(&mut self, node: Option>) -> Option { + let node = node?; + let defaults = self + .named_children(node) + .into_iter() + .filter_map(|param| { + let name = self.named_field(param, "name")?; + let value = self.named_field(param, "value")?; + let value = optional_node(self.normalize_node(value)); + Some(self.wrap( + "LASGN", + vec![Child::Symbol(node_text(name, self.source).to_string()), value], + param, + )) + }) + .map(|node| Child::Node(Box::new(node))) + .collect::>(); + if defaults.is_empty() { + None + } else { + Some(self.wrap("ARGS", defaults, node)) + } + } + + fn normalize_block_parameters(&mut self, _block: Option>) -> Option { + None + } + + fn normalize_children(&mut self, node: TreeSitterNode<'_>) -> Vec { + let mut children = Vec::new(); + for child in self.named_children(node) { + if child.kind() == "heredoc_body" { + continue; + } + if self.assignment_rhs(child) { + continue; + } + if let Some(normalized) = self.normalize_node(child) { + children.push(Child::Node(Box::new(normalized))); + } + } + children + } + + fn scope(&self, body: Option, args: Option, source: TreeSitterNode<'_>) -> Node { + self.wrap( + "SCOPE", + vec![Child::Nil, optional_node(args), optional_node(body)], + source, + ) + } + + fn list(&self, children: Vec, source: TreeSitterNode<'_>) -> Node { + self.wrap( + "LIST", + children + .into_iter() + .map(|child| Child::Node(Box::new(child))) + .collect(), + source, + ) + } + + fn wrap(&self, node_type: &str, children: Vec, source: TreeSitterNode<'_>) -> Node { + let node_span = span(source); + Node { + r#type: node_type.to_string(), + children, + first_lineno: node_span[0], + first_column: node_span[1], + last_lineno: node_span[2], + last_column: node_span[3], + text: node_text(source, self.source).to_string(), + } + } + + fn wrap_from_nodes( + &self, + node_type: &str, + children: Vec, + first: TreeSitterNode<'_>, + last: TreeSitterNode<'_>, + ) -> Node { + let first_span = span(first); + let last_span = span(last); + let text = self + .source + .get(first.start_byte()..last.end_byte()) + .unwrap_or("") + .to_string(); + Node { + r#type: node_type.to_string(), + children, + first_lineno: first_span[0], + first_column: first_span[1], + last_lineno: last_span[2], + last_column: last_span[3], + text, + } + } + + fn wrap_from_source_node(&self, node_type: &str, children: Vec, source: &Node) -> Node { + Node { + r#type: node_type.to_string(), + children, + first_lineno: source.first_lineno, + first_column: source.first_column, + last_lineno: source.last_lineno, + last_column: source.last_column, + text: source.text.clone(), + } + } + + fn with_ruby_scope( + &mut self, + node: TreeSitterNode<'_>, + reset: bool, + f: impl FnOnce(&mut Self) -> T, + ) -> T { + let previous = self.local_stack.clone(); + if reset { + self.local_stack.clear(); + } + self.local_stack.push(self.ruby_scope_locals(node)); + let result = f(self); + self.local_stack = previous; + result + } + + fn ruby_scope_locals(&self, node: TreeSitterNode<'_>) -> BTreeSet { + let mut locals = BTreeSet::new(); + self.collect_ruby_scope_locals(node, &mut locals, true); + locals + } + + fn collect_ruby_scope_locals( + &self, + node: TreeSitterNode<'_>, + locals: &mut BTreeSet, + root: bool, + ) { + if !root && self.ruby_scope_boundary(node) { + return; + } + if matches!( + node.kind(), + "method_parameters" | "block_parameters" | "lambda_parameters" + ) { + for child in self.named_children(node) { + self.collect_identifier_names(child, locals); + } + } + if matches!(node.kind(), "assignment" | "operator_assignment") { + if let Some(left) = self.assignment_left(node) { + self.collect_assignment_target_names(left, locals); + } + } + for child in self.named_children(node) { + if !self.ruby_scope_boundary(child) { + self.collect_ruby_scope_locals(child, locals, false); + } + } + } + + fn collect_assignment_target_names( + &self, + node: TreeSitterNode<'_>, + locals: &mut BTreeSet, + ) { + if self.identifier_kind(node.kind()) { + locals.insert(node_text(node, self.source).trim_start_matches('*').to_string()); + return; + } + if matches!( + node.kind(), + "left_assignment_list" + | "expression_list" + | "splat" + | "splat_parameter" + | "rest_assignment" + ) { + for child in self.named_children(node) { + self.collect_assignment_target_names(child, locals); + } + } + } + + fn collect_identifier_names(&self, node: TreeSitterNode<'_>, locals: &mut BTreeSet) { + if self.identifier_kind(node.kind()) { + locals.insert(node_text(node, self.source).trim_start_matches('*').to_string()); + } + for child in self.named_children(node) { + self.collect_identifier_names(child, locals); + } + } + + fn ruby_scope_boundary(&self, node: TreeSitterNode<'_>) -> bool { + if matches!(node.kind(), "block" | "do_block") + && node + .parent() + .map(|parent| parent.kind() == "lambda") + .unwrap_or(false) + { + return false; + } + matches!( + node.kind(), + "method" + | "function_definition" + | "function_declaration" + | "method_definition" + | "method_declaration" + | "function_item" + | "singleton_method" + | "class" + | "module" + | "singleton_class" + | "lambda" + | "block" + | "do_block" + ) + } + + fn ruby_vcall_identifier(&self, node: TreeSitterNode<'_>, name: &str) -> bool { + !self.assignment_lhs(node) + && !self.ruby_definition_identifier(node) + && !self + .local_stack + .iter() + .rev() + .any(|scope| scope.contains(name)) + } + + fn ruby_definition_identifier(&self, node: TreeSitterNode<'_>) -> bool { + let Some(parent) = node.parent() else { + return false; + }; + if matches!(parent.kind(), "method" | "singleton_method") { + return self + .named_field(parent, "name") + .map(|name| name == node) + .unwrap_or(false); + } + matches!( + parent.kind(), + "method_parameters" + | "block_parameters" + | "lambda_parameters" + | "optional_parameter" + | "keyword_parameter" + | "block_parameter" + ) + } + + fn assignment_lhs(&self, node: TreeSitterNode<'_>) -> bool { + if node + .prev_sibling() + .map(|sibling| node_text(sibling, self.source) == ":") + .unwrap_or(false) + { + return false; + } + node.next_sibling() + .map(|sibling| assignment_operator(node_text(sibling, self.source))) + .unwrap_or(false) + } + + fn assignment_rhs(&self, node: TreeSitterNode<'_>) -> bool { + node.prev_sibling() + .map(|sibling| assignment_operator(node_text(sibling, self.source))) + .unwrap_or(false) + } + + fn modifier_statement(&self, node: TreeSitterNode<'_>) -> bool { + let named = self.named_children(node); + matches!(node.kind(), "body_statement" | "block_body" | "statement") + && self.modifier_keyword(node).is_some() + && named.len() == 2 + } + + fn leading_if_statement(&self, node: TreeSitterNode<'_>) -> bool { + matches!( + node.kind(), + "body_statement" | "block" | "block_body" | "statement" + ) && node + .children(&mut node.walk()) + .next() + .map(|child| matches!(child.kind(), "if" | "unless")) + .unwrap_or(false) + && self.named_children(node).len() >= 2 + && self + .named_children(node) + .first() + .map(|child| !if_kind(child.kind())) + .unwrap_or(false) + } + + fn normalize_leading_if_statement(&mut self, node: TreeSitterNode<'_>) -> Option { + let keyword = node + .children(&mut node.walk()) + .next() + .map(|child| child.kind().to_string())?; + let condition = self + .named_children(node) + .into_iter() + .find(|child| !matches!(child.kind(), "comment" | "then" | "elsif" | "else"))?; + let consequence = self + .named_children(node) + .into_iter() + .find(|child| child.kind() == "then") + .or_else(|| self.branch_child(node, condition, 0)); + let alternative = self.explicit_alternative(node); + let node_type = if keyword == "unless" { "UNLESS" } else { "IF" }; + let condition = optional_node(self.normalize_node(condition)); + let consequence = optional_node(consequence.and_then(|child| self.normalize_body(child))); + let alternative = optional_node(alternative.and_then(|child| self.normalize_else_or_branch(child))); + Some(self.wrap( + node_type, + vec![condition, consequence, alternative], + node, + )) + } + + fn command_call_statement(&self, node: TreeSitterNode<'_>) -> bool { + matches!( + node.kind(), + "body_statement" | "block" | "block_body" | "statement" + ) && !self.dotted_call(node) + && self + .named_children(node) + .into_iter() + .next() + .map(|child| self.identifier_kind(child.kind())) + .unwrap_or(false) + && (self + .named_children(node) + .into_iter() + .any(|child| matches!(child.kind(), "argument_list" | "arguments")) + || self.call_block(node).is_some()) + } + + fn visibility_inline_def_call(&self, node: TreeSitterNode<'_>) -> bool { + if node.kind() != "call" { + return false; + } + let Some(message) = self.named_children(node).into_iter().next() else { + return false; + }; + if !inline_def_wrapper_mid(node_text(message, self.source)) { + return false; + } + self.named_children(node) + .into_iter() + .find(|child| child.kind() == "argument_list") + .map(|args| node_text(args, self.source).trim_start().starts_with("def ")) + .unwrap_or(false) + } + + fn visibility_inline_def_statement( + &self, + node: TreeSitterNode<'_>, + function: TreeSitterNode<'_>, + ) -> bool { + inline_def_wrapper_mid(node_text(function, self.source)) + && node_text(node, self.source).contains("def ") + } + + fn inline_def_from_statement(&mut self, node: TreeSitterNode<'_>) -> Option { + let source = self + .named_children(node) + .into_iter() + .find(|child| child.kind() == "argument_list") + .unwrap_or(node); + self.inline_def_from_source(source) + } + + fn inline_def_from_source(&mut self, source: TreeSitterNode<'_>) -> Option { + if let Some(method) = self + .named_children(source) + .into_iter() + .find(|child| matches!(child.kind(), "method" | "singleton_method")) + { + return if method.kind() == "singleton_method" { + self.normalize_singleton_function(method) + } else { + self.normalize_function(method) + }; + } + let body = self.inline_def_body(source); + let receiver = self.inline_def_receiver(source); + let normalized_body = self.with_ruby_scope(source, true, |normalizer| { + let body = body.and_then(|body| normalizer.normalize_body(body)); + normalizer.elide_tail_returns(body) + }); + if let Some(receiver) = receiver { + let name = self.inline_def_name_after_receiver(source, receiver)?; + if name.is_empty() { + return None; + } + let receiver = self.normalize_node(receiver)?; + return Some(self.wrap( + "DEFS", + vec![ + Child::Node(Box::new(receiver)), + Child::Symbol(name), + Child::Node(Box::new(self.scope(normalized_body, None, source))), + ], + source, + )); + } + + let name = self + .named_children(source) + .into_iter() + .find(|child| self.identifier_kind(child.kind())) + .map(|child| node_text(child, self.source).to_string())?; + if name.is_empty() { + return None; + } + Some(self.wrap( + "DEFN", + vec![ + Child::Symbol(name), + Child::Node(Box::new(self.scope(normalized_body, None, source))), + ], + source, + )) + } + + fn inline_def_receiver<'tree>( + &self, + source: TreeSitterNode<'tree>, + ) -> Option> { + let text = node_text(source, self.source); + if !text.contains("def ") || !text.split_whitespace().nth(1).unwrap_or("").contains('.') { + return None; + } + self.named_children(source) + .into_iter() + .find(|child| matches!(child.kind(), "self" | "this" | "constant" | "scope_resolution")) + } + + fn inline_def_name_after_receiver( + &self, + source: TreeSitterNode<'_>, + receiver: TreeSitterNode<'_>, + ) -> Option { + let children = self.named_children(source); + let index = children.iter().position(|child| *child == receiver)?; + children + .into_iter() + .skip(index + 1) + .find(|child| self.identifier_kind(child.kind())) + .map(|child| node_text(child, self.source).to_string()) + } + + fn inline_def_body<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { + let mut stack = self.named_children(node).into_iter().rev().collect::>(); + while let Some(child) = stack.pop() { + if child.kind() == "body_statement" { + return Some(child); + } + stack.extend(self.named_children(child).into_iter().rev()); + } + None + } + + fn modifier_keyword(&self, node: TreeSitterNode<'_>) -> Option { + let mut seen_named = false; + for child in node.children(&mut node.walk()) { + seen_named = seen_named || child.is_named(); + if seen_named + && !child.is_named() + && matches!(child.kind(), "if" | "unless" | "while" | "until") + { + return Some(child.kind().to_string()); + } + } + None + } + + fn ternary_statement(&self, node: TreeSitterNode<'_>) -> bool { + matches!( + node.kind(), + "body_statement" | "block_body" | "statement" | "argument_list" | "conditional" + ) && self.named_children(node).len() >= 3 + && node + .children(&mut node.walk()) + .any(|child| !child.is_named() && node_text(child, self.source) == "?") + && node + .children(&mut node.walk()) + .any(|child| !child.is_named() && node_text(child, self.source) == ":") + } + + fn boolean_expression(&self, node: TreeSitterNode<'_>) -> bool { + (matches!(node.kind(), "binary" | "binary_expression" | "boolean_operator") + || self.boolean_statement(node)) + && matches!(self.boolean_operator(node).as_deref(), Some("and" | "or")) + } + + fn boolean_statement(&self, node: TreeSitterNode<'_>) -> bool { + if !matches!( + node.kind(), + "body_statement" | "block_body" | "statement" | "argument_list" + ) { + return false; + } + if !matches!( + self.binary_operator(node).as_deref(), + Some("&&" | "||" | "and" | "or") + ) { + return false; + } + if self.named_children(node).len() < 2 { + return false; + } + node.children(&mut node.walk()).all(|child| { + child.is_named() + || matches!(node_text(child, self.source), "&&" | "||" | "and" | "or" | "(" | ")") + }) + } + + fn operator_call_expression(&self, node: TreeSitterNode<'_>) -> bool { + matches!(node.kind(), "binary" | "binary_expression") + && self + .binary_operator(node) + .map(|operator| OPERATOR_CALL_OPERATORS.contains(&operator.as_str())) + .unwrap_or(false) + } + + fn comparison_expression(&self, node: TreeSitterNode<'_>) -> bool { + matches!(node.kind(), "binary" | "binary_expression" | "comparison_operator") + && self + .comparison_operator(node) + .map(|operator| COMPARISON_OPERATORS.contains(&operator.as_str())) + .unwrap_or(false) + } + + fn infix_statement(&self, node: TreeSitterNode<'_>) -> bool { + self.infix_statement_parts(node).is_some() + } + + fn infix_statement_parts<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option<(TreeSitterNode<'tree>, String, TreeSitterNode<'tree>)> { + if !matches!( + node.kind(), + "body_statement" | "block_body" | "statement" | "argument_list" + ) { + return None; + } + let mut named_index = 0usize; + let mut left = None; + let mut right = None; + let mut operator = None; + for child in node.children(&mut node.walk()) { + if child.is_named() { + left.get_or_insert(child); + if operator.is_some() { + right = Some(child); + } + named_index += 1; + } else { + let text = node_text(child, self.source); + if COMPARISON_OPERATORS.contains(&text) || OPERATOR_CALL_OPERATORS.contains(&text) { + operator = Some(text.to_string()); + } + } + } + if named_index == 2 { + Some((left?, operator?, right?)) + } else { + None + } + } + + fn boolean_operator(&self, node: TreeSitterNode<'_>) -> Option { + let direct = self.binary_operator(node)?; + if matches!(direct.as_str(), "&&" | "and") { + Some("and".to_string()) + } else if matches!(direct.as_str(), "||" | "or") { + Some("or".to_string()) + } else { + None + } + } + + fn comparison_operator(&self, node: TreeSitterNode<'_>) -> Option { + self.binary_operator(node) + .or_else(|| comparison_operator_from_text(node_text(node, self.source))) + } + + fn binary_operator(&self, node: TreeSitterNode<'_>) -> Option { + node.children(&mut node.walk()) + .find(|child| !child.is_named() && !matches!(node_text(*child, self.source), "(" | ")")) + .map(|child| node_text(child, self.source).to_string()) + } + + fn interpolated_statement(&self, node: TreeSitterNode<'_>) -> bool { + matches!( + node.kind(), + "body_statement" | "block_body" | "statement" | "argument_list" + ) && self + .named_children(node) + .into_iter() + .any(|child| child.kind() == "interpolation") + } + + fn interpolated_string(&self, node: TreeSitterNode<'_>) -> bool { + node.kind() == "string" + && self + .named_children(node) + .into_iter() + .any(|child| child.kind() == "interpolation") + } + + fn statement_call_with_block(&self, node: TreeSitterNode<'_>) -> bool { + matches!(node.kind(), "body_statement" | "block_body" | "statement") + && self.call_block(node).is_some() + && (self.dotted_call(node) + || self.named_children(node).into_iter().any(|child| { + self.call_kind(child.kind()) || self.member_read_node(child) + })) + } + + fn dotted_expression(&self, node: TreeSitterNode<'_>) -> bool { + matches!( + node.kind(), + "body_statement" | "block_body" | "statement" | "argument_list" + ) && self.dotted_call(node) + } + + fn dotted_call(&self, node: TreeSitterNode<'_>) -> bool { + if !node + .children(&mut node.walk()) + .any(|child| matches!(node_text(child, self.source), "." | "&.")) + { + return false; + } + let callable = self + .named_children(node) + .into_iter() + .filter(|child| { + !matches!( + child.kind(), + "block" | "do_block" | "argument_list" | "arguments" + ) + }) + .collect::>(); + callable.len() >= 2 + } + + fn safe_navigation_call(&self, node: TreeSitterNode<'_>) -> bool { + node.children(&mut node.walk()) + .any(|child| !child.is_named() && node_text(child, self.source) == "&.") + } + + fn dotted_call_parts<'tree>( + &self, + node: TreeSitterNode<'tree>, + block: Option>, + ) -> Option<(TreeSitterNode<'tree>, String)> { + let callable = self + .named_children(node) + .into_iter() + .filter(|child| Some(*child) != block) + .filter(|child| { + !matches!( + child.kind(), + "block" | "do_block" | "argument_list" | "arguments" + ) + }) + .collect::>(); + let receiver = *callable.first()?; + let method = node_text(*callable.get(1)?, self.source) + .trim_end_matches('=') + .to_string(); + Some((receiver, method)) + } + + fn member_read_node(&self, node: TreeSitterNode<'_>) -> bool { + matches!( + node.kind(), + "attribute" + | "member_expression" + | "member_access_expression" + | "field" + | "field_access" + | "selector_expression" + | "field_expression" + | "navigation_expression" + | "directly_assignable_expression" + | "expression_list" + ) + } + + fn member_parts<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option<(TreeSitterNode<'tree>, String)> { + if self.dotted_call(node) { + return self.dotted_call_parts(node, None); + } + let receiver = self + .named_field(node, "receiver") + .or_else(|| self.named_field(node, "object")) + .or_else(|| self.named_field(node, "operand")) + .or_else(|| self.named_field(node, "value")) + .or_else(|| self.named_field(node, "expression")) + .or_else(|| { + self.named_children(node) + .into_iter() + .find(|child| child.kind() != "navigation_suffix") + })?; + let method = self + .named_field(node, "method") + .or_else(|| self.named_field(node, "field")) + .or_else(|| self.named_field(node, "property")) + .or_else(|| self.named_field(node, "suffix")) + .or_else(|| self.named_children(node).into_iter().last())?; + (receiver != method).then(|| { + ( + receiver, + node_text(method, self.source) + .trim_start_matches(['.', '?']) + .trim_end_matches('=') + .to_string(), + ) + }) + } + + fn call_arguments( + &mut self, + node: TreeSitterNode<'_>, + function: Option>, + ) -> Vec { + let Some(args) = self + .named_field(node, "arguments") + .or_else(|| self.named_field(node, "argument")) + .or_else(|| { + self.named_children(node) + .into_iter() + .find(|child| matches!(child.kind(), "argument_list" | "arguments")) + }) + else { + return Vec::new(); + }; + self.named_children(args) + .into_iter() + .filter(|child| Some(*child) != function) + .filter_map(|child| self.normalize_node(child)) + .collect() + } + + fn command_arguments(&mut self, args: TreeSitterNode<'_>) -> Vec { + let children = self.named_children(args); + if children.is_empty() { + return self + .scalar_argument_list_value(args) + .into_iter() + .collect(); + } + if self.dotted_expression(args) { + return self.normalize_dotted_expression(args).into_iter().collect(); + } + if children.len() == 1 + && self.call_kind(children[0].kind()) + && self.call_block(children[0]).is_some() + { + return self.normalize_call_with_block(children[0]).into_iter().collect(); + } + children + .into_iter() + .filter_map(|child| self.normalize_node(child)) + .collect() + } + + fn scalar_argument_list_value(&mut self, node: TreeSitterNode<'_>) -> Option { + let text = node_text(node, self.source).trim(); + if text == "yield" { + return Some(self.wrap("YIELD", vec![Child::Nil], node)); + } + if text == "nil" { + return Some(self.wrap("NIL", Vec::new(), node)); + } + if text == "true" { + return Some(self.wrap("TRUE", Vec::new(), node)); + } + if text == "false" { + return Some(self.wrap("FALSE", Vec::new(), node)); + } + if let Some(symbol) = text.strip_prefix(':') { + if bare_identifier_text(symbol) { + return Some(self.wrap("LIT", vec![Child::Symbol(symbol.to_string())], node)); + } + } + if bare_identifier_text(text) { + if !self + .local_stack + .iter() + .rev() + .any(|scope| scope.contains(text)) + { + Some(self.wrap("VCALL", vec![Child::Symbol(text.to_string())], node)) + } else { + Some(self.wrap("LVAR", vec![Child::String(text.to_string())], node)) + } + } else { + None + } + } + + fn assignment_left<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { + self.named_field(node, "left") + .or_else(|| self.named_children(node).into_iter().next()) + } + + fn assignment_right<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { + self.named_field(node, "right") + .or_else(|| self.named_children(node).into_iter().nth(1)) + } + + fn assignment_target( + &mut self, + left: TreeSitterNode<'_>, + right: Option, + source: TreeSitterNode<'_>, + ) -> Option { + if left.kind() == "instance_variable" || node_text(left, self.source).starts_with('@') { + return Some(self.wrap( + "IASGN", + vec![ + Child::String(node_text(left, self.source).to_string()), + optional_node(right), + ], + source, + )); + } + if left.kind() == "global_variable" || node_text(left, self.source).starts_with('$') { + return Some(self.wrap( + "GASGN", + vec![ + Child::String(node_text(left, self.source).to_string()), + optional_node(right), + ], + source, + )); + } + if left.kind() == "element_reference" { + let named = self.named_children(left); + let receiver = *named.first()?; + let mut args = named + .iter() + .skip(1) + .filter_map(|arg| self.normalize_node(*arg)) + .collect::>(); + if let Some(right) = right { + args.push(right); + } + let receiver = optional_node(self.normalize_node(receiver)); + let args = list_or_nil(args, left, self); + return Some(self.wrap( + "ATTRASGN", + vec![receiver, Child::Symbol("[]=".to_string()), args], + source, + )); + } + if self.member_read_node(left) { + let (receiver, method) = self.member_parts(left)?; + let writer = if node_text(left, self.source).contains("&.") { + method + } else { + format!("{method}=") + }; + let receiver = optional_node(self.normalize_node(receiver)); + let args = list_or_nil(right.into_iter().collect(), left, self); + return Some(self.wrap( + "ATTRASGN", + vec![receiver, Child::Symbol(writer), args], + source, + )); + } + if left.kind() == "expression_list" { + return self + .named_children(left) + .into_iter() + .next() + .and_then(|child| self.assignment_target(child, right, source)); + } + None + } + + fn normalize_assignment_lhs(&mut self, node: TreeSitterNode<'_>) -> Option { + let right = node + .next_named_sibling() + .and_then(|sibling| self.normalize_node(sibling)); + let source = node.parent().unwrap_or(node); + self.assignment_target(node, right.clone(), source).or_else(|| { + Some(self.wrap( + "LASGN", + vec![Child::String(self.target_name(node)), optional_node(right)], + node, + )) + }) + } + + fn target_name(&self, node: TreeSitterNode<'_>) -> String { + node_text(node, self.source) + .trim_start_matches('*') + .to_string() + } + + fn function_name(&self, node: TreeSitterNode<'_>) -> Option { + self.named_field(node, "name") + .or_else(|| { + self.named_children(node).into_iter().find(|child| { + self.identifier_kind(child.kind()) || child.kind() == "constant" + }) + }) + .map(|name| node_text(name, self.source).to_string()) + } + + fn block_child<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { + self.named_children(node) + .into_iter() + .find(|child| matches!(child.kind(), "body_statement" | "block_body" | "block")) + } + + fn call_block<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { + self.named_children(node) + .into_iter() + .find(|child| matches!(child.kind(), "block" | "do_block")) + } + + fn named_field<'tree>( + &self, + node: TreeSitterNode<'tree>, + name: &str, + ) -> Option> { + node.child_by_field_name(name) + } + + fn named_children<'tree>(&self, node: TreeSitterNode<'tree>) -> Vec> { + node.children(&mut node.walk()) + .filter(|child| child.is_named()) + .collect() + } + + fn source_before_child( + &self, + node: TreeSitterNode<'_>, + child: TreeSitterNode<'_>, + ) -> Node { + let text = self + .source + .get(node.start_byte()..child.start_byte()) + .unwrap_or("") + .trim_end() + .to_string(); + if text.is_empty() { + return self.wrap("SOURCE", Vec::new(), node); + } + + let lines = text.lines().collect::>(); + let first_span = span(node); + let last_lineno = first_span[0] + lines.len() - 1; + let last_column = if lines.len() <= 1 { + first_span[1] + text.len() + } else { + lines.last().map(|line| line.len()).unwrap_or(0) + }; + Node { + r#type: "SOURCE".to_string(), + children: Vec::new(), + first_lineno: first_span[0], + first_column: first_span[1], + last_lineno, + last_column, + text, + } + } + + fn first_named<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { + self.named_children(node).into_iter().next() + } + + fn branch_child<'tree>( + &self, + node: TreeSitterNode<'tree>, + condition: TreeSitterNode<'tree>, + offset: usize, + ) -> Option> { + self.named_children(node) + .into_iter() + .filter(|child| *child != condition) + .nth(offset) + } + + fn explicit_alternative<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + self.named_children(node) + .into_iter() + .find(|child| matches!(child.kind(), "else" | "elsif")) + } + + fn identifier_kind(&self, kind: &str) -> bool { + matches!( + kind, + "identifier" | "simple_identifier" | "property_identifier" | "field_identifier" + ) + } + + fn call_kind(&self, kind: &str) -> bool { + matches!( + kind, + "call" | "call_expression" | "method_call" | "method_call_expression" + ) + } + + fn block_kind(&self, kind: &str) -> bool { + matches!( + kind, + "block" + | "body_statement" + | "statement_block" + | "statement_list" + | "class_body" + | "switch_body" + | "match_block" + | "then" + | "block_body" + | "control_structure_body" + | "function_body" + ) + } + + fn first_dotted_call_descendant<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + for child in self.named_children(node) { + if self.call_kind(child.kind()) && self.dotted_call(child) { + return Some(child); + } + if let Some(found) = self.first_dotted_call_descendant(child) { + return Some(found); + } + } + None + } + + fn elide_tail_returns(&self, node: Option) -> Option { + let mut node = node?; + if matches!( + node.r#type.as_str(), + "DEFN" | "DEFS" | "CLASS" | "MODULE" | "SCLASS" | "LAMBDA" | "ITER" + ) { + return Some(node); + } + if node.r#type == "RETURN" { + return node.children.into_iter().find_map(child_node); + } + + match node.r#type.as_str() { + "BLOCK" => { + if let Some(last) = node.children.pop() { + match child_node(last) { + Some(last_node) => { + if let Some(elided) = self.elide_tail_returns(Some(last_node)) { + node.children.push(Child::Node(Box::new(elided))); + } else { + node.children.push(Child::Nil); + } + } + None => node.children.push(Child::Nil), + } + } + } + "SCOPE" => { + if node.children.len() > 2 { + let child = std::mem::replace(&mut node.children[2], Child::Nil); + if let Some(elided) = child_node(child).and_then(|body| self.elide_tail_returns(Some(body))) { + node.children[2] = Child::Node(Box::new(elided)); + } + } + } + "IF" | "UNLESS" => { + for index in [1usize, 2usize] { + if node.children.len() > index { + let child = std::mem::replace(&mut node.children[index], Child::Nil); + if let Some(elided) = child_node(child).and_then(|body| self.elide_tail_returns(Some(body))) { + node.children[index] = Child::Node(Box::new(elided)); + } + } + } + } + "CASE" | "CASE2" => { + let index = if node.r#type == "CASE" { 1 } else { 0 }; + if node.children.len() > index { + let child = std::mem::replace(&mut node.children[index], Child::Nil); + if let Some(elided) = child_node(child).and_then(|body| self.elide_tail_returns(Some(body))) { + node.children[index] = Child::Node(Box::new(elided)); + } + } + } + "WHEN" | "RESBODY" => { + for index in [1usize, 2usize] { + if node.children.len() > index { + let child = std::mem::replace(&mut node.children[index], Child::Nil); + if let Some(elided) = child_node(child).and_then(|body| self.elide_tail_returns(Some(body))) { + node.children[index] = Child::Node(Box::new(elided)); + } + } + } + } + "RESCUE" => { + for index in [0usize, 1usize] { + if node.children.len() > index { + let child = std::mem::replace(&mut node.children[index], Child::Nil); + if let Some(elided) = child_node(child).and_then(|body| self.elide_tail_returns(Some(body))) { + node.children[index] = Child::Node(Box::new(elided)); + } + } + } + } + _ => {} + } + + Some(node) + } + + fn elide_implicit_nil_body(&self, node: Option) -> Option { + let node = self.drop_trailing_nil_statement(node); + match node { + Some(node) if node.r#type == "NIL" => None, + other => other, + } + } + + fn drop_trailing_nil_statement(&self, node: Option) -> Option { + let mut node = node?; + if node.r#type != "BLOCK" { + return Some(node); + } + while node + .children + .last() + .and_then(self::node) + .map(|child| child.r#type == "NIL") + .unwrap_or(false) + { + node.children.pop(); + } + if node.children.is_empty() { + None + } else if node.children.len() == 1 { + child_node(node.children.into_iter().next().unwrap()) + } else { + Some(node) + } + } +} + +fn optional_node(node: Option) -> Child { + node.map(|node| Child::Node(Box::new(node))) + .unwrap_or(Child::Nil) +} + +fn child_node(child: Child) -> Option { + match child { + Child::Node(node) => Some(*node), + _ => None, + } +} + +fn list_or_nil( + children: Vec, + source: TreeSitterNode<'_>, + normalizer: &TreeSitterNormalizer<'_>, +) -> Child { + if children.is_empty() { + Child::Nil + } else { + Child::Node(Box::new(normalizer.list(children, source))) + } +} + +fn dynamic_scope(mut node: Node) -> Node { + if matches!( + node.r#type.as_str(), + "DEFN" | "DEFS" | "CLASS" | "MODULE" | "SCLASS" | "LAMBDA" + ) { + return node; + } + if node.r#type == "LASGN" { + node.r#type = "DASGN".to_string(); + } else if node.r#type == "LVAR" { + node.r#type = "DVAR".to_string(); + } + node.children = node + .children + .into_iter() + .map(|child| match child { + Child::Node(node) => Child::Node(Box::new(dynamic_scope(*node))), + other => other, + }) + .collect(); + node +} + +fn assignment_operator(text: &str) -> bool { + matches!( + text, + "=" | "+=" | "-=" | "*=" | "/=" | "%=" | "&&=" | "||=" + ) +} + +fn kind_type(kind: &str) -> &str { + match kind { + "body_statement" | "block_body" | "block" => "BLOCK", + other => other, + } +} + +fn if_kind(kind: &str) -> bool { + matches!( + kind, + "if" | "if_statement" | "if_modifier" | "unless" | "unless_modifier" | "if_expression" | "conditional" + ) +} + +fn return_kind(kind: &str) -> &str { + match kind { + "return" | "return_statement" | "return_expression" => "RETURN", + "break" | "break_statement" | "break_expression" => "BREAK", + "next" | "continue_statement" => "NEXT", + other => other, + } +} + +fn inline_def_wrapper_mid(text: &str) -> bool { + matches!( + text, + "public" | "protected" | "private" | "private_class_method" | "module_function" + ) +} + +fn bare_identifier_text(text: &str) -> bool { + let mut chars = text.chars(); + let Some(first) = chars.next() else { + return false; + }; + if !(first == '_' || first.is_ascii_alphabetic()) { + return false; + } + chars.all(|ch| ch == '_' || ch == '!' || ch == '?' || ch == '=' || ch.is_ascii_alphanumeric()) +} + +fn comparison_operator_from_text(text: &str) -> Option { + for operator in ["===", "!==", "==", "!=", "<=", ">=", "<", ">"] { + if text.contains(operator) { + return Some(operator.to_string()); + } + } + None +} + +fn child_to_string(child: Option<&Child>) -> Option { + match child { + Some(Child::String(value)) | Some(Child::Symbol(value)) => Some(value.clone()), + _ => None, + } +} + +#[cfg(test)] +mod tests { + use super::{parse, Child, Node}; + use std::io::Write; + + fn parse_source(source: &str) -> Node { + let mut file = tempfile::Builder::new() + .suffix(".rb") + .tempfile() + .expect("create temp ruby file"); + file.write_all(source.as_bytes()) + .expect("write temp ruby file"); + parse(file.path()).expect("parse temp ruby file").0 + } + + fn nodes_of_type<'a>(node: &'a Node, node_type: &str, out: &mut Vec<&'a Node>) { + if node.r#type == node_type { + out.push(node); + } + for child in node.children.iter().filter_map(super::node) { + nodes_of_type(child, node_type, out); + } + } + + #[test] + fn normalizes_safe_navigation_inside_multi_statement_else_body() { + let root = parse_source( + r#" +def x(cond, node) + if cond + node.storage = :stack + else + node.storage = :heap + current_fn_ctx&.record_heap_use! + end +end +"#, + ); + let mut qcalls = Vec::new(); + nodes_of_type(&root, "QCALL", &mut qcalls); + + assert!( + qcalls + .iter() + .any(|node| node.text == "current_fn_ctx&.record_heap_use!"), + "expected normalized QCALL for current_fn_ctx safe navigation, got {qcalls:#?} in {root:#?}" + ); + } + + #[test] + fn normalizes_visibility_wrapped_singleton_def() { + let root = parse_source( + r#" +private_class_method def self.collect_payload_binding_names(node, names) + if node.is_a?(AST::Identifier) + return + end + AST.wrapped_children(node).each { |child| collect_payload_binding_names(child, names) if child.is_a?(AST::Locatable) } +end +"#, + ); + let mut defs = Vec::new(); + nodes_of_type(&root, "DEFS", &mut defs); + + assert!( + defs.iter() + .any(|node| node.children.get(1) == Some(&Child::Symbol("collect_payload_binding_names".to_string()))), + "expected normalized DEFS for visibility-wrapped singleton def, got {root:#?}" + ); + + let def = defs + .into_iter() + .find(|node| node.children.get(1) == Some(&Child::Symbol("collect_payload_binding_names".to_string()))) + .expect("visibility-wrapped singleton def should normalize to DEFS"); + let mut calls = Vec::new(); + nodes_of_type(def, "CALL", &mut calls); + nodes_of_type(def, "FCALL", &mut calls); + calls.sort_by_key(|node| (node.first_lineno, node.first_column)); + let ordered = calls + .iter() + .map(|node| (node.first_lineno, node.text.as_str())) + .collect::>(); + + let first_if_call = ordered + .iter() + .position(|(_line, text)| *text == "node.is_a?(AST::Identifier)") + .expect("expected identifier guard call"); + let recursive_call = ordered + .iter() + .position(|(_line, text)| *text == "collect_payload_binding_names(child, names)") + .expect("expected recursive payload scan call"); + assert!( + first_if_call < recursive_call, + "expected method body calls in source order, got {ordered:#?} in {root:#?}" + ); + } + + #[test] + fn normalizes_heredoc_beginning_as_dynamic_string_receiver() { + let root = parse_source( + r#" +def emit + <<~ZIG.chomp + hi + ZIG +end +"#, + ); + let mut calls = Vec::new(); + nodes_of_type(&root, "CALL", &mut calls); + + let call = calls + .iter() + .find(|node| node.text == "<<~ZIG.chomp") + .expect("expected heredoc chomp call"); + assert_eq!(call.children.get(1), Some(&Child::Symbol("chomp".to_string()))); + assert_eq!( + call.children + .first() + .and_then(super::node) + .map(|node| node.r#type.as_str()), + Some("DSTR") + ); + } + + #[test] + fn flatten_and_matches_ruby_ast_helper() { + let left = Node { + r#type: "LVAR".to_string(), + children: vec![Child::String("a".to_string())], + first_lineno: 1, + first_column: 0, + last_lineno: 1, + last_column: 1, + text: "a".to_string(), + }; + let right = Node { + r#type: "LVAR".to_string(), + children: vec![Child::String("b".to_string())], + first_lineno: 1, + first_column: 5, + last_lineno: 1, + last_column: 6, + text: "b".to_string(), + }; + let and_node = Node { + r#type: "AND".to_string(), + children: vec![ + Child::Node(Box::new(left)), + Child::Node(Box::new(right)), + ], + first_lineno: 1, + first_column: 0, + last_lineno: 1, + last_column: 6, + text: "a && b".to_string(), + }; + + assert_eq!(super::flatten_and(&and_node).len(), 2); + } +} diff --git a/gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs b/gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs new file mode 100644 index 000000000..34a393a2c --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs @@ -0,0 +1,502 @@ +use crate::decomplex::ast::{self, Child, Node, Span}; +use crate::decomplex::syntax::Language; +use anyhow::Result; +use serde::Serialize; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::PathBuf; + +const GUARD_MIDS: &[&str] = &["is_a?", "kind_of?", "instance_of?", "nil?", "respond_to?"]; +const TRANSIENT_NOARG_MIDS: &[&str] = &["pop", "shift"]; + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct DecisionPressureRow { + pub contract: String, + pub decisions: usize, + pub essential: usize, + pub methods: usize, + pub sites: Vec, + pub spans: BTreeMap, +} + +#[derive(Clone, Debug)] +struct Hit { + contract: String, + file: String, + defn: String, + line: usize, + span: Span, +} + +#[derive(Clone, Debug)] +struct Scanner { + file: String, + lines: Vec, + guard_hits: Vec, + dispatch_hits: Vec, +} + +type AssignmentMap = Vec<(String, Node)>; + +pub fn scan_files(files: &[PathBuf], _language: Language) -> Result> { + let mut guard = Vec::new(); + let mut dispatch = Vec::new(); + + for file in files { + let (root, lines) = ast::parse(file)?; + let mut scanner = Scanner::new(file.to_string_lossy().to_string(), lines); + scanner.walk(&root, &[], &Vec::new()); + guard.extend(scanner.guard_hits); + dispatch.extend(scanner.dispatch_hits); + } + + Ok(ranked(&guard, &dispatch)) +} + +impl Scanner { + fn new(file: String, lines: Vec) -> Self { + Self { + file, + lines, + guard_hits: Vec::new(), + dispatch_hits: Vec::new(), + } + } + + fn walk(&mut self, node: &Node, defstack: &[String], asgmap: &AssignmentMap) { + let mut next_defstack = defstack.to_vec(); + let mut next_asgmap = asgmap.clone(); + + if matches!(node.r#type.as_str(), "DEFN" | "DEFS") { + let name_index = if node.r#type == "DEFS" { 1 } else { 0 }; + if let Some(name) = child_to_string(node.children.get(name_index)) { + next_defstack.push(name); + } + next_asgmap = self.build_asgmap(node); + } + + self.record_decision(node, &next_defstack, &next_asgmap); + self.record_rescue_nil(node, &next_defstack, &next_asgmap); + for child in node.children.iter().filter_map(ast::node) { + self.walk(child, &next_defstack, &next_asgmap); + } + } + + fn build_asgmap(&self, defn_node: &Node) -> AssignmentMap { + let mut map = Vec::new(); + let mut stack = ast::body_stmts(defn_node); + + while let Some(node) = stack.pop() { + if node.r#type == "LASGN" { + let name = child_to_string(node.children.first()); + let source = node.children.get(1).and_then(ast::node); + if let (Some(name), Some(source)) = (name, source) { + if !map.iter().any(|(existing, _)| existing == &name) + && self.simple_source(source) + { + map.push((name, source.clone())); + } + } + } + for child in node.children.iter().filter_map(ast::node) { + stack.push(child); + } + } + + map + } + + fn simple_source(&self, node: &Node) -> bool { + match node.r#type.as_str() { + "IVAR" => true, + "CALL" | "QCALL" => { + let receiver = node.children.first().and_then(ast::node); + let method = child_to_string(node.children.get(1)); + let args_nil = child_nil(node.children.get(2)); + receiver.is_some() + && (args_nil || method.as_deref() == Some("[]")) + } + _ => false, + } + } + + fn record_decision( + &mut self, + node: &Node, + defstack: &[String], + asgmap: &AssignmentMap, + ) { + if !matches!(node.r#type.as_str(), "CALL" | "QCALL") { + return; + } + + let Some(receiver) = node.children.first().and_then(ast::node) else { + return; + }; + let Some(method) = child_to_string(node.children.get(1)) else { + return; + }; + + let guard = (node.r#type == "CALL" && GUARD_MIDS.contains(&method.as_str())) + || node.r#type == "QCALL"; + if guard { + if let Some(contract) = self.contract_of(receiver, asgmap, 0) { + self.guard_hits.push(self.hit(contract, defstack, node)); + } + return; + } + + if node.r#type == "CALL" && method.ends_with('?') { + if let Some(contract) = self.contract_of(receiver, asgmap, 0) { + self.dispatch_hits.push(self.hit(contract, defstack, node)); + } + } + } + + fn record_rescue_nil( + &mut self, + node: &Node, + defstack: &[String], + asgmap: &AssignmentMap, + ) { + if node.r#type != "RESCUE" { + return; + } + + let Some(body) = node.children.first().and_then(ast::node) else { + return; + }; + let Some(resbody) = node.children.get(1).and_then(ast::node) else { + return; + }; + if resbody.r#type != "RESBODY" || !child_nil(resbody.children.first()) { + return; + } + + let handler = resbody.children.get(1); + let nil_handler = child_nil(handler) + || handler + .and_then(ast::node) + .map(|node| node.r#type == "NIL") + .unwrap_or(false); + if !nil_handler || !matches!(body.r#type.as_str(), "CALL" | "QCALL") { + return; + } + + if let Some(contract) = self.contract_of(body, asgmap, 0) { + self.guard_hits.push(self.hit(contract, defstack, node)); + } + } + + fn hit(&self, contract: String, defstack: &[String], node: &Node) -> Hit { + Hit { + contract, + file: self.file.clone(), + defn: defstack + .last() + .cloned() + .unwrap_or_else(|| "(top-level)".to_string()), + line: node.first_lineno, + span: [ + node.first_lineno, + node.first_column, + node.last_lineno, + node.last_column, + ], + } + } + + fn contract_of( + &self, + node: &Node, + asgmap: &AssignmentMap, + depth: usize, + ) -> Option { + if depth >= 8 { + return None; + } + + match node.r#type.as_str() { + "LVAR" | "DVAR" => { + let name = child_to_string(node.children.first())?; + if let Some((_, source)) = asgmap + .iter() + .find(|(candidate, _)| candidate == &name) + { + self.contract_of(source, asgmap, depth + 1) + } else { + Some("~local".to_string()) + } + } + "IVAR" => child_to_string(node.children.first()), + "CALL" | "QCALL" => { + let receiver = node.children.first().and_then(ast::node); + let method = child_to_string(node.children.get(1))?; + let args = node.children.get(2).and_then(ast::node); + + if method == "[]" { + let key = args.and_then(|args| first_non_nil_child(&args.children)); + let text = key + .map(|child| child_slice(child, &self.lines)) + .unwrap_or_else(|| "nil".to_string()); + Some(format!("[{text}]")) + } else if args.is_none() + && receiver.is_some() + && !TRANSIENT_NOARG_MIDS.contains(&method.as_str()) + { + Some(format!(".{method}")) + } else { + None + } + } + "VCALL" => child_to_string(node.children.first()).map(|name| format!(".{name}")), + _ => None, + } + } +} + +fn ranked(guard_hits: &[Hit], dispatch_hits: &[Hit]) -> Vec { + let mut essential = Vec::<(String, usize)>::new(); + for hit in dispatch_hits { + if let Some((_, count)) = essential + .iter_mut() + .find(|(contract, _)| contract == &hit.contract) + { + *count += 1; + } else { + essential.push((hit.contract.clone(), 1)); + } + } + + let mut groups = Vec::<(String, Vec<&Hit>)>::new(); + for hit in guard_hits { + if let Some((_, hits)) = groups + .iter_mut() + .find(|(contract, _)| contract == &hit.contract) + { + hits.push(hit); + } else { + groups.push((hit.contract.clone(), vec![hit])); + } + } + + let rows = groups + .into_iter() + .map(|(contract, hits)| { + let methods = hits + .iter() + .map(|hit| (hit.file.clone(), hit.defn.clone())) + .collect::>() + .len(); + let sites = hits.iter().map(|hit| loc(hit)).collect::>(); + let spans = hits + .iter() + .map(|hit| (loc(hit), hit.span)) + .collect::>(); + let essential_count = essential + .iter() + .find(|(candidate, _)| candidate == &contract) + .map(|(_, count)| *count) + .unwrap_or(0); + DecisionPressureRow { + contract, + decisions: hits.len(), + essential: essential_count, + methods, + sites, + spans, + } + }) + .collect::>(); + + let mut named = rows + .iter() + .filter(|row| row.contract != "~local") + .cloned() + .collect::>(); + named.sort_by(|left, right| { + right + .decisions + .cmp(&left.decisions) + .then(right.methods.cmp(&left.methods)) + }); + let local = rows + .into_iter() + .filter(|row| row.contract == "~local") + .collect::>(); + named.into_iter().chain(local).collect() +} + +fn child_to_string(child: Option<&Child>) -> Option { + match child { + Some(Child::String(value)) | Some(Child::Symbol(value)) => Some(value.clone()), + _ => None, + } +} + +fn child_nil(child: Option<&Child>) -> bool { + matches!(child, None | Some(Child::Nil)) +} + +fn first_non_nil_child(children: &[Child]) -> Option<&Child> { + children.iter().find(|child| !matches!(child, Child::Nil)) +} + +fn child_slice(child: &Child, lines: &[String]) -> String { + match child { + Child::Node(node) => ast::slice(node, lines), + Child::Symbol(value) => value.clone(), + Child::String(value) => format!("{value:?}"), + Child::Nil => "nil".to_string(), + } +} + +fn loc(hit: &Hit) -> String { + format!("{}:{}:{}", hit.file, hit.defn, hit.line) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn node(node_type: &str, children: Vec) -> Node { + Node { + r#type: node_type.to_string(), + children, + first_lineno: 1, + first_column: 0, + last_lineno: 1, + last_column: 1, + text: String::new(), + } + } + + #[test] + fn resolves_local_to_accessor_contract() { + let source = node( + "CALL", + vec![ + Child::Node(Box::new(node("LVAR", vec![Child::String("node".to_string())]))), + Child::Symbol("full_type".to_string()), + Child::Nil, + ], + ); + let scanner = Scanner::new("test.rb".to_string(), Vec::new()); + let local = node("LVAR", vec![Child::String("ti".to_string())]); + assert_eq!( + scanner.contract_of(&local, &vec![("ti".to_string(), source)], 0), + Some(".full_type".to_string()) + ); + } + + #[test] + fn resolved_transient_local_does_not_fall_back_to_local_contract() { + let source = node( + "CALL", + vec![ + Child::Node(Box::new(node( + "LVAR", + vec![Child::String("stack".to_string())], + ))), + Child::Symbol("pop".to_string()), + Child::Nil, + ], + ); + let scanner = Scanner::new("test.rb".to_string(), Vec::new()); + let local = node("LVAR", vec![Child::String("node".to_string())]); + + assert_eq!( + scanner.contract_of(&local, &vec![("node".to_string(), source)], 0), + None + ); + } + + #[test] + fn hash_key_contract_uses_key_text() { + let element = node( + "CALL", + vec![ + Child::Node(Box::new(node("LVAR", vec![Child::String("p".to_string())]))), + Child::Symbol("[]".to_string()), + Child::Node(Box::new(node( + "LIST", + vec![Child::Node(Box::new(Node { + r#type: "LIT".to_string(), + children: vec![Child::Symbol("type".to_string())], + first_lineno: 1, + first_column: 2, + last_lineno: 1, + last_column: 7, + text: ":type".to_string(), + }))], + ))), + ], + ); + let scanner = Scanner::new("test.rb".to_string(), Vec::new()); + assert_eq!(scanner.contract_of(&element, &Vec::new(), 0), Some("[:type]".to_string())); + } + + #[test] + fn scan_records_safe_navigation_pressure() { + let mut file = tempfile::NamedTempFile::new().expect("temp"); + std::io::Write::write_all( + &mut file, + b"def scan\n file&.unlink\nend\n", + ) + .expect("write"); + + let rows = scan_files(&[file.path().to_path_buf()], Language::Ruby).expect("scan"); + + assert_eq!(rows.len(), 1); + assert_eq!(rows[0].contract, ".file"); + assert_eq!(rows[0].decisions, 1); + } + + #[test] + fn scan_records_safe_navigation_pressure_inside_ensure() { + let mut file = tempfile::NamedTempFile::new().expect("temp"); + std::io::Write::write_all( + &mut file, + b"class CoUpdateTest < Minitest::Test\n def scan(ruby)\n f = Tempfile.new([\"cu\", \".rb\"])\n f.write(ruby)\n ensure\n f&.unlink\n end\nend\n", + ) + .expect("write"); + + let rows = scan_files(&[file.path().to_path_buf()], Language::Ruby).expect("scan"); + + assert_eq!(rows.len(), 1); + assert_eq!(rows[0].contract, "~local"); + assert_eq!(rows[0].decisions, 1); + } + + #[test] + fn scan_counts_block_predicate_on_assigned_local_as_essential_context() { + let mut file = tempfile::NamedTempFile::new().expect("temp"); + std::io::Write::write_all( + &mut file, + b"def t\n pairs = []\n refute(pairs.any? { |h| h[:pair].include?(\"[]\") })\n pairs.nil?\nend\n", + ) + .expect("write"); + + let rows = scan_files(&[file.path().to_path_buf()], Language::Ruby).expect("scan"); + + assert_eq!(rows.len(), 1); + assert_eq!(rows[0].contract, "~local"); + assert_eq!(rows[0].decisions, 1); + assert_eq!(rows[0].essential, 1); + } + + #[test] + fn scan_records_safe_navigation_pressure_in_ternary_arm() { + let mut file = tempfile::NamedTempFile::new().expect("temp"); + std::io::Write::write_all( + &mut file, + b"def x(node)\n decl = node.respond_to?(:symbol) ? node.symbol&.reg : nil\nend\n", + ) + .expect("write"); + + let rows = scan_files(&[file.path().to_path_buf()], Language::Ruby).expect("scan"); + + assert_eq!(rows.iter().find(|row| row.contract == ".symbol").map(|row| row.decisions), Some(1)); + assert_eq!(rows.iter().find(|row| row.contract == "~local").map(|row| row.decisions), Some(1)); + } + +} diff --git a/gems/decomplex/rust/src/decomplex/detectors/miner.rs b/gems/decomplex/rust/src/decomplex/detectors/miner.rs new file mode 100644 index 000000000..5ed596d69 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/miner.rs @@ -0,0 +1,213 @@ +use crate::decomplex::ast::Span; +use crate::decomplex::syntax::{self, DecisionSite, Document, Language}; +use anyhow::Result; +use serde::Serialize; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::PathBuf; + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct MinerReport { + pub missing_abstractions: Vec, + pub neglected_conditions: Vec, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct MissingAbstraction { + pub kind: String, + pub members: Vec, + pub support: usize, + pub scatter: usize, + pub rank: usize, + pub sites: Vec, + pub spans: BTreeMap, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct NeglectedCondition { + pub pattern: Vec, + pub support: usize, + pub missing: String, + pub at: String, + pub spans: BTreeMap, +} + +#[derive(Clone, Debug)] +struct Group { + kind: String, + members: Vec, + sites: Vec, + order: usize, +} + +pub fn scan_files(files: &[PathBuf], language: Language) -> Result { + let documents = syntax::parse_files(files, language)?; + Ok(scan_documents(&documents, 2, 3)) +} + +pub fn scan_documents( + documents: &[Document], + min_scatter: usize, + min_neglected_support: usize, +) -> MinerReport { + let sites = documents + .iter() + .flat_map(|document| document.decision_sites.clone()) + .collect::>(); + MinerReport { + missing_abstractions: missing_abstractions(&sites, min_scatter), + neglected_conditions: neglected_conditions(&sites, min_neglected_support), + } +} + +fn missing_abstractions(sites: &[DecisionSite], min_scatter: usize) -> Vec { + let mut out = groups(sites) + .into_iter() + .filter_map(|group| { + let scatter = group + .sites + .iter() + .map(|site| (site.file.clone(), site.function.clone())) + .collect::>() + .len(); + if scatter < min_scatter { + return None; + } + let spans = group + .sites + .iter() + .map(|site| (loc(site), site.span)) + .collect::>(); + Some(( + group.order, + MissingAbstraction { + kind: group.kind, + members: group.members, + support: group.sites.len(), + scatter, + rank: group.sites.len() * scatter, + sites: group.sites.iter().map(loc).collect(), + spans, + }, + )) + }) + .collect::>(); + out.sort_by(|left, right| right.1.rank.cmp(&left.1.rank).then(left.0.cmp(&right.0))); + out.into_iter().map(|(_, finding)| finding).collect() +} + +fn neglected_conditions(sites: &[DecisionSite], min_support: usize) -> Vec { + let popular = groups(sites) + .into_iter() + .filter(|group| group.sites.len() >= min_support) + .map(|group| (group.kind, group.members, group.sites.len())) + .collect::>(); + let mut out = Vec::new(); + let mut seen = BTreeSet::new(); + for site in sites { + for (kind, members, support) in &popular { + if kind != &site.kind { + continue; + } + let missing = difference(members, &site.members); + let extra = difference(&site.members, members); + if missing.len() != 1 || !extra.is_empty() || &site.members == members { + continue; + } + let at = loc(site); + let mut spans = BTreeMap::new(); + spans.insert(at.clone(), site.span); + let finding = NeglectedCondition { + pattern: members.clone(), + support: *support, + missing: missing[0].clone(), + at, + spans, + }; + let key = serde_json::to_string(&finding).unwrap_or_default(); + if seen.insert(key) { + out.push(finding); + } + } + } + out.sort_by(|left, right| right.support.cmp(&left.support)); + out +} + +fn groups(sites: &[DecisionSite]) -> Vec { + let mut groups = Vec::new(); + let mut seen_sites = BTreeSet::new(); + for site in sites { + let site_key = format!( + "{}\0{}\0{}\0{}\0{}", + site.file, + site.function, + site.line, + site.kind, + site.members.join("\0") + ); + if !seen_sites.insert(site_key) { + continue; + } + if let Some(group) = groups + .iter_mut() + .find(|group: &&mut Group| group.kind == site.kind && group.members == site.members) + { + group.sites.push(site.clone()); + } else { + groups.push(Group { + kind: site.kind.clone(), + members: site.members.clone(), + sites: vec![site.clone()], + order: groups.len(), + }); + } + } + groups +} + +fn difference(left: &[String], right: &[String]) -> Vec { + left.iter() + .filter(|candidate| !right.contains(candidate)) + .cloned() + .collect() +} + +fn loc(site: &DecisionSite) -> String { + format!("{}:{}:{}", site.file, site.function, site.line) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn site(function: &str, line: usize, members: &[&str]) -> DecisionSite { + DecisionSite { + kind: "conjunction".to_string(), + members: members.iter().map(|member| member.to_string()).collect(), + file: "a.rb".to_string(), + function: function.to_string(), + line, + span: [line, 0, line, 1], + predicate: members.join(" && "), + } + } + + #[test] + fn reports_missing_abstractions_and_neglected_conditions() { + let sites = vec![ + site("one", 1, &["a", "b", "c"]), + site("two", 2, &["a", "b", "c"]), + site("three", 3, &["a", "b", "c"]), + site("broken", 4, &["a", "b"]), + ]; + let missing = missing_abstractions(&sites, 2); + assert_eq!(missing.len(), 1); + assert_eq!(missing[0].support, 3); + assert_eq!(missing[0].scatter, 3); + + let neglected = neglected_conditions(&sites, 3); + assert_eq!(neglected.len(), 1); + assert_eq!(neglected[0].missing, "c"); + assert_eq!(neglected[0].at, "a.rb:broken:4"); + } +} diff --git a/gems/decomplex/rust/src/decomplex/detectors/mod.rs b/gems/decomplex/rust/src/decomplex/detectors/mod.rs index c40ce19bf..d4fd9743f 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/mod.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/mod.rs @@ -1,3 +1,6 @@ pub mod co_update; +pub mod decision_pressure; pub mod flay_similarity; +pub mod miner; pub mod predicate_alias; +pub mod semantic_alias; diff --git a/gems/decomplex/rust/src/decomplex/detectors/predicate_alias.rs b/gems/decomplex/rust/src/decomplex/detectors/predicate_alias.rs index e7fda8767..a64ed895a 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/predicate_alias.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/predicate_alias.rs @@ -1,5 +1,5 @@ -use crate::decomplex::ast::Span; -use crate::decomplex::syntax::{self, Document, Language, PredicateAlias}; +use crate::decomplex::ast::{self, Child, Node, Span}; +use crate::decomplex::syntax::Language; use anyhow::Result; use serde::Serialize; use std::collections::BTreeMap; @@ -18,23 +18,94 @@ pub struct AliasCluster { pub spans: BTreeMap, } +#[derive(Clone, Debug, Eq, PartialEq)] +struct Pred { + name: String, + body: String, + file: String, + defn: String, + line: usize, + span: Span, +} + +#[derive(Clone, Debug)] +struct Scanner { + file: String, + lines: Vec, + preds: Vec, +} + pub fn scan_files(files: &[PathBuf], language: Language) -> Result { - let documents = syntax::parse_files(files, language)?; - Ok(scan_documents(&documents)) + let _ = language; + let mut preds = Vec::new(); + for file in files { + let (root, lines) = ast::parse(file)?; + let mut scanner = Scanner::new(file.to_string_lossy().to_string(), lines); + scanner.walk(&root); + preds.extend(scanner.preds); + } + Ok(PredicateAliasReport { + alias_clusters: alias_clusters(&preds), + }) } -pub fn scan_documents(documents: &[Document]) -> PredicateAliasReport { - let predicates = documents - .iter() - .flat_map(|document| document.predicate_aliases.clone()) - .collect::>(); - PredicateAliasReport { - alias_clusters: alias_clusters(&predicates), +impl Scanner { + fn new(file: String, lines: Vec) -> Self { + Self { + file, + lines, + preds: Vec::new(), + } + } + + fn walk(&mut self, node: &Node) { + if node.r#type == "DEFN" { + self.record_def(node); + } + for child in node.children.iter().filter_map(ast::node) { + self.walk(child); + } + } + + fn record_def(&mut self, node: &Node) { + let Some(name) = child_to_string(node.children.first()) else { + return; + }; + let Some(scope) = node.children.get(1).and_then(ast::node) else { + return; + }; + if scope.r#type != "SCOPE" { + return; + } + let Some(body) = scope.children.get(2).and_then(ast::node) else { + return; + }; + if body.r#type == "BLOCK" { + return; + } + + let text = ast::slice(body, &self.lines); + if text.is_empty() || text.len() > 200 { + return; + } + self.preds.push(Pred { + name: name.clone(), + body: text, + file: self.file.clone(), + defn: name, + line: node.first_lineno, + span: [ + node.first_lineno, + node.first_column, + node.last_lineno, + node.last_column, + ], + }); } } -fn alias_clusters(predicates: &[PredicateAlias]) -> Vec { - let mut by_body: Vec<(&str, Vec<&PredicateAlias>)> = Vec::new(); +fn alias_clusters(predicates: &[Pred]) -> Vec { + let mut by_body: Vec<(&str, Vec<&Pred>)> = Vec::new(); for predicate in predicates { if let Some((_, rows)) = by_body.iter_mut().find(|(body, _)| *body == predicate.body.as_str()) { rows.push(predicate); @@ -79,12 +150,19 @@ fn alias_clusters(predicates: &[PredicateAlias]) -> Vec { out } +fn child_to_string(child: Option<&Child>) -> Option { + match child { + Some(Child::String(value)) | Some(Child::Symbol(value)) => Some(value.clone()), + _ => None, + } +} + #[cfg(test)] mod tests { use super::*; - fn pred(name: &str, body: &str, line: usize) -> PredicateAlias { - PredicateAlias { + fn pred(name: &str, body: &str, line: usize) -> Pred { + Pred { name: name.to_string(), body: body.to_string(), file: "a.rb".to_string(), diff --git a/gems/decomplex/rust/src/decomplex/detectors/semantic_alias.rs b/gems/decomplex/rust/src/decomplex/detectors/semantic_alias.rs new file mode 100644 index 000000000..ba64a0385 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/semantic_alias.rs @@ -0,0 +1,339 @@ +use crate::decomplex::ast::{self, Child, Node, Span}; +use crate::decomplex::syntax::Language; +use anyhow::Result; +use serde::Serialize; +use std::collections::BTreeMap; +use std::path::PathBuf; + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct SemanticAliasReport { + pub alias_clusters: Vec, + pub reification_misses: Vec, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct SemanticAliasCluster { + pub canon: String, + pub names: Vec, + pub sites: Vec, + pub spans: BTreeMap, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct ReificationMiss { + pub predicate: String, + pub canon: String, + pub at: String, + pub spans: BTreeMap, + pub raw: String, +} + +#[derive(Clone, Debug)] +struct Pred { + name: String, + canon: String, + file: String, + line: usize, + span: Span, +} + +#[derive(Clone, Debug)] +struct Use { + canon: String, + file: String, + defn: String, + line: usize, + raw: String, + span: Span, +} + +#[derive(Clone, Debug)] +struct Scanner { + file: String, + lines: Vec, + preds: Vec, + uses: Vec, +} + +pub fn scan_files(files: &[PathBuf], language: Language) -> Result { + let _ = language; + let mut preds = Vec::new(); + let mut uses = Vec::new(); + for file in files { + let (root, lines) = ast::parse(file)?; + let mut scanner = Scanner::new(file.to_string_lossy().to_string(), lines); + scanner.walk(&root, &[]); + preds.extend(scanner.preds); + uses.extend(scanner.uses); + } + Ok(SemanticAliasReport { + alias_clusters: alias_clusters(&preds), + reification_misses: reification_misses(&preds, &uses), + }) +} + +impl Scanner { + fn new(file: String, lines: Vec) -> Self { + Self { + file, + lines, + preds: Vec::new(), + uses: Vec::new(), + } + } + + fn walk(&mut self, node: &Node, defstack: &[String]) { + let next_defstack = ast::def_push(node, defstack); + if node.r#type == "DEFN" { + self.record_pred(node); + } + if matches!(node.r#type.as_str(), "CALL" | "OPCALL") && comparison(node) { + let raw = ast::slice(node, &self.lines); + self.uses.push(Use { + canon: canon(&raw), + file: self.file.clone(), + defn: next_defstack + .last() + .cloned() + .unwrap_or_else(|| "(top-level)".to_string()), + line: node.first_lineno, + raw, + span: [ + node.first_lineno, + node.first_column, + node.last_lineno, + node.last_column, + ], + }); + } + for child in node.children.iter().filter_map(ast::node) { + self.walk(child, &next_defstack); + } + } + + fn record_pred(&mut self, node: &Node) { + let Some(name) = child_to_string(node.children.first()) else { + return; + }; + if !name.ends_with('?') { + return; + } + let statements = ast::body_stmts(node); + if statements.len() != 1 { + return; + } + self.preds.push(Pred { + name, + canon: canon(&ast::slice(statements[0], &self.lines)), + file: self.file.clone(), + line: node.first_lineno, + span: [ + node.first_lineno, + node.first_column, + node.last_lineno, + node.last_column, + ], + }); + } +} + +fn alias_clusters(preds: &[Pred]) -> Vec { + let mut by_canon: Vec<(&str, Vec<&Pred>)> = Vec::new(); + for pred in preds { + if let Some((_, rows)) = by_canon + .iter_mut() + .find(|(existing, _)| *existing == pred.canon.as_str()) + { + rows.push(pred); + } else { + by_canon.push((pred.canon.as_str(), vec![pred])); + } + } + + let mut out = by_canon + .into_iter() + .filter_map(|(canon, rows)| { + let mut names = Vec::new(); + for pred in &rows { + if !names.contains(&pred.name) { + names.push(pred.name.clone()); + } + } + if names.len() < 2 { + return None; + } + let sites = rows + .iter() + .map(|pred| format!("{}:{}:{}", pred.file, pred.name, pred.line)) + .collect::>(); + let spans = rows + .iter() + .map(|pred| (format!("{}:{}:{}", pred.file, pred.name, pred.line), pred.span)) + .collect::>(); + Some(SemanticAliasCluster { + canon: canon.to_string(), + names, + sites, + spans, + }) + }) + .collect::>(); + out.sort_by(|left, right| right.names.len().cmp(&left.names.len())); + out +} + +fn reification_misses(preds: &[Pred], uses: &[Use]) -> Vec { + let mut out = Vec::new(); + for usage in uses { + let usage_canon = usage.canon.clone(); + let Some(pred) = preds.iter().find(|candidate| candidate.canon == usage_canon) else { + continue; + }; + let usage_function = semantic_function_name(&usage.defn); + if usage_function.ends_with('?') + && preds + .iter() + .any(|candidate| candidate.canon == usage_canon && candidate.name == usage_function) + { + continue; + } + let at = format!("{}:{}:{}", usage.file, usage_function, usage.line); + let mut spans = BTreeMap::new(); + spans.insert(at.clone(), usage.span); + out.push(ReificationMiss { + predicate: pred.name.clone(), + canon: usage_canon, + at, + spans, + raw: usage.raw.clone(), + }); + } + out.sort_by(|left, right| left.predicate.cmp(&right.predicate)); + out +} + +fn canon(text: &str) -> String { + let (mut value, _) = ast::canon_polarity(text); + value = value.strip_prefix("self.").unwrap_or(&value).to_string(); + value = value.strip_prefix('@').unwrap_or(&value).to_string(); + value = strip_single_receiver_hop(&value); + value.split_whitespace().collect::>().join(" ") +} + +fn strip_single_receiver_hop(text: &str) -> String { + let Some(dot) = text.find('.') else { + return text.to_string(); + }; + let receiver = &text[..dot]; + if receiver.is_empty() || !identifier_like(receiver) { + return text.to_string(); + } + let rest = &text[dot + 1..]; + let Some(attr_len) = leading_identifier_len(rest) else { + return text.to_string(); + }; + let after_attr = rest[attr_len..].trim_start(); + if !(after_attr.starts_with("==") || after_attr.starts_with("!=") || after_attr.starts_with('.')) { + return text.to_string(); + } + rest.to_string() +} + +fn leading_identifier_len(text: &str) -> Option { + let mut chars = text.char_indices(); + let (_, first) = chars.next()?; + if !(first == '_' || first.is_ascii_alphabetic()) { + return None; + } + let mut end = first.len_utf8(); + for (index, ch) in chars { + if ch == '_' || ch.is_ascii_alphanumeric() { + end = index + ch.len_utf8(); + } else { + break; + } + } + Some(end) +} + +fn identifier_like(text: &str) -> bool { + let mut chars = text.chars(); + let Some(first) = chars.next() else { + return false; + }; + if !(first == '_' || first.is_ascii_alphabetic()) { + return false; + } + chars.all(|ch| ch == '_' || ch.is_ascii_alphanumeric()) +} + +fn semantic_function_name(name: &str) -> String { + name.strip_prefix("self.").unwrap_or(name).to_string() +} + +fn comparison(node: &Node) -> bool { + let Some(method) = child_to_string(node.children.get(1)) else { + return false; + }; + matches!(method.as_str(), "==" | "!=" | "nil?") +} + +fn child_to_string(child: Option<&Child>) -> Option { + match child { + Some(Child::String(value)) | Some(Child::Symbol(value)) => Some(value.clone()), + _ => None, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn pred(name: &str, body: &str, line: usize) -> Pred { + Pred { + name: name.to_string(), + canon: canon(body), + file: "a.rb".to_string(), + line, + span: [line, 0, line, 1], + } + } + + fn use_at(function: &str, raw: &str, line: usize) -> Use { + Use { + canon: canon(raw), + raw: raw.to_string(), + file: "a.rb".to_string(), + defn: function.to_string(), + line, + span: [line, 0, line, 1], + } + } + + #[test] + fn canonicalizes_receiver_forms() { + assert_eq!(canon("node.provenance == :frame"), "provenance == :frame"); + assert_eq!(canon("@provenance == :frame"), "provenance == :frame"); + assert_eq!(canon("self.provenance == :frame"), "provenance == :frame"); + assert_eq!(canon("!x.heap?"), "x.heap?"); + assert_eq!(canon("stmt.expr? && ok"), "stmt.expr? && ok"); + } + + #[test] + fn reports_aliases_and_reification_misses() { + let preds = vec![ + pred("frame?", "@provenance == :frame", 1), + pred("is_frame?", "provenance == :frame", 2), + pred("heap?", "@provenance == :heap", 3), + ]; + let uses = vec![use_at("somewhere", "node.provenance == :frame", 10)]; + let report = SemanticAliasReport { + alias_clusters: alias_clusters(&preds), + reification_misses: reification_misses(&preds, &uses), + }; + assert_eq!(report.alias_clusters.len(), 1); + assert_eq!(report.alias_clusters[0].names, vec!["frame?", "is_frame?"]); + assert_eq!(report.reification_misses.len(), 1); + assert_eq!(report.reification_misses[0].predicate, "frame?"); + } +} diff --git a/gems/decomplex/rust/src/decomplex/syntax.rs b/gems/decomplex/rust/src/decomplex/syntax.rs index aa760d0de..208f62468 100644 --- a/gems/decomplex/rust/src/decomplex/syntax.rs +++ b/gems/decomplex/rust/src/decomplex/syntax.rs @@ -30,7 +30,9 @@ pub struct Document { pub root: RawNode, pub function_defs: Vec, pub state_writes: Vec, + pub decision_sites: Vec, pub predicate_aliases: Vec, + pub comparison_uses: Vec, } #[derive(Clone, Debug)] @@ -64,6 +66,27 @@ pub struct PredicateAlias { pub span: Span, } +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct DecisionSite { + pub kind: String, + pub members: Vec, + pub file: String, + pub function: String, + pub line: usize, + pub span: Span, + pub predicate: String, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct ComparisonUse { + pub canon_source: String, + pub raw: String, + pub file: String, + pub function: String, + pub line: usize, + pub span: Span, +} + #[derive(Clone, Debug, Eq, PartialEq, Serialize)] pub struct SimilarityFinding { pub at: String, diff --git a/gems/decomplex/rust/src/decomplex/syntax/ruby.rs b/gems/decomplex/rust/src/decomplex/syntax/ruby.rs index b8b400f87..4a5b21d72 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/ruby.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/ruby.rs @@ -1,4 +1,4 @@ -use super::{Document, FunctionDef, Language, PredicateAlias, StateWrite}; +use super::{ComparisonUse, DecisionSite, Document, FunctionDef, Language, PredicateAlias, StateWrite}; use crate::decomplex::ast::{line, node_text, normalize_text, span, RawNode}; use anyhow::{Context, Result}; use std::collections::HashSet; @@ -10,8 +10,11 @@ pub fn parse_file(file: PathBuf) -> Result { let parsed = ParsedRuby::parse(file)?; let mut function_defs = Vec::new(); let mut state_writes = Vec::new(); + let mut decision_sites = Vec::new(); let mut predicate_aliases = Vec::new(); + let mut comparison_uses = Vec::new(); let mut seen_writes = HashSet::new(); + let mut seen_decisions = HashSet::new(); let context = ContextState::new(file_owner(&parsed.file)); collect_facts( @@ -21,8 +24,11 @@ pub fn parse_file(file: PathBuf) -> Result { &context, &mut function_defs, &mut state_writes, + &mut decision_sites, &mut predicate_aliases, + &mut comparison_uses, &mut seen_writes, + &mut seen_decisions, ); Ok(Document { @@ -33,7 +39,9 @@ pub fn parse_file(file: PathBuf) -> Result { root: RawNode::from_tree_sitter(parsed.tree.root_node(), &parsed.source), function_defs, state_writes, + decision_sites, predicate_aliases, + comparison_uses, }) } @@ -98,13 +106,18 @@ fn collect_facts( context: &ContextState, function_defs: &mut Vec, state_writes: &mut Vec, + decision_sites: &mut Vec, predicate_aliases: &mut Vec, + comparison_uses: &mut Vec, seen_writes: &mut HashSet, + seen_decisions: &mut HashSet, ) { let next_context = push_function_context(node, push_owner_context(node, source, context), source); record_function_def(node, source, file, &next_context, function_defs); record_state_write(node, source, file, &next_context, state_writes, seen_writes); + record_decision_site(node, source, file, &next_context, decision_sites, seen_decisions); record_predicate_alias(node, source, file, predicate_aliases); + record_comparison_use(node, source, file, &next_context, comparison_uses); let mut cursor = node.walk(); for child in node.children(&mut cursor) { @@ -115,8 +128,11 @@ fn collect_facts( &next_context, function_defs, state_writes, + decision_sites, predicate_aliases, + comparison_uses, seen_writes, + seen_decisions, ); } } @@ -179,6 +195,142 @@ fn record_predicate_alias( }); } +fn record_comparison_use( + node: Node<'_>, + source: &str, + file: &Path, + context: &ContextState, + out: &mut Vec, +) { + if !comparison_node(node, source) { + return; + } + let raw = normalize_text(node_text(node, source)); + out.push(ComparisonUse { + canon_source: raw.clone(), + raw, + file: file.to_string_lossy().to_string(), + function: context.current_function(), + line: line(node), + span: span(node), + }); +} + +fn comparison_node(node: Node<'_>, source: &str) -> bool { + if matches!(node.kind(), "binary" | "binary_expression") { + return matches!(direct_operator_from_source(node, source).as_str(), "==" | "!="); + } + if node.kind() != "call" { + return false; + } + node.child_by_field_name("method") + .map(|method| node_text(method, source) == "nil?") + .unwrap_or(false) +} + +fn record_decision_site( + node: Node<'_>, + source: &str, + file: &Path, + context: &ContextState, + out: &mut Vec, + seen: &mut HashSet, +) { + if boolean_container(node) && boolean_and(node, source) { + record_conjunction_decision(node, source, file, context, out, seen); + return; + } + + if case_node(node) || hidden_case(node) { + let decision_node = case_source_node(node); + if ruby_predicate_less_case(decision_node) { + return; + } + let patterns = case_patterns(decision_node, source); + if patterns.len() < 2 { + return; + } + push_decision_site(out, seen, DecisionSite { + kind: "case_dispatch".to_string(), + members: patterns, + file: file.to_string_lossy().to_string(), + function: context.current_function(), + line: line(decision_node), + span: span(decision_node), + predicate: decision_predicate(decision_node, source), + }); + } +} + +fn record_conjunction_decision( + mut node: Node<'_>, + source: &str, + file: &Path, + context: &ContextState, + out: &mut Vec, + seen: &mut HashSet, +) { + let from_wrapper = parenthesized_wrapper(node); + if from_wrapper + && node + .parent() + .map(|parent| boolean_container(parent) && boolean_and(parent, source)) + .unwrap_or(false) + { + return; + } + + if from_wrapper { + if let Some(child) = first_named_child(node) { + node = child; + } + } + + if !from_wrapper + && node + .parent() + .map(|parent| boolean_container(parent) && boolean_and(parent, source) && span(parent) != span(node)) + .unwrap_or(false) + { + return; + } + + let mut members = flatten_boolean_and(node, source) + .into_iter() + .map(|child| decision_member_text(child, source)) + .collect::>(); + members.sort(); + members.dedup(); + if members.len() < 2 { + return; + } + + push_decision_site(out, seen, DecisionSite { + kind: "conjunction".to_string(), + members, + file: file.to_string_lossy().to_string(), + function: context.current_function(), + line: conjunction_span(node)[0], + span: conjunction_span(node), + predicate: normalize_text(node_text(node, source)), + }); +} + +fn push_decision_site(out: &mut Vec, seen: &mut HashSet, site: DecisionSite) { + let key = format!( + "{}\0{}\0{}\0{}\0{:?}\0{}", + site.file, + site.function, + site.kind, + site.line, + site.span, + site.members.join("\0") + ); + if seen.insert(key) { + out.push(site); + } +} + fn method_single_expression_body(node: Node<'_>) -> Option> { let mut cursor = node.walk(); if node.children(&mut cursor).any(|child| child.kind() == "=") { @@ -519,6 +671,313 @@ fn strip_assignment_suffix(text: &str) -> String { text.strip_suffix('=').unwrap_or(text).to_string() } +fn case_node(node: Node<'_>) -> bool { + matches!( + node.kind(), + "case" | "when_expression" | "switch_statement" | "switch_expression" | "match_statement" | "match_expression" + ) +} + +fn hidden_case(node: Node<'_>) -> bool { + matches!(node.kind(), "body_statement" | "block_body" | "argument_list") + && first_child_kind(node) == Some("case") +} + +fn case_source_node(node: Node<'_>) -> Node<'_> { + if !hidden_case(node) { + return node; + } + let mut cursor = node.walk(); + let result = node + .children(&mut cursor) + .find(|child| child.kind() == "case") + .unwrap_or(node); + result +} + +fn ruby_predicate_less_case(node: Node<'_>) -> bool { + (node.kind() == "case" || hidden_case(node)) && decision_subject(node).is_none() +} + +fn case_patterns(node: Node<'_>, source: &str) -> Vec { + let mut out = case_arms(node) + .into_iter() + .flat_map(|arm| case_arm_patterns(arm, source)) + .filter(|pattern| !default_case_pattern(pattern)) + .collect::>(); + out.sort(); + out.dedup(); + out +} + +fn case_arms(node: Node<'_>) -> Vec> { + let mut arms = Vec::new(); + let mut stack = named_children(node); + while let Some(child) = stack.pop() { + if matches!( + child.kind(), + "when" + | "switch_case" + | "case_clause" + | "expression_case" + | "case_statement" + | "switch_section" + | "switch_block_statement_group" + | "switch_entry" + | "when_entry" + | "match_arm" + ) { + arms.push(child); + } else if !matches!( + child.kind(), + "method" + | "function_definition" + | "function_declaration" + | "method_definition" + | "method_declaration" + | "function_item" + | "class" + | "module" + | "class_definition" + | "class_declaration" + ) { + stack.extend(named_children(child)); + } + } + arms.reverse(); + arms +} + +fn case_arm_patterns(child: Node<'_>, source: &str) -> Vec { + match child.kind() { + "when" | "match_arm" => { + let mut patterns = named_children(child) + .into_iter() + .filter(|node| matches!(node.kind(), "pattern" | "case_pattern" | "match_pattern")) + .collect::>(); + if patterns.is_empty() { + patterns = child + .child_by_field_name("pattern") + .or_else(|| first_named_child(child)) + .into_iter() + .collect(); + } + ruby_when_pattern_texts(&patterns, source) + } + "switch_case" + | "case_clause" + | "expression_case" + | "case_statement" + | "switch_section" + | "switch_block_statement_group" + | "switch_entry" + | "when_entry" => { + if node_text(child, source).trim_start().starts_with("else") { + return Vec::new(); + } + let value = child + .child_by_field_name("value") + .or_else(|| child.child_by_field_name("pattern")) + .or_else(|| named_children(child).into_iter().find(|candidate| candidate.kind() == "when_condition")) + .or_else(|| named_children(child).into_iter().find(|candidate| candidate.kind() == "switch_pattern")) + .or_else(|| first_named_child(child)); + value + .filter(|node| !node.kind().contains("statement") && !node.kind().contains("block")) + .map(|node| vec![normalize_text(node_text(node, source))]) + .unwrap_or_default() + } + _ => Vec::new(), + } +} + +fn ruby_when_pattern_texts(patterns: &[Node<'_>], source: &str) -> Vec { + if patterns.is_empty() { + return Vec::new(); + } + let texts = patterns + .iter() + .map(|pattern| normalize_text(node_text(*pattern, source))) + .collect::>(); + if !texts.iter().any(|text| text.starts_with('*')) { + return texts; + } + + let mut out = Vec::new(); + let mut pending_plain = Vec::new(); + for (index, text) in texts.iter().enumerate() { + if text.starts_with('*') { + if !pending_plain.is_empty() { + out.push(pending_plain.join(", ")); + pending_plain.clear(); + } + if texts.len() == 1 || index > 0 { + out.push(text.trim_start_matches('*').to_string()); + } else { + out.push(text.clone()); + } + } else { + pending_plain.push(text.clone()); + } + } + if !pending_plain.is_empty() { + out.push(pending_plain.join(", ")); + } + out +} + +fn default_case_pattern(text: &str) -> bool { + matches!(text, "" | "_" | "default") +} + +fn decision_predicate(node: Node<'_>, source: &str) -> String { + let target = decision_subject(node); + normalize_text(target.map(|child| node_text(child, source)).unwrap_or_else(|| node_text(node, source))) +} + +fn decision_subject(node: Node<'_>) -> Option> { + node.child_by_field_name("value") + .or_else(|| node.child_by_field_name("subject")) + .or_else(|| named_children(node).into_iter().find(|child| child.kind() == "when_subject")) + .or_else(|| node.child_by_field_name("condition")) + .or_else(|| { + named_children(node).into_iter().find(|child| { + !matches!( + child.kind(), + "when" + | "switch_case" + | "case_clause" + | "expression_case" + | "case_statement" + | "switch_section" + | "switch_block_statement_group" + | "switch_entry" + | "when_entry" + | "match_arm" + | "else" + | "then" + | "comment" + ) + }) + }) +} + +fn boolean_container(node: Node<'_>) -> bool { + if matches!(node.kind(), "binary" | "binary_expression" | "boolean_operator") { + return true; + } + if parenthesized_wrapper(node) { + return first_named_child(node).map(boolean_container).unwrap_or(false); + } + if !matches!(node.kind(), "body_statement" | "block_body" | "statement" | "pattern" | "argument_list") { + return false; + } + if !matches!(direct_operator(node).as_str(), "&&" | "and") { + return false; + } + if named_children(node).len() < 2 { + return false; + } + let mut cursor = node.walk(); + let result = node + .children(&mut cursor) + .all(|child| child.is_named() || matches!(child.kind(), "&&" | "and" | "(" | ")")); + result +} + +fn boolean_and(node: Node<'_>, source: &str) -> bool { + if parenthesized_wrapper(node) { + return first_named_child(node) + .map(|child| boolean_and(child, source)) + .unwrap_or(false); + } + matches!(direct_operator_from_source(node, source).as_str(), "&&" | "and") +} + +fn flatten_boolean_and<'tree>(node: Node<'tree>, source: &str) -> Vec> { + if !(boolean_container(node) && boolean_and(node, source)) { + return vec![node]; + } + if parenthesized_wrapper(node) { + return first_named_child(node) + .map(|child| flatten_boolean_and(child, source)) + .unwrap_or_else(|| vec![node]); + } + named_children(node) + .into_iter() + .flat_map(|child| flatten_boolean_and(child, source)) + .collect() +} + +fn parenthesized_wrapper(node: Node<'_>) -> bool { + matches!(node.kind(), "parenthesized_statements" | "parenthesized_expression") + && named_children(node).len() == 1 +} + +fn conjunction_span(node: Node<'_>) -> [usize; 4] { + let mut base = span(node); + if node.kind() == "pattern" && node.start_position().column > 0 { + base[1] += 1; + } + base +} + +fn decision_member_text(node: Node<'_>, source: &str) -> String { + normalize_text(&strip_enclosing_parentheses(node_text(node, source))) +} + +fn strip_enclosing_parentheses(text: &str) -> String { + let mut value = text.trim().to_string(); + loop { + if !(value.starts_with('(') && value.ends_with(')')) { + break value; + } + if !enclosing_parentheses_wrap_all(&value) { + break value; + } + value = value[1..value.len() - 1].trim().to_string(); + } +} + +fn enclosing_parentheses_wrap_all(text: &str) -> bool { + let mut depth = 0isize; + for (index, ch) in text.chars().enumerate() { + if ch == '(' { + depth += 1; + } else if ch == ')' { + depth -= 1; + } + if depth == 0 && index < text.len() - 1 { + return false; + } + if depth < 0 { + return false; + } + } + depth == 0 +} + +fn direct_operator(node: Node<'_>) -> String { + let mut cursor = node.walk(); + let result = node + .children(&mut cursor) + .find(|child| !child.is_named() && !matches!(child.kind(), "(" | ")")) + .map(|child| child.kind().to_string()) + .unwrap_or_default() + ; + result +} + +fn direct_operator_from_source(node: Node<'_>, source: &str) -> String { + let mut cursor = node.walk(); + let result = node + .children(&mut cursor) + .find(|child| !child.is_named() && !matches!(node_text(*child, source), "(" | ")")) + .map(|child| node_text(child, source).to_string()) + .unwrap_or_default() + ; + result +} + #[cfg(test)] mod tests { use super::*; diff --git a/gems/decomplex/rust/src/main.rs b/gems/decomplex/rust/src/main.rs index 78891f147..08b081d18 100644 --- a/gems/decomplex/rust/src/main.rs +++ b/gems/decomplex/rust/src/main.rs @@ -1,7 +1,9 @@ mod decomplex; use anyhow::{bail, Context, Result}; -use decomplex::detectors::{co_update, flay_similarity, predicate_alias}; +use decomplex::detectors::{ + co_update, decision_pressure, flay_similarity, miner, predicate_alias, semantic_alias, +}; use decomplex::parallel; use decomplex::syntax::Language; use std::path::PathBuf; @@ -28,6 +30,24 @@ fn main() -> Result<()> { .with_context(|| "failed to scan predicate-alias facts")?; println!("{}", serde_json::to_string(&report)?); } + Command::Miner { language, files, .. } => { + let language = Language::parse(&language)?; + let report = miner::scan_files(&files, language) + .with_context(|| "failed to scan decision-site miner facts")?; + println!("{}", serde_json::to_string(&report)?); + } + Command::SemanticAliases { language, files, .. } => { + let language = Language::parse(&language)?; + let report = semantic_alias::scan_files(&files, language) + .with_context(|| "failed to scan semantic-alias facts")?; + println!("{}", serde_json::to_string(&report)?); + } + Command::DecisionPressure { language, files, .. } => { + let language = Language::parse(&language)?; + let report = decision_pressure::scan_files(&files, language) + .with_context(|| "failed to scan decision-pressure facts")?; + println!("{}", serde_json::to_string(&report)?); + } Command::FlaySimilarity { language, mass, @@ -60,6 +80,21 @@ enum Command { files: Vec, jobs: Option, }, + Miner { + language: String, + files: Vec, + jobs: Option, + }, + SemanticAliases { + language: String, + files: Vec, + jobs: Option, + }, + DecisionPressure { + language: String, + files: Vec, + jobs: Option, + }, FlaySimilarity { language: String, mass: usize, @@ -75,6 +110,9 @@ impl Command { Self::StateWrites { jobs, .. } | Self::CoUpdate { jobs, .. } | Self::PredicateAliases { jobs, .. } + | Self::Miner { jobs, .. } + | Self::SemanticAliases { jobs, .. } + | Self::DecisionPressure { jobs, .. } | Self::FlaySimilarity { jobs, .. } => *jobs, } } @@ -119,6 +157,39 @@ fn parse_args(args: Vec) -> Result { jobs, }) } + "miner" | "decision-miner" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("miner requires at least one file"); + } + Ok(Command::Miner { + language, + files, + jobs, + }) + } + "semantic-aliases" | "semantic-alias" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("semantic-aliases requires at least one file"); + } + Ok(Command::SemanticAliases { + language, + files, + jobs, + }) + } + "decision-pressure" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("decision-pressure requires at least one file"); + } + Ok(Command::DecisionPressure { + language, + files, + jobs, + }) + } "flay-similarity" => { let mut language = String::from("ruby"); let mut mass = 32usize; diff --git a/gems/decomplex/test/detector_runner_test.rb b/gems/decomplex/test/detector_runner_test.rb index 64c3461ba..528846802 100644 --- a/gems/decomplex/test/detector_runner_test.rb +++ b/gems/decomplex/test/detector_runner_test.rb @@ -2,6 +2,7 @@ require "minitest/autorun" require "open3" +require "tempfile" require_relative "../lib/decomplex" class DetectorRunnerTest < Minitest::Test @@ -24,6 +25,119 @@ def test_co_update_rust_engine_matches_ruby_engine_byte_for_byte assert_equal ruby_json, rust_json end + def test_miner_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + Tempfile.create(["decomplex-miner", ".rb"]) do |file| + file.write(<<~RUBY) + def one(a, b, c) + a && b && c + end + + def two(a, b, c) + a && b && c + end + + def three(a, b, c) + a && b && c + end + + def broken(a, b) + a && b + end + RUBY + file.flush + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("miner", [file.path]) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + end + + def test_semantic_alias_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + Tempfile.create(["decomplex-semantic-alias", ".rb"]) do |file| + file.write(<<~RUBY) + def frame?; @provenance == :frame; end + def is_frame?; provenance == :frame; end + def heap?; @provenance == :heap; end + def somewhere(node) + return 1 if node.provenance == :frame + end + RUBY + file.flush + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("semantic-alias", [file.path]) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + end + + def test_predicate_alias_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + Tempfile.create(["decomplex-predicate-alias", ".rb"]) do |file| + file.write(<<~RUBY) + def first?; true; end + def second?; true; end + + def nil_body; nil; end + def other_nil_body; nil; end + + def setup + super + self[:type_params] ||= [] + end + + def type_params + self[:type_params] ||= [] + end + + def emit_one + <<~ZIG.chomp + hi + ZIG + end + + def emit_two + <<~ZIG.chomp + bye + ZIG + end + RUBY + file.flush + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("predicate-alias", [file.path]) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + end + + def test_decision_pressure_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + Tempfile.create(["decomplex-decision-pressure", ".rb"]) do |file| + file.write(<<~RUBY) + def scan(node) + value = node.respond_to?(:symbol) ? node.symbol&.reg : nil + value.nil? + ensure + node&.cleanup + end + RUBY + file.flush + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("decision-pressure", [file.path]) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + end + def test_detector_cli_compare_engines_outputs_canonical_json skip "cargo is not available" unless cargo_available? From b9739e27f39dd16b7e97429f7aba115fd9974ac5 Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Tue, 16 Jun 2026 23:38:39 +0000 Subject: [PATCH 11/52] Decomplex: Finish Tier 1 metric migration to Rust Migrates all remaining Tier 1 detectors (Redundant Nil Guard, State Mesh, Temporal Ordering Pressure, State-Based Branch Density) to Rust with strict function-for-function parity with the Ruby gem. Ensures byte-for-byte JSON compatibility by matching discovery-order grouping and implementing Ruby regex logic in Rust. Co-authored-by: gemini-cli <218195315+gemini-cli@users.noreply.github.com> --- .../lib/decomplex/detector_runner.rb | 55 +- .../decomplex/native/redundant_nil_guard.rb | 25 + .../decomplex/native/state_branch_density.rb | 25 + .../lib/decomplex/native/state_mesh.rb | 25 + .../native/temporal_ordering_pressure.rb | 25 + gems/decomplex/lib/decomplex/state_mesh.rb | 14 +- gems/decomplex/rust/Cargo.lock | 1 + gems/decomplex/rust/Cargo.toml | 1 + .../rust/src/decomplex/detectors/co_update.rs | 262 ++++---- .../decomplex/detectors/decision_pressure.rs | 511 +++++---------- .../rust/src/decomplex/detectors/miner.rs | 238 +++---- .../rust/src/decomplex/detectors/mod.rs | 4 + .../decomplex/detectors/predicate_alias.rs | 181 ++---- .../detectors/redundant_nil_guard.rs | 486 ++++++++++++++ .../src/decomplex/detectors/semantic_alias.rs | 352 ++++------- .../detectors/state_branch_density.rs | 469 ++++++++++++++ .../src/decomplex/detectors/state_mesh.rs | 594 ++++++++++++++++++ .../detectors/temporal_ordering_pressure.rs | 268 ++++++++ gems/decomplex/rust/src/main.rs | 95 ++- gems/decomplex/test/detector_runner_test.rb | 106 ++++ 20 files changed, 2716 insertions(+), 1021 deletions(-) create mode 100644 gems/decomplex/lib/decomplex/native/redundant_nil_guard.rb create mode 100644 gems/decomplex/lib/decomplex/native/state_branch_density.rb create mode 100644 gems/decomplex/lib/decomplex/native/state_mesh.rb create mode 100644 gems/decomplex/lib/decomplex/native/temporal_ordering_pressure.rb create mode 100644 gems/decomplex/rust/src/decomplex/detectors/redundant_nil_guard.rs create mode 100644 gems/decomplex/rust/src/decomplex/detectors/state_branch_density.rs create mode 100644 gems/decomplex/rust/src/decomplex/detectors/state_mesh.rs create mode 100644 gems/decomplex/rust/src/decomplex/detectors/temporal_ordering_pressure.rs diff --git a/gems/decomplex/lib/decomplex/detector_runner.rb b/gems/decomplex/lib/decomplex/detector_runner.rb index 560659d98..640c6e211 100644 --- a/gems/decomplex/lib/decomplex/detector_runner.rb +++ b/gems/decomplex/lib/decomplex/detector_runner.rb @@ -13,6 +13,10 @@ require_relative "decision_pressure" require_relative "predicate_alias" require_relative "semantic_alias" +require_relative "state_mesh" +require_relative "state_branch_density" +require_relative "temporal_ordering_pressure" +require_relative "redundant_nil_guard" module Decomplex # Runs one detector in isolation and emits deterministic machine output. @@ -35,7 +39,12 @@ module DetectorRunner "semantic-predicate-aliases" => :semantic_alias, "reification-misses" => :semantic_alias, "flay-similarity" => :flay_similarity, - "structural-similarity" => :flay_similarity + "structural-similarity" => :flay_similarity, + "temporal-ordering-pressure" => :temporal_ordering_pressure, + "state-branch-density" => :state_branch_density, + "redundant-nil-guard" => :redundant_nil_guard, + "state-mesh" => :state_mesh, + "state-heatmap" => :state_mesh }.freeze ENGINES = %w[ruby rust].freeze @@ -58,6 +67,14 @@ def run(detector, files, engine: "ruby", mass: FlaySimilarity::DEFAULT_MASS, fuz semantic_alias(files, engine: engine, jobs: jobs) when :flay_similarity flay_similarity(files, engine: engine, mass: mass, fuzzy: fuzzy, jobs: jobs) + when :temporal_ordering_pressure + temporal_ordering_pressure(files, engine: engine, jobs: jobs) + when :state_branch_density + state_branch_density(files, engine: engine, jobs: jobs) + when :redundant_nil_guard + redundant_nil_guard(files, engine: engine, jobs: jobs) + when :state_mesh + state_mesh(files, engine: engine, jobs: jobs) else raise ArgumentError, "unsupported decomplex detector: #{detector}" end @@ -147,6 +164,42 @@ def detector_names { "findings" => findings } end + private_class_method def self.temporal_ordering_pressure(files, engine:, jobs:) + if engine.to_s == "rust" + require_relative "native/temporal_ordering_pressure" + return Native::TemporalOrderingPressure.scan(files, jobs: jobs) + end + + TemporalOrderingPressure.scan(files) + end + + private_class_method def self.state_branch_density(files, engine:, jobs:) + if engine.to_s == "rust" + require_relative "native/state_branch_density" + return Native::StateBranchDensity.scan(files, jobs: jobs) + end + + StateBranchDensity.scan(files).findings + end + + private_class_method def self.redundant_nil_guard(files, engine:, jobs:) + if engine.to_s == "rust" + require_relative "native/redundant_nil_guard" + return Native::RedundantNilGuard.scan(files, jobs: jobs) + end + + RedundantNilGuard.scan(files) + end + + private_class_method def self.state_mesh(files, engine:, jobs:) + if engine.to_s == "rust" + require_relative "native/state_mesh" + return Native::StateMesh.scan(files, jobs: jobs) + end + + StateMesh.scan(files).tap(&:run).to_json_graph + end + private_class_method def self.canonicalize(value) case value when Hash diff --git a/gems/decomplex/lib/decomplex/native/redundant_nil_guard.rb b/gems/decomplex/lib/decomplex/native/redundant_nil_guard.rb new file mode 100644 index 000000000..237f1ad70 --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/redundant_nil_guard.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module RedundantNilGuard + module_function + + def scan(files, jobs: nil) + paths = Array(files).map(&:to_s) + validate_ruby_files!(paths) + JSON.parse(Command.run("redundant-nil-guard", "--language", "ruby", *Command.jobs_args(jobs), *paths)) + end + + private_class_method def self.validate_ruby_files!(paths) + bad = paths.reject { |path| File.extname(path) == ".rb" } + return if bad.empty? + + raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/state_branch_density.rb b/gems/decomplex/lib/decomplex/native/state_branch_density.rb new file mode 100644 index 000000000..eebc752bd --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/state_branch_density.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module StateBranchDensity + module_function + + def scan(files, jobs: nil) + paths = Array(files).map(&:to_s) + validate_ruby_files!(paths) + JSON.parse(Command.run("state-branch-density", "--language", "ruby", *Command.jobs_args(jobs), *paths)) + end + + private_class_method def self.validate_ruby_files!(paths) + bad = paths.reject { |path| File.extname(path) == ".rb" } + return if bad.empty? + + raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/state_mesh.rb b/gems/decomplex/lib/decomplex/native/state_mesh.rb new file mode 100644 index 000000000..102229954 --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/state_mesh.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module StateMesh + module_function + + def scan(files, jobs: nil) + paths = Array(files).map(&:to_s) + validate_ruby_files!(paths) + JSON.parse(Command.run("state-mesh", "--language", "ruby", *Command.jobs_args(jobs), *paths)) + end + + private_class_method def self.validate_ruby_files!(paths) + bad = paths.reject { |path| File.extname(path) == ".rb" } + return if bad.empty? + + raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/temporal_ordering_pressure.rb b/gems/decomplex/lib/decomplex/native/temporal_ordering_pressure.rb new file mode 100644 index 000000000..611491fc1 --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/temporal_ordering_pressure.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module TemporalOrderingPressure + module_function + + def scan(files, jobs: nil) + paths = Array(files).map(&:to_s) + validate_ruby_files!(paths) + JSON.parse(Command.run("temporal-ordering-pressure", "--language", "ruby", *Command.jobs_args(jobs), *paths)) + end + + private_class_method def self.validate_ruby_files!(paths) + bad = paths.reject { |path| File.extname(path) == ".rb" } + return if bad.empty? + + raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/state_mesh.rb b/gems/decomplex/lib/decomplex/state_mesh.rb index edc476998..668a8d2ab 100644 --- a/gems/decomplex/lib/decomplex/state_mesh.rb +++ b/gems/decomplex/lib/decomplex/state_mesh.rb @@ -61,6 +61,8 @@ def walk_writes(node, lines, defstack, file) return unless Ast.node?(node) case node.type + when :CLASS, :MODULE + defstack = defstack + [node.children[0].to_s] when :DEFN then defstack = defstack + [node.children[0].to_s] when :DEFS then defstack = defstack + [node.children[1].to_s] when :ATTRASGN @@ -107,13 +109,17 @@ def walk_reads(node, lines, defstack, file, field_norms) return unless Ast.node?(node) case node.type + when :CLASS, :MODULE + defstack = defstack + [node.children[0].to_s] when :DEFN then defstack = defstack + [node.children[0].to_s] when :DEFS then defstack = defstack + [node.children[1].to_s] - when :CALL, :OPCALL + when :CALL, :OPCALL, :FCALL, :VCALL # CALL(recv, :method, args) - attribute reads have no args - recv = node.children[0] - mid = node.children[1] - args = node.children[2] + # FCALL(:method, args) - attribute reads have no args + # VCALL(:method) - attribute reads have no args + recv = node.type == :CALL || node.type == :OPCALL ? node.children[0] : nil + mid = node.type == :CALL || node.type == :OPCALL ? node.children[1] : node.children[0] + args = node.type == :CALL || node.type == :OPCALL ? node.children[2] : node.children[1] # Skip if called with arguments (it's a method call, not attr read) if args.nil? || (Ast.node?(args) && args.type == :LIST && args.children.compact.empty?) diff --git a/gems/decomplex/rust/Cargo.lock b/gems/decomplex/rust/Cargo.lock index 0be14f618..78002f238 100644 --- a/gems/decomplex/rust/Cargo.lock +++ b/gems/decomplex/rust/Cargo.lock @@ -44,6 +44,7 @@ name = "decomplex-rust" version = "0.1.0" dependencies = [ "anyhow", + "regex", "serde", "serde_json", "tempfile", diff --git a/gems/decomplex/rust/Cargo.toml b/gems/decomplex/rust/Cargo.toml index 245ae5322..24a40eecd 100644 --- a/gems/decomplex/rust/Cargo.toml +++ b/gems/decomplex/rust/Cargo.toml @@ -13,6 +13,7 @@ path = "src/main.rs" anyhow = "1.0" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" +regex = "1.10" tree-sitter = "=0.24.7" tree-sitter-language = "=0.1.3" tree-sitter-ruby = "=0.23.1" diff --git a/gems/decomplex/rust/src/decomplex/detectors/co_update.rs b/gems/decomplex/rust/src/decomplex/detectors/co_update.rs index 06c0c0944..e7db72483 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/co_update.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/co_update.rs @@ -1,5 +1,5 @@ use crate::decomplex::ast::Span; -use crate::decomplex::syntax::{self, Document, Language, StateWrite}; +use crate::decomplex::syntax::{self, Language, StateWrite}; use anyhow::Result; use serde::Serialize; use std::collections::{BTreeMap, BTreeSet}; @@ -13,181 +13,151 @@ pub struct CoUpdateReport { #[derive(Clone, Debug, Eq, PartialEq, Serialize)] pub struct CoWrittenPair { - pub pair: [String; 2], - pub sites: Vec, + pub pair: Vec, pub support: usize, + pub sites: Vec, } #[derive(Clone, Debug, Eq, PartialEq, Serialize)] pub struct NeglectedUpdate { - pub at: String, + pub pair: Vec, + pub support: usize, pub has: String, pub missing: String, - pub pair: [String; 2], - pub recv: String, + pub at: String, pub spans: BTreeMap, - pub support: usize, + pub recv: String, +} + +#[derive(Clone, Debug)] +struct Write { + attr: String, + recv: String, + file: String, + defn: String, + line: usize, + span: Span, } pub fn scan_files(files: &[PathBuf], language: Language) -> Result { - let documents = syntax::parse_files(files, language)?; - Ok(scan_documents(&documents, 3)) + let mut writes = Vec::new(); + for file in files { + let doc = syntax::parse_file(file.clone(), language)?; + for w in doc.state_writes { + writes.push(Write { + attr: w.field, + recv: w.receiver, + file: w.file, + defn: w.function, + line: w.line, + span: w.span, + }); + } + } + let report = Report::new(writes); + Ok(CoUpdateReport { + co_written_pairs: report.co_written_pairs(3), + neglected_updates: report.neglected_updates(3), + }) } pub fn state_writes_for_files(files: &[PathBuf], language: Language) -> Result> { - let documents = syntax::parse_files(files, language)?; - Ok(documents - .iter() - .flat_map(|document| document.state_writes.clone()) - .collect()) + let mut out = Vec::new(); + for file in files { + let doc = syntax::parse_file(file.clone(), language)?; + out.extend(doc.state_writes); + } + Ok(out) } -pub fn scan_documents(documents: &[Document], min_support: usize) -> CoUpdateReport { - let writes = documents - .iter() - .flat_map(|document| document.state_writes.clone()) - .collect::>(); - let pairs = co_written_pairs(&writes, min_support); - let neglected = neglected_updates(&writes, &pairs); - CoUpdateReport { - co_written_pairs: pairs, - neglected_updates: neglected, - } +struct Report { + #[allow(dead_code)] + writes: Vec, + by_unit: Vec<((String, String), Vec)>, } -fn co_written_pairs(writes: &[StateWrite], min_support: usize) -> Vec { - let by_unit = writes_by_unit(writes); - let mut counts: Vec<([String; 2], Vec<[String; 2]>)> = Vec::new(); - for ((file, function), unit_writes) in by_unit { - let attrs = unit_writes - .iter() - .map(|write| write.field.clone()) - .collect::>() - .into_iter() - .collect::>(); - for left in 0..attrs.len() { - for right in (left + 1)..attrs.len() { - let pair = [attrs[left].clone(), attrs[right].clone()]; - if let Some((_, units)) = counts.iter_mut().find(|(existing, _)| *existing == pair) { - units.push([file.clone(), function.clone()]); - } else { - counts.push((pair, vec![[file.clone(), function.clone()]])); - } +impl Report { + fn new(writes: Vec) -> Self { + let mut by_unit: Vec<((String, String), Vec)> = Vec::new(); + for w in &writes { + let key = (w.file.clone(), w.defn.clone()); + if let Some(entry) = by_unit.iter_mut().find(|(k, _)| k == &key) { + entry.1.push(w.clone()); + } else { + by_unit.push((key, vec![w.clone()])); } } + Self { writes, by_unit } } - let mut out = counts - .into_iter() - .filter_map(|(pair, units)| { - if units.len() < min_support { - return None; + fn co_written_pairs(&self, min_support: usize) -> Vec { + let mut counts: Vec<(Vec, Vec<(String, String)>)> = Vec::new(); + for (unit, ws) in &self.by_unit { + let mut attrs: Vec<_> = ws.iter().map(|w| w.attr.clone()).collect::>().into_iter().collect(); + attrs.sort(); + + for i in 0..attrs.len() { + for j in i+1..attrs.len() { + let pair = vec![attrs[i].clone(), attrs[j].clone()]; + if let Some(entry) = counts.iter_mut().find(|(p, _)| p == &pair) { + entry.1.push(unit.clone()); + } else { + counts.push((pair, vec![unit.clone()])); + } + } } - let support = units.len(); - Some(CoWrittenPair { - pair, - sites: units - .into_iter() - .map(|unit| format!("{}:{}", unit[0], unit[1])) - .collect(), - support, - }) - }) - .collect::>(); - out.sort_by(|left, right| right.support.cmp(&left.support)); - out -} + } -fn neglected_updates(writes: &[StateWrite], pairs: &[CoWrittenPair]) -> Vec { - let by_unit = writes_by_unit(writes); - let mut out = Vec::new(); - for ((file, function), unit_writes) in by_unit { - let attrs = unit_writes - .iter() - .map(|write| write.field.as_str()) - .collect::>(); - for pair in pairs { - let left = pair.pair[0].as_str(); - let right = pair.pair[1].as_str(); - let maybe = if attrs.contains(left) && !attrs.contains(right) { - Some((left, right)) - } else if attrs.contains(right) && !attrs.contains(left) { - Some((right, left)) - } else { - None - }; - let Some((has, missing)) = maybe else { - continue; - }; - let Some(write) = unit_writes.iter().find(|write| write.field == has) else { - continue; - }; - let at = format!("{file}:{function}:{}", write.line); - let mut spans = BTreeMap::new(); - spans.insert(at.clone(), write.span); - out.push(NeglectedUpdate { - at, - has: has.to_string(), - missing: missing.to_string(), - pair: pair.pair.clone(), - recv: write.receiver.clone(), - spans, - support: pair.support, + let mut out = Vec::new(); + for (pair, units) in counts { + if units.len() < min_support { continue; } + out.push(CoWrittenPair { + pair, + support: units.len(), + sites: units.into_iter().map(|(f, d)| format!("{}:{}", f, d)).collect(), }); } + out.sort_by(|a, b| b.support.cmp(&a.support)); + out } - out.sort_by(|left, right| right.support.cmp(&left.support)); - out -} -fn writes_by_unit(writes: &[StateWrite]) -> Vec<((String, String), Vec)> { - let mut by_unit: Vec<((String, String), Vec)> = Vec::new(); - for write in writes { - let key = (write.file.clone(), write.function.clone()); - if let Some((_, unit_writes)) = by_unit.iter_mut().find(|(existing, _)| *existing == key) { - unit_writes.push(write.clone()); - } else { - by_unit.push((key, vec![write.clone()])); - } - } - by_unit -} + fn neglected_updates(&self, min_support: usize) -> Vec { + let pairs = self.co_written_pairs(min_support); + let mut out = Vec::new(); -#[cfg(test)] -mod tests { - use super::*; + for ((file, defn), ws) in &self.by_unit { + let attrs: BTreeSet<_> = ws.iter().map(|w| w.attr.clone()).collect(); + for p in &pairs { + let a = &p.pair[0]; + let b = &p.pair[1]; + + let (has, miss) = if attrs.contains(a) && !attrs.contains(b) { + (Some(a), Some(b)) + } else if attrs.contains(b) && !attrs.contains(a) { + (Some(b), Some(a)) + } else { + (None, None) + }; - fn write(file: &str, function: &str, attr: &str, line: usize) -> StateWrite { - StateWrite { - field: attr.to_string(), - receiver: "node".to_string(), - file: file.to_string(), - function: function.to_string(), - line, - span: [line, 0, line, 1], - owner: "Box".to_string(), + if let (Some(has), Some(miss)) = (has, miss) { + if let Some(w) = ws.iter().find(|x| &x.attr == has) { + let loc = format!("{}:{}:{}", file, defn, w.line); + let mut spans = BTreeMap::new(); + spans.insert(loc.clone(), w.span); + out.push(NeglectedUpdate { + pair: p.pair.clone(), + support: p.support, + has: has.clone(), + missing: miss.clone(), + at: loc, + spans, + recv: w.recv.clone(), + }); + } + } + } } - } - - #[test] - fn reports_frequent_pairs_and_neglected_updates() { - let writes = vec![ - write("a.rb", "one", "storage", 1), - write("a.rb", "one", "provenance", 2), - write("a.rb", "two", "storage", 3), - write("a.rb", "two", "provenance", 4), - write("b.rb", "three", "storage", 5), - write("b.rb", "three", "provenance", 6), - write("c.rb", "broken", "storage", 7), - ]; - let pairs = co_written_pairs(&writes, 3); - assert_eq!(pairs.len(), 1); - assert_eq!(pairs[0].pair, ["provenance".to_string(), "storage".to_string()]); - assert_eq!(pairs[0].support, 3); - - let neglected = neglected_updates(&writes, &pairs); - assert_eq!(neglected.len(), 1); - assert_eq!(neglected[0].missing, "provenance"); - assert_eq!(neglected[0].at, "c.rb:broken:7"); + out.sort_by(|a, b| b.support.cmp(&a.support)); + out } } diff --git a/gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs b/gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs index 34a393a2c..515f11e08 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs @@ -27,32 +27,29 @@ struct Hit { span: Span, } -#[derive(Clone, Debug)] -struct Scanner { - file: String, - lines: Vec, - guard_hits: Vec, - dispatch_hits: Vec, -} - -type AssignmentMap = Vec<(String, Node)>; - pub fn scan_files(files: &[PathBuf], _language: Language) -> Result> { let mut guard = Vec::new(); let mut dispatch = Vec::new(); for file in files { let (root, lines) = ast::parse(file)?; - let mut scanner = Scanner::new(file.to_string_lossy().to_string(), lines); - scanner.walk(&root, &[], &Vec::new()); - guard.extend(scanner.guard_hits); - dispatch.extend(scanner.dispatch_hits); + let mut detector = DecisionPressure::new(file.to_string_lossy().to_string(), lines); + detector.walk(&root, &Vec::new(), &BTreeMap::new()); + guard.extend(detector.guard_hits); + dispatch.extend(detector.dispatch_hits); } - Ok(ranked(&guard, &dispatch)) + Ok(Report::new(guard, dispatch).ranked()) +} + +struct DecisionPressure { + file: String, + lines: Vec, + guard_hits: Vec, + dispatch_hits: Vec, } -impl Scanner { +impl DecisionPressure { fn new(file: String, lines: Vec) -> Self { Self { file, @@ -62,14 +59,14 @@ impl Scanner { } } - fn walk(&mut self, node: &Node, defstack: &[String], asgmap: &AssignmentMap) { + fn walk(&mut self, node: &Node, defstack: &[String], asgmap: &BTreeMap) { let mut next_defstack = defstack.to_vec(); let mut next_asgmap = asgmap.clone(); if matches!(node.r#type.as_str(), "DEFN" | "DEFS") { let name_index = if node.r#type == "DEFS" { 1 } else { 0 }; - if let Some(name) = child_to_string(node.children.get(name_index)) { - next_defstack.push(name); + if let Some(Child::Symbol(name)) = node.children.get(name_index) { + next_defstack.push(name.clone()); } next_asgmap = self.build_asgmap(node); } @@ -81,422 +78,202 @@ impl Scanner { } } - fn build_asgmap(&self, defn_node: &Node) -> AssignmentMap { - let mut map = Vec::new(); + fn build_asgmap(&self, defn_node: &Node) -> BTreeMap { + let mut map = BTreeMap::new(); let mut stack = ast::body_stmts(defn_node); + stack.reverse(); while let Some(node) = stack.pop() { if node.r#type == "LASGN" { - let name = child_to_string(node.children.first()); - let source = node.children.get(1).and_then(ast::node); - if let (Some(name), Some(source)) = (name, source) { - if !map.iter().any(|(existing, _)| existing == &name) - && self.simple_source(source) - { - map.push((name, source.clone())); + if let Some(Child::String(name)) = node.children.get(0) { + if let Some(src) = node.children.get(1).and_then(ast::node) { + if !map.contains_key(name) && self.simple_source(src) { + map.insert(name.clone(), src.clone()); + } } } } - for child in node.children.iter().filter_map(ast::node) { + for child in node.children.iter().filter_map(ast::node).rev() { stack.push(child); } } - map } - fn simple_source(&self, node: &Node) -> bool { - match node.r#type.as_str() { + fn simple_source(&self, n: &Node) -> bool { + match n.r#type.as_str() { "IVAR" => true, "CALL" | "QCALL" => { - let receiver = node.children.first().and_then(ast::node); - let method = child_to_string(node.children.get(1)); - let args_nil = child_nil(node.children.get(2)); - receiver.is_some() - && (args_nil || method.as_deref() == Some("[]")) + let recv = n.children.get(0).and_then(ast::node); + let mid = n.children.get(1).and_then(|c| match c { Child::Symbol(s) => Some(s), _ => None }); + let args = n.children.get(2); + recv.is_some() && (args.is_none() || matches!(args, Some(Child::Nil)) || mid.map(|s| s.as_str()) == Some("[]")) } _ => false, } } - fn record_decision( - &mut self, - node: &Node, - defstack: &[String], - asgmap: &AssignmentMap, - ) { + fn hit(&self, contract: String, defstack: &[String], node: &Node) -> Hit { + Hit { + contract, + file: self.file.clone(), + defn: defstack.last().cloned().unwrap_or_else(|| "(top-level)".to_string()), + line: node.first_lineno, + span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], + } + } + + fn record_decision(&mut self, node: &Node, defstack: &[String], asgmap: &BTreeMap) { if !matches!(node.r#type.as_str(), "CALL" | "QCALL") { return; } - let Some(receiver) = node.children.first().and_then(ast::node) else { - return; - }; - let Some(method) = child_to_string(node.children.get(1)) else { - return; - }; + let recv = node.children.get(0).and_then(ast::node); + let mid = node.children.get(1).and_then(|c| match c { Child::Symbol(s) => Some(s), _ => None }); + let _args = node.children.get(2); - let guard = (node.r#type == "CALL" && GUARD_MIDS.contains(&method.as_str())) - || node.r#type == "QCALL"; + let Some(recv) = recv else { return }; + let Some(mid) = mid else { return }; + + let guard = (node.r#type == "CALL" && GUARD_MIDS.contains(&mid.as_str())) || node.r#type == "QCALL"; if guard { - if let Some(contract) = self.contract_of(receiver, asgmap, 0) { - self.guard_hits.push(self.hit(contract, defstack, node)); + if let Some(c) = self.contract_of(recv, asgmap, 0) { + self.guard_hits.push(self.hit(c, defstack, node)); } return; } - if node.r#type == "CALL" && method.ends_with('?') { - if let Some(contract) = self.contract_of(receiver, asgmap, 0) { - self.dispatch_hits.push(self.hit(contract, defstack, node)); + if node.r#type == "CALL" && mid.ends_with('?') { + if let Some(c) = self.contract_of(recv, asgmap, 0) { + self.dispatch_hits.push(self.hit(c, defstack, node)); } } } - fn record_rescue_nil( - &mut self, - node: &Node, - defstack: &[String], - asgmap: &AssignmentMap, - ) { + fn record_rescue_nil(&mut self, node: &Node, defstack: &[String], asgmap: &BTreeMap) { if node.r#type != "RESCUE" { return; } - let Some(body) = node.children.first().and_then(ast::node) else { - return; - }; - let Some(resbody) = node.children.get(1).and_then(ast::node) else { - return; - }; - if resbody.r#type != "RESBODY" || !child_nil(resbody.children.first()) { - return; - } + let body = node.children.get(0).and_then(ast::node); + let resb = node.children.get(1).and_then(ast::node); - let handler = resbody.children.get(1); - let nil_handler = child_nil(handler) - || handler - .and_then(ast::node) - .map(|node| node.r#type == "NIL") - .unwrap_or(false); - if !nil_handler || !matches!(body.r#type.as_str(), "CALL" | "QCALL") { - return; - } + let Some(resb) = resb else { return }; + if resb.r#type != "RESBODY" { return }; + if !matches!(resb.children.get(0), None | Some(Child::Nil)) { return }; - if let Some(contract) = self.contract_of(body, asgmap, 0) { - self.guard_hits.push(self.hit(contract, defstack, node)); - } - } + let handler = resb.children.get(1); + let nil_handler = matches!(handler, None | Some(Child::Nil)) || handler.and_then(ast::node).map(|n| n.r#type == "NIL").unwrap_or(false); + if !nil_handler { return }; - fn hit(&self, contract: String, defstack: &[String], node: &Node) -> Hit { - Hit { - contract, - file: self.file.clone(), - defn: defstack - .last() - .cloned() - .unwrap_or_else(|| "(top-level)".to_string()), - line: node.first_lineno, - span: [ - node.first_lineno, - node.first_column, - node.last_lineno, - node.last_column, - ], + let Some(body) = body else { return }; + if !matches!(body.r#type.as_str(), "CALL" | "QCALL") { return }; + + if let Some(c) = self.contract_of(body, asgmap, 0) { + self.guard_hits.push(self.hit(c, defstack, node)); } } - fn contract_of( - &self, - node: &Node, - asgmap: &AssignmentMap, - depth: usize, - ) -> Option { - if depth >= 8 { - return None; - } + fn contract_of(&self, n: &Node, asgmap: &BTreeMap, depth: usize) -> Option { + if depth >= 8 { return None; } - match node.r#type.as_str() { + match n.r#type.as_str() { "LVAR" | "DVAR" => { - let name = child_to_string(node.children.first())?; - if let Some((_, source)) = asgmap - .iter() - .find(|(candidate, _)| candidate == &name) - { - self.contract_of(source, asgmap, depth + 1) - } else { - Some("~local".to_string()) + if let Some(Child::String(nm)) = n.children.first() { + if let Some(src) = asgmap.get(nm) { + return self.contract_of(src, asgmap, depth + 1); + } else { + return Some("~local".to_string()); + } } + None + } + "IVAR" => { + if let Some(Child::String(attr)) = n.children.first() { + return Some(attr.clone()); + } + None } - "IVAR" => child_to_string(node.children.first()), "CALL" | "QCALL" => { - let receiver = node.children.first().and_then(ast::node); - let method = child_to_string(node.children.get(1))?; - let args = node.children.get(2).and_then(ast::node); - - if method == "[]" { - let key = args.and_then(|args| first_non_nil_child(&args.children)); - let text = key - .map(|child| child_slice(child, &self.lines)) - .unwrap_or_else(|| "nil".to_string()); - Some(format!("[{text}]")) - } else if args.is_none() - && receiver.is_some() - && !TRANSIENT_NOARG_MIDS.contains(&method.as_str()) - { - Some(format!(".{method}")) + let recv = n.children.get(0).and_then(ast::node); + let mid = n.children.get(1).and_then(|c| match c { Child::Symbol(s) => Some(s), _ => None })?; + let args = n.children.get(2); + + if mid == "[]" { + let key = if let Some(Child::Node(node)) = args { + node.children.iter().filter(|c| !matches!(c, Child::Nil)).next() + } else { + None + }; + let kt = match key { + Some(Child::Node(k)) => ast::slice(k, &self.lines), + _ => "nil".to_string(), // Simplified key.inspect + }; + Some(format!("[{}]", kt)) + } else if (args.is_none() || matches!(args, Some(Child::Nil))) && recv.is_some() && !TRANSIENT_NOARG_MIDS.contains(&mid.as_str()) { + Some(format!(".{}", mid)) } else { None } } - "VCALL" => child_to_string(node.children.first()).map(|name| format!(".{name}")), - _ => None, + "VCALL" => { + if let Some(Child::Symbol(name)) = n.children.first() { + return Some(format!(".{}", name)); + } + None + } + _ => None } } } -fn ranked(guard_hits: &[Hit], dispatch_hits: &[Hit]) -> Vec { - let mut essential = Vec::<(String, usize)>::new(); - for hit in dispatch_hits { - if let Some((_, count)) = essential - .iter_mut() - .find(|(contract, _)| contract == &hit.contract) - { - *count += 1; - } else { - essential.push((hit.contract.clone(), 1)); - } +struct Report { + guard: Vec, + dispatch: Vec, +} + +impl Report { + fn new(guard: Vec, dispatch: Vec) -> Self { + Self { guard, dispatch } } - let mut groups = Vec::<(String, Vec<&Hit>)>::new(); - for hit in guard_hits { - if let Some((_, hits)) = groups - .iter_mut() - .find(|(contract, _)| contract == &hit.contract) - { - hits.push(hit); - } else { - groups.push((hit.contract.clone(), vec![hit])); + fn ranked(&self) -> Vec { + let mut ess = BTreeMap::new(); + for h in &self.dispatch { + *ess.entry(&h.contract).or_insert(0) += 1; + } + + let mut rows_map: BTreeMap> = BTreeMap::new(); + for h in &self.guard { + rows_map.entry(h.contract.clone()).or_default().push(h); } - } - let rows = groups - .into_iter() - .map(|(contract, hits)| { - let methods = hits - .iter() - .map(|hit| (hit.file.clone(), hit.defn.clone())) - .collect::>() - .len(); - let sites = hits.iter().map(|hit| loc(hit)).collect::>(); - let spans = hits - .iter() - .map(|hit| (loc(hit), hit.span)) - .collect::>(); - let essential_count = essential - .iter() - .find(|(candidate, _)| candidate == &contract) - .map(|(_, count)| *count) - .unwrap_or(0); + let mut rows: Vec<_> = rows_map.into_iter().map(|(contract, hs)| { + let mut methods_set = BTreeSet::new(); + for h in &hs { + methods_set.insert((&h.file, &h.defn)); + } + let sites = hs.iter().map(|h| loc(h)).collect(); + let spans = hs.iter().map(|h| (loc(h), h.span)).collect(); + let essential = ess.get(&contract).cloned().unwrap_or(0); DecisionPressureRow { contract, - decisions: hits.len(), - essential: essential_count, - methods, + decisions: hs.len(), + essential, + methods: methods_set.len(), sites, spans, } - }) - .collect::>(); - - let mut named = rows - .iter() - .filter(|row| row.contract != "~local") - .cloned() - .collect::>(); - named.sort_by(|left, right| { - right - .decisions - .cmp(&left.decisions) - .then(right.methods.cmp(&left.methods)) - }); - let local = rows - .into_iter() - .filter(|row| row.contract == "~local") - .collect::>(); - named.into_iter().chain(local).collect() -} + }).collect(); -fn child_to_string(child: Option<&Child>) -> Option { - match child { - Some(Child::String(value)) | Some(Child::Symbol(value)) => Some(value.clone()), - _ => None, + let mut named: Vec<_> = rows.iter().filter(|r| r.contract != "~local").cloned().collect(); + named.sort_by(|a, b| b.decisions.cmp(&a.decisions).then_with(|| b.methods.cmp(&a.methods))); + + let local: Vec<_> = rows.into_iter().filter(|r| r.contract == "~local").collect(); + named.into_iter().chain(local).collect() } } -fn child_nil(child: Option<&Child>) -> bool { - matches!(child, None | Some(Child::Nil)) -} - -fn first_non_nil_child(children: &[Child]) -> Option<&Child> { - children.iter().find(|child| !matches!(child, Child::Nil)) -} - -fn child_slice(child: &Child, lines: &[String]) -> String { - match child { - Child::Node(node) => ast::slice(node, lines), - Child::Symbol(value) => value.clone(), - Child::String(value) => format!("{value:?}"), - Child::Nil => "nil".to_string(), - } -} - -fn loc(hit: &Hit) -> String { - format!("{}:{}:{}", hit.file, hit.defn, hit.line) -} - -#[cfg(test)] -mod tests { - use super::*; - - fn node(node_type: &str, children: Vec) -> Node { - Node { - r#type: node_type.to_string(), - children, - first_lineno: 1, - first_column: 0, - last_lineno: 1, - last_column: 1, - text: String::new(), - } - } - - #[test] - fn resolves_local_to_accessor_contract() { - let source = node( - "CALL", - vec![ - Child::Node(Box::new(node("LVAR", vec![Child::String("node".to_string())]))), - Child::Symbol("full_type".to_string()), - Child::Nil, - ], - ); - let scanner = Scanner::new("test.rb".to_string(), Vec::new()); - let local = node("LVAR", vec![Child::String("ti".to_string())]); - assert_eq!( - scanner.contract_of(&local, &vec![("ti".to_string(), source)], 0), - Some(".full_type".to_string()) - ); - } - - #[test] - fn resolved_transient_local_does_not_fall_back_to_local_contract() { - let source = node( - "CALL", - vec![ - Child::Node(Box::new(node( - "LVAR", - vec![Child::String("stack".to_string())], - ))), - Child::Symbol("pop".to_string()), - Child::Nil, - ], - ); - let scanner = Scanner::new("test.rb".to_string(), Vec::new()); - let local = node("LVAR", vec![Child::String("node".to_string())]); - - assert_eq!( - scanner.contract_of(&local, &vec![("node".to_string(), source)], 0), - None - ); - } - - #[test] - fn hash_key_contract_uses_key_text() { - let element = node( - "CALL", - vec![ - Child::Node(Box::new(node("LVAR", vec![Child::String("p".to_string())]))), - Child::Symbol("[]".to_string()), - Child::Node(Box::new(node( - "LIST", - vec![Child::Node(Box::new(Node { - r#type: "LIT".to_string(), - children: vec![Child::Symbol("type".to_string())], - first_lineno: 1, - first_column: 2, - last_lineno: 1, - last_column: 7, - text: ":type".to_string(), - }))], - ))), - ], - ); - let scanner = Scanner::new("test.rb".to_string(), Vec::new()); - assert_eq!(scanner.contract_of(&element, &Vec::new(), 0), Some("[:type]".to_string())); - } - - #[test] - fn scan_records_safe_navigation_pressure() { - let mut file = tempfile::NamedTempFile::new().expect("temp"); - std::io::Write::write_all( - &mut file, - b"def scan\n file&.unlink\nend\n", - ) - .expect("write"); - - let rows = scan_files(&[file.path().to_path_buf()], Language::Ruby).expect("scan"); - - assert_eq!(rows.len(), 1); - assert_eq!(rows[0].contract, ".file"); - assert_eq!(rows[0].decisions, 1); - } - - #[test] - fn scan_records_safe_navigation_pressure_inside_ensure() { - let mut file = tempfile::NamedTempFile::new().expect("temp"); - std::io::Write::write_all( - &mut file, - b"class CoUpdateTest < Minitest::Test\n def scan(ruby)\n f = Tempfile.new([\"cu\", \".rb\"])\n f.write(ruby)\n ensure\n f&.unlink\n end\nend\n", - ) - .expect("write"); - - let rows = scan_files(&[file.path().to_path_buf()], Language::Ruby).expect("scan"); - - assert_eq!(rows.len(), 1); - assert_eq!(rows[0].contract, "~local"); - assert_eq!(rows[0].decisions, 1); - } - - #[test] - fn scan_counts_block_predicate_on_assigned_local_as_essential_context() { - let mut file = tempfile::NamedTempFile::new().expect("temp"); - std::io::Write::write_all( - &mut file, - b"def t\n pairs = []\n refute(pairs.any? { |h| h[:pair].include?(\"[]\") })\n pairs.nil?\nend\n", - ) - .expect("write"); - - let rows = scan_files(&[file.path().to_path_buf()], Language::Ruby).expect("scan"); - - assert_eq!(rows.len(), 1); - assert_eq!(rows[0].contract, "~local"); - assert_eq!(rows[0].decisions, 1); - assert_eq!(rows[0].essential, 1); - } - - #[test] - fn scan_records_safe_navigation_pressure_in_ternary_arm() { - let mut file = tempfile::NamedTempFile::new().expect("temp"); - std::io::Write::write_all( - &mut file, - b"def x(node)\n decl = node.respond_to?(:symbol) ? node.symbol&.reg : nil\nend\n", - ) - .expect("write"); - - let rows = scan_files(&[file.path().to_path_buf()], Language::Ruby).expect("scan"); - - assert_eq!(rows.iter().find(|row| row.contract == ".symbol").map(|row| row.decisions), Some(1)); - assert_eq!(rows.iter().find(|row| row.contract == "~local").map(|row| row.decisions), Some(1)); - } - +fn loc(h: &Hit) -> String { + format!("{}:{}:{}", h.file, h.defn, h.line) } diff --git a/gems/decomplex/rust/src/decomplex/detectors/miner.rs b/gems/decomplex/rust/src/decomplex/detectors/miner.rs index 5ed596d69..1053533b3 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/miner.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/miner.rs @@ -31,183 +31,107 @@ pub struct NeglectedCondition { pub spans: BTreeMap, } -#[derive(Clone, Debug)] -struct Group { - kind: String, - members: Vec, - sites: Vec, - order: usize, -} - pub fn scan_files(files: &[PathBuf], language: Language) -> Result { let documents = syntax::parse_files(files, language)?; - Ok(scan_documents(&documents, 2, 3)) -} - -pub fn scan_documents( - documents: &[Document], - min_scatter: usize, - min_neglected_support: usize, -) -> MinerReport { - let sites = documents - .iter() - .flat_map(|document| document.decision_sites.clone()) - .collect::>(); - MinerReport { - missing_abstractions: missing_abstractions(&sites, min_scatter), - neglected_conditions: neglected_conditions(&sites, min_neglected_support), + let mut sites = Vec::new(); + for doc in documents { + sites.extend(doc.decision_sites); } + let mut m = Miner::new(sites); + Ok(MinerReport { + missing_abstractions: m.missing_abstractions(2), + neglected_conditions: m.neglected_conditions(3), + }) } -fn missing_abstractions(sites: &[DecisionSite], min_scatter: usize) -> Vec { - let mut out = groups(sites) - .into_iter() - .filter_map(|group| { - let scatter = group - .sites - .iter() - .map(|site| (site.file.clone(), site.function.clone())) - .collect::>() - .len(); - if scatter < min_scatter { - return None; - } - let spans = group - .sites - .iter() - .map(|site| (loc(site), site.span)) - .collect::>(); - Some(( - group.order, - MissingAbstraction { - kind: group.kind, - members: group.members, - support: group.sites.len(), - scatter, - rank: group.sites.len() * scatter, - sites: group.sites.iter().map(loc).collect(), - spans, - }, - )) - }) - .collect::>(); - out.sort_by(|left, right| right.1.rank.cmp(&left.1.rank).then(left.0.cmp(&right.0))); - out.into_iter().map(|(_, finding)| finding).collect() +struct Miner { + sites: Vec, + groups: BTreeMap<(String, Vec), Vec>, } -fn neglected_conditions(sites: &[DecisionSite], min_support: usize) -> Vec { - let popular = groups(sites) - .into_iter() - .filter(|group| group.sites.len() >= min_support) - .map(|group| (group.kind, group.members, group.sites.len())) - .collect::>(); - let mut out = Vec::new(); - let mut seen = BTreeSet::new(); - for site in sites { - for (kind, members, support) in &popular { - if kind != &site.kind { - continue; - } - let missing = difference(members, &site.members); - let extra = difference(&site.members, members); - if missing.len() != 1 || !extra.is_empty() || &site.members == members { - continue; +impl Miner { + fn new(sites: Vec) -> Self { + let mut groups = BTreeMap::new(); + for s in &sites { + groups.entry((s.kind.clone(), s.members.clone())).or_insert_with(Vec::new).push(s.clone()); + } + Self { sites, groups } + } + + fn missing_abstractions(&self, min_scatter: usize) -> Vec { + let mut out = Vec::new(); + for ((kind, members), sts) in &self.groups { + let mut methods = BTreeSet::new(); + for s in sts { + methods.insert((s.file.clone(), s.function.clone())); } - let at = loc(site); + let scatter = methods.len(); + if scatter < min_scatter { continue; } + + let mut sites = Vec::new(); let mut spans = BTreeMap::new(); - spans.insert(at.clone(), site.span); - let finding = NeglectedCondition { - pattern: members.clone(), - support: *support, - missing: missing[0].clone(), - at, - spans, - }; - let key = serde_json::to_string(&finding).unwrap_or_default(); - if seen.insert(key) { - out.push(finding); + for s in sts { + let l = self.loc(s); + sites.push(l.clone()); + spans.insert(l, s.span); } - } - } - out.sort_by(|left, right| right.support.cmp(&left.support)); - out -} -fn groups(sites: &[DecisionSite]) -> Vec { - let mut groups = Vec::new(); - let mut seen_sites = BTreeSet::new(); - for site in sites { - let site_key = format!( - "{}\0{}\0{}\0{}\0{}", - site.file, - site.function, - site.line, - site.kind, - site.members.join("\0") - ); - if !seen_sites.insert(site_key) { - continue; - } - if let Some(group) = groups - .iter_mut() - .find(|group: &&mut Group| group.kind == site.kind && group.members == site.members) - { - group.sites.push(site.clone()); - } else { - groups.push(Group { - kind: site.kind.clone(), - members: site.members.clone(), - sites: vec![site.clone()], - order: groups.len(), + out.push(MissingAbstraction { + kind: kind.clone(), + members: members.clone(), + support: sts.len(), + scatter, + rank: sts.len() * scatter, + sites, + spans, }); } + out.sort_by(|a, b| b.rank.cmp(&a.rank)); + out } - groups -} -fn difference(left: &[String], right: &[String]) -> Vec { - left.iter() - .filter(|candidate| !right.contains(candidate)) - .cloned() - .collect() -} + fn neglected_conditions(&self, min_support: usize) -> Vec { + let mut popular = Vec::new(); + for ((kind, members), sts) in &self.groups { + if sts.len() >= min_support { + popular.push((kind.clone(), members.clone(), sts.len())); + } + } -fn loc(site: &DecisionSite) -> String { - format!("{}:{}:{}", site.file, site.function, site.line) -} + let mut out = Vec::new(); + for s in &self.sites { + for (kind, mem, sup) in &popular { + if kind != &s.kind { continue; } + + let mem_set: BTreeSet<_> = mem.iter().cloned().collect(); + let s_mem_set: BTreeSet<_> = s.members.iter().cloned().collect(); + + let diff_mem_s: BTreeSet<_> = mem_set.difference(&s_mem_set).cloned().collect(); + let diff_s_mem: BTreeSet<_> = s_mem_set.difference(&mem_set).cloned().collect(); -#[cfg(test)] -mod tests { - use super::*; + if diff_mem_s.len() == 1 && diff_s_mem.is_empty() { + if s.members == *mem { continue; } - fn site(function: &str, line: usize, members: &[&str]) -> DecisionSite { - DecisionSite { - kind: "conjunction".to_string(), - members: members.iter().map(|member| member.to_string()).collect(), - file: "a.rb".to_string(), - function: function.to_string(), - line, - span: [line, 0, line, 1], - predicate: members.join(" && "), + let l = self.loc(s); + let mut spans = BTreeMap::new(); + spans.insert(l.clone(), s.span); + + out.push(NeglectedCondition { + pattern: mem.clone(), + support: *sup, + missing: diff_mem_s.into_iter().next().unwrap(), + at: l, + spans, + }); + } + } } + out.sort_by(|a, b| b.support.cmp(&a.support)); + out.dedup_by(|a, b| a.at == b.at && a.pattern == b.pattern); + out } - #[test] - fn reports_missing_abstractions_and_neglected_conditions() { - let sites = vec![ - site("one", 1, &["a", "b", "c"]), - site("two", 2, &["a", "b", "c"]), - site("three", 3, &["a", "b", "c"]), - site("broken", 4, &["a", "b"]), - ]; - let missing = missing_abstractions(&sites, 2); - assert_eq!(missing.len(), 1); - assert_eq!(missing[0].support, 3); - assert_eq!(missing[0].scatter, 3); - - let neglected = neglected_conditions(&sites, 3); - assert_eq!(neglected.len(), 1); - assert_eq!(neglected[0].missing, "c"); - assert_eq!(neglected[0].at, "a.rb:broken:4"); + fn loc(&self, s: &DecisionSite) -> String { + format!("{}:{}:{}", s.file, s.function, s.line) } } diff --git a/gems/decomplex/rust/src/decomplex/detectors/mod.rs b/gems/decomplex/rust/src/decomplex/detectors/mod.rs index d4fd9743f..9245a612a 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/mod.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/mod.rs @@ -3,4 +3,8 @@ pub mod decision_pressure; pub mod flay_similarity; pub mod miner; pub mod predicate_alias; +pub mod redundant_nil_guard; pub mod semantic_alias; +pub mod state_branch_density; +pub mod state_mesh; +pub mod temporal_ordering_pressure; diff --git a/gems/decomplex/rust/src/decomplex/detectors/predicate_alias.rs b/gems/decomplex/rust/src/decomplex/detectors/predicate_alias.rs index a64ed895a..fe0845248 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/predicate_alias.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/predicate_alias.rs @@ -2,7 +2,7 @@ use crate::decomplex::ast::{self, Child, Node, Span}; use crate::decomplex::syntax::Language; use anyhow::Result; use serde::Serialize; -use std::collections::BTreeMap; +use std::collections::{BTreeMap, BTreeSet}; use std::path::PathBuf; #[derive(Clone, Debug, Eq, PartialEq, Serialize)] @@ -28,28 +28,24 @@ struct Pred { span: Span, } -#[derive(Clone, Debug)] -struct Scanner { - file: String, - lines: Vec, - preds: Vec, -} - -pub fn scan_files(files: &[PathBuf], language: Language) -> Result { - let _ = language; +pub fn scan_files(files: &[PathBuf], _language: Language) -> Result { let mut preds = Vec::new(); for file in files { let (root, lines) = ast::parse(file)?; - let mut scanner = Scanner::new(file.to_string_lossy().to_string(), lines); - scanner.walk(&root); - preds.extend(scanner.preds); + let mut p = PredicateAlias::new(file.to_string_lossy().to_string(), lines); + p.walk(&root); + preds.extend(p.preds); } - Ok(PredicateAliasReport { - alias_clusters: alias_clusters(&preds), - }) + Ok(Report::new(preds).findings()) } -impl Scanner { +struct PredicateAlias { + file: String, + lines: Vec, + preds: Vec, +} + +impl PredicateAlias { fn new(file: String, lines: Vec) -> Self { Self { file, @@ -68,119 +64,80 @@ impl Scanner { } fn record_def(&mut self, node: &Node) { - let Some(name) = child_to_string(node.children.first()) else { - return; - }; - let Some(scope) = node.children.get(1).and_then(ast::node) else { - return; - }; - if scope.r#type != "SCOPE" { - return; - } - let Some(body) = scope.children.get(2).and_then(ast::node) else { - return; + let name = match node.children.get(0) { + Some(Child::Symbol(s)) => s.clone(), + _ => return, }; - if body.r#type == "BLOCK" { - return; - } + let scope = node.children.get(1).and_then(ast::node); + let Some(scope) = scope else { return }; + if scope.r#type != "SCOPE" { return }; + + let body = scope.children.get(2).and_then(ast::node); + let Some(body) = body else { return }; + if body.r#type == "BLOCK" { return }; + + let txt = ast::slice(body, &self.lines); + if txt.is_empty() || txt.len() > 200 { return }; - let text = ast::slice(body, &self.lines); - if text.is_empty() || text.len() > 200 { - return; - } self.preds.push(Pred { name: name.clone(), - body: text, + body: txt, file: self.file.clone(), defn: name, line: node.first_lineno, - span: [ - node.first_lineno, - node.first_column, - node.last_lineno, - node.last_column, - ], + span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], }); } } -fn alias_clusters(predicates: &[Pred]) -> Vec { - let mut by_body: Vec<(&str, Vec<&Pred>)> = Vec::new(); - for predicate in predicates { - if let Some((_, rows)) = by_body.iter_mut().find(|(body, _)| *body == predicate.body.as_str()) { - rows.push(predicate); - } else { - by_body.push((predicate.body.as_str(), vec![predicate])); +struct Report { + preds: Vec, +} + +impl Report { + fn new(preds: Vec) -> Self { + Self { preds } + } + + fn findings(&self) -> PredicateAliasReport { + PredicateAliasReport { + alias_clusters: self.alias_clusters(), } } - let mut out = by_body - .into_iter() - .filter_map(|(body, rows)| { - let mut names = Vec::new(); - for predicate in &rows { - if !names.contains(&predicate.name) { - names.push(predicate.name.clone()); - } + + fn alias_clusters(&self) -> Vec { + let mut by_body: Vec<(String, Vec<&Pred>)> = Vec::new(); + for p in &self.preds { + if let Some(entry) = by_body.iter_mut().find(|(b, _)| b == &p.body) { + entry.1.push(p); + } else { + by_body.push((p.body.clone(), vec![p])); } - if names.len() < 2 { - return None; + } + + let mut out = Vec::new(); + for (body, ps) in by_body { + let mut names_set = BTreeSet::new(); + for p in &ps { names_set.insert(p.name.clone()); } + let names: Vec<_> = names_set.into_iter().collect(); + if names.len() < 2 { continue; } + + let mut sites = Vec::new(); + let mut spans = BTreeMap::new(); + for p in &ps { + let loc = format!("{}:{}:{}", p.file, p.name, p.line); + sites.push(loc.clone()); + spans.insert(loc, p.span); } - let sites = rows - .iter() - .map(|predicate| format!("{}:{}:{}", predicate.file, predicate.name, predicate.line)) - .collect::>(); - let spans = rows - .iter() - .map(|predicate| { - ( - format!("{}:{}:{}", predicate.file, predicate.name, predicate.line), - predicate.span, - ) - }) - .collect::>(); - Some(AliasCluster { - body: body.to_string(), + + out.push(AliasCluster { + body, names, sites, spans, - }) - }) - .collect::>(); - out.sort_by(|left, right| right.names.len().cmp(&left.names.len())); - out -} - -fn child_to_string(child: Option<&Child>) -> Option { - match child { - Some(Child::String(value)) | Some(Child::Symbol(value)) => Some(value.clone()), - _ => None, - } -} - -#[cfg(test)] -mod tests { - use super::*; - - fn pred(name: &str, body: &str, line: usize) -> Pred { - Pred { - name: name.to_string(), - body: body.to_string(), - file: "a.rb".to_string(), - defn: name.to_string(), - line, - span: [line, 0, line, 1], + }); } - } - - #[test] - fn clusters_distinct_names_with_same_body() { - let clusters = alias_clusters(&[ - pred("heap?", "node.storage == :heap", 1), - pred("owned?", "node.storage == :heap", 2), - pred("other?", "node.storage == :frame", 3), - ]); - assert_eq!(clusters.len(), 1); - assert_eq!(clusters[0].body, "node.storage == :heap"); - assert_eq!(clusters[0].names, vec!["heap?".to_string(), "owned?".to_string()]); + out.sort_by(|a, b| b.names.len().cmp(&a.names.len())); + out } } diff --git a/gems/decomplex/rust/src/decomplex/detectors/redundant_nil_guard.rs b/gems/decomplex/rust/src/decomplex/detectors/redundant_nil_guard.rs new file mode 100644 index 000000000..d61e5c9d7 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/redundant_nil_guard.rs @@ -0,0 +1,486 @@ +use crate::decomplex::ast::{self, Child, Node, Span}; +use crate::decomplex::syntax::Language; +use anyhow::Result; +use serde::Serialize; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::PathBuf; + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct RedundantNilGuardRow { + pub at: String, + pub file: String, + pub defn: String, + pub line: usize, + pub span: Span, + pub local: String, + pub guard: String, + pub proof: String, + pub spans: BTreeMap, +} + +#[derive(Clone, Debug)] +struct Flow { + known: BTreeSet, + terminated: bool, +} + +#[derive(Clone, Debug)] +struct NilFact { + local: String, + non_nil_when_true: bool, +} + +struct Finding { + file: String, + defn: String, + line: usize, + span: Span, + local: String, + guard: String, + proof: String, +} + +impl Finding { + fn to_h(&self) -> RedundantNilGuardRow { + let loc = format!("{}:{}:{}", self.file, self.defn, self.line); + let mut spans = BTreeMap::new(); + spans.insert(loc.clone(), self.span); + RedundantNilGuardRow { + at: loc, + file: self.file.clone(), + defn: self.defn.clone(), + line: self.line, + span: self.span, + local: self.local.clone(), + guard: self.guard.clone(), + proof: self.proof.clone(), + spans, + } + } +} + +const TERMINATING_CALLS: &[&str] = &["raise", "fail", "abort", "exit", "exit!"]; + +pub fn scan_files(files: &[PathBuf], _language: Language) -> Result> { + let mut findings = Vec::new(); + for file in files { + let (root, lines) = ast::parse(file)?; + let mut scanner = RedundantNilGuard::new(file.to_string_lossy().to_string(), lines); + scanner.walk(&root, &Vec::new()); + findings.extend(scanner.findings); + } + let mut out: Vec<_> = findings.into_iter().map(|f| f.to_h()).collect(); + out.sort_by(|a, b| { + a.file + .cmp(&b.file) + .then_with(|| a.line.cmp(&b.line)) + .then_with(|| a.local.cmp(&b.local)) + .then_with(|| a.guard.cmp(&b.guard)) + }); + Ok(out) +} + +struct RedundantNilGuard { + file: String, + lines: Vec, + findings: Vec, +} + +impl RedundantNilGuard { + fn new(file: String, lines: Vec) -> Self { + Self { + file, + lines, + findings: Vec::new(), + } + } + + fn walk(&mut self, node: &Node, defstack: &[String]) { + if matches!(node.r#type.as_str(), "DEFN" | "DEFS") { + let name_index = if node.r#type == "DEFS" { 1 } else { 0 }; + if let Some(Child::Symbol(name)) = node.children.get(name_index) { + let mut next_defstack = defstack.to_vec(); + next_defstack.push(name.clone()); + self.process_block(&ast::body_stmts(node), &next_defstack, &BTreeSet::new()); + } + return; + } + + for child in node.children.iter().filter_map(ast::node) { + self.walk(child, defstack); + } + } + + fn process_block(&mut self, stmts: &[&Node], defstack: &[String], known: &BTreeSet) -> Flow { + let mut current = known.clone(); + for stmt in stmts { + let flow = self.process_stmt(stmt, defstack, ¤t); + current = flow.known; + if flow.terminated { + return Flow { + known: current, + terminated: true, + }; + } + } + Flow { + known: current, + terminated: false, + } + } + + fn process_stmt(&mut self, node: &Node, defstack: &[String], known: &BTreeSet) -> Flow { + match node.r#type.as_str() { + "IF" | "UNLESS" => self.process_branch(node, defstack, known), + "LASGN" => { + if let Some(rhs) = node.children.get(1).and_then(ast::node) { + self.inspect_node(rhs, defstack, known); + } + let mut next_known = known.clone(); + if let Some(Child::String(name)) = node.children.first() { + next_known.remove(name); + } + Flow { + known: next_known, + terminated: false, + } + } + _ => { + self.inspect_node(node, defstack, known); + Flow { + known: known.clone(), + terminated: self.terminating(node), + } + } + } + } + + fn process_branch(&mut self, node: &Node, defstack: &[String], known: &BTreeSet) -> Flow { + let cond = node.children.get(0).and_then(ast::node); + let then_body = node.children.get(1).and_then(ast::node); + let else_body = node.children.get(2).and_then(ast::node); + + if let Some(cond) = cond { + self.inspect_node(cond, defstack, known); + } + + let then_known = self.known_for_branch(node.r#type.as_str(), true, cond, known); + let else_known = self.known_for_branch(node.r#type.as_str(), false, cond, known); + + let then_flow = self.process_block(&self.stmts_for(then_body), defstack, &then_known); + let else_flow = self.process_block(&self.stmts_for(else_body), defstack, &else_known); + + if then_flow.terminated && else_flow.terminated { + Flow { + known: BTreeSet::new(), + terminated: true, + } + } else if then_flow.terminated { + Flow { + known: else_flow.known, + terminated: false, + } + } else if else_flow.terminated { + Flow { + known: then_flow.known, + terminated: false, + } + } else { + let intersection: BTreeSet<_> = then_flow.known.intersection(&else_flow.known).cloned().collect(); + Flow { + known: intersection, + terminated: false, + } + } + } + + fn known_for_branch( + &self, + node_type: &str, + body_branch: bool, + cond: Option<&Node>, + known: &BTreeSet, + ) -> BTreeSet { + let mut next_known = known.clone(); + let cond_true_branch = if node_type == "IF" { + body_branch + } else { + !body_branch + }; + if let Some(cond) = cond { + for fact in self.branch_nil_facts(cond, cond_true_branch) { + next_known.insert(fact.local); + } + } + next_known + } + + fn inspect_node(&mut self, node: &Node, defstack: &[String], known: &BTreeSet) { + let recorded = self.record_redundant(node, defstack, known); + if matches!(node.r#type.as_str(), "DEFN" | "DEFS") { + return; + } + if recorded && node.r#type == "OPCALL" { + return; + } + for child in node.children.iter().filter_map(ast::node) { + self.inspect_node(child, defstack, known); + } + } + + fn record_redundant(&mut self, node: &Node, defstack: &[String], known: &BTreeSet) -> bool { + let local = self.redundant_nil_subject(node, known); + let Some(local) = local else { return false }; + + let defn = defstack.last().map(|s| s.as_str()).unwrap_or("(top-level)"); + self.findings.push(Finding { + file: self.file.clone(), + defn: defn.to_string(), + line: node.first_lineno, + span: self.span(node), + local: local.clone(), + guard: ast::slice(node, &self.lines), + proof: format!("{} is already proven non-nil on this path", local), + }); + true + } + + fn redundant_nil_subject(&self, node: &Node, known: &BTreeSet) -> Option { + if node.r#type == "QCALL" { + return self.qcall_subject(node, known); + } + + let fact = self.nil_fact(node)?; + if known.contains(&fact.local) { + return Some(fact.local); + } + None + } + + fn nil_fact(&self, node: &Node) -> Option { + match node.r#type.as_str() { + "CALL" => { + let recv = node.children.get(0).and_then(ast::node)?; + let mid = match node.children.get(1)? { + Child::Symbol(s) => s, + _ => return None, + }; + let args = node.children.get(2); + if mid == "nil?" && (args.is_none() || matches!(args, Some(Child::Nil))) { + let subject = self.subject_key(recv)?; + return Some(NilFact { + local: subject, + non_nil_when_true: false, + }); + } + None + } + "OPCALL" => { + let recv = node.children.get(0).and_then(ast::node)?; + let mid = match node.children.get(1)? { + Child::Symbol(s) => s, + _ => return None, + }; + let args = node.children.get(2); + if mid == "!" { + return self.negated_nil_fact(recv); + } + if mid == "==" || mid == "!=" { + return self.comparison_nil_fact(recv, mid, args); + } + None + } + _ => None, + } + } + + fn branch_nil_facts(&self, node: &Node, cond_truth: bool) -> Vec { + if node.r#type == "AND" { + if !cond_truth { + return Vec::new(); + } + let mut facts = Vec::new(); + for child in ast::flatten_and(node) { + facts.extend(self.branch_nil_facts(child, true)); + } + return facts; + } + + if node.r#type == "OPCALL" { + if let Some(Child::Symbol(mid)) = node.children.get(1) { + if mid == "!" { + if let Some(child) = node.children.get(0).and_then(ast::node) { + return self.branch_nil_facts(child, !cond_truth); + } + } + } + } + + if let Some(safe_receiver) = self.safe_nav_receiver_fact(node) { + if cond_truth { + return vec![safe_receiver]; + } + } + + if let Some(fact) = self.nil_fact(node) { + if cond_truth == fact.non_nil_when_true { + return vec![fact]; + } + } + + if let Some(truthy) = self.truthy_subject_fact(node) { + if cond_truth { + return vec![truthy]; + } + } + + Vec::new() + } + + fn safe_nav_receiver_fact(&self, node: &Node) -> Option { + if node.r#type == "QCALL" { + let recv = node.children.get(0).and_then(ast::node)?; + let subject = self.subject_key(recv)?; + return Some(NilFact { + local: subject, + non_nil_when_true: true, + }); + } + None + } + + fn truthy_subject_fact(&self, node: &Node) -> Option { + let subject = self.subject_key(node)?; + Some(NilFact { + local: subject, + non_nil_when_true: true, + }) + } + + fn negated_nil_fact(&self, node: &Node) -> Option { + let mut fact = self.nil_fact(node)?; + fact.non_nil_when_true = !fact.non_nil_when_true; + Some(fact) + } + + fn comparison_nil_fact(&self, recv: &Node, mid: &str, args: Option<&Child>) -> Option { + let subject = self.subject_key(recv)?; + if !self.nil_arg(args) { + return None; + } + Some(NilFact { + local: subject, + non_nil_when_true: mid == "!=", + }) + } + + fn qcall_subject(&self, node: &Node, known: &BTreeSet) -> Option { + let recv = node.children.get(0).and_then(ast::node)?; + let subject = self.subject_key(recv)?; + if known.contains(&subject) { + return Some(subject); + } + None + } + + fn subject_key(&self, node: &Node) -> Option { + match node.r#type.as_str() { + "LVAR" | "DVAR" | "VCALL" => { + match node.children.first()? { + Child::String(s) | Child::Symbol(s) => Some(s.clone()), + _ => None, + } + } + "CALL" => { + let recv = node.children.get(0).and_then(ast::node); + let mid = match node.children.get(1)? { + Child::Symbol(s) => s, + _ => return None, + }; + let args = node.children.get(2); + if (args.is_none() || matches!(args, Some(Child::Nil))) && self.stable_reader_name(mid) { + if let Some(recv) = recv { + if recv.r#type == "SELF" { + return Some(format!("self.{}", mid)); + } + let recv_key = self.subject_key(recv)?; + return Some(format!("{}.{}", recv_key, mid)); + } + } + None + } + _ => None, + } + } + + fn stable_reader_name(&self, mid: &str) -> bool { + !(mid.ends_with('=') || mid.ends_with('!') || mid == "[]") + } + + #[allow(dead_code)] + fn local_name(&self, node: &Node) -> Option { + if matches!(node.r#type.as_str(), "LVAR" | "DVAR") { + match node.children.first()? { + Child::String(s) | Child::Symbol(s) => return Some(s.clone()), + _ => {} + } + } + None + } + + fn nil_arg(&self, args: Option<&Child>) -> bool { + let Some(Child::Node(node)) = args else { return false }; + if node.r#type != "LIST" { + return false; + } + node.children.iter().any(|c| match c { + Child::Node(n) => n.r#type == "NIL", + Child::Nil => true, + _ => false, + }) + } + + fn stmts_for<'a>(&self, node: Option<&'a Node>) -> Vec<&'a Node> { + let Some(node) = node else { return Vec::new() }; + if node.r#type == "BLOCK" { + node.children.iter().filter_map(ast::node).collect() + } else { + vec![node] + } + } + + fn terminating(&self, node: &Node) -> bool { + if matches!(node.r#type.as_str(), "RETURN" | "NEXT" | "BREAK") { + return true; + } + if !matches!(node.r#type.as_str(), "FCALL" | "VCALL" | "CALL") { + return false; + } + + let mid = if node.r#type == "CALL" { + node.children.get(1).and_then(|c| match c { + Child::Symbol(s) => Some(s.as_str()), + _ => None, + }) + } else { + node.children.get(0).and_then(|c| match c { + Child::Symbol(s) => Some(s.as_str()), + _ => None, + }) + }; + + if let Some(mid) = mid { + return TERMINATING_CALLS.contains(&mid); + } + false + } + + fn span(&self, node: &Node) -> Span { + [ + node.first_lineno, + node.first_column, + node.last_lineno, + node.last_column, + ] + } +} diff --git a/gems/decomplex/rust/src/decomplex/detectors/semantic_alias.rs b/gems/decomplex/rust/src/decomplex/detectors/semantic_alias.rs index ba64a0385..bd69d82a7 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/semantic_alias.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/semantic_alias.rs @@ -2,7 +2,7 @@ use crate::decomplex::ast::{self, Child, Node, Span}; use crate::decomplex::syntax::Language; use anyhow::Result; use serde::Serialize; -use std::collections::BTreeMap; +use std::collections::{BTreeMap, BTreeSet}; use std::path::PathBuf; #[derive(Clone, Debug, Eq, PartialEq, Serialize)] @@ -47,32 +47,27 @@ struct Use { span: Span, } -#[derive(Clone, Debug)] -struct Scanner { - file: String, - lines: Vec, - preds: Vec, - uses: Vec, -} - -pub fn scan_files(files: &[PathBuf], language: Language) -> Result { - let _ = language; +pub fn scan_files(files: &[PathBuf], _language: Language) -> Result { let mut preds = Vec::new(); let mut uses = Vec::new(); for file in files { let (root, lines) = ast::parse(file)?; - let mut scanner = Scanner::new(file.to_string_lossy().to_string(), lines); - scanner.walk(&root, &[]); + let mut scanner = SemanticAlias::new(file.to_string_lossy().to_string(), lines); + scanner.walk(&root, &Vec::new()); preds.extend(scanner.preds); uses.extend(scanner.uses); } - Ok(SemanticAliasReport { - alias_clusters: alias_clusters(&preds), - reification_misses: reification_misses(&preds, &uses), - }) + Ok(Report::new(preds, uses).findings()) } -impl Scanner { +struct SemanticAlias { + file: String, + lines: Vec, + preds: Vec, + uses: Vec, +} + +impl SemanticAlias { fn new(file: String, lines: Vec) -> Self { Self { file, @@ -83,257 +78,148 @@ impl Scanner { } fn walk(&mut self, node: &Node, defstack: &[String]) { - let next_defstack = ast::def_push(node, defstack); + let mut next_defstack = defstack.to_vec(); + if matches!(node.r#type.as_str(), "DEFN" | "DEFS") { + let name_index = if node.r#type == "DEFS" { 1 } else { 0 }; + if let Some(Child::Symbol(name)) = node.children.get(name_index) { + next_defstack.push(name.clone()); + } + } + if node.r#type == "DEFN" { self.record_pred(node); } - if matches!(node.r#type.as_str(), "CALL" | "OPCALL") && comparison(node) { - let raw = ast::slice(node, &self.lines); + + if matches!(node.r#type.as_str(), "CALL" | "OPCALL") && self.comparison(node) { + let c = self.canon(&ast::slice(node, &self.lines)); self.uses.push(Use { - canon: canon(&raw), + canon: c, file: self.file.clone(), - defn: next_defstack - .last() - .cloned() - .unwrap_or_else(|| "(top-level)".to_string()), + defn: next_defstack.last().cloned().unwrap_or_else(|| "(top-level)".to_string()), line: node.first_lineno, - raw, - span: [ - node.first_lineno, - node.first_column, - node.last_lineno, - node.last_column, - ], + raw: ast::slice(node, &self.lines), + span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], }); } + for child in node.children.iter().filter_map(ast::node) { self.walk(child, &next_defstack); } } - fn record_pred(&mut self, node: &Node) { - let Some(name) = child_to_string(node.children.first()) else { - return; - }; - if !name.ends_with('?') { - return; - } - let statements = ast::body_stmts(node); - if statements.len() != 1 { - return; - } - self.preds.push(Pred { - name, - canon: canon(&ast::slice(statements[0], &self.lines)), - file: self.file.clone(), - line: node.first_lineno, - span: [ - node.first_lineno, - node.first_column, - node.last_lineno, - node.last_column, - ], - }); + fn canon(&self, text: &str) -> String { + let (mut t, _) = ast::canon_polarity(text); + t = t.strip_prefix("self.").unwrap_or(&t).to_string(); + t = t.strip_prefix('@').unwrap_or(&t).to_string(); + + // Ruby: t = t.sub(/\A[A-Za-z_]\w*(?:\([^)]*\))?\.(?=[A-Za-z_]\w*\s*(==|!=|\.))/, "") + let re = regex::Regex::new(r"^[A-Za-z_]\w*(?:\([^)]*\))?\.(?P[A-Za-z_]\w*\s*(?:==|!=|\.))").unwrap(); + t = re.replace(&t, "$rest").to_string(); + + t.split_whitespace().collect::>().join(" ") } -} -fn alias_clusters(preds: &[Pred]) -> Vec { - let mut by_canon: Vec<(&str, Vec<&Pred>)> = Vec::new(); - for pred in preds { - if let Some((_, rows)) = by_canon - .iter_mut() - .find(|(existing, _)| *existing == pred.canon.as_str()) - { - rows.push(pred); - } else { - by_canon.push((pred.canon.as_str(), vec![pred])); + fn comparison(&self, node: &Node) -> bool { + let mid = node.children.get(1); + match mid { + Some(Child::Symbol(s)) => matches!(s.as_str(), "==" | "!=" | "nil?"), + _ => false } } - let mut out = by_canon - .into_iter() - .filter_map(|(canon, rows)| { - let mut names = Vec::new(); - for pred in &rows { - if !names.contains(&pred.name) { - names.push(pred.name.clone()); - } - } - if names.len() < 2 { - return None; - } - let sites = rows - .iter() - .map(|pred| format!("{}:{}:{}", pred.file, pred.name, pred.line)) - .collect::>(); - let spans = rows - .iter() - .map(|pred| (format!("{}:{}:{}", pred.file, pred.name, pred.line), pred.span)) - .collect::>(); - Some(SemanticAliasCluster { - canon: canon.to_string(), - names, - sites, - spans, - }) - }) - .collect::>(); - out.sort_by(|left, right| right.names.len().cmp(&left.names.len())); - out -} + fn record_pred(&mut self, node: &Node) { + if let Some(Child::Symbol(name)) = node.children.first() { + if !name.ends_with('?') { return; } -fn reification_misses(preds: &[Pred], uses: &[Use]) -> Vec { - let mut out = Vec::new(); - for usage in uses { - let usage_canon = usage.canon.clone(); - let Some(pred) = preds.iter().find(|candidate| candidate.canon == usage_canon) else { - continue; - }; - let usage_function = semantic_function_name(&usage.defn); - if usage_function.ends_with('?') - && preds - .iter() - .any(|candidate| candidate.canon == usage_canon && candidate.name == usage_function) - { - continue; + let stmts = ast::body_stmts(node); + if stmts.len() != 1 { return; } + + self.preds.push(Pred { + name: name.clone(), + canon: self.canon(&ast::slice(stmts[0], &self.lines)), + file: self.file.clone(), + line: node.first_lineno, + span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], + }); } - let at = format!("{}:{}:{}", usage.file, usage_function, usage.line); - let mut spans = BTreeMap::new(); - spans.insert(at.clone(), usage.span); - out.push(ReificationMiss { - predicate: pred.name.clone(), - canon: usage_canon, - at, - spans, - raw: usage.raw.clone(), - }); } - out.sort_by(|left, right| left.predicate.cmp(&right.predicate)); - out } -fn canon(text: &str) -> String { - let (mut value, _) = ast::canon_polarity(text); - value = value.strip_prefix("self.").unwrap_or(&value).to_string(); - value = value.strip_prefix('@').unwrap_or(&value).to_string(); - value = strip_single_receiver_hop(&value); - value.split_whitespace().collect::>().join(" ") +struct Report { + preds: Vec, + uses: Vec, } -fn strip_single_receiver_hop(text: &str) -> String { - let Some(dot) = text.find('.') else { - return text.to_string(); - }; - let receiver = &text[..dot]; - if receiver.is_empty() || !identifier_like(receiver) { - return text.to_string(); +impl Report { + fn new(preds: Vec, uses: Vec) -> Self { + Self { preds, uses } } - let rest = &text[dot + 1..]; - let Some(attr_len) = leading_identifier_len(rest) else { - return text.to_string(); - }; - let after_attr = rest[attr_len..].trim_start(); - if !(after_attr.starts_with("==") || after_attr.starts_with("!=") || after_attr.starts_with('.')) { - return text.to_string(); - } - rest.to_string() -} -fn leading_identifier_len(text: &str) -> Option { - let mut chars = text.char_indices(); - let (_, first) = chars.next()?; - if !(first == '_' || first.is_ascii_alphabetic()) { - return None; - } - let mut end = first.len_utf8(); - for (index, ch) in chars { - if ch == '_' || ch.is_ascii_alphanumeric() { - end = index + ch.len_utf8(); - } else { - break; + fn findings(&self) -> SemanticAliasReport { + SemanticAliasReport { + alias_clusters: self.alias_clusters(), + reification_misses: self.reification_misses(), } } - Some(end) -} - -fn identifier_like(text: &str) -> bool { - let mut chars = text.chars(); - let Some(first) = chars.next() else { - return false; - }; - if !(first == '_' || first.is_ascii_alphabetic()) { - return false; - } - chars.all(|ch| ch == '_' || ch.is_ascii_alphanumeric()) -} - -fn semantic_function_name(name: &str) -> String { - name.strip_prefix("self.").unwrap_or(name).to_string() -} -fn comparison(node: &Node) -> bool { - let Some(method) = child_to_string(node.children.get(1)) else { - return false; - }; - matches!(method.as_str(), "==" | "!=" | "nil?") -} - -fn child_to_string(child: Option<&Child>) -> Option { - match child { - Some(Child::String(value)) | Some(Child::Symbol(value)) => Some(value.clone()), - _ => None, - } -} + fn alias_clusters(&self) -> Vec { + let mut by_canon: BTreeMap> = BTreeMap::new(); + for p in &self.preds { + by_canon.entry(p.canon.clone()).or_default().push(p); + } -#[cfg(test)] -mod tests { - use super::*; + let mut out = Vec::new(); + for (c, ps) in by_canon { + let mut names_set = BTreeSet::new(); + for p in &ps { names_set.insert(p.name.clone()); } + let names: Vec<_> = names_set.into_iter().collect(); + if names.len() < 2 { continue; } + + let mut sites = Vec::new(); + let mut spans = BTreeMap::new(); + for p in &ps { + let loc = format!("{}:{}:{}", p.file, p.name, p.line); + sites.push(loc.clone()); + spans.insert(loc, p.span); + } - fn pred(name: &str, body: &str, line: usize) -> Pred { - Pred { - name: name.to_string(), - canon: canon(body), - file: "a.rb".to_string(), - line, - span: [line, 0, line, 1], + out.push(SemanticAliasCluster { + canon: c, + names, + sites, + spans, + }); } + out.sort_by(|a, b| b.names.len().cmp(&a.names.len())); + out } - fn use_at(function: &str, raw: &str, line: usize) -> Use { - Use { - canon: canon(raw), - raw: raw.to_string(), - file: "a.rb".to_string(), - defn: function.to_string(), - line, - span: [line, 0, line, 1], + fn reification_misses(&self) -> Vec { + let mut by_canon: BTreeMap> = BTreeMap::new(); + for p in &self.preds { + by_canon.entry(p.canon.clone()).or_default().push(p); } - } - #[test] - fn canonicalizes_receiver_forms() { - assert_eq!(canon("node.provenance == :frame"), "provenance == :frame"); - assert_eq!(canon("@provenance == :frame"), "provenance == :frame"); - assert_eq!(canon("self.provenance == :frame"), "provenance == :frame"); - assert_eq!(canon("!x.heap?"), "x.heap?"); - assert_eq!(canon("stmt.expr? && ok"), "stmt.expr? && ok"); - } - - #[test] - fn reports_aliases_and_reification_misses() { - let preds = vec![ - pred("frame?", "@provenance == :frame", 1), - pred("is_frame?", "provenance == :frame", 2), - pred("heap?", "@provenance == :heap", 3), - ]; - let uses = vec![use_at("somewhere", "node.provenance == :frame", 10)]; - let report = SemanticAliasReport { - alias_clusters: alias_clusters(&preds), - reification_misses: reification_misses(&preds, &uses), - }; - assert_eq!(report.alias_clusters.len(), 1); - assert_eq!(report.alias_clusters[0].names, vec!["frame?", "is_frame?"]); - assert_eq!(report.reification_misses.len(), 1); - assert_eq!(report.reification_misses[0].predicate, "frame?"); + let mut out = Vec::new(); + for u in &self.uses { + if let Some(ps) = by_canon.get(&u.canon) { + if ps.is_empty() { continue; } + if u.defn.ends_with('?') && ps.iter().any(|p| p.name == u.defn) { continue; } + + let loc = format!("{}:{}:{}", u.file, u.defn, u.line); + let mut spans = BTreeMap::new(); + spans.insert(loc.clone(), u.span); + + out.push(ReificationMiss { + predicate: ps[0].name.clone(), + canon: u.canon.clone(), + at: loc, + spans, + raw: u.raw.clone(), + }); + } + } + out.sort_by(|a, b| a.predicate.cmp(&b.predicate)); + out } } diff --git a/gems/decomplex/rust/src/decomplex/detectors/state_branch_density.rs b/gems/decomplex/rust/src/decomplex/detectors/state_branch_density.rs new file mode 100644 index 000000000..f078f8fbb --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/state_branch_density.rs @@ -0,0 +1,469 @@ +use crate::decomplex::ast::{self, Child, Node, Span}; +use crate::decomplex::syntax::Language; +use anyhow::Result; +use regex::Regex; +use serde::Serialize; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::PathBuf; + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct StateBranchDensityRow { + pub at: String, + pub file: String, + pub method: String, + pub decisions: usize, + pub state_refs: Vec, + pub predicate: String, + pub score: usize, + pub sites: Vec, + pub spans: BTreeMap, +} + +#[derive(Debug, Clone)] +struct Decision { + file: String, + defn: String, + line: usize, + span: Span, + predicate: String, + state_refs: Vec, +} + +const BRANCH_TYPES: &[&str] = &["IF", "UNLESS", "WHILE", "UNTIL"]; +const NOISE_MIDS: &[&str] = &[ + "!", "!=", "==", "===", "<", "<=", ">", ">=", "[]", "[]=", "to_s", "inspect", "class", +]; + +pub fn scan_files(files: &[PathBuf], _language: Language) -> Result> { + let mut parsed = Vec::new(); + let mut global_immutable_readers: BTreeMap> = BTreeMap::new(); + let mut global_immutable_reader_types: BTreeMap> = BTreeMap::new(); + let mut global_type_aliases: BTreeMap = BTreeMap::new(); + + for file in files { + let (root, lines) = ast::parse(file)?; + let scanner = StateBranchDensity::new(None, lines.clone(), None, None, None); + + for (name, readers) in scanner.immutable_struct_readers(&lines) { + global_immutable_readers.entry(name).or_default().extend(readers); + } + for (name, reader_types) in scanner.immutable_struct_reader_types(&lines) { + global_immutable_reader_types.entry(name).or_default().extend(reader_types); + } + global_type_aliases.extend(scanner.type_aliases(&lines)); + + parsed.push((file.to_string_lossy().to_string(), root, lines)); + } + + let mut all_decisions = Vec::new(); + for (file, root, lines) in parsed { + let mut scanner = StateBranchDensity::new( + Some(file), + lines, + Some(global_immutable_readers.clone()), + Some(global_immutable_reader_types.clone()), + Some(global_type_aliases.clone()), + ); + scanner.walk(&root, &Vec::new()); + all_decisions.extend(scanner.decisions); + } + + Ok(Report::new(all_decisions).findings()) +} + +struct StateBranchDensity { + file: String, + lines: Vec, + decisions: Vec, + immutable_readers: BTreeMap>, + immutable_reader_types: BTreeMap>, + type_aliases: BTreeMap, + method_param_types: BTreeMap>, +} + +impl StateBranchDensity { + fn new( + file: Option, + lines: Vec, + immutable_readers: Option>>, + immutable_reader_types: Option>>, + type_aliases: Option>, + ) -> Self { + let ir = immutable_readers.unwrap_or_else(|| BTreeMap::new()); // Simplified + let irt = immutable_reader_types.unwrap_or_else(|| BTreeMap::new()); + let ta = type_aliases.unwrap_or_else(|| BTreeMap::new()); + + // Re-extract if not provided (matches Ruby's initialize) + let ir = if ir.is_empty() { + let s = Self { + file: file.clone().unwrap_or_default(), + lines: lines.clone(), + decisions: Vec::new(), + immutable_readers: BTreeMap::new(), + immutable_reader_types: BTreeMap::new(), + type_aliases: BTreeMap::new(), + method_param_types: BTreeMap::new(), + }; + s.immutable_struct_readers(&lines) + } else { ir }; + + let irt = if irt.is_empty() { + let s = Self { + file: file.clone().unwrap_or_default(), + lines: lines.clone(), + decisions: Vec::new(), + immutable_readers: BTreeMap::new(), + immutable_reader_types: BTreeMap::new(), + type_aliases: BTreeMap::new(), + method_param_types: BTreeMap::new(), + }; + s.immutable_struct_reader_types(&lines) + } else { irt }; + + let ta = if ta.is_empty() { + let s = Self { + file: file.clone().unwrap_or_default(), + lines: lines.clone(), + decisions: Vec::new(), + immutable_readers: BTreeMap::new(), + immutable_reader_types: BTreeMap::new(), + type_aliases: BTreeMap::new(), + method_param_types: BTreeMap::new(), + }; + s.type_aliases(&lines) + } else { ta }; + + let mut s = Self { + file: file.unwrap_or_default(), + lines: lines.clone(), + decisions: Vec::new(), + immutable_readers: ir, + immutable_reader_types: irt, + type_aliases: ta, + method_param_types: BTreeMap::new(), + }; + s.method_param_types = s.extract_method_param_types(&lines); + s + } + + fn walk(&mut self, node: &Node, defstack: &[String]) { + let mut next_defstack = defstack.to_vec(); + if matches!(node.r#type.as_str(), "DEFN" | "DEFS") { + let name_index = if node.r#type == "DEFS" { 1 } else { 0 }; + if let Some(Child::Symbol(name)) = node.children.get(name_index) { + next_defstack.push(name.clone()); + } + } + + self.record_branch(node, &next_defstack); + for child in node.children.iter().filter_map(ast::node) { + self.walk(child, &next_defstack); + } + } + + fn record_branch(&mut self, node: &Node, defstack: &[String]) { + let cond = match node.r#type.as_str() { + t if BRANCH_TYPES.contains(&t) => node.children.first().and_then(ast::node), + "CASE" => node.children.first().and_then(ast::node), + _ => None, + }; + let Some(cond) = cond else { return }; + + let defn = defstack.last().map(|s| s.as_str()).unwrap_or("(top-level)"); + let refs = self.state_refs(cond, defn); + if refs.is_empty() { + return; + } + + self.decisions.push(Decision { + file: self.file.clone(), + defn: defn.to_string(), + line: node.first_lineno, + span: [ + node.first_lineno, + node.first_column, + node.last_lineno, + node.last_column, + ], + predicate: ast::slice(cond, &self.lines), + state_refs: refs.into_iter().collect::>().into_iter().collect(), + }); + } + + fn state_refs(&self, node: &Node, defn: &str) -> Vec { + let mut refs = Vec::new(); + self.collect_state_refs(node, &mut refs, defn); + refs + } + + fn collect_state_refs(&self, node: &Node, refs: &mut Vec, defn: &str) { + match node.r#type.as_str() { + "IVAR" | "GVAR" => { + if let Some(Child::String(name)) = node.children.first() { + refs.push(name.clone()); + } + } + "CALL" | "QCALL" | "OPCALL" => { + let recv = node.children.get(0).and_then(ast::node); + let mid = node.children.get(1).and_then(|c| match c { + Child::Symbol(s) => Some(s), + _ => None, + }); + let args = node.children.get(2); + if let (Some(recv), Some(mid)) = (recv, mid) { + if self.state_attr_read(recv, mid, args, defn) { + refs.push(format!("{}.{}", ast::slice(recv, &self.lines), mid)); + } + } + } + _ => {} + } + for child in node.children.iter().filter_map(ast::node) { + self.collect_state_refs(child, refs, defn); + } + } + + fn state_attr_read(&self, recv: &Node, mid: &str, args: Option<&Child>, defn: &str) -> bool { + if NOISE_MIDS.contains(&mid) { + return false; + } + if !self.empty_arg_list(args) { + return false; + } + if self.immutable_struct_const_read(recv, mid, defn) { + return false; + } + true + } + + fn immutable_struct_const_read(&self, recv: &Node, mid: &str, defn: &str) -> bool { + let Some(owner_type) = self.immutable_receiver_type(recv, defn) else { + return false; + }; + self.immutable_reader(&owner_type, mid) + } + + fn immutable_receiver_type(&self, recv: &Node, defn: &str) -> Option { + if matches!(recv.r#type.as_str(), "CALL" | "QCALL" | "OPCALL") { + let recv_recv = recv.children.get(0).and_then(ast::node)?; + let recv_mid = recv.children.get(1).and_then(|c| match c { + Child::Symbol(s) => Some(s), + _ => None, + })?; + let recv_args = recv.children.get(2); + return self.immutable_reader_result_type(recv_recv, recv_mid, recv_args, defn); + } + if recv.r#type == "LVAR" { + let name = match recv.children.first()? { + Child::String(s) => s, + _ => return None, + }; + return self.method_param_types.get(defn)?.get(name).cloned(); + } + None + } + + fn immutable_reader(&self, type_name: &str, mid: &str) -> bool { + let resolved = self.resolve_type_alias(type_name); + let readers = self.immutable_readers.get(&resolved).or_else(|| { + resolved.split("::").last().and_then(|last| self.immutable_readers.get(last)) + }); + readers.map(|r| r.contains(mid)).unwrap_or(false) + } + + fn immutable_reader_result_type(&self, recv: &Node, mid: &str, args: Option<&Child>, defn: &str) -> Option { + if !self.empty_arg_list(args) { + return None; + } + let owner_type = self.immutable_receiver_type(recv, defn)?; + let resolved = self.resolve_type_alias(&owner_type); + let reader_types = self.immutable_reader_types.get(&resolved).or_else(|| { + resolved.split("::").last().and_then(|last| self.immutable_reader_types.get(last)) + })?; + reader_types.get(mid).cloned() + } + + fn empty_arg_list(&self, args: Option<&Child>) -> bool { + match args { + None | Some(Child::Nil) => true, + Some(Child::Node(node)) if node.r#type == "LIST" => { + node.children.iter().all(|c| matches!(c, Child::Nil)) + } + _ => false, + } + } + + fn immutable_struct_readers(&self, lines: &[String]) -> BTreeMap> { + let mut readers = BTreeMap::new(); + let mut class_stack = Vec::new(); + let class_struct_re = Regex::new(r"^\s*class\s+([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\s*<\s*T::Struct\b").unwrap(); + let const_re = Regex::new(r"^\s*const\s+:([A-Za-z_]\w*)\b").unwrap(); + let end_re = Regex::new(r"^\s*end\s*(?:#.*)?$").unwrap(); + + for line in lines { + if let Some(caps) = class_struct_re.captures(line) { + class_stack.push(caps[1].to_string()); + continue; + } + if !class_stack.is_empty() { + if let Some(caps) = const_re.captures(line) { + readers.entry(class_stack.last().unwrap().clone()).or_insert_with(BTreeSet::new).insert(caps[1].to_string()); + continue; + } + } + if end_re.is_match(line) { + class_stack.pop(); + } + } + readers + } + + fn immutable_struct_reader_types(&self, lines: &[String]) -> BTreeMap> { + let mut reader_types = BTreeMap::new(); + let mut class_stack = Vec::new(); + let class_struct_re = Regex::new(r"^\s*class\s+([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\s*<\s*T::Struct\b").unwrap(); + let const_type_re = Regex::new(r"^\s*const\s+:([A-Za-z_]\w*)\s*,\s*([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\b").unwrap(); + let end_re = Regex::new(r"^\s*end\s*(?:#.*)?$").unwrap(); + + for line in lines { + if let Some(caps) = class_struct_re.captures(line) { + class_stack.push(caps[1].to_string()); + continue; + } + if !class_stack.is_empty() { + if let Some(caps) = const_type_re.captures(line) { + reader_types.entry(class_stack.last().unwrap().clone()).or_insert_with(BTreeMap::new).insert(caps[1].to_string(), caps[2].to_string()); + continue; + } + } + if end_re.is_match(line) { + class_stack.pop(); + } + } + reader_types + } + + fn type_aliases(&self, lines: &[String]) -> BTreeMap { + let mut aliases = BTreeMap::new(); + let type_alias_re = Regex::new(r"^\s*([A-Z]\w*)\s*=\s*T\.type_alias\s*\{\s*([A-Z]\w*(?:::[A-Z]\w*)*)\s*\}").unwrap(); + let const_alias_re = Regex::new(r"^\s*([A-Z]\w*)\s*=\s*([A-Z]\w*(?:::[A-Z]\w*)*)\b").unwrap(); + + for line in lines { + if let Some(caps) = type_alias_re.captures(line) { + aliases.insert(caps[1].to_string(), caps[2].to_string()); + } else if let Some(caps) = const_alias_re.captures(line) { + aliases.insert(caps[1].to_string(), caps[2].to_string()); + } + } + aliases + } + + fn resolve_type_alias(&self, type_name: &str) -> String { + let mut seen = BTreeSet::new(); + let mut current = type_name.to_string(); + loop { + if seen.contains(¤t) { + return current; + } + seen.insert(current.clone()); + let target = self.type_aliases.get(¤t).or_else(|| { + current.split("::").last().and_then(|last| self.type_aliases.get(last)) + }); + match target { + Some(t) => current = t.clone(), + None => return current, + } + } + } + + fn extract_method_param_types(&self, lines: &[String]) -> BTreeMap> { + let mut types_by_method = BTreeMap::new(); + let mut pending_sig = String::new(); + let def_re = Regex::new(r"^\s*def\s+([A-Za-z_]\w*[!?=]?)(?:\s|\(|$)").unwrap(); + + for line in lines { + if self.pending_sig_active(line, &pending_sig) { + pending_sig.push_str(line); + } + if let Some(caps) = def_re.captures(line) { + types_by_method.insert(caps[1].to_string(), self.sig_param_types(&pending_sig)); + pending_sig.clear(); + } + } + types_by_method + } + + fn pending_sig_active(&self, line: &str, pending_sig: &str) -> bool { + !pending_sig.is_empty() || line.trim().starts_with("sig") + } + + fn sig_param_types(&self, sig_source: &str) -> BTreeMap { + let params_re = Regex::new(r"params\s*\((.*?)\)").unwrap(); + let param_pair_re = Regex::new(r"([A-Za-z_]\w*)\s*:\s*([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)").unwrap(); + let mut params = BTreeMap::new(); + if let Some(p_caps) = params_re.captures(sig_source) { + for pair in param_pair_re.captures_iter(&p_caps[1]) { + params.insert(pair[1].to_string(), pair[2].to_string()); + } + } + params + } +} + +struct Report { + decisions: Vec, +} + +impl Report { + fn new(decisions: Vec) -> Self { + Self { decisions } + } + + fn findings(&self) -> Vec { + let mut groups: BTreeMap<(String, String), Vec> = BTreeMap::new(); + for d in &self.decisions { + groups.entry((d.file.clone(), d.defn.clone())).or_default().push(d.clone()); + } + + let mut rows = Vec::new(); + for ((file, defn), ds) in groups { + let mut refs = BTreeSet::new(); + for d in &ds { + for r in &d.state_refs { + refs.insert(r.clone()); + } + } + let refs: Vec<_> = refs.into_iter().collect(); + let score = ds.len() * refs.len().max(1); + + let mut sites = Vec::new(); + let mut spans = BTreeMap::new(); + for d in &ds { + let loc = format!("{}:{}:{}", d.file, d.defn, d.line); + sites.push(loc.clone()); + spans.insert(loc, d.span); + } + + rows.push(StateBranchDensityRow { + at: format!("{}:{}:{}", file, defn, ds.first().unwrap().line), + file, + method: defn, + decisions: ds.len(), + state_refs: refs, + predicate: ds.first().unwrap().predicate.clone(), + score, + sites, + spans, + }); + } + + rows.sort_by(|a, b| { + b.score + .cmp(&a.score) + .then_with(|| b.decisions.cmp(&a.decisions)) + .then_with(|| a.file.cmp(&b.file)) + .then_with(|| a.method.cmp(&b.method)) + }); + rows + } +} diff --git a/gems/decomplex/rust/src/decomplex/detectors/state_mesh.rs b/gems/decomplex/rust/src/decomplex/detectors/state_mesh.rs new file mode 100644 index 000000000..49ba03ba6 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/state_mesh.rs @@ -0,0 +1,594 @@ +use crate::decomplex::ast::{self, Child, Node, Span}; +use crate::decomplex::detectors::semantic_alias; +use crate::decomplex::syntax::Language; +use anyhow::Result; +use serde::Serialize; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::{Path, PathBuf}; + +#[derive(Clone, Debug, Serialize)] +pub struct StateMeshReport { + pub state_mesh: StateMeshMeta, + pub fields: BTreeMap, + pub hierarchy: Vec, +} + +#[derive(Clone, Debug, Serialize)] +pub struct StateMeshMeta { + pub total_fields: usize, + pub total_writes: usize, + pub total_reads: usize, + pub total_re_derivations: usize, + pub min_writes: usize, + pub custom_fields: Option>, +} + +#[derive(Clone, Debug, Serialize)] +pub struct StateFieldRow { + pub messiness: f64, + pub rank: usize, + pub metrics: FieldMetricsRow, + pub writers: Vec, + pub readers: Vec, + pub re_derivations: Vec, +} + +#[derive(Clone, Debug, Serialize)] +pub struct FieldMetricsRow { + pub writes: usize, + pub reads: usize, + pub re_derivations: usize, + pub scatter: usize, + pub write_scatter: usize, + pub read_scatter: usize, + pub receiver_types: usize, + pub fix_churn: f64, + pub pressure: usize, + pub percentiles: BTreeMap, +} + +#[derive(Clone, Debug, Serialize)] +pub struct SiteInfo { + pub file: String, + pub defn: String, + pub line: usize, + pub recv: String, + pub span: Span, +} + +#[derive(Clone, Debug, Serialize)] +pub struct ReDerivationInfo { + pub file: String, + pub defn: String, + pub line: usize, + pub raw: String, + pub predicate: String, + pub canon: String, +} + +#[derive(Clone, Debug, Serialize)] +pub struct DirObj { + pub name: String, + pub writers: usize, + pub readers: usize, + pub files: Vec, +} + +#[derive(Clone, Debug, Serialize)] +pub struct FileObj { + pub name: String, + pub writers: usize, + pub readers: usize, + pub defns: Vec, +} + +#[derive(Clone, Debug, Serialize)] +pub struct DefnObj { + pub name: String, + pub writers: usize, + pub readers: usize, + pub fields: DefnFields, +} + +#[derive(Clone, Debug, Serialize)] +pub struct DefnFields { + pub written: Vec, + pub read: Vec, +} + +#[derive(Clone, Debug)] +struct Write { + #[allow(dead_code)] + attr: String, + norm: String, + recv: String, + file: String, + defn: String, + line: usize, + span: Span, +} + +#[derive(Clone, Debug)] +struct Read { + #[allow(dead_code)] + attr: String, + norm: String, + recv: String, + file: String, + defn: String, + line: usize, + span: Span, +} + +#[derive(Clone, Debug)] +struct ReDerivation { + field: String, + file: String, + defn: String, + line: usize, + raw: String, + predicate: String, + canon: String, +} + +struct FieldMetrics { + name: String, + writes: usize, + reads: usize, + re_derivations: usize, + scatter: usize, + write_scatter: usize, + read_scatter: usize, + receiver_types: usize, + messiness: f64, + pressure: usize, + percentiles: BTreeMap, + rank: usize, +} + +pub fn scan_files(files: &[PathBuf], language: Language) -> Result { + let mut src_map = BTreeMap::new(); + for file in files { + let (root, lines) = ast::parse(file)?; + src_map.insert(file.to_string_lossy().to_string(), (root, lines)); + } + + let mut sm = StateMesh::new(src_map); + sm.run(language)?; + Ok(sm.to_json_graph()) +} + +struct StateMesh { + src_map: BTreeMap)>, + min_writes: usize, + custom_fields: Option>, + writes: Vec, + reads: Vec, + re_derivations: Vec, +} + +impl StateMesh { + fn new(src_map: BTreeMap)>) -> Self { + Self { + src_map, + min_writes: 2, + custom_fields: None, + writes: Vec::new(), + reads: Vec::new(), + re_derivations: Vec::new(), + } + } + + fn run(&mut self, language: Language) -> Result<()> { + self.discover_fields(); + if self.known_field_norms().is_empty() { + return Ok(()); + } + + self.find_reads(); + self.find_re_derivations(language)?; + Ok(()) + } + + fn discover_fields(&mut self) { + let files: Vec<_> = self.src_map.keys().cloned().collect(); + for file in files { + let (root, lines) = self.src_map.get(&file).unwrap(); + let mut writes = Vec::new(); + self.walk_writes(root, lines, &Vec::new(), &file, &mut writes); + self.writes.extend(writes); + } + } + + fn walk_writes(&self, node: &Node, lines: &[String], defstack: &[String], file: &str, out: &mut Vec) { + let mut next_defstack = defstack.to_vec(); + match node.r#type.as_str() { + "CLASS" | "MODULE" | "DEFN" => { + if let Some(Child::Symbol(name)) = node.children.first() { + next_defstack.push(name.clone()); + } + } + "DEFS" => { + if let Some(Child::Symbol(name)) = node.children.get(1) { + next_defstack.push(name.clone()); + } + } + "ATTRASGN" => { + if let (Some(recv), Some(Child::Symbol(msg))) = (node.children.get(0).and_then(ast::node), node.children.get(1)) { + if msg != "[]=" { + let attr = msg.trim_end_matches('=').to_string(); + let norm = self.normalize(&attr); + out.push(Write { + attr, + norm, + recv: self.recv_slice(Some(recv), lines), + file: file.to_string(), + defn: next_defstack.last().cloned().unwrap_or_else(|| "(top-level)".to_string()), + line: node.first_lineno, + span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], + }); + } + } + } + "IASGN" => { + if let Some(Child::String(attr)) = node.children.first() { + let norm = self.normalize(attr); + out.push(Write { + attr: attr.clone(), + norm, + recv: "self".to_string(), + file: file.to_string(), + defn: next_defstack.last().cloned().unwrap_or_else(|| "(top-level)".to_string()), + line: node.first_lineno, + span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], + }); + } + } + _ => {} + } + + for child in node.children.iter().filter_map(ast::node) { + self.walk_writes(child, lines, &next_defstack, file, out); + } + } + + fn find_reads(&mut self) { + let field_norms = self.known_field_norms(); + let files: Vec<_> = self.src_map.keys().cloned().collect(); + for file in files { + let (root, lines) = self.src_map.get(&file).unwrap(); + let mut reads = Vec::new(); + self.walk_reads(root, lines, &Vec::new(), &file, &field_norms, &mut reads); + self.reads.extend(reads); + } + } + + fn walk_reads(&self, node: &Node, lines: &[String], defstack: &[String], file: &str, field_norms: &BTreeSet, out: &mut Vec) { + let mut next_defstack = defstack.to_vec(); + match node.r#type.as_str() { + "CLASS" | "MODULE" | "DEFN" => { + if let Some(Child::Symbol(name)) = node.children.first() { + next_defstack.push(name.clone()); + } + } + "DEFS" => { + if let Some(Child::Symbol(name)) = node.children.get(1) { + next_defstack.push(name.clone()); + } + } + "CALL" | "OPCALL" | "FCALL" | "VCALL" => { + let recv = if node.r#type == "CALL" || node.r#type == "OPCALL" { node.children.get(0).and_then(ast::node) } else { None }; + let mid = if node.r#type == "CALL" || node.r#type == "OPCALL" { node.children.get(1) } else { node.children.get(0) }; + let args = if node.r#type == "CALL" || node.r#type == "OPCALL" { node.children.get(2) } else { node.children.get(1) }; + + if let Some(Child::Symbol(name)) = mid { + if args.is_none() || matches!(args, Some(Child::Nil)) || self.is_empty_list(args) { + if field_norms.contains(name) { + out.push(Read { + attr: name.clone(), + norm: name.clone(), + recv: self.recv_slice(recv, lines), + file: file.to_string(), + defn: next_defstack.last().cloned().unwrap_or_else(|| "(top-level)".to_string()), + line: node.first_lineno, + span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], + }); + } + } + } + } + "IVAR" => { + if let Some(Child::String(name)) = node.children.first() { + let norm = self.normalize(name); + if field_norms.contains(&norm) { + out.push(Read { + attr: name.clone(), + norm, + recv: "self".to_string(), + file: file.to_string(), + defn: next_defstack.last().cloned().unwrap_or_else(|| "(top-level)".to_string()), + line: node.first_lineno, + span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], + }); + } + } + } + _ => {} + } + + for child in node.children.iter().filter_map(ast::node) { + self.walk_reads(child, lines, &next_defstack, file, field_norms, out); + } + } + + fn find_re_derivations(&mut self, language: Language) -> Result<()> { + let field_norms = self.known_field_norms(); + if field_norms.is_empty() { + return Ok(()); + } + + let files: Vec<_> = self.src_map.keys().map(PathBuf::from).collect(); + let sa = semantic_alias::scan_files(&files, language)?; + + for m in sa.reification_misses { + let loc = m.at.clone(); + let parts: Vec<&str> = loc.split(':').collect(); + if parts.len() < 3 { continue; } + let line = parts.last().unwrap().parse::().unwrap_or(0); + let defn = parts[parts.len() - 2].to_string(); + let file = parts[..parts.len() - 2].join(":"); + + if let Some(matched) = field_norms.iter().find(|fnorm| m.raw.contains(*fnorm) || m.canon.contains(*fnorm)) { + self.re_derivations.push(ReDerivation { + field: matched.clone(), + file, + defn, + line, + raw: m.raw, + predicate: m.predicate, + canon: m.canon, + }); + } + } + Ok(()) + } + + fn metrics(&self) -> Vec { + let field_norms = self.known_field_norms(); + let mut metrics_vec = Vec::new(); + + for fnorm in &field_norms { + let ws: Vec<_> = self.writes.iter().filter(|w| &w.norm == fnorm).collect(); + let rs: Vec<_> = self.reads.iter().filter(|r| &r.norm == fnorm).collect(); + let ds: Vec<_> = self.re_derivations.iter().filter(|d| &d.field == fnorm).collect(); + + let mut all_sites = BTreeSet::new(); + for w in &ws { all_sites.insert((w.file.clone(), w.defn.clone())); } + for r in &rs { all_sites.insert((r.file.clone(), r.defn.clone())); } + for d in &ds { all_sites.insert((d.file.clone(), d.defn.clone())); } + let scatter = all_sites.len(); + + let mut write_sites = BTreeSet::new(); + for w in &ws { write_sites.insert((w.file.clone(), w.defn.clone())); } + let write_scatter = write_sites.len(); + + let mut read_sites = BTreeSet::new(); + for r in &rs { read_sites.insert((r.file.clone(), r.defn.clone())); } + let read_scatter = read_sites.len(); + + let mut receivers = BTreeSet::new(); + for w in &ws { receivers.insert(w.recv.clone()); } + for r in &rs { receivers.insert(r.recv.clone()); } + let receiver_types = receivers.len(); + + let n_writes = ws.len(); + let n_reads = rs.len(); + let n_reder = ds.len(); + let fix_churn = 1.0; + let messiness = (n_writes + n_reads + n_reder) as f64 * scatter as f64 * fix_churn; + let pressure = read_scatter; + + metrics_vec.push(FieldMetrics { + name: fnorm.clone(), + writes: n_writes, + reads: n_reads, + re_derivations: n_reder, + scatter, + write_scatter, + read_scatter, + receiver_types, + messiness, + pressure, + percentiles: BTreeMap::new(), + rank: 0, + }); + } + + metrics_vec.sort_by(|a, b| b.messiness.partial_cmp(&a.messiness).unwrap_or(std::cmp::Ordering::Equal).then_with(|| a.name.cmp(&b.name))); + for (i, m) in metrics_vec.iter_mut().enumerate() { + m.rank = i + 1; + } + + let total = metrics_vec.len(); + if total > 1 { + let attrs = ["writes", "reads", "re_derivations", "scatter", "messiness", "pressure"]; + for attr in &attrs { + let mut vals: Vec = metrics_vec.iter().map(|m| match *attr { + "writes" => m.writes as f64, + "reads" => m.reads as f64, + "re_derivations" => m.re_derivations as f64, + "scatter" => m.scatter as f64, + "messiness" => m.messiness, + "pressure" => m.pressure as f64, + _ => 0.0, + }).collect(); + vals.sort_by(|a, b| a.partial_cmp(b).unwrap()); + + for m in metrics_vec.iter_mut() { + let v = match *attr { + "writes" => m.writes as f64, + "reads" => m.reads as f64, + "re_derivations" => m.re_derivations as f64, + "scatter" => m.scatter as f64, + "messiness" => m.messiness, + "pressure" => m.pressure as f64, + _ => 0.0, + }; + let pctl = vals.iter().filter(|&&x| x <= v).count() * 100 / total; + m.percentiles.insert(attr.to_string(), pctl); + } + } + } + + metrics_vec + } + + fn to_json_graph(&self) -> StateMeshReport { + let fm = self.metrics(); + let fm_index: BTreeMap = fm.iter().map(|m| (m.name.clone(), m)).collect(); + let field_norms = self.known_field_norms(); + + let mut fields_obj = BTreeMap::new(); + for fnorm in &field_norms { + let m = fm_index.get(fnorm).unwrap(); + let ws: Vec<_> = self.writes.iter().filter(|w| &w.norm == fnorm).map(|w| SiteInfo { + file: w.file.clone(), defn: w.defn.clone(), line: w.line, recv: w.recv.clone(), span: w.span, + }).collect(); + let rs: Vec<_> = self.reads.iter().filter(|r| &r.norm == fnorm).map(|r| SiteInfo { + file: r.file.clone(), defn: r.defn.clone(), line: r.line, recv: r.recv.clone(), span: r.span, + }).collect(); + let ds: Vec<_> = self.re_derivations.iter().filter(|d| &d.field == fnorm).map(|d| ReDerivationInfo { + file: d.file.clone(), defn: d.defn.clone(), line: d.line, raw: d.raw.clone(), predicate: d.predicate.clone(), canon: d.canon.clone(), + }).collect(); + + fields_obj.insert(fnorm.clone(), StateFieldRow { + messiness: m.messiness, + rank: m.rank, + metrics: FieldMetricsRow { + writes: m.writes, + reads: m.reads, + re_derivations: m.re_derivations, + scatter: m.scatter, + write_scatter: m.write_scatter, + read_scatter: m.read_scatter, + receiver_types: m.receiver_types, + fix_churn: 1.0, + pressure: m.pressure, + percentiles: m.percentiles.clone(), + }, + writers: ws, + readers: rs, + re_derivations: ds, + }); + } + + let mut all_unit_sites: BTreeMap<(String, String), (BTreeSet, BTreeSet)> = BTreeMap::new(); + for w in &self.writes { + let entry = all_unit_sites.entry((w.file.clone(), w.defn.clone())).or_default(); + entry.0.insert(w.norm.clone()); + } + for r in &self.reads { + let entry = all_unit_sites.entry((r.file.clone(), r.defn.clone())).or_default(); + entry.1.insert(r.norm.clone()); + } + + let mut dirs: BTreeMap>> = BTreeMap::new(); + for ((file, defn), (ws, rs)) in all_unit_sites { + let path = Path::new(&file); + let dir = path.parent().map(|p| p.to_string_lossy().to_string()).unwrap_or_else(|| ".".to_string()); + let dir = if dir.is_empty() { ".".to_string() } else { dir }; + let base = path.file_name().map(|s| s.to_string_lossy().to_string()).unwrap_or_else(|| file.clone()); + + dirs.entry(dir).or_default() + .entry(base).or_default() + .insert(defn.clone(), DefnObj { + name: defn, + writers: ws.len(), + readers: rs.len(), + fields: DefnFields { + written: ws.into_iter().collect(), + read: rs.into_iter().collect(), + }, + }); + } + + let mut hierarchy = Vec::new(); + for (dname, files_map) in dirs { + let mut dir_writers = 0; + let mut dir_readers = 0; + let mut file_objs = Vec::new(); + for (fname, defns_map) in files_map { + let mut file_writers = 0; + let mut file_readers = 0; + let mut defn_objs: Vec = defns_map.into_iter().map(|(_, v)| v).collect(); + defn_objs.sort_by(|a, b| a.name.cmp(&b.name)); + for d in &defn_objs { + file_writers += d.writers; + file_readers += d.readers; + } + dir_writers += file_writers; + dir_readers += file_readers; + file_objs.push(FileObj { + name: fname, + writers: file_writers, + readers: file_readers, + defns: defn_objs, + }); + } + file_objs.sort_by(|a, b| a.name.cmp(&b.name)); + hierarchy.push(DirObj { + name: dname, + writers: dir_writers, + readers: dir_readers, + files: file_objs, + }); + } + hierarchy.sort_by(|a, b| a.name.cmp(&b.name)); + + StateMeshReport { + state_mesh: StateMeshMeta { + total_fields: field_norms.len(), + total_writes: self.writes.len(), + total_reads: self.reads.len(), + total_re_derivations: self.re_derivations.len(), + min_writes: self.min_writes, + custom_fields: self.custom_fields.clone(), + }, + fields: fields_obj, + hierarchy, + } + } + + fn normalize(&self, attr: &str) -> String { + attr.trim_start_matches('@').to_string() + } + + fn known_field_norms(&self) -> BTreeSet { + let mut discovered = BTreeMap::new(); + for w in &self.writes { + *discovered.entry(w.norm.clone()).or_insert(0) += 1; + } + let mut norms: BTreeSet = discovered.into_iter() + .filter(|(_, count)| *count >= self.min_writes) + .map(|(name, _)| name) + .collect(); + if let Some(custom) = &self.custom_fields { + norms.extend(custom.clone()); + } + norms + } + + fn recv_slice(&self, node: Option<&Node>, lines: &[String]) -> String { + let Some(node) = node else { return "?".to_string() }; + ast::slice(node, lines) + } + + fn is_empty_list(&self, args: Option<&Child>) -> bool { + if let Some(Child::Node(node)) = args { + if node.r#type == "LIST" { + return node.children.iter().all(|c| matches!(c, Child::Nil)); + } + } + false + } +} diff --git a/gems/decomplex/rust/src/decomplex/detectors/temporal_ordering_pressure.rs b/gems/decomplex/rust/src/decomplex/detectors/temporal_ordering_pressure.rs new file mode 100644 index 000000000..90285a1b0 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/temporal_ordering_pressure.rs @@ -0,0 +1,268 @@ +use crate::decomplex::ast::{self, Child, Node, Span}; +use crate::decomplex::syntax::Language; +use anyhow::Result; +use serde::Serialize; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::PathBuf; + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct TemporalOrderingPressureRow { + pub at: String, + pub file: String, + pub owner: String, + pub public_methods: usize, + pub state_methods: usize, + pub writers: usize, + pub state_fields: Vec, + pub shared_fields: Vec, + pub orderings: String, + pub state_space: String, + pub score: usize, + pub sites: Vec, + pub spans: BTreeMap, +} + +#[derive(Clone, Debug)] +struct MethodState { + name: String, + line: usize, + span: Span, + visibility: String, + reads: Vec, + writes: Vec, +} + +pub fn scan_files(files: &[PathBuf], _language: Language) -> Result> { + let mut rows = Vec::new(); + for file in files { + let (root, lines) = ast::parse(file)?; + let mut detector = TemporalOrderingPressure::new(file.to_string_lossy().to_string(), lines); + rows.extend(detector.scan(&root)); + } + rows.sort_by(|a, b| { + b.score + .cmp(&a.score) + .then_with(|| b.state_methods.cmp(&a.state_methods)) + .then_with(|| a.file.cmp(&b.file)) + .then_with(|| a.owner.cmp(&b.owner)) + }); + Ok(rows) +} + +struct TemporalOrderingPressure { + file: String, + lines: Vec, +} + +impl TemporalOrderingPressure { + fn new(file: String, lines: Vec) -> Self { + Self { file, lines } + } + + fn scan(&mut self, root: &Node) -> Vec { + let mut out = Vec::new(); + self.walk_owners(root, &Vec::new(), &mut out); + out + } + + fn walk_owners(&self, node: &Node, owners: &[String], out: &mut Vec) { + if matches!(node.r#type.as_str(), "CLASS" | "MODULE") { + let owner = self.owner_name(node); + let methods = self.owner_methods(node); + if let Some(row) = self.pressure_row(&owner, &methods) { + out.push(row); + } + let mut next_owners = owners.to_vec(); + next_owners.push(owner); + for child in node.children.iter().filter_map(ast::node) { + self.walk_owners(child, &next_owners, out); + } + } else { + for child in node.children.iter().filter_map(ast::node) { + self.walk_owners(child, owners, out); + } + } + } + + fn owner_name(&self, node: &Node) -> String { + let name = ast::slice(node.children.first().and_then(ast::node).unwrap_or(node), &self.lines); + if name.is_empty() { + "(anonymous)".to_string() + } else { + name + } + } + + fn owner_methods(&self, owner_node: &Node) -> Vec { + let Some(body) = self.owner_body(owner_node) else { + return Vec::new(); + }; + + let stmts = if body.r#type == "BLOCK" { + body.children.iter().filter_map(ast::node).collect::>() + } else { + vec![body] + }; + + let mut visibility = "public".to_string(); + let mut methods = Vec::new(); + + for stmt in stmts { + if self.visibility_marker(stmt) { + if let Some(Child::Symbol(name)) = stmt.children.first() { + visibility = name.clone(); + } + } else if matches!(stmt.r#type.as_str(), "DEFN" | "DEFS") { + methods.push(self.method_state(stmt, &visibility)); + } + } + methods + } + + fn owner_body<'a>(&self, owner_node: &'a Node) -> Option<&'a Node> { + let scope = owner_node.children.get(2).and_then(ast::node)?; + if scope.r#type != "SCOPE" { + return None; + } + scope.children.get(2).and_then(ast::node) + } + + fn visibility_marker(&self, node: &Node) -> bool { + if node.r#type == "VCALL" { + if let Some(Child::Symbol(name)) = node.children.first() { + return matches!(name.as_str(), "public" | "protected" | "private"); + } + } + false + } + + fn method_state(&self, defn_node: &Node, visibility: &str) -> MethodState { + let mut reads = Vec::new(); + let mut writes = Vec::new(); + self.collect_state_access(defn_node, &mut reads, &mut writes); + + let name_index = if defn_node.r#type == "DEFS" { 1 } else { 0 }; + let name = defn_node + .children + .get(name_index) + .and_then(|c| match c { + Child::Symbol(s) => Some(s.clone()), + _ => None, + }) + .unwrap_or_else(|| "(anonymous)".to_string()); + + let mut reads: Vec<_> = reads.into_iter().collect::>().into_iter().collect(); + let mut writes: Vec<_> = writes.into_iter().collect::>().into_iter().collect(); + reads.sort(); + writes.sort(); + + MethodState { + name, + line: defn_node.first_lineno, + span: [ + defn_node.first_lineno, + defn_node.first_column, + defn_node.last_lineno, + defn_node.last_column, + ], + visibility: visibility.to_string(), + reads, + writes, + } + } + + fn collect_state_access(&self, node: &Node, reads: &mut Vec, writes: &mut Vec) { + match node.r#type.as_str() { + "IASGN" => { + if let Some(Child::String(name)) = node.children.first() { + writes.push(name.clone()); + } + } + "IVAR" => { + if let Some(Child::String(name)) = node.children.first() { + reads.push(name.clone()); + } + } + _ => {} + } + for child in node.children.iter().filter_map(ast::node) { + self.collect_state_access(child, reads, writes); + } + } + + fn pressure_row(&self, owner: &str, methods: &[MethodState]) -> Option { + let public_methods: Vec<_> = methods.iter().filter(|m| m.visibility == "public").collect(); + let state_methods: Vec<_> = public_methods + .iter() + .filter(|m| !m.reads.is_empty() || !m.writes.is_empty()) + .collect(); + let writers: Vec<_> = public_methods.iter().filter(|m| !m.writes.is_empty()).collect(); + + if state_methods.len() < 3 || writers.len() < 2 { + return None; + } + + let mut fields_set = BTreeSet::new(); + for m in &state_methods { + for r in &m.reads { + fields_set.insert(r.clone()); + } + for w in &m.writes { + fields_set.insert(w.clone()); + } + } + let fields: Vec<_> = fields_set.into_iter().collect(); + + let shared_fields: Vec<_> = fields + .iter() + .filter(|field| { + state_methods + .iter() + .filter(|m| m.reads.contains(*field) || m.writes.contains(*field)) + .count() + >= 2 + }) + .cloned() + .collect(); + + if shared_fields.is_empty() { + return None; + } + + let n = state_methods.len(); + let state_space_exp = fields.len().min(12); + let state_space = 2usize.pow(state_space_exp as u32); + let score = (n * writers.len() * shared_fields.len().max(1)) + state_space; + + let first_line = state_methods.first()?.line; + let at = format!("{}:{}:{}", self.file, owner, first_line); + + let mut sites = Vec::new(); + let mut spans = BTreeMap::new(); + for m in &state_methods { + let loc = format!("{}:{}:{}", self.file, m.name, m.line); + sites.push(loc.clone()); + spans.insert(loc, m.span); + } + + Some(TemporalOrderingPressureRow { + at, + file: self.file.clone(), + owner: owner.to_string(), + public_methods: public_methods.len(), + state_methods: n, + writers: writers.len(), + state_fields: fields, + shared_fields, + orderings: self.factorial_label(n), + state_space: format!("2^{}", state_space_exp), + score, + sites, + spans, + }) + } + + fn factorial_label(&self, n: usize) -> String { + format!("{}!", n) + } +} diff --git a/gems/decomplex/rust/src/main.rs b/gems/decomplex/rust/src/main.rs index 08b081d18..d553cb40b 100644 --- a/gems/decomplex/rust/src/main.rs +++ b/gems/decomplex/rust/src/main.rs @@ -2,7 +2,8 @@ mod decomplex; use anyhow::{bail, Context, Result}; use decomplex::detectors::{ - co_update, decision_pressure, flay_similarity, miner, predicate_alias, semantic_alias, + co_update, decision_pressure, flay_similarity, miner, predicate_alias, redundant_nil_guard, + semantic_alias, state_branch_density, state_mesh, temporal_ordering_pressure, }; use decomplex::parallel; use decomplex::syntax::Language; @@ -48,6 +49,30 @@ fn main() -> Result<()> { .with_context(|| "failed to scan decision-pressure facts")?; println!("{}", serde_json::to_string(&report)?); } + Command::StateBranchDensity { language, files, .. } => { + let language = Language::parse(&language)?; + let report = state_branch_density::scan_files(&files, language) + .with_context(|| "failed to scan state-branch-density facts")?; + println!("{}", serde_json::to_string(&report)?); + } + Command::TemporalOrderingPressure { language, files, .. } => { + let language = Language::parse(&language)?; + let report = temporal_ordering_pressure::scan_files(&files, language) + .with_context(|| "failed to scan temporal-ordering-pressure facts")?; + println!("{}", serde_json::to_string(&report)?); + } + Command::RedundantNilGuard { language, files, .. } => { + let language = Language::parse(&language)?; + let report = redundant_nil_guard::scan_files(&files, language) + .with_context(|| "failed to scan redundant-nil-guard facts")?; + println!("{}", serde_json::to_string(&report)?); + } + Command::StateMesh { language, files, .. } => { + let language = Language::parse(&language)?; + let report = state_mesh::scan_files(&files, language) + .with_context(|| "failed to scan state-mesh facts")?; + println!("{}", serde_json::to_string(&report)?); + } Command::FlaySimilarity { language, mass, @@ -95,6 +120,26 @@ enum Command { files: Vec, jobs: Option, }, + StateBranchDensity { + language: String, + files: Vec, + jobs: Option, + }, + TemporalOrderingPressure { + language: String, + files: Vec, + jobs: Option, + }, + RedundantNilGuard { + language: String, + files: Vec, + jobs: Option, + }, + StateMesh { + language: String, + files: Vec, + jobs: Option, + }, FlaySimilarity { language: String, mass: usize, @@ -113,6 +158,10 @@ impl Command { | Self::Miner { jobs, .. } | Self::SemanticAliases { jobs, .. } | Self::DecisionPressure { jobs, .. } + | Self::StateBranchDensity { jobs, .. } + | Self::TemporalOrderingPressure { jobs, .. } + | Self::RedundantNilGuard { jobs, .. } + | Self::StateMesh { jobs, .. } | Self::FlaySimilarity { jobs, .. } => *jobs, } } @@ -190,6 +239,50 @@ fn parse_args(args: Vec) -> Result { jobs, }) } + "state-branch-density" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("state-branch-density requires at least one file"); + } + Ok(Command::StateBranchDensity { + language, + files, + jobs, + }) + } + "temporal-ordering-pressure" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("temporal-ordering-pressure requires at least one file"); + } + Ok(Command::TemporalOrderingPressure { + language, + files, + jobs, + }) + } + "redundant-nil-guard" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("redundant-nil-guard requires at least one file"); + } + Ok(Command::RedundantNilGuard { + language, + files, + jobs, + }) + } + "state-mesh" | "state-heatmap" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("state-mesh requires at least one file"); + } + Ok(Command::StateMesh { + language, + files, + jobs, + }) + } "flay-similarity" => { let mut language = String::from("ruby"); let mut mass = 32usize; diff --git a/gems/decomplex/test/detector_runner_test.rb b/gems/decomplex/test/detector_runner_test.rb index 528846802..b87050410 100644 --- a/gems/decomplex/test/detector_runner_test.rb +++ b/gems/decomplex/test/detector_runner_test.rb @@ -117,6 +117,112 @@ def emit_two end end + def test_temporal_ordering_pressure_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + Tempfile.create(["decomplex-temporal-ordering", ".rb"]) do |file| + file.write(<<~RUBY) + class Order + def one; @a = 1; end + def two; @a = 2; @b = 3; end + def three; @b = 4; end + def reader; @a; end + end + RUBY + file.flush + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("temporal-ordering-pressure", [file.path]) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + end + + def test_state_branch_density_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + Tempfile.create(["decomplex-state-branch", ".rb"]) do |file| + file.write(<<~RUBY) + class User < T::Struct + const :name, String + const :admin, T::Boolean + end + + class Checker + sig { params(user: User).void } + def check(user) + if user.admin + @checked = true + end + if @checked && user.name == "admin" + puts "Hello" + end + end + end + RUBY + file.flush + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("state-branch-density", [file.path]) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + end + + def test_redundant_nil_guard_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + Tempfile.create(["decomplex-redundant-nil", ".rb"]) do |file| + file.write(<<~RUBY) + def check(x) + if x + puts x.nil? + x&.foo + end + end + RUBY + file.flush + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("redundant-nil-guard", [file.path]) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + end + + def test_state_mesh_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + Tempfile.create(["decomplex-state-mesh", ".rb"]) do |file| + file.write(<<~RUBY) + class Mesh + def initialize + @a = 1 + @b = 2 + end + + def writer + @a = 3 + end + + def reader + @a + @b + end + + def a_alias + @a + end + end + RUBY + file.flush + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("state-mesh", [file.path]) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + end + def test_decision_pressure_rust_engine_matches_ruby_engine_byte_for_byte skip "cargo is not available" unless cargo_available? From 566575513063cde9fd26bdb0b80238e7740e9d04 Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Wed, 17 Jun 2026 00:56:02 +0000 Subject: [PATCH 12/52] Decomplex: Finish Tier 2 metric migration to Rust Migrates all Tier 2 detectors to Rust: - Inconsistent Rename Clones - Derived-State Staleness - Implicit Control Flow - Weighted Inlined Cognitive Complexity - Locality Drag - Operational Discontinuity Strict function-for-function parity maintained with the Ruby source. Refactored existing Tier 2/Tier 3 detectors (Miner, SemanticAlias) to ensure identical discovery order and grouping semantics. All tests pass with byte-for-byte JSON identity. Co-authored-by: gemini-cli <218195315+gemini-cli@users.noreply.github.com> --- .../lib/decomplex/detector_runner.rb | 84 +- .../lib/decomplex/native/derived_state.rb | 25 + .../decomplex/native/implicit_control_flow.rb | 25 + .../native/inconsistent_rename_clone.rb | 25 + .../lib/decomplex/native/locality_drag.rb | 25 + .../native/operational_discontinuity.rb | 25 + .../native/weighted_inlined_complexity.rb | 25 + .../lib/decomplex/ordered_protocol_mine.rb | 4 +- gems/decomplex/rust/src/decomplex/ast.rs | 20 +- .../decomplex/detectors/decision_pressure.rs | 2 +- .../src/decomplex/detectors/derived_state.rs | 169 +++++ .../detectors/implicit_control_flow.rs | 718 ++++++++++++++++++ .../detectors/inconsistent_rename_clone.rs | 239 ++++++ .../src/decomplex/detectors/local_flow.rs | 333 ++++++++ .../src/decomplex/detectors/locality_drag.rs | 264 +++++++ .../rust/src/decomplex/detectors/miner.rs | 4 +- .../rust/src/decomplex/detectors/mod.rs | 8 + .../detectors/operational_discontinuity.rs | 183 +++++ .../detectors/structural_topology.rs | 384 ++++++++++ .../weighted_inlined_cognitive_complexity.rs | 474 ++++++++++++ gems/decomplex/rust/src/main.rs | 190 ++++- gems/decomplex/test/detector_runner_test.rb | 148 ++++ 22 files changed, 3348 insertions(+), 26 deletions(-) create mode 100644 gems/decomplex/lib/decomplex/native/derived_state.rb create mode 100644 gems/decomplex/lib/decomplex/native/implicit_control_flow.rb create mode 100644 gems/decomplex/lib/decomplex/native/inconsistent_rename_clone.rb create mode 100644 gems/decomplex/lib/decomplex/native/locality_drag.rb create mode 100644 gems/decomplex/lib/decomplex/native/operational_discontinuity.rb create mode 100644 gems/decomplex/lib/decomplex/native/weighted_inlined_complexity.rb create mode 100644 gems/decomplex/rust/src/decomplex/detectors/derived_state.rs create mode 100644 gems/decomplex/rust/src/decomplex/detectors/implicit_control_flow.rs create mode 100644 gems/decomplex/rust/src/decomplex/detectors/inconsistent_rename_clone.rs create mode 100644 gems/decomplex/rust/src/decomplex/detectors/local_flow.rs create mode 100644 gems/decomplex/rust/src/decomplex/detectors/locality_drag.rs create mode 100644 gems/decomplex/rust/src/decomplex/detectors/operational_discontinuity.rs create mode 100644 gems/decomplex/rust/src/decomplex/detectors/structural_topology.rs create mode 100644 gems/decomplex/rust/src/decomplex/detectors/weighted_inlined_cognitive_complexity.rs diff --git a/gems/decomplex/lib/decomplex/detector_runner.rb b/gems/decomplex/lib/decomplex/detector_runner.rb index 640c6e211..c84d0a311 100644 --- a/gems/decomplex/lib/decomplex/detector_runner.rb +++ b/gems/decomplex/lib/decomplex/detector_runner.rb @@ -17,6 +17,12 @@ require_relative "state_branch_density" require_relative "temporal_ordering_pressure" require_relative "redundant_nil_guard" +require_relative "inconsistent_rename_clone" +require_relative "derived_state" +require_relative "ordered_protocol_mine" +require_relative "weighted_inlined_cognitive_complexity" +require_relative "locality_drag" +require_relative "operational_discontinuity" module Decomplex # Runs one detector in isolation and emits deterministic machine output. @@ -44,7 +50,13 @@ module DetectorRunner "state-branch-density" => :state_branch_density, "redundant-nil-guard" => :redundant_nil_guard, "state-mesh" => :state_mesh, - "state-heatmap" => :state_mesh + "state-heatmap" => :state_mesh, + "inconsistent-rename-clone" => :inconsistent_rename_clone, + "derived-state" => :derived_state, + "implicit-control-flow" => :implicit_control_flow, + "weighted-inlined-complexity" => :weighted_inlined_complexity, + "locality-drag" => :locality_drag, + "operational-discontinuity" => :operational_discontinuity }.freeze ENGINES = %w[ruby rust].freeze @@ -75,6 +87,18 @@ def run(detector, files, engine: "ruby", mass: FlaySimilarity::DEFAULT_MASS, fuz redundant_nil_guard(files, engine: engine, jobs: jobs) when :state_mesh state_mesh(files, engine: engine, jobs: jobs) + when :inconsistent_rename_clone + inconsistent_rename_clone(files, engine: engine, jobs: jobs) + when :derived_state + derived_state(files, engine: engine, jobs: jobs) + when :implicit_control_flow + implicit_control_flow(files, engine: engine, jobs: jobs) + when :weighted_inlined_complexity + weighted_inlined_complexity(files, engine: engine, jobs: jobs) + when :locality_drag + locality_drag(files, engine: engine, jobs: jobs) + when :operational_discontinuity + operational_discontinuity(files, engine: engine, jobs: jobs) else raise ArgumentError, "unsupported decomplex detector: #{detector}" end @@ -200,6 +224,64 @@ def detector_names StateMesh.scan(files).tap(&:run).to_json_graph end + private_class_method def self.inconsistent_rename_clone(files, engine:, jobs:) + if engine.to_s == "rust" + require_relative "native/inconsistent_rename_clone" + return Native::InconsistentRenameClone.scan(files, jobs: jobs) + end + + InconsistentRenameClone.scan(files) + end + + private_class_method def self.derived_state(files, engine:, jobs:) + if engine.to_s == "rust" + require_relative "native/derived_state" + return Native::DerivedState.scan(files, jobs: jobs) + end + + DerivedState.scan(files) + end + + private_class_method def self.implicit_control_flow(files, engine:, jobs:) + if engine.to_s == "rust" + require_relative "native/implicit_control_flow" + return Native::ImplicitControlFlow.scan(files, jobs: jobs) + end + + report = ImplicitControlFlow.scan(files) + { + "ordered_protocols" => report.ordered_protocols, + "order_drift" => report.drift + } + end + + private_class_method def self.weighted_inlined_complexity(files, engine:, jobs:) + if engine.to_s == "rust" + require_relative "native/weighted_inlined_complexity" + return Native::WeightedInlinedComplexity.scan(files, jobs: jobs) + end + + WeightedInlinedCognitiveComplexity.scan(files) + end + + private_class_method def self.locality_drag(files, engine:, jobs:) + if engine.to_s == "rust" + require_relative "native/locality_drag" + return Native::LocalityDrag.scan(files, jobs: jobs) + end + + LocalityDrag.scan(files) + end + + private_class_method def self.operational_discontinuity(files, engine:, jobs:) + if engine.to_s == "rust" + require_relative "native/operational_discontinuity" + return Native::OperationalDiscontinuity.scan(files, jobs: jobs) + end + + OperationalDiscontinuity.scan(files) + end + private_class_method def self.canonicalize(value) case value when Hash diff --git a/gems/decomplex/lib/decomplex/native/derived_state.rb b/gems/decomplex/lib/decomplex/native/derived_state.rb new file mode 100644 index 000000000..412e8c7d5 --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/derived_state.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module DerivedState + module_function + + def scan(files, jobs: nil) + paths = Array(files).map(&:to_s) + validate_ruby_files!(paths) + JSON.parse(Command.run("derived-state", "--language", "ruby", *Command.jobs_args(jobs), *paths)) + end + + private_class_method def self.validate_ruby_files!(paths) + bad = paths.reject { |path| File.extname(path) == ".rb" } + return if bad.empty? + + raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/implicit_control_flow.rb b/gems/decomplex/lib/decomplex/native/implicit_control_flow.rb new file mode 100644 index 000000000..b1fd3c530 --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/implicit_control_flow.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module ImplicitControlFlow + module_function + + def scan(files, jobs: nil) + paths = Array(files).map(&:to_s) + validate_ruby_files!(paths) + JSON.parse(Command.run("implicit-control-flow", "--language", "ruby", *Command.jobs_args(jobs), *paths)) + end + + private_class_method def self.validate_ruby_files!(paths) + bad = paths.reject { |path| File.extname(path) == ".rb" } + return if bad.empty? + + raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/inconsistent_rename_clone.rb b/gems/decomplex/lib/decomplex/native/inconsistent_rename_clone.rb new file mode 100644 index 000000000..93956d8ce --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/inconsistent_rename_clone.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module InconsistentRenameClone + module_function + + def scan(files, jobs: nil) + paths = Array(files).map(&:to_s) + validate_ruby_files!(paths) + JSON.parse(Command.run("inconsistent-rename-clone", "--language", "ruby", *Command.jobs_args(jobs), *paths)) + end + + private_class_method def self.validate_ruby_files!(paths) + bad = paths.reject { |path| File.extname(path) == ".rb" } + return if bad.empty? + + raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/locality_drag.rb b/gems/decomplex/lib/decomplex/native/locality_drag.rb new file mode 100644 index 000000000..cb4bbefb6 --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/locality_drag.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module LocalityDrag + module_function + + def scan(files, jobs: nil) + paths = Array(files).map(&:to_s) + validate_ruby_files!(paths) + JSON.parse(Command.run("locality-drag", "--language", "ruby", *Command.jobs_args(jobs), *paths)) + end + + private_class_method def self.validate_ruby_files!(paths) + bad = paths.reject { |path| File.extname(path) == ".rb" } + return if bad.empty? + + raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/operational_discontinuity.rb b/gems/decomplex/lib/decomplex/native/operational_discontinuity.rb new file mode 100644 index 000000000..a3f6e4959 --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/operational_discontinuity.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module OperationalDiscontinuity + module_function + + def scan(files, jobs: nil) + paths = Array(files).map(&:to_s) + validate_ruby_files!(paths) + JSON.parse(Command.run("operational-discontinuity", "--language", "ruby", *Command.jobs_args(jobs), *paths)) + end + + private_class_method def self.validate_ruby_files!(paths) + bad = paths.reject { |path| File.extname(path) == ".rb" } + return if bad.empty? + + raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/weighted_inlined_complexity.rb b/gems/decomplex/lib/decomplex/native/weighted_inlined_complexity.rb new file mode 100644 index 000000000..8887a6e6c --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/weighted_inlined_complexity.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module WeightedInlinedComplexity + module_function + + def scan(files, jobs: nil) + paths = Array(files).map(&:to_s) + validate_ruby_files!(paths) + JSON.parse(Command.run("weighted-inlined-complexity", "--language", "ruby", *Command.jobs_args(jobs), *paths)) + end + + private_class_method def self.validate_ruby_files!(paths) + bad = paths.reject { |path| File.extname(path) == ".rb" } + return if bad.empty? + + raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/ordered_protocol_mine.rb b/gems/decomplex/lib/decomplex/ordered_protocol_mine.rb index f61c16bf8..073fa85a8 100644 --- a/gems/decomplex/lib/decomplex/ordered_protocol_mine.rb +++ b/gems/decomplex/lib/decomplex/ordered_protocol_mine.rb @@ -228,7 +228,9 @@ def scope_body(node) end def owner_name(node) - Ast.slice(node.children[0], @lines).to_s.empty? ? "(anonymous)" : Ast.slice(node.children[0], @lines) + name = node.children[0] + res = Ast.node?(name) ? Ast.slice(name, @lines) : name.to_s + res.empty? ? "(anonymous)" : res end def method_name(node) diff --git a/gems/decomplex/rust/src/decomplex/ast.rs b/gems/decomplex/rust/src/decomplex/ast.rs index f567c32d2..87d10a15d 100644 --- a/gems/decomplex/rust/src/decomplex/ast.rs +++ b/gems/decomplex/rust/src/decomplex/ast.rs @@ -233,24 +233,6 @@ pub fn body_stmts(defn_node: &Node) -> Vec<&Node> { } } -pub fn def_push(node: &Node, stack: &[String]) -> Vec { - let mut next = stack.to_vec(); - match node.r#type.as_str() { - "DEFN" => { - if let Some(name) = child_to_string(node.children.first()) { - next.push(name); - } - } - "DEFS" => { - if let Some(name) = child_to_string(node.children.get(1)) { - next.push(name); - } - } - _ => {} - } - next -} - pub fn canon_polarity(text: &str) -> (String, bool) { let trimmed = text.trim(); if let Some(rest) = trimmed.strip_prefix('!') { @@ -2762,7 +2744,7 @@ fn comparison_operator_from_text(text: &str) -> Option { None } -fn child_to_string(child: Option<&Child>) -> Option { +pub fn child_to_string(child: Option<&Child>) -> Option { match child { Some(Child::String(value)) | Some(Child::Symbol(value)) => Some(value.clone()), _ => None, diff --git a/gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs b/gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs index 515f11e08..74319a197 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs @@ -248,7 +248,7 @@ impl Report { rows_map.entry(h.contract.clone()).or_default().push(h); } - let mut rows: Vec<_> = rows_map.into_iter().map(|(contract, hs)| { + let rows: Vec<_> = rows_map.into_iter().map(|(contract, hs)| { let mut methods_set = BTreeSet::new(); for h in &hs { methods_set.insert((&h.file, &h.defn)); diff --git a/gems/decomplex/rust/src/decomplex/detectors/derived_state.rs b/gems/decomplex/rust/src/decomplex/detectors/derived_state.rs new file mode 100644 index 000000000..fe43587bc --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/derived_state.rs @@ -0,0 +1,169 @@ +use crate::decomplex::ast::{self, Child, Node, Span}; +use crate::decomplex::syntax::Language; +use anyhow::Result; +use serde::Serialize; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::PathBuf; + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct DerivedStateRow { + pub file: String, + pub defn: String, + pub derived: String, + pub source: String, + pub derived_at: usize, + pub source_reassigned_at: usize, + pub gap: isize, + pub at: String, + pub spans: BTreeMap, +} + +#[derive(Clone, Debug)] +struct Asgn { + name: String, + deps: Vec, + line: usize, + span: Span, +} + +pub fn scan_files(files: &[PathBuf], _language: Language) -> Result> { + let mut out = Vec::new(); + for file in files { + let (root, lines) = ast::parse(file)?; + let detector = DerivedState::new(file.to_string_lossy().to_string(), lines); + detector.each_method(&root, &mut |file, defn, stmts| { + out.extend(analyze(file, defn, stmts)); + }); + } + out.sort_by(|a, b| b.gap.cmp(&a.gap)); + Ok(out) +} + +struct DerivedState { + file: String, + #[allow(dead_code)] + lines: Vec, +} + +impl DerivedState { + fn new(file: String, lines: Vec) -> Self { + Self { file, lines } + } + + fn each_method(&self, node: &Node, blk: &mut dyn FnMut(&str, &str, &[&Node])) { + if matches!(node.r#type.as_str(), "DEFN" | "DEFS") { + let name_index = if node.r#type == "DEFS" { 1 } else { 0 }; + if let Some(Child::Symbol(name)) = node.children.get(name_index) { + blk(&self.file, name, &ast::body_stmts(node)); + } + } + for child in node.children.iter().filter_map(ast::node) { + self.each_method(child, blk); + } + } +} + +const BRANCH_RHS: &[&str] = &[ + "IF", "CASE", "CASE2", "CASE3", "AND", "OR", "WHILE", "UNTIL", "RESCUE", "ENSURE", +]; + +fn lasgns<'a>(stmts: &'a [&'a Node]) -> Vec<&'a Node> { + let mut acc = Vec::new(); + for s in stmts { + walk_lasgns(s, &mut acc); + } + acc +} + +fn walk_lasgns<'a>(n: &'a Node, acc: &mut Vec<&'a Node>) { + if n.r#type == "LASGN" { + acc.push(n); + if let Some(val) = n.children.get(1).and_then(ast::node) { + if BRANCH_RHS.contains(&val.r#type.as_str()) { + // branch-local RHS: do not flatten its inner assignments + } else { + for child in n.children.iter().filter_map(ast::node) { + walk_lasgns(child, acc); + } + } + } + } else { + for child in n.children.iter().filter_map(ast::node) { + walk_lasgns(child, acc); + } + } +} + +fn lvars(node: &Node, acc: &mut Vec) { + if node.r#type == "LVAR" { + if let Some(Child::String(name)) = node.children.first() { + acc.push(name.clone()); + } + } + for child in node.children.iter().filter_map(ast::node) { + lvars(child, acc); + } +} + +fn analyze(file: &str, defn: &str, stmts: &[&Node]) -> Vec { + let asgns: Vec<_> = lasgns(stmts) + .iter() + .map(|n| { + let mut deps = Vec::new(); + if let Some(val) = n.children.get(1).and_then(ast::node) { + lvars(val, &mut deps); + } + let mut deps: Vec<_> = deps.into_iter().collect::>().into_iter().collect(); + deps.sort(); + Asgn { + name: match n.children.first().unwrap() { + Child::String(s) => s.clone(), + _ => panic!("LASGN without name"), + }, + deps, + line: n.first_lineno, + span: [n.first_lineno, n.first_column, n.last_lineno, n.last_column], + } + }) + .collect(); + + let mut out = Vec::new(); + for (i, b) in asgns.iter().enumerate() { + if b.deps.is_empty() { + continue; + } + + for a in &b.deps { + if a == &b.name { + continue; + } + + // a reassigned strictly after b's definition? + let reasn = asgns.iter().skip(i + 1).find(|x| &x.name == a); + let Some(reasn) = reasn else { continue }; + + // b recomputed at or after a's reassignment? + let recomputed = asgns.iter().skip(i + 1).any(|x| &x.name == &b.name && x.line >= reasn.line); + if recomputed { + continue; + } + + let loc = format!("{}:{}:{}", file, defn, b.line); + let mut spans = BTreeMap::new(); + spans.insert(loc.clone(), b.span); + + out.push(DerivedStateRow { + file: file.to_string(), + defn: defn.to_string(), + derived: b.name.clone(), + source: a.clone(), + derived_at: b.line, + source_reassigned_at: reasn.line, + gap: (reasn.line as isize) - (b.line as isize), + at: loc, + spans, + }); + } + } + out +} diff --git a/gems/decomplex/rust/src/decomplex/detectors/implicit_control_flow.rs b/gems/decomplex/rust/src/decomplex/detectors/implicit_control_flow.rs new file mode 100644 index 000000000..8797b9c8e --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/implicit_control_flow.rs @@ -0,0 +1,718 @@ +use crate::decomplex::ast::{self, Child, Node, Span}; +use crate::decomplex::syntax::Language; +use anyhow::Result; +use serde::Serialize; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::PathBuf; + +#[derive(Clone, Debug, PartialEq, Serialize)] +pub struct ImplicitControlFlowReport { + pub ordered_protocols: Vec, + pub order_drift: Vec, +} + +#[derive(Clone, Debug, PartialEq, Serialize)] +pub struct ProtocolFinding { + pub kind: String, + pub protocol: Vec, + pub dependency: Vec, + pub states: Vec, + pub support: usize, + pub confidence: f64, + pub at: String, + pub observed: Vec, + pub missing: Vec, + pub sites: Vec, + pub spans: BTreeMap, +} + +#[derive(Clone, Debug)] +struct MethodEffect { + owner: String, + name: String, + reads: Vec, + writes: Vec, +} + +#[derive(Clone, Debug)] +struct Call { + mid: String, + file: String, + line: usize, + span: Span, + reads: Vec, + writes: Vec, +} + +#[derive(Clone, Debug)] +struct MethodSequence { + file: String, + owner: String, + defn: String, + line: usize, + calls: Vec, +} + +#[derive(Clone, Debug)] +struct Path { + calls: Vec, + terminal: bool, +} + +const PATH_LIMIT: usize = 64; + +const IGNORED_MIDS: &[&str] = &[ + "abstract!", "alias_method", "any", "attr_accessor", "attr_reader", "attr_writer", "bind", + "cast", "checked", "enum", "extend", "final", "include", "interface!", "let", "must", "must_because", + "nilable", "override", "overridable", "params", "prepend", "private", "private_class_method", + "protected", "public", "require", "require_relative", "requires_ancestor", "sealed!", "sig", + "type_member", "type_template", "untyped", "unsafe", "void", + "a_kind_of", "after", "around", "before", "be", "be_a", "be_an", "be_empty", "be_falsey", "be_nil", + "be_truthy", "change", "contain_exactly", "context", "describe", "eq", "eql", "equal", "expect", + "have_attributes", "have_key", "have_received", "it", "match", "not_to", "raise_error", + "receive", "subject", "to", +]; + +const OPTIONAL_DIAGNOSTIC_MIDS: &[&str] = &["error!", "fixable!", "read_interpolated_string", "warn!"]; + +const MUTATING_MIDS: &[&str] = &[ + "<<", "[]=", "add", "append", "clear", "collect!", "compact!", "concat", "declare", "delete", "delete_if", + "each_key=", "fill", "filter!", "keep_if", "mark", "merge!", "move", "push", "reject!", "replace", + "resolve", "shift", "stamp", "store", "unshift", "update", "write", +]; + +const NON_MUTATING_OPERATOR_MIDS: &[&str] = &["!", "!=", "!~"]; +const MUTATING_SUFFIXES: &[&str] = &["!"]; + +pub fn scan_files(files: &[PathBuf], _language: Language) -> Result { + let mut parsed = BTreeMap::new(); + for file in files { + parsed.insert(file.to_string_lossy().to_string(), ast::parse(file)?); + } + + let effect_index = EffectIndex::build(&parsed); + let mut sequences = Vec::new(); + for (file, (root, lines)) in &parsed { + let mut miner = ImplicitControlFlow::new(file.clone(), lines.clone(), &effect_index); + miner.walk(root, &Vec::new()); + sequences.extend(miner.sequences); + } + + let report = Report::new(sequences); + Ok(ImplicitControlFlowReport { + ordered_protocols: report.ordered_protocols(1), + order_drift: report.drift(4, 0.75), + }) +} + +struct ImplicitControlFlow<'a> { + file: String, + lines: Vec, + effect_index: &'a EffectIndex, + sequences: Vec, +} + +impl<'a> ImplicitControlFlow<'a> { + fn new(file: String, lines: Vec, effect_index: &'a EffectIndex) -> Self { + Self { file, lines, effect_index, sequences: Vec::new() } + } + + fn walk(&mut self, node: &Node, owners: &[String]) { + if matches!(node.r#type.as_str(), "CLASS" | "MODULE") { + let mut next_owners = owners.to_vec(); + next_owners.push(self.owner_name(node)); + for child in node.children.iter().filter_map(ast::node) { + self.walk(child, &next_owners); + } + } else if matches!(node.r#type.as_str(), "DEFN" | "DEFS") { + self.record_method_paths(node, &owners.join("::")); + } else { + for child in node.children.iter().filter_map(ast::node) { + self.walk(child, owners); + } + } + } + + fn record_method_paths(&mut self, node: &Node, owner: &str) { + let defn = self.method_name(node); + for path in self.method_paths(node) { + let calls: Vec<_> = path.calls.iter().map(|c| self.call_for(c, owner, &defn)).collect(); + if calls.iter().filter(|c| self.stateful_call(c)).count() < 2 { continue; } + + self.sequences.push(MethodSequence { + file: self.file.clone(), + owner: owner.to_string(), + defn: defn.clone(), + line: node.first_lineno, + calls, + }); + } + } + + fn method_paths(&self, node: &Node) -> Vec { + self.paths_for_statements(&ast::body_stmts(node)) + } + + fn paths_for_statements(&self, statements: &[&Node]) -> Vec { + let mut paths = vec![self.empty_path()]; + for stmt in statements { + if stmt.r#type == "BEGIN" { continue; } + let stmt_paths = self.paths_for(stmt); + paths = self.append_statement_paths(paths, stmt_paths); + } + paths + } + + fn append_statement_paths(&self, paths: Vec, stmt_paths: Vec) -> Vec { + self.combine_path_lists(paths, stmt_paths) + } + + fn combine_path_lists(&self, left_paths: Vec, right_paths: Vec) -> Vec { + let mut combined = Vec::new(); + for left in left_paths { + if left.terminal { + combined.push(left); + } else { + for right in &right_paths { + let mut calls = left.calls.clone(); + calls.extend(right.calls.clone()); + combined.push(Path { calls, terminal: right.terminal }); + } + } + } + combined.into_iter().take(PATH_LIMIT).collect() + } + + fn paths_for(&self, node: &Node) -> Vec { + match node.r#type.as_str() { + "BLOCK" => self.paths_for_statements(&node.children.iter().filter_map(ast::node).collect::>()), + "SCOPE" => self.paths_for(node.children.get(2).and_then(ast::node).unwrap_or(node)), + "IF" | "UNLESS" => self.branch_paths(node), + "CASE" | "CASE2" => self.case_paths(node), + "RETURN" | "BREAK" | "NEXT" | "REDO" | "RETRY" => { + self.generic_paths(node).into_iter().map(|mut p| { p.terminal = true; p }).collect() + } + _ => self.generic_paths(node), + } + } + + fn branch_paths(&self, node: &Node) -> Vec { + let cond = node.children.get(0).and_then(ast::node); + let pos = node.children.get(1).and_then(ast::node); + let neg = node.children.get(2).and_then(ast::node); + + let mut alts = self.paths_for(pos.unwrap_or(node)); + if let Some(n) = neg { alts.extend(self.paths_for(n)); } else { alts.push(self.empty_path()); } + + self.combine_path_lists(self.paths_for(cond.unwrap_or(node)), alts) + } + + fn case_paths(&self, node: &Node) -> Vec { + let (cond, first_when) = if node.r#type == "CASE2" { (None, node.children.get(0).and_then(ast::node)) } else { (node.children.get(0).and_then(ast::node), node.children.get(1).and_then(ast::node)) }; + self.combine_path_lists(cond.map(|c| self.paths_for(c)).unwrap_or(vec![self.empty_path()]), self.when_paths(first_when)) + } + + fn when_paths(&self, node: Option<&Node>) -> Vec { + let Some(n) = node else { return vec![self.empty_path()] }; + if n.r#type != "WHEN" { return self.paths_for(n) } + + let pat = n.children.get(0).and_then(ast::node); + let body = n.children.get(1).and_then(ast::node); + let next = n.children.get(2).and_then(ast::node); + + let current = self.combine_path_lists(self.paths_for(pat.unwrap_or(n)), self.paths_for(body.unwrap_or(n))); + let mut out = current; + out.extend(self.when_paths(next)); + out.into_iter().take(PATH_LIMIT).collect() + } + + fn generic_paths(&self, node: &Node) -> Vec { + if matches!(node.r#type.as_str(), "CLASS" | "MODULE" | "DEFN" | "DEFS" | "LAMBDA") { + return vec![self.empty_path()]; + } + + let mut child_paths = vec![self.empty_path()]; + for child in node.children.iter().filter_map(ast::node) { + child_paths = self.combine_path_lists(child_paths, self.paths_for(child)); + } + + if let Some(mid) = self.internal_protocol_call(node) { + self.combine_path_lists(vec![Path { calls: vec![self.raw_call(&mid, node)], terminal: false }], child_paths) + } else { + child_paths + } + } + + fn raw_call(&self, mid: &str, node: &Node) -> Call { + Call { + mid: mid.to_string(), + file: self.file.clone(), + line: node.first_lineno, + span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], + reads: Vec::new(), + writes: Vec::new(), + } + } + + fn call_for(&self, call: &Call, owner: &str, _defn: &str) -> Call { + let effect = self.effect_index.effect_for(owner, &call.mid); + Call { + mid: call.mid.clone(), + file: call.file.clone(), + line: call.line, + span: call.span, + reads: effect.map(|e| e.reads.clone()).unwrap_or_default(), + writes: effect.map(|e| e.writes.clone()).unwrap_or_default(), + } + } + + fn stateful_call(&self, call: &Call) -> bool { + !call.reads.is_empty() || !call.writes.is_empty() + } + + fn empty_path(&self) -> Path { Path { calls: Vec::new(), terminal: false } } + + fn owner_name(&self, node: &Node) -> String { + let text = ast::slice(node.children.first().and_then(ast::node).unwrap_or(node), &self.lines); + if text.is_empty() { "(anonymous)".to_string() } else { text } + } + + fn method_name(&self, node: &Node) -> String { + if node.r#type == "DEFS" { + ast::child_to_string(node.children.get(1)).unwrap_or_else(|| "?".to_string()) + } else { + ast::child_to_string(node.children.get(0)).unwrap_or_else(|| "?".to_string()) + } + } + + fn internal_protocol_call(&self, node: &Node) -> Option { + let mid = self.call_mid(node)?; + if IGNORED_MIDS.contains(&mid.as_str()) { return None } + if !self.internal_receiver(node) { return None } + Some(mid) + } + + fn call_mid(&self, node: &Node) -> Option { + match node.r#type.as_str() { + "CALL" | "OPCALL" | "ATTRASGN" => ast::child_to_string(node.children.get(1)), + "FCALL" | "VCALL" => ast::child_to_string(node.children.get(0)), + _ => None, + } + } + + fn internal_receiver(&self, node: &Node) -> bool { + if matches!(node.r#type.as_str(), "FCALL" | "VCALL") { return true } + let receiver = node.children.get(0).and_then(ast::node); + receiver.map(|r| r.r#type == "SELF").unwrap_or(false) + } +} + +struct EffectIndex { + by_owner_name: BTreeMap<(String, String), MethodEffect>, + by_name: BTreeMap>, +} + +impl EffectIndex { + fn build(parsed: &BTreeMap)>) -> Self { + let mut effects = Vec::new(); + for (file, (root, lines)) in parsed { + effects.extend(EffectCollector::new(file.clone(), lines.clone()).scan(root)); + } + let mut by_owner_name = BTreeMap::new(); + let mut by_name = BTreeMap::new(); + for e in effects { + by_owner_name.insert((e.owner.clone(), e.name.clone()), e.clone()); + by_name.entry(e.name.clone()).or_insert_with(Vec::new).push(e); + } + Self { by_owner_name, by_name } + } + + fn effect_for(&self, owner: &str, name: &str) -> Option<&MethodEffect> { + if let Some(e) = self.by_owner_name.get(&(owner.to_string(), name.to_string())) { + return Some(e); + } + let candidates = self.by_name.get(name)?; + let stateful: Vec<_> = candidates.iter().filter(|e| !e.reads.is_empty() || !e.writes.is_empty()).collect(); + if stateful.len() == 1 { Some(stateful[0]) } else { None } + } +} + +struct EffectCollector { + lines: Vec, +} + +impl EffectCollector { + fn new(_file: String, lines: Vec) -> Self { Self { lines } } + + fn scan(&self, root: &Node) -> Vec { + let mut out = Vec::new(); + self.walk(root, &Vec::new(), &mut out); + out + } + + fn walk(&self, node: &Node, owners: &[String], out: &mut Vec) { + if matches!(node.r#type.as_str(), "CLASS" | "MODULE") { + let mut next_owners = owners.to_vec(); + next_owners.push(self.owner_name(node)); + for child in node.children.iter().filter_map(ast::node) { + self.walk(child, &next_owners, out); + } + } else if matches!(node.r#type.as_str(), "DEFN" | "DEFS") { + out.push(self.method_effect(node, &owners.join("::"))); + } else { + for child in node.children.iter().filter_map(ast::node) { + self.walk(child, owners, out); + } + } + } + + fn method_effect(&self, node: &Node, owner: &str) -> MethodEffect { + let mut reads = BTreeSet::new(); + let mut writes = BTreeSet::new(); + self.collect_state_access(node, &mut reads, &mut writes); + MethodEffect { + owner: owner.to_string(), + name: self.method_name(node), + reads: { let mut v: Vec<_> = reads.into_iter().collect(); v.sort(); v }, + writes: { let mut v: Vec<_> = writes.into_iter().collect(); v.sort(); v }, + } + } + + fn collect_state_access(&self, node: &Node, reads: &mut BTreeSet, writes: &mut BTreeSet) { + if matches!(node.r#type.as_str(), "CLASS" | "MODULE" | "LAMBDA") { return } + + match node.r#type.as_str() { + "IASGN" => { if let Some(s) = ast::child_to_string(node.children.get(0)) { writes.insert(self.normalize_state(&s)); } } + "LASGN" => self.collect_index_write(node, writes), + "IVAR" => { if let Some(s) = ast::child_to_string(node.children.get(0)) { reads.insert(self.normalize_state(&s)); } } + "ATTRASGN" => self.collect_attr_write(node, writes), + "CALL" | "OPCALL" => { + self.collect_bare_reader_comparison(node, reads); + self.collect_receiver_mutation(node, writes); + self.collect_self_reader(node, reads); + } + "VCALL" | "FCALL" => self.collect_self_reader(node, reads), + _ => {} + } + + for child in node.children.iter().filter_map(ast::node) { + self.collect_state_access(child, reads, writes); + } + } + + fn collect_attr_write(&self, node: &Node, writes: &mut BTreeSet) { + let receiver = node.children.get(0).and_then(ast::node); + let mid = ast::child_to_string(node.children.get(1)); + let Some(mid) = mid else { return }; + let attr = mid.trim_end_matches('=').to_string(); + + if mid == "[]=" { + if let Some(t) = receiver.and_then(|r| self.state_receiver_token(r)) { writes.insert(t); } + } else if receiver.map(|r| self.self_receiver(r)).unwrap_or(false) { + writes.insert(self.normalize_state(&attr)); + } else if let Some(t) = receiver.and_then(|r| self.state_receiver_token(r)) { + writes.insert(format!("{}.{}", t, attr)); + } + } + + fn collect_index_write(&self, node: &Node, writes: &mut BTreeSet) { + let name = ast::child_to_string(node.children.get(0)).unwrap_or_default(); + if name.contains('[') { + writes.insert(self.normalize_state(name.split('[').next().unwrap())); + } + } + + fn collect_bare_reader_comparison(&self, node: &Node, reads: &mut BTreeSet) { + let receiver = node.children.get(0).and_then(ast::node); + let mid = ast::child_to_string(node.children.get(1)).unwrap_or_default(); + if matches!(mid.as_str(), "==" | "!=" | "===" | "<" | "<=" | ">" | ">=") { + if let Some(r) = receiver { + if r.r#type == "LVAR" { + if let Some(name) = ast::child_to_string(r.children.get(0)) { + reads.insert(self.normalize_state(&name)); + } + } + } + } + } + + fn collect_receiver_mutation(&self, node: &Node, writes: &mut BTreeSet) { + let receiver = node.children.get(0).and_then(ast::node); + let mid = ast::child_to_string(node.children.get(1)).unwrap_or_default(); + if self.mutating_mid(&mid) { + if let Some(r) = receiver { + if let Some(t) = self.state_receiver_token(r) { writes.insert(t); } + } + } + } + + fn collect_self_reader(&self, node: &Node, reads: &mut BTreeSet) { + let mid = self.call_mid(node); + let Some(mid) = mid else { return }; + if self.mutating_mid(&mid) { return } + if IGNORED_MIDS.contains(&mid.as_str()) { return } + if !self.no_args(node) { return } + if node.r#type == "CALL" && !self.self_receiver(node.children.get(0).and_then(ast::node).unwrap()) { return } + reads.insert(self.normalize_state(&mid)); + } + + fn mutating_mid(&self, mid: &str) -> bool { + if NON_MUTATING_OPERATOR_MIDS.contains(&mid) { return false } + MUTATING_MIDS.contains(&mid) || MUTATING_SUFFIXES.iter().any(|s| mid.ends_with(s)) + } + + fn no_args(&self, node: &Node) -> bool { + match node.r#type.as_str() { + "CALL" | "OPCALL" => node.children.get(2).map(|c| matches!(c, Child::Nil)).unwrap_or(true), + "VCALL" => true, + "FCALL" => node.children.get(1).map(|c| matches!(c, Child::Nil)).unwrap_or(true), + _ => false, + } + } + + fn state_receiver_token(&self, node: &Node) -> Option { + match node.r#type.as_str() { + "IVAR" => ast::child_to_string(node.children.get(0)).map(|s| self.normalize_state(&s)), + "SELF" => Some("self".to_string()), + "VCALL" | "FCALL" | "LVAR" => ast::child_to_string(node.children.get(0)).map(|s| self.normalize_state(&s)), + "CALL" => { + if self.no_args(node) { ast::child_to_string(node.children.get(1)).map(|s| self.normalize_state(&s)) } else { None } + } + _ => None, + } + } + + fn self_receiver(&self, node: &Node) -> bool { node.r#type == "SELF" } + + fn call_mid(&self, node: &Node) -> Option { + match node.r#type.as_str() { + "CALL" | "OPCALL" | "ATTRASGN" => ast::child_to_string(node.children.get(1)), + "FCALL" | "VCALL" => ast::child_to_string(node.children.get(0)), + _ => None, + } + } + + fn owner_name(&self, node: &Node) -> String { + let text = ast::slice(node.children.first().and_then(ast::node).unwrap_or(node), &self.lines); + if text.is_empty() { "(anonymous)".to_string() } else { text } + } + + fn method_name(&self, node: &Node) -> String { + if node.r#type == "DEFS" { + ast::child_to_string(node.children.get(1)).unwrap_or_else(|| "?".to_string()) + } else { + ast::child_to_string(node.children.get(0)).unwrap_or_else(|| "?".to_string()) + } + } + + fn normalize_state(&self, name: &str) -> String { + name.trim_start_matches('@').trim_end_matches('=').to_string() + } +} + +struct Report { + sequences: Vec, + site_call_sets: BTreeMap<(String, String, String, usize), BTreeMap>, +} + +impl Report { + fn new(sequences: Vec) -> Self { + let mut site_call_sets = BTreeMap::new(); + for seq in &sequences { + let mut calls = BTreeMap::new(); + for c in seq.calls.iter().filter(|c| !c.reads.is_empty() || !c.writes.is_empty()) { + calls.insert(c.mid.clone(), true); + } + site_call_sets.insert((seq.file.clone(), seq.owner.clone(), seq.defn.clone(), seq.line), calls); + } + Self { sequences, site_call_sets } + } + + fn ordered_protocols(&self, min_support: usize) -> Vec { + let mut counts: BTreeMap<(String, String, String, String), BTreeMap<(String, String, String, usize), ProtocolFinding>> = BTreeMap::new(); + for seq in &self.sequences { + let state_calls: Vec<_> = seq.calls.iter().filter(|c| !c.reads.is_empty() || !c.writes.is_empty()).collect(); + let collapsed = self.collapse_consecutive(&state_calls); + for i in 0..collapsed.len().saturating_sub(1) { + let left = collapsed[i]; + let right = collapsed[i+1]; + let edge = self.dependency_edge(left, right); + let Some(edge) = edge else { continue }; + if self.diagnostic_protocol(&[left.mid.clone(), right.mid.clone()]) { continue }; + + let key = (left.mid.clone(), right.mid.clone(), edge.0.join("|"), edge.1.join("|")); + let site_key = (seq.file.clone(), seq.owner.clone(), seq.defn.clone(), seq.line); + counts.entry(key).or_default().insert(site_key, ProtocolFinding { + kind: "protocol_pressure".to_string(), + protocol: vec![left.mid.clone(), right.mid.clone()], + dependency: edge.0, + states: edge.1, + support: 0, + confidence: 1.0, + at: format!("{}:{}:{}", seq.file, seq.defn, seq.line), + observed: vec![left.mid.clone(), right.mid.clone()], + missing: Vec::new(), + sites: Vec::new(), + spans: { let mut s = BTreeMap::new(); s.insert(format!("{}:{}:{}", seq.file, seq.defn, seq.line), left.span); s }, + }); + } + } + + let mut out = Vec::new(); + for (_, sites) in counts { + if sites.len() < min_support { continue; } + let mut first = sites.values().next().unwrap().clone(); + first.support = sites.len(); + first.sites = sites.keys().map(|k| format!("{}:{}:{}", k.0, k.2, k.3)).collect(); + out.push(first); + } + out.sort_by(|a, b| b.support.cmp(&a.support).then_with(|| self.dependency_rank(a).cmp(&self.dependency_rank(b))).then_with(|| a.protocol.join("\0").cmp(&b.protocol.join("\0")))); + out + } + + fn drift(&self, min_support: usize, min_confidence: f64) -> Vec { + let protocols = self.ordered_protocols(min_support); + let mut protocol_index: BTreeMap> = BTreeMap::new(); + for p in protocols { + let mut pair = p.protocol.clone(); + pair.sort(); + protocol_index.entry(pair.join("\0")).or_default().push(p); + } + + let mut out = Vec::new(); + for seq in &self.sequences { + let state_calls: Vec<_> = seq.calls.iter().filter(|c| !c.reads.is_empty() || !c.writes.is_empty()).collect(); + let collapsed = self.collapse_consecutive(&state_calls); + let mids: Vec<_> = collapsed.iter().map(|c| c.mid.clone()).collect(); + let positions = self.first_positions(&mids); + + for protocol_row in self.candidate_protocols(&positions.keys().cloned().collect::>(), &protocol_index) { + let present: Vec<_> = protocol_row.protocol.iter().filter(|m| positions.contains_key(*m)).cloned().collect(); + if present.len() < 2 { continue; } + if self.ordered_subsequence(&mids, &protocol_row.protocol) { continue; } + + let confidence = (protocol_row.support as f64) / (self.denominator_for(&present) as f64); + if confidence < min_confidence { continue; } + + out.push(self.finding(seq, &protocol_row, &present, &positions, confidence)); + } + } + + let mut deduped = Vec::new(); + let mut seen = BTreeSet::new(); + for row in out { + let key = (row.kind.clone(), row.at.clone(), row.protocol.clone(), row.observed.clone(), row.states.clone()); + if seen.insert(key) { deduped.push(row); } + } + deduped.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap().then_with(|| b.support.cmp(&a.support)).then_with(|| a.at.cmp(&b.at))); + deduped + } + + fn dependency_rank(&self, row: &ProtocolFinding) -> usize { + if row.dependency.iter().any(|d| d == "write_read") { 0 } + else if row.dependency.iter().any(|d| d == "write_write") { 1 } + else { 2 } + } + + fn collapse_consecutive<'a>(&self, calls: &'a [&'a Call]) -> Vec<&'a Call> { + let mut out = Vec::new(); + let mut last = None; + for c in calls { + if last.map(|l| l == &c.mid).unwrap_or(false) { continue; } + last = Some(&c.mid); + out.push(*c); + } + out + } + + fn dependency_edge(&self, left: &Call, right: &Call) -> Option<(Vec, Vec)> { + let lw: BTreeSet<_> = left.writes.iter().collect(); + let lr: BTreeSet<_> = left.reads.iter().collect(); + let rw: BTreeSet<_> = right.writes.iter().collect(); + let rr: BTreeSet<_> = right.reads.iter().collect(); + + let mut kinds = Vec::new(); + let mut states = BTreeSet::new(); + + let wr: Vec<_> = lw.intersection(&rr).collect(); + if !wr.is_empty() { kinds.push("write_read".to_string()); for s in wr { states.insert((*s).clone()); } } + let ww: Vec<_> = lw.intersection(&rw).collect(); + if !ww.is_empty() { kinds.push("write_write".to_string()); for s in ww { states.insert((*s).clone()); } } + let rw_int: Vec<_> = lr.intersection(&rw).collect(); + if !rw_int.is_empty() { kinds.push("read_write".to_string()); for s in rw_int { states.insert((*s).clone()); } } + + if kinds.is_empty() { return None } + kinds.sort(); + let mut states_v: Vec<_> = states.into_iter().collect(); + states_v.sort(); + Some((kinds, states_v)) + } + + fn diagnostic_protocol(&self, protocol: &[String]) -> bool { + protocol.iter().any(|m| OPTIONAL_DIAGNOSTIC_MIDS.contains(&m.as_str())) + } + + fn candidate_protocols(&self, mids: &[String], protocol_index: &BTreeMap>) -> Vec { + let mut out = Vec::new(); + let mut seen = BTreeSet::new(); + for i in 0..mids.len() { + for j in i+1..mids.len() { + let mut pair = vec![mids[i].clone(), mids[j].clone()]; + pair.sort(); + if let Some(ps) = protocol_index.get(&pair.join("\0")) { + for p in ps { + let key = (p.protocol.clone(), p.dependency.clone(), p.states.clone()); + if seen.insert(key) { out.push(p.clone()); } + } + } + } + } + out + } + + fn first_positions(&self, mids: &[String]) -> BTreeMap { + let mut out = BTreeMap::new(); + for (i, m) in mids.iter().enumerate() { + out.entry(m.clone()).or_insert(i); + } + out + } + + fn ordered_subsequence(&self, mids: &[String], protocol: &[String]) -> bool { + let mut idx = 0; + for m in mids { + if m == &protocol[idx] { idx += 1; } + if idx == protocol.len() { return true; } + } + false + } + + fn denominator_for(&self, present: &[String]) -> usize { + self.site_call_sets.values().filter(|mids| present.iter().all(|m| mids.contains_key(m))).count().max(1) + } + + fn finding(&self, seq: &MethodSequence, protocol_row: &ProtocolFinding, present: &[String], positions: &BTreeMap, confidence: f64) -> ProtocolFinding { + let anchor_mid = present.iter().min_by_key(|m| positions.get(*m).unwrap()).unwrap(); + let anchor = seq.calls.iter().find(|c| &c.mid == anchor_mid).unwrap(); + let loc = format!("{}:{}:{}", seq.file, seq.defn, anchor.line); + let mut observed = present.to_vec(); + observed.sort_by_key(|m| positions.get(m).unwrap()); + + let mut spans = BTreeMap::new(); + spans.insert(loc.clone(), anchor.span); + + ProtocolFinding { + kind: "order_drift".to_string(), + protocol: protocol_row.protocol.clone(), + observed, + missing: Vec::new(), + dependency: protocol_row.dependency.clone(), + states: protocol_row.states.clone(), + support: protocol_row.support, + confidence: (confidence * 100.0).round() / 100.0, + at: loc, + sites: protocol_row.sites.clone(), + spans, + } + } +} diff --git a/gems/decomplex/rust/src/decomplex/detectors/inconsistent_rename_clone.rs b/gems/decomplex/rust/src/decomplex/detectors/inconsistent_rename_clone.rs new file mode 100644 index 000000000..76912ce31 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/inconsistent_rename_clone.rs @@ -0,0 +1,239 @@ +use crate::decomplex::ast::{self, Child, Node, Span}; +use crate::decomplex::syntax::Language; +use anyhow::Result; +use serde::Serialize; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::PathBuf; + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct InconsistentRenameCloneRow { + pub file: String, + pub defn: String, + pub line: usize, + pub at: String, + pub ref_at: String, + pub spans: BTreeMap, + pub ref_name: String, + pub divergent: Vec, + pub clone_size: usize, +} + +#[derive(Clone, Debug, Eq, PartialEq, Hash, PartialOrd, Ord)] +enum Skeleton { + ID, + MID, + CALL, + FCALL, + Node(String), +} + +#[derive(Clone, Debug)] +struct Block { + skeleton: Vec, + names: Vec, + file: String, + defn: String, + line: usize, + span: Span, +} + +const HOLE_TYPES: &[&str] = &["LVAR", "DVAR", "IVAR", "LASGN", "DASGN", "IASGN"]; +const MIN_TOKENS: usize = 8; + +pub fn scan_files(files: &[PathBuf], _language: Language) -> Result> { + let mut blocks = Vec::new(); + for file in files { + let (root, _lines) = ast::parse(file)?; + let detector = InconsistentRenameClone::new(file.to_string_lossy().to_string()); + detector.collect(&root, &Vec::new(), &mut blocks); + } + Ok(Report::new(blocks).inconsistent_renames()) +} + +struct InconsistentRenameClone { + file: String, +} + +impl InconsistentRenameClone { + fn new(file: String) -> Self { + Self { file } + } + + fn collect(&self, node: &Node, defstack: &[String], blocks: &mut Vec) { + let mut next_defstack = defstack.to_vec(); + if matches!(node.r#type.as_str(), "DEFN" | "DEFS") { + let name_index = if node.r#type == "DEFS" { 1 } else { 0 }; + if let Some(Child::Symbol(name)) = node.children.get(name_index) { + next_defstack.push(name.clone()); + } + } + + if node.r#type == "BLOCK" { + let stmts: Vec<_> = node.children.iter().filter_map(ast::node).collect(); + if stmts.len() >= 3 { + self.add_block(&stmts, &next_defstack, blocks); + } + } + + for child in node.children.iter().filter_map(ast::node) { + self.collect(child, &next_defstack, blocks); + } + } + + fn add_block(&self, stmts: &[&Node], defstack: &[String], blocks: &mut Vec) { + let mut skeleton = Vec::new(); + let mut names = Vec::new(); + for stmt in stmts { + self.tokenize(stmt, &mut skeleton, &mut names); + } + if skeleton.len() < MIN_TOKENS { + return; + } + + blocks.push(Block { + skeleton, + names, + file: self.file.clone(), + defn: defstack.last().cloned().unwrap_or_else(|| "(top-level)".to_string()), + line: stmts[0].first_lineno, + span: [ + stmts[0].first_lineno, + stmts[0].first_column, + stmts.last().unwrap().last_lineno, + stmts.last().unwrap().last_column, + ], + }); + } + + fn tokenize(&self, node: &Node, skeleton: &mut Vec, names: &mut Vec) { + match node.r#type.as_str() { + t if HOLE_TYPES.contains(&t) => { + skeleton.push(Skeleton::ID); + if let Some(Child::String(name)) = node.children.first() { + names.push(name.clone()); + } + } + "VCALL" => { + skeleton.push(Skeleton::ID); + if let Some(Child::Symbol(name)) = node.children.first() { + names.push(name.clone()); + } + } + "CALL" | "FCALL" => { + skeleton.push(if node.r#type == "CALL" { Skeleton::CALL } else { Skeleton::FCALL }); + let mid_index = if node.r#type == "CALL" { 1 } else { 0 }; + skeleton.push(Skeleton::MID); + if let Some(Child::Symbol(mid)) = node.children.get(mid_index) { + names.push(mid.clone()); + } + } + "LIT" | "STR" | "SYM" | "INTEGER" | "FLOAT" => { + skeleton.push(Skeleton::Node(node.r#type.clone())); + } + _ => { + skeleton.push(Skeleton::Node(node.r#type.clone())); + } + } + for child in node.children.iter().filter_map(ast::node) { + self.tokenize(child, skeleton, names); + } + } +} + +struct Report { + groups: BTreeMap, Vec>, +} + +impl Report { + fn new(blocks: Vec) -> Self { + let mut groups: BTreeMap, Vec> = BTreeMap::new(); + for b in blocks { + groups.entry(b.skeleton.clone()).or_default().push(b); + } + groups.retain(|_, v| v.len() >= 2); + Self { groups } + } + + fn inconsistent_renames(&self) -> Vec { + let mut out = Vec::new(); + for members in self.groups.values() { + out.extend(self.findings_for(members)); + } + out.sort_by(|a, b| b.clone_size.cmp(&a.clone_size).then_with(|| a.at.cmp(&b.at))); + out.dedup_by(|a, b| a.at == b.at && a.ref_at == b.ref_at && a.ref_name == b.ref_name); + out + } + + fn findings_for(&self, members: &[Block]) -> Vec { + let mut units = BTreeSet::new(); + for m in members { + units.insert((m.file.clone(), m.defn.clone())); + } + if units.len() < 2 { + return Vec::new(); + } + + let mut out = Vec::new(); + for i in 0..members.len() { + for j in i + 1..members.len() { + let ref_block = &members[i]; + let candidate = &members[j]; + if self.same_unit(ref_block, candidate) { + continue; + } + out.extend(self.inconsistent_pairs(ref_block, candidate)); + out.extend(self.inconsistent_pairs(candidate, ref_block)); + } + } + out + } + + fn inconsistent_pairs(&self, ref_block: &Block, candidate: &Block) -> Vec { + let mut out = Vec::new(); + for (ref_name, positions) in self.ref_classes(ref_block) { + let mut spellings = BTreeSet::new(); + for pos in positions { + if let Some(name) = candidate.names.get(pos) { + spellings.insert(name.clone()); + } + } + if spellings.len() < 2 { + continue; + } + out.push(self.finding(ref_block, candidate, &ref_name, spellings.into_iter().collect())); + } + out + } + + fn ref_classes(&self, ref_block: &Block) -> BTreeMap> { + let mut classes: BTreeMap> = BTreeMap::new(); + for (index, name) in ref_block.names.iter().enumerate() { + classes.entry(name.clone()).or_default().push(index); + } + classes.retain(|_, v| v.len() >= 2); + classes + } + + fn same_unit(&self, left: &Block, right: &Block) -> bool { + left.file == right.file && left.defn == right.defn + } + + fn finding(&self, ref_block: &Block, candidate: &Block, ref_name: &str, divergent: Vec) -> InconsistentRenameCloneRow { + let at = format!("{}:{}:{}", candidate.file, candidate.defn, candidate.line); + let ref_at = format!("{}:{}:{}", ref_block.file, ref_block.defn, ref_block.line); + let mut spans = BTreeMap::new(); + spans.insert(at.clone(), candidate.span); + spans.insert(ref_at.clone(), ref_block.span); + InconsistentRenameCloneRow { + file: candidate.file.clone(), + defn: candidate.defn.clone(), + line: candidate.line, + at, + ref_at, + spans, + ref_name: ref_name.to_string(), + divergent, + clone_size: 2, + } + } +} diff --git a/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs b/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs new file mode 100644 index 000000000..52bfdc2ca --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs @@ -0,0 +1,333 @@ +use crate::decomplex::ast::{self, Child, Node, Span}; +use crate::decomplex::syntax::Language; +use anyhow::Result; +use serde::Serialize; +use std::collections::{BTreeSet}; +use std::path::PathBuf; + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct LocalFlowRow { + pub summaries: Vec, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct MethodSummary { + pub id: String, + pub owner: String, + pub name: String, + pub file: String, + pub line: usize, + pub span: Span, + #[serde(skip_serializing)] + pub node: Node, + pub statements: Vec, + pub boundaries: Vec, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct Statement { + pub index: usize, + pub line: usize, + pub end_line: usize, + pub span: Span, + pub source: String, + pub reads: BTreeSet, + pub writes: BTreeSet, + pub dependencies: Vec<(String, String)>, + pub co_uses: Vec<(String, String)>, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct Boundary { + pub before_index: usize, + pub after_index: usize, + pub line: usize, + pub kind: String, + pub text: String, +} + +const OWNER_TYPES: &[&str] = &["CLASS", "MODULE"]; +const METHOD_TYPES: &[&str] = &["DEFN", "DEFS"]; +const SKIP_NESTED_TYPES: &[&str] = &["CLASS", "MODULE", "DEFN", "DEFS", "LAMBDA"]; +const LOCAL_READ_TYPES: &[&str] = &["LVAR", "DVAR"]; +const LOCAL_WRITE_TYPES: &[&str] = &["LASGN", "DASGN"]; + +pub fn scan_files(files: &[PathBuf], _language: Language) -> Result> { + let mut out = Vec::new(); + for file in files { + let (root, lines) = ast::parse(file)?; + let mut detector = LocalFlow::new(file.to_string_lossy().to_string(), lines); + out.extend(detector.scan(&root)); + } + Ok(out) +} + +struct LocalFlow { + file: String, + lines: Vec, +} + +impl LocalFlow { + fn new(file: String, lines: Vec) -> Self { + Self { file, lines } + } + + fn scan(&mut self, root: &Node) -> Vec { + let mut out = Vec::new(); + self.collect_methods(root, &Vec::new(), &mut out); + out + } + + fn collect_methods(&self, node: &Node, owners: &[String], out: &mut Vec) { + if OWNER_TYPES.contains(&node.r#type.as_str()) { + let owner = self.full_owner_name(owners, node); + for method in self.owner_methods(node) { + out.push(self.method_summary(method, &owner)); + } + let mut next_owners = owners.to_vec(); + next_owners.push(self.owner_segment(node)); + self.collect_nested_owners(node, &next_owners, out); + } else if METHOD_TYPES.contains(&node.r#type.as_str()) && owners.is_empty() { + out.push(self.method_summary(node, "(top-level)")); + } else { + for child in node.children.iter().filter_map(ast::node) { + self.collect_methods(child, owners, out); + } + } + } + + fn collect_nested_owners(&self, node: &Node, owners: &[String], out: &mut Vec) { + if METHOD_TYPES.contains(&node.r#type.as_str()) { + return; + } + + for child in node.children.iter().filter_map(ast::node) { + if OWNER_TYPES.contains(&child.r#type.as_str()) { + self.collect_methods(child, owners, out); + } else { + self.collect_nested_owners(child, owners, out); + } + } + } + + fn method_summary(&self, node: &Node, owner: &str) -> MethodSummary { + let statements: Vec<_> = ast::body_stmts(node) + .iter() + .enumerate() + .map(|(index, stmt)| self.statement_summary(stmt, index)) + .collect(); + MethodSummary { + id: format!("{}#{}", owner, self.method_name(node)), + owner: owner.to_string(), + name: self.method_name(node), + file: self.file.clone(), + line: node.first_lineno, + span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], + node: node.clone(), + boundaries: self.structural_boundaries(&statements), + statements, + } + } + + fn statement_summary(&self, node: &Node, index: usize) -> Statement { + Statement { + index, + line: node.first_lineno, + end_line: node.last_lineno, + span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], + source: ast::slice(node, &self.lines), + reads: self.local_reads(node), + writes: self.local_writes(node), + dependencies: self.assignment_dependencies(node), + co_uses: self.co_use_edges(node), + } + } + + fn structural_boundaries(&self, statements: &[Statement]) -> Vec { + let mut out = Vec::new(); + for i in 0..statements.len().saturating_sub(1) { + let left = &statements[i]; + let right = &statements[i + 1]; + if let Some(boundary) = self.source_boundary(left.end_line + 1, right.line - 1) { + out.push(Boundary { + before_index: left.index, + after_index: right.index, + line: boundary.line, + kind: boundary.kind, + text: boundary.text, + }); + } + } + out + } + + fn source_boundary(&self, first_line: usize, last_line: usize) -> Option { + if first_line > last_line { + return None; + } + + let mut blank = None; + for line_number in first_line..=last_line { + let text = self.lines.get(line_number - 1).map(|s| s.as_str()).unwrap_or(""); + let stripped = text.trim(); + if stripped.starts_with('#') { + return Some(RawBoundary { + line: line_number, + kind: "comment".to_string(), + text: stripped.to_string(), + }); + } + if stripped.is_empty() && blank.is_none() { + blank = Some(RawBoundary { + line: line_number, + kind: "blank".to_string(), + text: stripped.to_string(), + }); + } + } + blank + } + + fn owner_methods<'a>(&self, owner_node: &'a Node) -> Vec<&'a Node> { + let Some(body) = self.owner_body(owner_node) else { + return Vec::new(); + }; + + let stmts = if body.r#type == "BLOCK" { + body.children.iter().filter_map(ast::node).collect::>() + } else { + vec![body] + }; + + stmts.into_iter().flat_map(|stmt| { + if METHOD_TYPES.contains(&stmt.r#type.as_str()) { + vec![stmt] + } else if self.visibility_call(stmt) { + self.inline_methods(stmt) + } else { + vec![] + } + }).collect() + } + + fn inline_methods<'a>(&self, stmt: &'a Node) -> Vec<&'a Node> { + let Some(args) = stmt.children.get(1).and_then(ast::node) else { + return Vec::new(); + }; + args.children.iter().filter_map(ast::node).filter(|arg| METHOD_TYPES.contains(&arg.r#type.as_str())).collect() + } + + fn owner_body<'a>(&self, owner_node: &'a Node) -> Option<&'a Node> { + let scope_index = if owner_node.r#type == "CLASS" { 2 } else { 1 }; + let scope = owner_node.children.get(scope_index).and_then(ast::node)?; + if scope.r#type != "SCOPE" { + return None; + } + scope.children.get(2).and_then(ast::node) + } + + fn visibility_call(&self, node: &Node) -> bool { + if node.r#type == "FCALL" { + if let Some(Child::Symbol(name)) = node.children.first() { + return matches!(name.as_str(), "public" | "protected" | "private"); + } + } + false + } + + fn method_name(&self, node: &Node) -> String { + if node.r#type == "DEFS" { + let receiver = node.children.get(0).and_then(ast::node); + let prefix = if let Some(r) = receiver { + if r.r#type == "SELF" { "self".to_string() } else { ast::slice(r, &self.lines) } + } else { + "?".to_string() + }; + format!("{}.{}", prefix, node.children.get(1).and_then(|c| match c { Child::Symbol(s) => Some(s), _ => None }).unwrap_or(&"?".to_string())) + } else { + node.children.first().and_then(|c| match c { Child::Symbol(s) => Some(s.clone()), _ => None }).unwrap_or_else(|| "?".to_string()) + } + } + + fn full_owner_name(&self, owners: &[String], node: &Node) -> String { + let mut next = owners.to_vec(); + next.push(self.owner_segment(node)); + next.join("::") + } + + fn owner_segment(&self, node: &Node) -> String { + let text = ast::slice(node.children.first().and_then(ast::node).unwrap_or(node), &self.lines); + if text.is_empty() { "(anonymous)".to_string() } else { text } + } + + fn local_reads(&self, node: &Node) -> BTreeSet { + let mut reads = Vec::new(); + self.walk_local(node, &mut |child| { + if LOCAL_READ_TYPES.contains(&child.r#type.as_str()) { + if let Some(Child::String(name)) = child.children.first() { + reads.push(name.clone()); + } + } + }); + reads.into_iter().collect() + } + + fn local_writes(&self, node: &Node) -> BTreeSet { + let mut writes = Vec::new(); + self.walk_local(node, &mut |child| { + if LOCAL_WRITE_TYPES.contains(&child.r#type.as_str()) { + if let Some(Child::String(name)) = child.children.first() { + writes.push(name.clone()); + } + } + }); + writes.into_iter().collect() + } + + fn assignment_dependencies(&self, node: &Node) -> Vec<(String, String)> { + let mut deps = Vec::new(); + self.walk_local(node, &mut |child| { + if LOCAL_WRITE_TYPES.contains(&child.r#type.as_str()) { + if let Some(Child::String(lhs)) = child.children.first() { + if let Some(rhs) = child.children.get(1).and_then(ast::node) { + for read in self.local_reads(rhs) { + if lhs != &read { + deps.push((lhs.clone(), read)); + } + } + } + } + } + }); + deps.sort(); + deps.dedup(); + deps + } + + fn co_use_edges(&self, node: &Node) -> Vec<(String, String)> { + let reads: Vec<_> = self.local_reads(node).into_iter().collect(); + let mut out = Vec::new(); + for i in 0..reads.len() { + for j in i + 1..reads.len() { + out.push((reads[i].clone(), reads[j].clone())); + } + } + out + } + + fn walk_local(&self, node: &Node, blk: &mut dyn FnMut(&Node)) { + if SKIP_NESTED_TYPES.contains(&node.r#type.as_str()) { + return; + } + blk(node); + for child in node.children.iter().filter_map(ast::node) { + self.walk_local(child, blk); + } + } +} + +struct RawBoundary { + line: usize, + kind: String, + text: String, +} diff --git a/gems/decomplex/rust/src/decomplex/detectors/locality_drag.rs b/gems/decomplex/rust/src/decomplex/detectors/locality_drag.rs new file mode 100644 index 000000000..69c44cb80 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/locality_drag.rs @@ -0,0 +1,264 @@ +use crate::decomplex::ast::{Span}; +use crate::decomplex::detectors::{local_flow, weighted_inlined_cognitive_complexity}; +use crate::decomplex::syntax::Language; +use anyhow::Result; +use serde::Serialize; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::PathBuf; + +#[derive(Clone, Debug, PartialEq, Serialize)] +pub struct LocalityDragRow { + pub at: String, + pub file: String, + pub owner: String, + pub defn: String, + pub method: String, + pub line: usize, + pub variable: String, + pub defined_at: usize, + pub used_at: usize, + pub gap_lines: usize, + pub gap_statements: usize, + pub unrelated_statements: usize, + pub setup_statements: usize, + pub related_statements: usize, + pub boundary_crossings: usize, + pub local_complexity: f64, + pub score: isize, + pub definition_deps: Vec, + pub use_reads: Vec, + pub examples: Vec, + pub boundaries: Vec, + pub reason: String, + pub spans: BTreeMap, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct Example { + pub line: usize, + pub source: String, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct BoundaryInfo { + pub line: usize, + pub marker: String, +} + +pub fn scan_files(files: &[PathBuf], _language: Language) -> Result> { + let summaries = local_flow::scan_files(files, _language)?; + let mut detector = LocalityDrag::new(summaries); + Ok(detector.findings()) +} + +struct LocalityDrag { + summaries: Vec, + min_unrelated_statements: usize, + min_gap_lines: usize, + min_local_complexity: f64, + min_score: isize, + max_findings_per_method: usize, +} + +impl LocalityDrag { + fn new(summaries: Vec) -> Self { + Self { + summaries, + min_unrelated_statements: 4, + min_gap_lines: 8, + min_local_complexity: 12.0, + min_score: 60, + max_findings_per_method: 3, + } + } + + fn findings(&mut self) -> Vec { + let mut out: Vec<_> = self.summaries.iter().flat_map(|s| self.findings_for(s)).collect(); + out.sort_by(|a, b| { + b.score.cmp(&a.score) + .then_with(|| b.unrelated_statements.cmp(&a.unrelated_statements)) + .then_with(|| b.gap_lines.cmp(&a.gap_lines)) + .then_with(|| a.file.cmp(&b.file)) + .then_with(|| a.line.cmp(&b.line)) + }); + out + } + + fn findings_for(&self, summary: &local_flow::MethodSummary) -> Vec { + if summary.statements.len() < self.min_unrelated_statements + 2 { return Vec::new() } + + let local_complexity = weighted_inlined_cognitive_complexity::LocalScorer::new().score(&summary.node).score; + if local_complexity < self.min_local_complexity { return Vec::new() } + + let mut findings = Vec::new(); + for (index, statement) in summary.statements.iter().enumerate() { + for name in &statement.writes { + if let Some(f) = self.finding_for_write(summary, local_complexity, statement, index, name) { + findings.push(f); + } + } + } + + findings.sort_by(|a, b| b.score.cmp(&a.score).then_with(|| a.defined_at.cmp(&b.defined_at)).then_with(|| a.variable.cmp(&b.variable))); + findings.into_iter().take(self.max_findings_per_method).collect() + } + + fn finding_for_write(&self, summary: &local_flow::MethodSummary, local_complexity: f64, statement: &local_flow::Statement, index: usize, name: &str) -> Option { + if self.ignorable_local(name) { return None } + + let use_index = self.first_read_before_rewrite(&summary.statements, index, name)?; + if self.same_prefix_staging_batch(&summary.statements, use_index, name) { return None } + + let gap = &summary.statements[(index + 1)..use_index]; + if gap.is_empty() { return None } + + let gap_refs: Vec<_> = gap.iter().collect(); + let (related, unrelated) = self.classify_gap_statements(name, statement, &gap_refs); + let substantive_unrelated: Vec<_> = unrelated.into_iter().filter(|s| !self.trivial_initializer(s)).collect(); + if substantive_unrelated.len() < self.min_unrelated_statements { return None } + + let use_statement = &summary.statements[use_index]; + let gap_lines = use_statement.line - statement.line; + let boundaries = self.boundary_crossings(summary, index, use_index); + if gap_lines < self.min_gap_lines && boundaries.is_empty() { return None } + + let score = self.score_for(name, &substantive_unrelated, &related, gap_lines, &boundaries, local_complexity, self.read_count_after_write(&summary.statements, index, name)); + if score < self.min_score { return None } + + let at = format!("{}:{}:{}", summary.file, summary.name, statement.line); + let mut spans = BTreeMap::new(); + spans.insert(at.clone(), summary.span); + + Some(LocalityDragRow { + at, + file: summary.file.clone(), + owner: summary.owner.clone(), + defn: summary.name.clone(), + method: summary.name.clone(), + line: statement.line, + variable: name.to_string(), + defined_at: statement.line, + used_at: use_statement.line, + gap_lines, + gap_statements: gap.len(), + unrelated_statements: substantive_unrelated.len(), + setup_statements: (gap.len() - related.len()) - substantive_unrelated.len(), + related_statements: related.len(), + boundary_crossings: boundaries.len(), + local_complexity: self.round(local_complexity), + score, + definition_deps: self.definition_deps(statement, name).into_iter().collect(), + use_reads: use_statement.reads.iter().cloned().collect(), + examples: substantive_unrelated.iter().take(3).map(|s| self.example_for(s)).collect(), + boundaries: boundaries.iter().map(|b| self.boundary_for(b)).collect(), + reason: self.reason_for(name, &substantive_unrelated, gap_lines, &boundaries, local_complexity), + spans, + }) + } + + fn first_read_before_rewrite(&self, statements: &[local_flow::Statement], index: usize, name: &str) -> Option { + for (offset, statement) in statements.iter().skip(index + 1).enumerate() { + if statement.writes.contains(name) { return None } + if statement.reads.contains(name) { return Some(index + 1 + offset) } + } + None + } + + fn read_count_after_write(&self, statements: &[local_flow::Statement], index: usize, name: &str) -> usize { + statements.iter().skip(index + 1).filter(|s| s.reads.contains(name)).count() + } + + fn classify_gap_statements<'a>(&self, name: &str, definition: &local_flow::Statement, gap: &'a [&local_flow::Statement]) -> (Vec<&'a local_flow::Statement>, Vec<&'a local_flow::Statement>) { + let mut related_names = BTreeSet::new(); + related_names.insert(name.to_string()); + for d in self.definition_deps(definition, name) { related_names.insert(d); } + + let mut related = Vec::new(); + let mut unrelated = Vec::new(); + for s in gap { + let new_related = self.derived_from_related(s, &related_names); + let touched: BTreeSet<_> = s.reads.union(&s.writes).cloned().collect(); + let touches_related = !touched.is_disjoint(&related_names); + if touches_related || !new_related.is_empty() { + related.push(*s); + for n in new_related { related_names.insert(n); } + } else { + unrelated.push(*s); + } + } + (related, unrelated) + } + + fn definition_deps(&self, statement: &local_flow::Statement, name: &str) -> BTreeSet { + statement.dependencies.iter().filter(|(lhs, _)| lhs == name).map(|(_, rhs)| rhs.clone()).collect() + } + + fn derived_from_related(&self, statement: &local_flow::Statement, related_names: &BTreeSet) -> BTreeSet { + statement.dependencies.iter().filter(|(_, rhs)| related_names.contains(rhs)).map(|(lhs, _)| lhs.clone()).collect() + } + + fn boundary_crossings<'a>(&self, summary: &'a local_flow::MethodSummary, definition_index: usize, use_index: usize) -> Vec<&'a local_flow::Boundary> { + summary.boundaries.iter().filter(|b| b.before_index >= definition_index && b.after_index <= use_index).collect() + } + + fn score_for(&self, variable: &str, unrelated: &[&local_flow::Statement], related: &[&local_flow::Statement], gap_lines: usize, boundaries: &[&local_flow::Boundary], local_complexity: f64, read_count: usize) -> isize { + let mut score = (unrelated.len() as isize * 5) + (gap_lines.min(30) as isize) + (boundaries.len() as isize * 8) + (local_complexity.min(25.0).round() as isize); + if read_count == 1 { score += 5; } + if self.benign_local(variable) { score -= 8; } + score -= related.len() as isize * 2; + score + } + + fn ignorable_local(&self, name: &str) -> bool { + name.starts_with('_') || self.source_location_local(name) + } + + fn same_prefix_staging_batch(&self, statements: &[local_flow::Statement], use_index: usize, name: &str) -> bool { + let Some(prefix) = self.staging_prefix(name) else { return false }; + let staged_names: BTreeSet<_> = statements.iter().take(use_index).flat_map(|s| s.writes.iter()).filter(|n| n.starts_with(&format!("{}_", prefix))).cloned().collect(); + if staged_names.len() < 4 { return false } + let use_reads = &statements[use_index].reads; + staged_names.intersection(use_reads).count() >= 4 + } + + fn trivial_initializer(&self, statement: &local_flow::Statement) -> bool { + if statement.writes.is_empty() || !statement.reads.is_empty() { return false } + let source = statement.source.trim(); + let re = regex::Regex::new(r"^\w+\s*=\s*(?:\{\}|\[\]|nil|false|true|0|T\.let\((?:nil|false|true|0)\b)").unwrap(); + re.is_match(source) + } + + fn staging_prefix(&self, name: &str) -> Option { + let parts: Vec<_> = name.split('_').collect(); + if parts.len() >= 2 && parts[0].len() >= 3 { Some(parts[0].to_string()) } else { None } + } + + fn benign_local(&self, name: &str) -> bool { self.source_location_local(name) } + + fn source_location_local(&self, name: &str) -> bool { + let re = regex::Regex::new(r"(?i)(?:\A|_)(?:tok|token|span|source|source_code|line|column|col|pos|idx|index|loc|location)(?:\z|_)").unwrap(); + re.is_match(name) + } + + fn example_for(&self, statement: &local_flow::Statement) -> Example { + let source = statement.source.lines().next().unwrap_or("").trim(); + let source = if source.len() > 99 { format!("{}...", &source[0..96]) } else { source.to_string() }; + Example { line: statement.line, source } + } + + fn boundary_for(&self, boundary: &local_flow::Boundary) -> BoundaryInfo { + BoundaryInfo { line: boundary.line, marker: if boundary.text.is_empty() { boundary.kind.clone() } else { boundary.text.clone() } } + } + + fn reason_for(&self, variable: &str, unrelated: &[&local_flow::Statement], gap_lines: usize, boundaries: &[&local_flow::Boundary], local_complexity: f64) -> String { + let mut parts = vec![ + format!("`{}` is initialized {} line(s) before first use", variable, gap_lines), + format!("{} unrelated intervening statement(s)", unrelated.len()), + ]; + if !boundaries.is_empty() { parts.push(format!("{} structural boundary crossing(s)", boundaries.len())); } + parts.push(format!("method local complexity {}", self.round(local_complexity))); + parts.join("; ") + } + + fn round(&self, value: f64) -> f64 { (value * 10.0).round() / 10.0 } +} diff --git a/gems/decomplex/rust/src/decomplex/detectors/miner.rs b/gems/decomplex/rust/src/decomplex/detectors/miner.rs index 1053533b3..d771dd1a0 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/miner.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/miner.rs @@ -1,5 +1,5 @@ use crate::decomplex::ast::Span; -use crate::decomplex::syntax::{self, DecisionSite, Document, Language}; +use crate::decomplex::syntax::{self, DecisionSite, Language}; use anyhow::Result; use serde::Serialize; use std::collections::{BTreeMap, BTreeSet}; @@ -37,7 +37,7 @@ pub fn scan_files(files: &[PathBuf], language: Language) -> Result for doc in documents { sites.extend(doc.decision_sites); } - let mut m = Miner::new(sites); + let m = Miner::new(sites); Ok(MinerReport { missing_abstractions: m.missing_abstractions(2), neglected_conditions: m.neglected_conditions(3), diff --git a/gems/decomplex/rust/src/decomplex/detectors/mod.rs b/gems/decomplex/rust/src/decomplex/detectors/mod.rs index 9245a612a..d9c9c153e 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/mod.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/mod.rs @@ -1,10 +1,18 @@ pub mod co_update; pub mod decision_pressure; +pub mod derived_state; pub mod flay_similarity; +pub mod implicit_control_flow; +pub mod inconsistent_rename_clone; +pub mod local_flow; +pub mod locality_drag; pub mod miner; +pub mod operational_discontinuity; pub mod predicate_alias; pub mod redundant_nil_guard; pub mod semantic_alias; pub mod state_branch_density; pub mod state_mesh; +pub mod structural_topology; pub mod temporal_ordering_pressure; +pub mod weighted_inlined_cognitive_complexity; diff --git a/gems/decomplex/rust/src/decomplex/detectors/operational_discontinuity.rs b/gems/decomplex/rust/src/decomplex/detectors/operational_discontinuity.rs new file mode 100644 index 000000000..a9206c16d --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/operational_discontinuity.rs @@ -0,0 +1,183 @@ +use crate::decomplex::ast::{Span}; +use crate::decomplex::detectors::local_flow; +use crate::decomplex::syntax::Language; +use anyhow::Result; +use serde::Serialize; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::PathBuf; + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct OperationalDiscontinuityRow { + pub file: String, + pub defn: String, + pub owner: String, + pub method: String, + pub line: usize, + pub at: String, + pub score: isize, + pub resets: usize, + pub dead_total: usize, + pub new_total: usize, + pub reset_points: Vec, + pub confidence: String, + pub confidence_reasons: Vec, + pub spans: BTreeMap, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct ResetPoint { + pub line: usize, + pub kind: String, + pub text: String, + pub before_statement: usize, + pub after_statement: usize, + pub dead: Vec, + pub new: Vec, + pub continuing: Vec, +} + +struct RangeInfo { + first: usize, + last: usize, +} + +pub fn scan_files(files: &[PathBuf], _language: Language) -> Result> { + let summaries = local_flow::scan_files(files, _language)?; + let detector = OperationalDiscontinuity::new(summaries); + Ok(detector.findings()) +} + +struct OperationalDiscontinuity { + summaries: Vec, + min_dead: usize, + min_new: usize, + max_continuing: usize, + min_score: isize, +} + +impl OperationalDiscontinuity { + fn new(summaries: Vec) -> Self { + Self { + summaries, + min_dead: 2, + min_new: 2, + max_continuing: 1, + min_score: 12, + } + } + + fn findings(&self) -> Vec { + let mut out: Vec<_> = self.summaries.iter().filter_map(|s| self.finding_for(s)).collect(); + out.sort_by(|a, b| b.score.cmp(&a.score).then_with(|| a.file.cmp(&b.file)).then_with(|| a.line.cmp(&b.line))); + out + } + + fn finding_for(&self, summary: &local_flow::MethodSummary) -> Option { + if summary.boundaries.is_empty() { return None } + + let ranges = self.variable_ranges(summary); + let resets: Vec<_> = summary.boundaries.iter().filter_map(|b| self.reset_at(b, &ranges)).collect(); + if resets.is_empty() { return None } + + let score = resets.iter().map(|r| (r.dead.len() as isize + r.new.len() as isize - r.continuing.len() as isize)).sum::() + (resets.len() as isize * 8); + if score < self.min_score { return None } + + let confidence_reasons = self.confidence_reasons_for(&summary.name, score, &resets); + let at = format!("{}:{}:{}", summary.file, summary.name, summary.line); + let mut spans = BTreeMap::new(); + spans.insert(at.clone(), summary.span); + + Some(OperationalDiscontinuityRow { + file: summary.file.clone(), + defn: summary.name.clone(), + owner: summary.owner.clone(), + method: summary.name.clone(), + line: summary.line, + at, + score, + resets: resets.len(), + dead_total: resets.iter().map(|r| r.dead.len()).sum(), + new_total: resets.iter().map(|r| r.new.len()).sum(), + reset_points: resets, + confidence: if confidence_reasons.is_empty() { "review".to_string() } else { "high".to_string() }, + confidence_reasons, + spans, + }) + } + + fn confidence_reasons_for(&self, method_name: &str, score: isize, resets: &[ResetPoint]) -> Vec { + let explicit_phase = resets.iter().any(|r| self.phase_marker(r)); + let mut reasons = Vec::new(); + if resets.len() >= 2 { reasons.push("repeated_resets".to_string()); } + if explicit_phase { reasons.push("explicit_phase_marker".to_string()); } + if score >= 20 { reasons.push("high_score".to_string()); } + + if self.grammar_method(method_name) && !explicit_phase { + reasons.retain(|r| r != "repeated_resets" && r != "high_score"); + } + reasons + } + + fn phase_marker(&self, reset: &ResetPoint) -> bool { + let re = regex::Regex::new(r"(?i)^\#\s*(?:\d+[a-z]?\s*[.)]|(?:phase|step|stage)\b)").unwrap(); + re.is_match(&reset.text) + } + + fn grammar_method(&self, method_name: &str) -> bool { + let re = regex::Regex::new(r"^parse(?:_|$)").unwrap(); + re.is_match(method_name) + } + + fn reset_at(&self, boundary: &local_flow::Boundary, ranges: &BTreeMap) -> Option { + let before = boundary.before_index; + let after = boundary.after_index; + + let mut dead = Vec::new(); + let mut continuing = Vec::new(); + let mut new_vars = Vec::new(); + + for (name, range) in ranges { + if range.first <= before { + if range.last <= before { + dead.push(name.clone()); + } + if range.last >= after { + continuing.push(name.clone()); + } + } + if range.first >= after { + new_vars.push(name.clone()); + } + } + + if dead.len() < self.min_dead || new_vars.len() < self.min_new || continuing.len() > self.max_continuing { + return None; + } + + dead.sort(); + new_vars.sort(); + continuing.sort(); + + Some(ResetPoint { + line: boundary.line, + kind: boundary.kind.clone(), + text: boundary.text.clone(), + before_statement: before, + after_statement: after, + dead, + new: new_vars, + continuing, + }) + } + + fn variable_ranges(&self, summary: &local_flow::MethodSummary) -> BTreeMap { + let mut ranges = BTreeMap::new(); + for statement in &summary.statements { + let touched: BTreeSet<_> = statement.reads.union(&statement.writes).cloned().collect(); + for name in touched { + ranges.entry(name).and_modify(|r: &mut RangeInfo| r.last = statement.index).or_insert(RangeInfo { first: statement.index, last: statement.index }); + } + } + ranges + } +} diff --git a/gems/decomplex/rust/src/decomplex/detectors/structural_topology.rs b/gems/decomplex/rust/src/decomplex/detectors/structural_topology.rs new file mode 100644 index 000000000..31778dc5a --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/structural_topology.rs @@ -0,0 +1,384 @@ +use crate::decomplex::ast::{self, Node, Span}; +use crate::decomplex::syntax::Language; +use anyhow::Result; +use serde::Serialize; +use std::collections::BTreeMap; +use std::path::PathBuf; + +#[derive(Clone, Debug, Serialize)] +pub struct StructuralTopologyReport { + pub methods: Vec, + pub edges: Vec, +} + +#[derive(Clone, Debug, Serialize)] +pub struct Method { + pub id: String, + pub owner: String, + pub name: String, + pub file: String, + pub line: usize, + pub span: Span, + pub visibility: String, +} + +#[derive(Clone, Debug, Serialize)] +pub struct Edge { + pub caller: String, + pub callee: String, + pub caller_name: String, + pub callee_name: String, + pub file: String, + pub line: usize, + pub span: Span, + pub r#type: String, + pub kind: String, + pub confidence: String, +} + +const VISIBILITY_MIDS: &[&str] = &["public", "protected", "private"]; +const OWNER_TYPES: &[&str] = &["CLASS", "MODULE"]; +const METHOD_TYPES: &[&str] = &["DEFN", "DEFS"]; +const SKIP_NESTED_TYPES: &[&str] = &["CLASS", "MODULE", "DEFN", "DEFS", "LAMBDA"]; +const CONDITIONAL_TYPES: &[&str] = &["IF", "UNLESS", "CASE", "CASE2"]; +const ITERATION_TYPES: &[&str] = &["ITER", "FOR", "WHILE", "UNTIL"]; + +pub fn scan_files(files: &[PathBuf], _language: Language) -> Result { + let mut methods = Vec::new(); + let mut parsed = Vec::new(); + + for file in files { + let (root, lines) = ast::parse(file)?; + let mut mc = MethodCollector::new(file.to_string_lossy().to_string(), lines.clone()); + methods.extend(mc.scan(&root)); + parsed.push((file.to_string_lossy().to_string(), root, lines)); + } + + let mut edges = Vec::new(); + for (file, root, lines) in &parsed { + let mut ec = EdgeCollector::new(file.clone(), lines.clone(), &methods); + edges.extend(ec.scan(root)); + } + + Ok(StructuralTopologyReport { methods, edges }) +} + +pub struct Graph { + pub methods: Vec, + pub edges: Vec, + method_by_id: BTreeMap, + edges_by_caller: BTreeMap>, + edges_by_callee: BTreeMap>, +} + +impl Graph { + pub fn new(methods: Vec, edges: Vec) -> Self { + let mut method_by_id = BTreeMap::new(); + for m in &methods { method_by_id.insert(m.id.clone(), m.clone()); } + + let mut edges_by_caller = BTreeMap::new(); + let mut edges_by_callee = BTreeMap::new(); + for e in &edges { + edges_by_caller.entry(e.caller.clone()).or_insert_with(Vec::new).push(e.clone()); + edges_by_callee.entry(e.callee.clone()).or_insert_with(Vec::new).push(e.clone()); + } + + Self { methods, edges, method_by_id, edges_by_caller, edges_by_callee } + } + + pub fn method(&self, id: &str) -> Option<&Method> { self.method_by_id.get(id) } + + pub fn internal_calls(&self, id: &str) -> Vec { self.edges_by_caller.get(id).cloned().unwrap_or_default() } + + pub fn internal_callers(&self, id: &str) -> Vec { self.edges_by_callee.get(id).cloned().unwrap_or_default() } + + pub fn single_internal_caller(&self, id: &str) -> bool { + let callers = self.internal_callers(id); + let mut unique = BTreeMap::new(); + for c in callers { unique.insert(c.caller, true); } + unique.len() == 1 + } + + pub fn visibility(&self, id: &str) -> Option<&str> { self.method(id).map(|m| m.visibility.as_str()) } +} + +struct MethodCollector { + file: String, + lines: Vec, +} + +impl MethodCollector { + fn new(file: String, lines: Vec) -> Self { Self { file, lines } } + + fn scan(&mut self, root: &Node) -> Vec { + let mut out = Vec::new(); + out.extend(self.methods_from_statements(&self.top_level_statements(root), &self.top_level_owner())); + self.walk(root, &Vec::new(), &mut out); + out + } + + fn walk(&self, node: &Node, owners: &[String], out: &mut Vec) { + if OWNER_TYPES.contains(&node.r#type.as_str()) { + let owner = self.full_owner_name(owners, node); + out.extend(self.owner_methods(node, &owner)); + let mut next_owners = owners.to_vec(); + next_owners.push(self.owner_segment(node)); + for child in node.children.iter().filter_map(ast::node) { + self.walk(child, &next_owners, out); + } + } else { + for child in node.children.iter().filter_map(ast::node) { + self.walk(child, owners, out); + } + } + } + + fn owner_methods(&self, owner_node: &Node, owner: &str) -> Vec { + let Some(body) = self.owner_body(owner_node) else { return Vec::new() }; + self.methods_from_statements(&self.owner_statements(body), owner) + } + + fn methods_from_statements(&self, stmts: &[&Node], owner: &str) -> Vec { + let mut methods = Vec::new(); + let mut visibility = "public".to_string(); + for stmt in stmts { + if self.bare_visibility_marker(stmt) { + visibility = ast::child_to_string(stmt.children.get(0)).unwrap_or_default(); + } else if self.visibility_call(stmt) { + visibility = self.handle_visibility_call(stmt, owner, &visibility, &mut methods); + } else if METHOD_TYPES.contains(&stmt.r#type.as_str()) { + methods.push(self.method_record(stmt, owner, &visibility)); + } + } + methods + } + + fn handle_visibility_call(&self, stmt: &Node, owner: &str, current_visibility: &str, methods: &mut Vec) -> String { + let vis = ast::child_to_string(stmt.children.get(0)).unwrap_or_default(); + if let Some(args) = stmt.children.get(1).and_then(ast::node) { + for arg in args.children.iter().filter_map(ast::node) { + if METHOD_TYPES.contains(&arg.r#type.as_str()) { + methods.push(self.method_record(arg, owner, &vis)); + } else if let Some(name) = self.literal_method_name(arg) { + if let Some(m) = methods.iter_mut().rev().find(|m| m.name == name) { + m.visibility = vis.clone(); + } + } + } + } + current_visibility.to_string() + } + + fn owner_body<'a>(&self, owner_node: &'a Node) -> Option<&'a Node> { + let scope_index = if owner_node.r#type == "CLASS" { 2 } else { 1 }; + let scope = owner_node.children.get(scope_index).and_then(ast::node)?; + if scope.r#type != "SCOPE" { return None } + scope.children.get(2).and_then(ast::node) + } + + fn owner_statements<'a>(&self, body: &'a Node) -> Vec<&'a Node> { + if body.r#type == "BLOCK" { body.children.iter().filter_map(ast::node).collect() } else { vec![body] } + } + + fn top_level_statements<'a>(&self, root: &'a Node) -> Vec<&'a Node> { + root.children.iter().filter_map(ast::node).flat_map(|c| if c.r#type == "BLOCK" { c.children.iter().filter_map(ast::node).collect() } else { vec![c] }).collect() + } + + fn bare_visibility_marker(&self, node: &Node) -> bool { + node.r#type == "VCALL" && VISIBILITY_MIDS.contains(&ast::child_to_string(node.children.get(0)).unwrap_or_default().as_str()) + } + + fn visibility_call(&self, node: &Node) -> bool { + node.r#type == "FCALL" && VISIBILITY_MIDS.contains(&ast::child_to_string(node.children.get(0)).unwrap_or_default().as_str()) + } + + fn literal_method_name(&self, node: &Node) -> Option { + match node.r#type.as_str() { + "LIT" | "STR" | "DSTR" => ast::child_to_string(node.children.get(0)), + _ => None + } + } + + fn method_record(&self, node: &Node, owner: &str, visibility: &str) -> Method { + let name = self.method_name(node); + Method { + id: format!("{}#{}", owner, name), + owner: owner.to_string(), + name: name.clone(), + file: self.file.clone(), + line: node.first_lineno, + span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], + visibility: if node.r#type == "DEFS" { "public".to_string() } else { visibility.to_string() }, + } + } + + fn method_name(&self, node: &Node) -> String { + if node.r#type == "DEFS" { + let receiver = node.children.get(0).and_then(ast::node); + let prefix = if let Some(r) = receiver { + if r.r#type == "SELF" { "self".to_string() } else { ast::slice(r, &self.lines) } + } else { "?".to_string() }; + format!("{}.{}", prefix, ast::child_to_string(node.children.get(1)).unwrap_or_else(|| "?".to_string())) + } else { + ast::child_to_string(node.children.get(0)).unwrap_or_else(|| "?".to_string()) + } + } + + fn full_owner_name(&self, owners: &[String], node: &Node) -> String { + let mut next = owners.to_vec(); + next.push(self.owner_segment(node)); + next.join("::") + } + + fn owner_segment(&self, node: &Node) -> String { + let text = ast::slice(node.children.first().and_then(ast::node).unwrap_or(node), &self.lines); + if text.is_empty() { "(anonymous)".to_string() } else { text } + } + + fn top_level_owner(&self) -> String { format!("(top-level:{})", self.file) } +} + +struct EdgeCollector { + file: String, + lines: Vec, + method_by_id: BTreeMap, +} + +impl EdgeCollector { + fn new(file: String, lines: Vec, methods: &[Method]) -> Self { + let mut map = BTreeMap::new(); + for m in methods { map.insert(m.id.clone(), m.clone()); } + Self { file, lines, method_by_id: map } + } + + fn scan(&mut self, root: &Node) -> Vec { + let mut out = Vec::new(); + let top_level_methods: Vec<_> = self.top_level_statements(root).into_iter().filter(|s| METHOD_TYPES.contains(&s.r#type.as_str())).collect(); + for m_node in top_level_methods { + let id = format!("(top-level:{})#{}", self.file, self.method_name(m_node)); + if let Some(m) = self.method_by_id.get(&id) { + self.collect_calls(m_node, m, &Vec::new(), &mut out); + } + } + self.walk(root, &Vec::new(), &mut out); + out + } + + fn walk(&self, node: &Node, owners: &[String], out: &mut Vec) { + if OWNER_TYPES.contains(&node.r#type.as_str()) { + let owner = self.full_owner_name(owners, node); + for m_node in self.owner_methods(node) { + let id = format!("{}#{}", owner, self.method_name(m_node)); + if let Some(m) = self.method_by_id.get(&id) { + self.collect_calls(m_node, m, &Vec::new(), out); + } + } + let mut next_owners = owners.to_vec(); + next_owners.push(self.owner_segment(node)); + for child in node.children.iter().filter_map(ast::node) { + self.walk(child, &next_owners, out); + } + } else { + for child in node.children.iter().filter_map(ast::node) { + self.walk(child, owners, out); + } + } + } + + fn collect_calls(&self, node: &Node, caller: &Method, context_stack: &[String], out: &mut Vec) { + if SKIP_NESTED_TYPES.contains(&node.r#type.as_str()) && !METHOD_TYPES.contains(&node.r#type.as_str()) { return } + + let mut next_context = context_stack.to_vec(); + if CONDITIONAL_TYPES.contains(&node.r#type.as_str()) { next_context.push("conditional".to_string()) } + if ITERATION_TYPES.contains(&node.r#type.as_str()) { next_context.push("iterates".to_string()) } + + if let Some(edge) = self.internal_edge(node, caller, &next_context) { + if edge.caller != edge.callee { out.push(edge) } + } + + for child in node.children.iter().filter_map(ast::node) { + self.collect_calls(child, caller, &next_context, out); + } + } + + fn internal_edge(&self, node: &Node, caller: &Method, context_stack: &[String]) -> Option { + let call = self.internal_call_name(node, caller)?; + let id = format!("{}#{}", caller.owner, call.name); + let callee = self.method_by_id.get(&id)?; + + Some(Edge { + caller: caller.id.clone(), + callee: callee.id.clone(), + caller_name: caller.name.clone(), + callee_name: callee.name.clone(), + file: self.file.clone(), + line: node.first_lineno, + span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], + r#type: context_stack.last().cloned().unwrap_or_else(|| "always".to_string()), + kind: call.kind, + confidence: "high".to_string(), + }) + } + + fn internal_call_name(&self, node: &Node, caller: &Method) -> Option { + match node.r#type.as_str() { + "FCALL" | "VCALL" => { + Some(InternalCallName { name: self.scoped_name(caller, &ast::child_to_string(node.children.get(0)).unwrap_or_default()), kind: "bare_internal".to_string() }) + } + "CALL" | "OPCALL" => { + let recv = node.children.get(0).and_then(ast::node)?; + if recv.r#type != "SELF" { return None } + let mid = ast::child_to_string(node.children.get(1))?; + Some(InternalCallName { name: self.scoped_name(caller, &mid), kind: "direct_self".to_string() }) + } + _ => None + } + } + + fn scoped_name(&self, caller: &Method, mid: &str) -> String { + if caller.name.starts_with("self.") { format!("self.{}", mid) } else { mid.to_string() } + } + + // Reuse helpers from MethodCollector + fn top_level_statements<'a>(&self, root: &'a Node) -> Vec<&'a Node> { + root.children.iter().filter_map(ast::node).flat_map(|c| if c.r#type == "BLOCK" { c.children.iter().filter_map(ast::node).collect() } else { vec![c] }).collect() + } + fn method_name(&self, node: &Node) -> String { + if node.r#type == "DEFS" { + let receiver = node.children.get(0).and_then(ast::node); + let prefix = if let Some(r) = receiver { + if r.r#type == "SELF" { "self".to_string() } else { ast::slice(r, &self.lines) } + } else { "?".to_string() }; + format!("{}.{}", prefix, ast::child_to_string(node.children.get(1)).unwrap_or_else(|| "?".to_string())) + } else { + ast::child_to_string(node.children.get(0)).unwrap_or_else(|| "?".to_string()) + } + } + fn owner_methods<'a>(&self, owner_node: &'a Node) -> Vec<&'a Node> { + let Some(body) = self.owner_body(owner_node) else { return Vec::new() }; + self.owner_statements(body) + } + fn owner_body<'a>(&self, owner_node: &'a Node) -> Option<&'a Node> { + let scope_index = if owner_node.r#type == "CLASS" { 2 } else { 1 }; + let scope = owner_node.children.get(scope_index).and_then(ast::node)?; + if scope.r#type != "SCOPE" { return None } + scope.children.get(2).and_then(ast::node) + } + fn owner_statements<'a>(&self, body: &'a Node) -> Vec<&'a Node> { + if body.r#type == "BLOCK" { body.children.iter().filter_map(ast::node).collect() } else { vec![body] } + } + fn full_owner_name(&self, owners: &[String], node: &Node) -> String { + let mut next = owners.to_vec(); + next.push(self.owner_segment(node)); + next.join("::") + } + fn owner_segment(&self, node: &Node) -> String { + let text = ast::slice(node.children.first().and_then(ast::node).unwrap_or(node), &self.lines); + if text.is_empty() { "(anonymous)".to_string() } else { text } + } +} + +struct InternalCallName { + name: String, + kind: String, +} diff --git a/gems/decomplex/rust/src/decomplex/detectors/weighted_inlined_cognitive_complexity.rs b/gems/decomplex/rust/src/decomplex/detectors/weighted_inlined_cognitive_complexity.rs new file mode 100644 index 000000000..11fa561dd --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/weighted_inlined_cognitive_complexity.rs @@ -0,0 +1,474 @@ +use crate::decomplex::ast::{self, Node, Span}; +use crate::decomplex::detectors::structural_topology; +use crate::decomplex::syntax::Language; +use anyhow::Result; +use serde::Serialize; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::PathBuf; + +#[derive(Clone, Debug, PartialEq, Serialize)] +pub struct WeightedInlinedCognitiveComplexityRow { + pub at: String, + pub owner: String, + pub method: String, + pub local: f64, + pub inlined: f64, + pub hidden: f64, + pub depth: usize, + pub single_caller_callees: Vec, + pub call_chain: Vec, + pub reason: String, + pub signals: BTreeMap, + pub spans: BTreeMap, +} + +pub fn scan_files(files: &[PathBuf], _language: Language) -> Result> { + let mut parsed = BTreeMap::new(); + for file in files { + parsed.insert(file.to_string_lossy().to_string(), ast::parse(file)?); + } + + let topology_report = structural_topology::scan_files(files, _language)?; + let topology = structural_topology::Graph::new(topology_report.methods, topology_report.edges); + + let mut bodies = Vec::new(); + for (file, (root, lines)) in &parsed { + let mut collector = MethodBodyCollector::new(file.clone(), lines.clone()); + bodies.extend(collector.scan(root)); + } + + let mut scores = BTreeMap::new(); + for body in bodies { + let score = LocalScorer::new().score(&body.node); + scores.insert(body.id.clone(), LocalScore { + id: body.id, + owner: body.owner, + name: body.name, + file: body.file, + line: body.line, + span: body.span, + score: score.score, + signals: score.signals, + }); + } + + let analyzer = Analyzer::new(topology, scores, 12.0, 15.0, 2); + Ok(analyzer.findings()) +} + +struct MethodBody { + id: String, + owner: String, + name: String, + file: String, + line: usize, + span: Span, + node: Node, +} + +struct LocalScore { + id: String, + owner: String, + name: String, + file: String, + line: usize, + span: Span, + score: f64, + signals: BTreeMap, +} + +struct Contribution { + #[allow(dead_code)] + callee_id: String, + callee_name: String, + score: f64, + #[allow(dead_code)] + weight: f64, + depth: usize, + chain: Vec, +} + +const OWNER_TYPES: &[&str] = &["CLASS", "MODULE"]; +const METHOD_TYPES: &[&str] = &["DEFN", "DEFS"]; +const SKIP_NESTED_TYPES: &[&str] = &["CLASS", "MODULE", "DEFN", "DEFS", "LAMBDA"]; +const BRANCH_TYPES: &[&str] = &["IF", "UNLESS"]; +const LOOP_TYPES: &[&str] = &["WHILE", "UNTIL", "FOR", "ITER"]; +const CASE_TYPES: &[&str] = &["CASE", "CASE2"]; +const RESCUE_TYPES: &[&str] = &["RESCUE", "RESBODY"]; +const EARLY_EXIT_TYPES: &[&str] = &["RETURN", "BREAK", "NEXT", "REDO", "RETRY"]; +const BOOLEAN_TYPES: &[&str] = &["AND", "OR"]; + +struct MethodBodyCollector { + file: String, + lines: Vec, +} + +impl MethodBodyCollector { + fn new(file: String, lines: Vec) -> Self { Self { file, lines } } + + fn scan(&mut self, root: &Node) -> Vec { + let mut out = Vec::new(); + for m_node in self.top_level_methods(root) { + out.push(self.method_body(m_node, &self.top_level_owner())); + } + self.walk(root, &Vec::new(), &mut out); + out + } + + fn top_level_methods<'a>(&self, root: &'a Node) -> Vec<&'a Node> { + self.top_level_statements(root).into_iter().filter(|s| METHOD_TYPES.contains(&s.r#type.as_str())).collect() + } + + fn walk<'a>(&self, node: &'a Node, owners: &[String], out: &mut Vec) { + if OWNER_TYPES.contains(&node.r#type.as_str()) { + let owner = self.full_owner_name(owners, node); + for m_node in self.owner_methods(node) { + out.push(self.method_body(m_node, &owner)); + } + let mut next_owners = owners.to_vec(); + next_owners.push(self.owner_segment(node)); + for child in node.children.iter().filter_map(ast::node) { + self.walk(child, &next_owners, out); + } + } else { + for child in node.children.iter().filter_map(ast::node) { + self.walk(child, owners, out); + } + } + } + + fn owner_methods<'a>(&self, owner_node: &'a Node) -> Vec<&'a Node> { + let Some(body) = self.owner_body(owner_node) else { return Vec::new() }; + self.owner_statements(body).into_iter().flat_map(|stmt| { + if METHOD_TYPES.contains(&stmt.r#type.as_str()) { + vec![stmt] + } else if self.visibility_call(stmt) { + self.inline_methods(stmt) + } else { + vec![] + } + }).collect() + } + + fn method_body(&self, node: &Node, owner: &str) -> MethodBody { + let name = self.method_name(node); + MethodBody { + id: format!("{}#{}", owner, name), + owner: owner.to_string(), + name, + file: self.file.clone(), + line: node.first_lineno, + span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], + node: node.clone(), + } + } + + fn inline_methods<'a>(&self, stmt: &'a Node) -> Vec<&'a Node> { + let Some(args) = stmt.children.get(1).and_then(ast::node) else { return Vec::new() }; + args.children.iter().filter_map(ast::node).filter(|arg| METHOD_TYPES.contains(&arg.r#type.as_str())).collect() + } + + fn owner_body<'a>(&self, owner_node: &'a Node) -> Option<&'a Node> { + let scope_index = if owner_node.r#type == "CLASS" { 2 } else { 1 }; + let scope = owner_node.children.get(scope_index).and_then(ast::node)?; + if scope.r#type != "SCOPE" { return None } + scope.children.get(2).and_then(ast::node) + } + + fn owner_statements<'a>(&self, body: &'a Node) -> Vec<&'a Node> { + if body.r#type == "BLOCK" { body.children.iter().filter_map(ast::node).collect() } else { vec![body] } + } + + fn top_level_statements<'a>(&self, root: &'a Node) -> Vec<&'a Node> { + root.children.iter().filter_map(ast::node).flat_map(|c| if c.r#type == "BLOCK" { c.children.iter().filter_map(ast::node).collect() } else { vec![c] }).collect() + } + + fn visibility_call(&self, node: &Node) -> bool { + node.r#type == "FCALL" && matches!(ast::child_to_string(node.children.get(0)).unwrap_or_default().as_str(), "public" | "protected" | "private") + } + + fn method_name(&self, node: &Node) -> String { + if node.r#type == "DEFS" { + let receiver = node.children.get(0).and_then(ast::node); + let prefix = if let Some(r) = receiver { + if r.r#type == "SELF" { "self".to_string() } else { ast::slice(r, &self.lines) } + } else { "?".to_string() }; + format!("{}.{}", prefix, ast::child_to_string(node.children.get(1)).unwrap_or_else(|| "?".to_string())) + } else { + ast::child_to_string(node.children.get(0)).unwrap_or_else(|| "?".to_string()) + } + } + + fn full_owner_name(&self, owners: &[String], node: &Node) -> String { + let mut next = owners.to_vec(); + next.push(self.owner_segment(node)); + next.join("::") + } + + fn owner_segment(&self, node: &Node) -> String { + let text = ast::slice(node.children.first().and_then(ast::node).unwrap_or(node), &self.lines); + if text.is_empty() { "(anonymous)".to_string() } else { text } + } + + fn top_level_owner(&self) -> String { format!("(top-level:{})", self.file) } +} + +pub struct LocalScorer {} + +pub struct ScoreResult { + pub score: f64, + pub signals: BTreeMap, +} + +impl LocalScorer { + pub fn new() -> Self { Self {} } + + pub fn score(&self, method_node: &Node) -> ScoreResult { + let mut signals = BTreeMap::new(); + ScoreResult { + score: self.round(self.score_node(method_node, 0, &mut signals)), + signals, + } + } + + fn score_node(&self, node: &Node, nesting: usize, signals: &mut BTreeMap) -> f64 { + if self.skip_nested(node) { return 0.0 } + + match node.r#type.as_str() { + t if BRANCH_TYPES.contains(&t) => self.score_branch(node, nesting, signals), + t if LOOP_TYPES.contains(&t) => self.score_loop(node, nesting, signals), + t if CASE_TYPES.contains(&t) => self.score_case(node, nesting, signals), + t if RESCUE_TYPES.contains(&t) => self.score_rescue(node, nesting, signals), + t if EARLY_EXIT_TYPES.contains(&t) => self.score_early_exit(node, nesting, signals), + t if BOOLEAN_TYPES.contains(&t) => self.score_boolean_node(node, nesting, signals), + _ => self.score_children(node, nesting, signals), + } + } + + fn skip_nested(&self, node: &Node) -> bool { + SKIP_NESTED_TYPES.contains(&node.r#type.as_str()) && !METHOD_TYPES.contains(&node.r#type.as_str()) + } + + fn score_branch(&self, node: &Node, nesting: usize, signals: &mut BTreeMap) -> f64 { + *signals.entry("branches".to_string()).or_insert(0) += 1; + if nesting > 0 { *signals.entry("nested".to_string()).or_insert(0) += 1; } + let condition = node.children.get(0).and_then(ast::node); + let positive = node.children.get(1).and_then(ast::node); + let negative = node.children.get(2).and_then(ast::node); + + self.branch_cost(nesting) + + self.predicate_cost(condition, signals) + + positive.map(|n| self.score_node(n, nesting + 1, signals)).unwrap_or(0.0) + + negative.map(|n| self.score_node(n, nesting + 1, signals)).unwrap_or(0.0) + } + + fn score_loop(&self, node: &Node, nesting: usize, signals: &mut BTreeMap) -> f64 { + *signals.entry("loops".to_string()).or_insert(0) += 1; + if nesting > 0 { *signals.entry("nested".to_string()).or_insert(0) += 1; } + self.branch_cost(nesting) + self.score_children(node, nesting + 1, signals) + } + + fn score_case(&self, node: &Node, nesting: usize, signals: &mut BTreeMap) -> f64 { + *signals.entry("cases".to_string()).or_insert(0) += 1; + 0.5 + self.score_case_children(node, nesting, signals) + } + + fn score_case_children(&self, node: &Node, nesting: usize, signals: &mut BTreeMap) -> f64 { + node.children.iter().filter_map(ast::node).map(|child| { + if child.r#type == "WHEN" { self.score_when(child, nesting, signals) } else { self.score_node(child, nesting, signals) } + }).sum() + } + + fn score_when(&self, node: &Node, nesting: usize, signals: &mut BTreeMap) -> f64 { + let body = node.children.get(1).and_then(ast::node); + let next_when = node.children.get(2).and_then(ast::node); + body.map(|n| self.score_node(n, nesting + 1, signals)).unwrap_or(0.0) + + next_when.map(|n| self.score_node(n, nesting, signals)).unwrap_or(0.0) + } + + fn score_rescue(&self, node: &Node, nesting: usize, signals: &mut BTreeMap) -> f64 { + *signals.entry("rescues".to_string()).or_insert(0) += 1; + self.branch_cost(nesting) + self.score_children(node, nesting + 1, signals) + } + + fn score_early_exit(&self, node: &Node, nesting: usize, signals: &mut BTreeMap) -> f64 { + *signals.entry("early_exits".to_string()).or_insert(0) += 1; + let exit_cost = if nesting > 0 { 0.5 + (nesting as f64 * 0.25) } else { 0.0 }; + exit_cost + self.score_children(node, nesting, signals) + } + + fn score_boolean_node(&self, node: &Node, nesting: usize, signals: &mut BTreeMap) -> f64 { + *signals.entry("boolean_ops".to_string()).or_insert(0) += 1; + 0.25 + self.score_children(node, nesting, signals) + } + + fn score_children(&self, node: &Node, nesting: usize, signals: &mut BTreeMap) -> f64 { + node.children.iter().filter_map(ast::node).map(|child| self.score_node(child, nesting, signals)).sum() + } + + fn predicate_cost(&self, node: Option<&Node>, signals: &mut BTreeMap) -> f64 { + let Some(node) = node else { return 0.0 }; + let bools = self.boolean_count(node); + *signals.entry("boolean_ops".to_string()).or_insert(0) += bools; + (bools as f64) * 0.5 + } + + fn boolean_count(&self, node: &Node) -> usize { + let own = if BOOLEAN_TYPES.contains(&node.r#type.as_str()) { 1 } else { 0 }; + own + node.children.iter().filter_map(ast::node).map(|child| self.boolean_count(child)).sum::() + } + + fn branch_cost(&self, nesting: usize) -> f64 { 1.0 + (nesting as f64) } + + fn round(&self, value: f64) -> f64 { (value * 10.0).round() / 10.0 } +} + +struct Analyzer { + topology: structural_topology::Graph, + scores: BTreeMap, + min_score: f64, + min_hidden: f64, + max_depth: usize, +} + +impl Analyzer { + fn new(topology: structural_topology::Graph, scores: BTreeMap, min_score: f64, min_hidden: f64, max_depth: usize) -> Self { + Self { topology, scores, min_score, min_hidden, max_depth } + } + + fn findings(&self) -> Vec { + let mut out: Vec<_> = self.scores.values().filter_map(|s| self.finding_for(s)).collect(); + out.sort_by(|a, b| b.hidden.partial_cmp(&a.hidden).unwrap_or(std::cmp::Ordering::Equal) + .then_with(|| b.inlined.partial_cmp(&a.inlined).unwrap_or(std::cmp::Ordering::Equal)) + .then_with(|| a.at.cmp(&b.at))); + out + } + + fn finding_for(&self, score: &LocalScore) -> Option { + let mut visited = BTreeSet::new(); + visited.insert(score.id.clone()); + let contributions = self.inlined_contributions(&score.id, 1, &mut visited); + + let hidden = self.round(contributions.iter().map(|c| c.score).sum()); + let total = self.round(score.score + hidden); + if total < self.min_score || hidden < self.min_hidden { return None } + + let direct_single_caller = self.single_caller_callees(&score.id); + let at = format!("{}:{}:{}", score.file, score.name, score.line); + let mut spans = BTreeMap::new(); + spans.insert(at.clone(), score.span); + + Some(WeightedInlinedCognitiveComplexityRow { + at, + owner: score.owner.clone(), + method: score.name.clone(), + local: score.score, + inlined: total, + hidden, + depth: contributions.iter().map(|c| c.depth).max().unwrap_or(0), + single_caller_callees: direct_single_caller.clone(), + call_chain: self.strongest_chain(score, &contributions), + reason: self.reason(hidden, &direct_single_caller), + signals: score.signals.clone(), + spans, + }) + } + + fn inlined_contributions(&self, method_id: &str, depth: usize, visited: &mut BTreeSet) -> Vec { + if depth > self.max_depth { return Vec::new() } + + let mut out = Vec::new(); + for edge in self.grouped_edges(method_id) { + if visited.contains(&edge.callee) { continue; } + let Some(callee) = self.scores.get(&edge.callee) else { continue; }; + + let weight = self.contribution_weight(&edge, depth); + let direct = Contribution { + callee_id: edge.callee.clone(), + callee_name: edge.callee_name.clone(), + score: self.round(callee.score * weight), + weight: self.round(weight), + depth, + chain: vec![edge.callee_name.clone()], + }; + + let mut next_visited = visited.clone(); + next_visited.insert(edge.callee.clone()); + let nested = self.inlined_contributions(&edge.callee, depth + 1, &mut next_visited); + let nested: Vec<_> = nested.into_iter().map(|c| Contribution { + callee_id: c.callee_id, + callee_name: c.callee_name, + score: self.round(c.score * weight), + weight: self.round(c.weight * weight), + depth: c.depth, + chain: { + let mut chain = vec![edge.callee_name.clone()]; + chain.extend(c.chain); + chain + }, + }).collect(); + + out.push(direct); + out.extend(nested); + } + out + } + + fn grouped_edges(&self, method_id: &str) -> Vec { + let mut by_callee: BTreeMap> = BTreeMap::new(); + for edge in self.topology.internal_calls(method_id) { + by_callee.entry(edge.callee.clone()).or_default().push(edge); + } + by_callee.into_iter().map(|(_, edges)| { + edges.into_iter().max_by(|a, b| self.edge_weight(&a.r#type).partial_cmp(&self.edge_weight(&b.r#type)).unwrap()).unwrap() + }).collect() + } + + fn contribution_weight(&self, edge: &structural_topology::Edge, depth: usize) -> f64 { + let caller_factor = if self.topology.single_internal_caller(&edge.callee) { 1.0 } else { 0.35 }; + let visibility_factor = if self.shared_public_step(edge) { 0.6 } else { 1.0 }; + let depth_factor = match depth { + 1 => 1.0, + 2 => 0.6, + _ => 0.35, + }; + let edge_factor = self.edge_weight(&edge.r#type); + caller_factor * visibility_factor * depth_factor * edge_factor + } + + fn edge_weight(&self, t: &str) -> f64 { + match t { + "always" => 1.0, + "conditional" => 0.75, + "iterates" => 1.15, + _ => 1.0, + } + } + + fn shared_public_step(&self, edge: &structural_topology::Edge) -> bool { + self.topology.visibility(&edge.callee) == Some("public") && !self.topology.single_internal_caller(&edge.callee) + } + + fn single_caller_callees(&self, method_id: &str) -> Vec { + let mut out: Vec<_> = self.grouped_edges(method_id).into_iter().filter(|e| self.topology.single_internal_caller(&e.callee)).map(|e| e.callee_name).collect(); + out.sort(); + out + } + + fn strongest_chain(&self, score: &LocalScore, contributions: &[Contribution]) -> Vec { + let chain = contributions.iter().max_by(|a, b| a.score.partial_cmp(&b.score).unwrap()).map(|c| c.chain.clone()).unwrap_or_default(); + let mut out = vec![score.name.clone()]; + out.extend(chain); + out + } + + fn reason(&self, hidden: f64, single_caller_callees: &[String]) -> String { + if single_caller_callees.is_empty() { + format!("same-owner call chain adds {} weighted cognitive points", hidden) + } else { + format!("{} single-caller helper(s) add {} weighted cognitive points", single_caller_callees.len(), hidden) + } + } + + fn round(&self, value: f64) -> f64 { (value * 10.0).round() / 10.0 } +} diff --git a/gems/decomplex/rust/src/main.rs b/gems/decomplex/rust/src/main.rs index d553cb40b..4cc7ea880 100644 --- a/gems/decomplex/rust/src/main.rs +++ b/gems/decomplex/rust/src/main.rs @@ -2,8 +2,10 @@ mod decomplex; use anyhow::{bail, Context, Result}; use decomplex::detectors::{ - co_update, decision_pressure, flay_similarity, miner, predicate_alias, redundant_nil_guard, - semantic_alias, state_branch_density, state_mesh, temporal_ordering_pressure, + co_update, decision_pressure, derived_state, flay_similarity, implicit_control_flow, + inconsistent_rename_clone, local_flow, locality_drag, miner, operational_discontinuity, + predicate_alias, redundant_nil_guard, semantic_alias, state_branch_density, state_mesh, + structural_topology, temporal_ordering_pressure, weighted_inlined_cognitive_complexity, }; use decomplex::parallel; use decomplex::syntax::Language; @@ -73,6 +75,54 @@ fn main() -> Result<()> { .with_context(|| "failed to scan state-mesh facts")?; println!("{}", serde_json::to_string(&report)?); } + Command::InconsistentRenameClone { language, files, .. } => { + let language = Language::parse(&language)?; + let report = inconsistent_rename_clone::scan_files(&files, language) + .with_context(|| "failed to scan inconsistent-rename-clone facts")?; + println!("{}", serde_json::to_string(&report)?); + } + Command::DerivedState { language, files, .. } => { + let language = Language::parse(&language)?; + let report = derived_state::scan_files(&files, language) + .with_context(|| "failed to scan derived-state facts")?; + println!("{}", serde_json::to_string(&report)?); + } + Command::ImplicitControlFlow { language, files, .. } => { + let language = Language::parse(&language)?; + let report = implicit_control_flow::scan_files(&files, language) + .with_context(|| "failed to scan implicit-control-flow facts")?; + println!("{}", serde_json::to_string(&report)?); + } + Command::WeightedInlinedComplexity { language, files, .. } => { + let language = Language::parse(&language)?; + let report = weighted_inlined_cognitive_complexity::scan_files(&files, language) + .with_context(|| "failed to scan weighted-inlined-complexity facts")?; + println!("{}", serde_json::to_string(&report)?); + } + Command::LocalityDrag { language, files, .. } => { + let language = Language::parse(&language)?; + let report = locality_drag::scan_files(&files, language) + .with_context(|| "failed to scan locality-drag facts")?; + println!("{}", serde_json::to_string(&report)?); + } + Command::OperationalDiscontinuity { language, files, .. } => { + let language = Language::parse(&language)?; + let report = operational_discontinuity::scan_files(&files, language) + .with_context(|| "failed to scan operational-discontinuity facts")?; + println!("{}", serde_json::to_string(&report)?); + } + Command::StructuralTopology { language, files, .. } => { + let language = Language::parse(&language)?; + let report = structural_topology::scan_files(&files, language) + .with_context(|| "failed to scan structural-topology facts")?; + println!("{}", serde_json::to_string(&report)?); + } + Command::LocalFlow { language, files, .. } => { + let language = Language::parse(&language)?; + let report = local_flow::scan_files(&files, language) + .with_context(|| "failed to scan local-flow facts")?; + println!("{}", serde_json::to_string(&report)?); + } Command::FlaySimilarity { language, mass, @@ -140,6 +190,46 @@ enum Command { files: Vec, jobs: Option, }, + InconsistentRenameClone { + language: String, + files: Vec, + jobs: Option, + }, + DerivedState { + language: String, + files: Vec, + jobs: Option, + }, + ImplicitControlFlow { + language: String, + files: Vec, + jobs: Option, + }, + WeightedInlinedComplexity { + language: String, + files: Vec, + jobs: Option, + }, + LocalityDrag { + language: String, + files: Vec, + jobs: Option, + }, + OperationalDiscontinuity { + language: String, + files: Vec, + jobs: Option, + }, + StructuralTopology { + language: String, + files: Vec, + jobs: Option, + }, + LocalFlow { + language: String, + files: Vec, + jobs: Option, + }, FlaySimilarity { language: String, mass: usize, @@ -162,6 +252,14 @@ impl Command { | Self::TemporalOrderingPressure { jobs, .. } | Self::RedundantNilGuard { jobs, .. } | Self::StateMesh { jobs, .. } + | Self::InconsistentRenameClone { jobs, .. } + | Self::DerivedState { jobs, .. } + | Self::ImplicitControlFlow { jobs, .. } + | Self::WeightedInlinedComplexity { jobs, .. } + | Self::LocalityDrag { jobs, .. } + | Self::OperationalDiscontinuity { jobs, .. } + | Self::StructuralTopology { jobs, .. } + | Self::LocalFlow { jobs, .. } | Self::FlaySimilarity { jobs, .. } => *jobs, } } @@ -283,6 +381,94 @@ fn parse_args(args: Vec) -> Result { jobs, }) } + "inconsistent-rename-clone" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("inconsistent-rename-clone requires at least one file"); + } + Ok(Command::InconsistentRenameClone { + language, + files, + jobs, + }) + } + "derived-state" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("derived-state requires at least one file"); + } + Ok(Command::DerivedState { + language, + files, + jobs, + }) + } + "implicit-control-flow" | "ordered-protocol-mine" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("implicit-control-flow requires at least one file"); + } + Ok(Command::ImplicitControlFlow { + language, + files, + jobs, + }) + } + "weighted-inlined-complexity" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("weighted-inlined-complexity requires at least one file"); + } + Ok(Command::WeightedInlinedComplexity { + language, + files, + jobs, + }) + } + "locality-drag" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("locality-drag requires at least one file"); + } + Ok(Command::LocalityDrag { + language, + files, + jobs, + }) + } + "operational-discontinuity" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("operational-discontinuity requires at least one file"); + } + Ok(Command::OperationalDiscontinuity { + language, + files, + jobs, + }) + } + "structural-topology" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("structural-topology requires at least one file"); + } + Ok(Command::StructuralTopology { + language, + files, + jobs, + }) + } + "local-flow" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("local-flow requires at least one file"); + } + Ok(Command::LocalFlow { + language, + files, + jobs, + }) + } "flay-similarity" => { let mut language = String::from("ruby"); let mut mass = 32usize; diff --git a/gems/decomplex/test/detector_runner_test.rb b/gems/decomplex/test/detector_runner_test.rb index b87050410..1f2469c78 100644 --- a/gems/decomplex/test/detector_runner_test.rb +++ b/gems/decomplex/test/detector_runner_test.rb @@ -223,6 +223,154 @@ def a_alias end end + def test_inconsistent_rename_clone_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + Tempfile.create(["decomplex-rename", ".rb"]) do |file| + file.write(<<~RUBY) + def one(a, b) + res = a + b + puts res + res * 2 + end + + def two(x, b) + res = x + b + puts res + res * 2 + end + RUBY + file.flush + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("inconsistent-rename-clone", [file.path]) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + end + + def test_derived_state_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + Tempfile.create(["decomplex-derived", ".rb"]) do |file| + file.write(<<~RUBY) + def check(a) + b = a + 1 + a = 2 + puts b + end + RUBY + file.flush + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("derived-state", [file.path]) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + end + + def test_implicit_control_flow_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + Tempfile.create(["decomplex-implicit", ".rb"]) do |file| + file.write(<<~RUBY) + class Flow + def prepare; @a = 1; end + def validate; @b = @a; end + def run + prepare + validate + end + end + RUBY + file.flush + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("implicit-control-flow", [file.path]) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + end + + def test_weighted_inlined_complexity_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + Tempfile.create(["decomplex-weighted", ".rb"]) do |file| + file.write(<<~RUBY) + class Complex + def entry + helper_one + helper_two if condition? + end + + private + def helper_one + if a; b; else; c; end + end + + def helper_two + while x; y; end + end + + def condition?; true; end + end + RUBY + file.flush + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("weighted-inlined-complexity", [file.path]) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + end + + def test_locality_drag_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + Tempfile.create(["decomplex-locality", ".rb"]) do |file| + file.write(<<~RUBY) + def heavy(x) + y = x + 1 + # Unrelated work + a = 1; b = 2; c = 3; d = 4; e = 5 + puts a, b, c, d, e + # Finally use y + puts y + end + RUBY + file.flush + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("locality-drag", [file.path]) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + end + + def test_operational_discontinuity_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + Tempfile.create(["decomplex-discontinuity", ".rb"]) do |file| + file.write(<<~RUBY) + def phase_shift + a = 1 + b = 2 + + # Phase 2 + x = 3 + y = 4 + puts x, y + end + RUBY + file.flush + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("operational-discontinuity", [file.path]) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + end + def test_decision_pressure_rust_engine_matches_ruby_engine_byte_for_byte skip "cargo is not available" unless cargo_available? From 99249158122d40bdd2e808d9a8560c1fb7456e64 Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Wed, 17 Jun 2026 01:39:51 +0000 Subject: [PATCH 13/52] Decomplex: Finish Tier 3 metric migration to Rust Migrates the final Tier 3 detectors to Rust: - False Simplicity - Fat Unions - Function LCOM - Oversized Predicate - Path Condition - Sequence Mine (Broken Protocols) Maintains strict function-for-function parity with the Ruby source. All 25 comparison tests pass with byte-for-byte JSON identity. Co-authored-by: gemini-cli <218195315+gemini-cli@users.noreply.github.com> --- .../lib/decomplex/detector_runner.rb | 83 ++++++- .../lib/decomplex/native/false_simplicity.rb | 25 ++ .../lib/decomplex/native/fat_union.rb | 25 ++ .../lib/decomplex/native/function_lcom.rb | 25 ++ .../decomplex/native/oversized_predicate.rb | 25 ++ .../lib/decomplex/native/path_condition.rb | 25 ++ .../lib/decomplex/native/sequence_mine.rb | 25 ++ .../decomplex/detectors/false_simplicity.rs | 202 +++++++++++++++ .../rust/src/decomplex/detectors/fat_union.rs | 173 +++++++++++++ .../src/decomplex/detectors/function_lcom.rs | 146 +++++++++++ .../rust/src/decomplex/detectors/mod.rs | 6 + .../detectors/oversized_predicate.rs | 132 ++++++++++ .../src/decomplex/detectors/path_condition.rs | 230 ++++++++++++++++++ .../src/decomplex/detectors/sequence_mine.rs | 193 +++++++++++++++ gems/decomplex/rust/src/main.rs | 147 ++++++++++- gems/decomplex/test/detector_runner_test.rb | 162 ++++++++++++ 16 files changed, 1619 insertions(+), 5 deletions(-) create mode 100644 gems/decomplex/lib/decomplex/native/false_simplicity.rb create mode 100644 gems/decomplex/lib/decomplex/native/fat_union.rb create mode 100644 gems/decomplex/lib/decomplex/native/function_lcom.rb create mode 100644 gems/decomplex/lib/decomplex/native/oversized_predicate.rb create mode 100644 gems/decomplex/lib/decomplex/native/path_condition.rb create mode 100644 gems/decomplex/lib/decomplex/native/sequence_mine.rb create mode 100644 gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs create mode 100644 gems/decomplex/rust/src/decomplex/detectors/fat_union.rs create mode 100644 gems/decomplex/rust/src/decomplex/detectors/function_lcom.rs create mode 100644 gems/decomplex/rust/src/decomplex/detectors/oversized_predicate.rs create mode 100644 gems/decomplex/rust/src/decomplex/detectors/path_condition.rs create mode 100644 gems/decomplex/rust/src/decomplex/detectors/sequence_mine.rs diff --git a/gems/decomplex/lib/decomplex/detector_runner.rb b/gems/decomplex/lib/decomplex/detector_runner.rb index c84d0a311..ae634f69a 100644 --- a/gems/decomplex/lib/decomplex/detector_runner.rb +++ b/gems/decomplex/lib/decomplex/detector_runner.rb @@ -23,6 +23,12 @@ require_relative "weighted_inlined_cognitive_complexity" require_relative "locality_drag" require_relative "operational_discontinuity" +require_relative "oversized_predicate" +require_relative "path_condition" +require_relative "sequence_mine" +require_relative "function_lcom" +require_relative "false_simplicity" +require_relative "fat_union" module Decomplex # Runs one detector in isolation and emits deterministic machine output. @@ -56,7 +62,14 @@ module DetectorRunner "implicit-control-flow" => :implicit_control_flow, "weighted-inlined-complexity" => :weighted_inlined_complexity, "locality-drag" => :locality_drag, - "operational-discontinuity" => :operational_discontinuity + "operational-discontinuity" => :operational_discontinuity, + "oversized-predicate" => :oversized_predicate, + "path-condition" => :path_condition, + "broken-protocol" => :sequence_mine, + "sequence-mine" => :sequence_mine, + "function-lcom" => :function_lcom, + "false-simplicity" => :false_simplicity, + "fat-union" => :fat_union }.freeze ENGINES = %w[ruby rust].freeze @@ -99,6 +112,18 @@ def run(detector, files, engine: "ruby", mass: FlaySimilarity::DEFAULT_MASS, fuz locality_drag(files, engine: engine, jobs: jobs) when :operational_discontinuity operational_discontinuity(files, engine: engine, jobs: jobs) + when :oversized_predicate + oversized_predicate(files, engine: engine, jobs: jobs) + when :path_condition + path_condition(files, engine: engine, jobs: jobs) + when :sequence_mine + sequence_mine(files, engine: engine, jobs: jobs) + when :function_lcom + function_lcom(files, engine: engine, jobs: jobs) + when :false_simplicity + false_simplicity(files, engine: engine, jobs: jobs) + when :fat_union + fat_union(files, engine: engine, jobs: jobs) else raise ArgumentError, "unsupported decomplex detector: #{detector}" end @@ -282,6 +307,62 @@ def detector_names OperationalDiscontinuity.scan(files) end + private_class_method def self.oversized_predicate(files, engine:, jobs:) + if engine.to_s == "rust" + require_relative "native/oversized_predicate" + return Native::OversizedPredicate.scan(files, jobs: jobs) + end + + { "findings" => OversizedPredicate.scan(files).findings } + end + + private_class_method def self.path_condition(files, engine:, jobs:) + if engine.to_s == "rust" + require_relative "native/path_condition" + return Native::PathCondition.scan(files, jobs: jobs) + end + + report = PathCondition.scan(files) + { "neglected" => report.neglected } + end + + private_class_method def self.sequence_mine(files, engine:, jobs:) + if engine.to_s == "rust" + require_relative "native/sequence_mine" + return Native::SequenceMine.scan(files, jobs: jobs) + end + + report = SequenceMine.scan(files) + { "broken" => report.broken_protocol } + end + + private_class_method def self.function_lcom(files, engine:, jobs:) + if engine.to_s == "rust" + require_relative "native/function_lcom" + return Native::FunctionLcom.scan(files, jobs: jobs) + end + + FunctionLCOM.scan(files) + end + + private_class_method def self.false_simplicity(files, engine:, jobs:) + if engine.to_s == "rust" + require_relative "native/false_simplicity" + return Native::FalseSimplicity.scan(files, jobs: jobs) + end + + FalseSimplicity.scan(files).findings + end + + private_class_method def self.fat_union(files, engine:, jobs:) + if engine.to_s == "rust" + require_relative "native/fat_union" + return Native::FatUnion.scan(files, jobs: jobs) + end + + { "fat_unions" => FatUnion.scan(files).fat_unions } + end + private_class_method def self.canonicalize(value) case value when Hash diff --git a/gems/decomplex/lib/decomplex/native/false_simplicity.rb b/gems/decomplex/lib/decomplex/native/false_simplicity.rb new file mode 100644 index 000000000..c8afcc290 --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/false_simplicity.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module FalseSimplicity + module_function + + def scan(files, jobs: nil) + paths = Array(files).map(&:to_s) + validate_ruby_files!(paths) + JSON.parse(Command.run("false-simplicity", "--language", "ruby", *Command.jobs_args(jobs), *paths)) + end + + private_class_method def self.validate_ruby_files!(paths) + bad = paths.reject { |path| File.extname(path) == ".rb" } + return if bad.empty? + + raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/fat_union.rb b/gems/decomplex/lib/decomplex/native/fat_union.rb new file mode 100644 index 000000000..f3df23a3e --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/fat_union.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module FatUnion + module_function + + def scan(files, jobs: nil) + paths = Array(files).map(&:to_s) + validate_ruby_files!(paths) + JSON.parse(Command.run("fat-union", "--language", "ruby", *Command.jobs_args(jobs), *paths)) + end + + private_class_method def self.validate_ruby_files!(paths) + bad = paths.reject { |path| File.extname(path) == ".rb" } + return if bad.empty? + + raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/function_lcom.rb b/gems/decomplex/lib/decomplex/native/function_lcom.rb new file mode 100644 index 000000000..108ca95b2 --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/function_lcom.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module FunctionLcom + module_function + + def scan(files, jobs: nil) + paths = Array(files).map(&:to_s) + validate_ruby_files!(paths) + JSON.parse(Command.run("function-lcom", "--language", "ruby", *Command.jobs_args(jobs), *paths)) + end + + private_class_method def self.validate_ruby_files!(paths) + bad = paths.reject { |path| File.extname(path) == ".rb" } + return if bad.empty? + + raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/oversized_predicate.rb b/gems/decomplex/lib/decomplex/native/oversized_predicate.rb new file mode 100644 index 000000000..a2f295430 --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/oversized_predicate.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module OversizedPredicate + module_function + + def scan(files, jobs: nil) + paths = Array(files).map(&:to_s) + validate_ruby_files!(paths) + JSON.parse(Command.run("oversized-predicate", "--language", "ruby", *Command.jobs_args(jobs), *paths)) + end + + private_class_method def self.validate_ruby_files!(paths) + bad = paths.reject { |path| File.extname(path) == ".rb" } + return if bad.empty? + + raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/path_condition.rb b/gems/decomplex/lib/decomplex/native/path_condition.rb new file mode 100644 index 000000000..eec47d915 --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/path_condition.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module PathCondition + module_function + + def scan(files, jobs: nil) + paths = Array(files).map(&:to_s) + validate_ruby_files!(paths) + JSON.parse(Command.run("path-condition", "--language", "ruby", *Command.jobs_args(jobs), *paths)) + end + + private_class_method def self.validate_ruby_files!(paths) + bad = paths.reject { |path| File.extname(path) == ".rb" } + return if bad.empty? + + raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/sequence_mine.rb b/gems/decomplex/lib/decomplex/native/sequence_mine.rb new file mode 100644 index 000000000..ea80d5bce --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/sequence_mine.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module SequenceMine + module_function + + def scan(files, jobs: nil) + paths = Array(files).map(&:to_s) + validate_ruby_files!(paths) + JSON.parse(Command.run("sequence-mine", "--language", "ruby", *Command.jobs_args(jobs), *paths)) + end + + private_class_method def self.validate_ruby_files!(paths) + bad = paths.reject { |path| File.extname(path) == ".rb" } + return if bad.empty? + + raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" + end + end + end +end diff --git a/gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs b/gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs new file mode 100644 index 000000000..788f7d8f1 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs @@ -0,0 +1,202 @@ +use crate::decomplex::ast::{self, Child, Node, Span}; +use crate::decomplex::syntax::Language; +use anyhow::Result; +use serde::Serialize; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::PathBuf; + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct FalseSimplicityRow { + pub kind: String, + pub detail: String, + pub support: usize, + pub scatter: usize, + pub at: String, + pub sites: Vec, + pub spans: BTreeMap, +} + +#[derive(Clone, Debug)] +struct Site { + kind: String, + detail: String, + file: String, + defn: String, + line: usize, + span: Span, +} + +pub fn scan_files(files: &[PathBuf], _language: Language) -> Result> { + let mut sites = Vec::new(); + for file in files { + let (root, lines) = ast::parse(file)?; + let mut detector = FalseSimplicity::new(file.to_string_lossy().to_string(), lines); + detector.walk(&root, &Vec::new()); + sites.extend(detector.sites); + } + Ok(Report::new(sites).findings()) +} + +const DISPATCH_MIDS: &[&str] = &["send", "public_send", "method", "public_method", "__send__"]; +const IO_MIDS: &[&str] = &[ + "puts", "print", "p", "open", "read", "write", "sysread", "syswrite", + "recv", "send", "gets", "read_nonblock", "write_nonblock", +]; +const REFLECTION_MIDS: &[&str] = &[ + "instance_eval", "class_eval", "module_eval", + "instance_exec", "class_exec", "module_exec", + "define_method", "define_singleton_method", + "const_get", "const_set", "const_missing", + "method_missing", "respond_to_missing?", +]; + +struct FalseSimplicity { + file: String, + lines: Vec, + sites: Vec, +} + +impl FalseSimplicity { + fn new(file: String, lines: Vec) -> Self { + Self { file, lines, sites: Vec::new() } + } + + fn walk(&mut self, node: &Node, defstack: &[String]) { + let mut next_defstack = defstack.to_vec(); + if matches!(node.r#type.as_str(), "DEFN" | "DEFS") { + let name_index = if node.r#type == "DEFS" { 1 } else { 0 }; + if let Some(Child::Symbol(name)) = node.children.get(name_index) { + next_defstack.push(name.clone()); + } + } + + self.inspect_node(node, &next_defstack); + + for child in node.children.iter().filter_map(ast::node) { + self.walk(child, &next_defstack); + } + } + + fn inspect_node(&mut self, node: &Node, defstack: &[String]) { + match node.r#type.as_str() { + "CALL" | "OPCALL" | "FCALL" | "VCALL" => { + let mid = self.call_mid(node); + if let Some(mid) = mid { + if DISPATCH_MIDS.contains(&mid.as_str()) { + self.add_site("dynamic_dispatch", &mid, node, defstack); + } else if IO_MIDS.contains(&mid.as_str()) && !self.receiver_is_explicit(node) { + self.add_site("hidden_io", &mid, node, defstack); + } else if REFLECTION_MIDS.contains(&mid.as_str()) { + self.add_site("runtime_reflection", &mid, node, defstack); + } + } + } + "ATTRASGN" => { + let mid = self.call_mid(node).unwrap_or_default(); + if mid.ends_with("eval=") { + self.add_site("runtime_reflection", &mid, node, defstack); + } + } + "SUPER" | "ZSUPER" => { + self.add_site("context_dependency", "super", node, defstack); + } + "GVAR" | "GASGN" => { + if let Some(name) = ast::child_to_string(node.children.get(0)) { + if !name.starts_with("$PREMATCH") && !name.starts_with("$POSTMATCH") && !name.starts_with("$MATCH") && !name.starts_with("$&") && !name.starts_with("$'") && !name.starts_with("$`") { + self.add_site("context_dependency", &name, node, defstack); + } + } + } + "CVAR" | "CVDASGN" | "CVDECL" => { + self.add_site("hidden_mutation", "class_var", node, defstack); + } + "CLASS" | "MODULE" => { + if !defstack.is_empty() { + self.add_site("monkeypatch", "nested_reopen", node, defstack); + } + } + "ALIAS" => { + self.add_site("runtime_reflection", "alias", node, defstack); + } + "UNDEF" => { + self.add_site("runtime_reflection", "undef", node, defstack); + } + _ => {} + } + } + + fn call_mid(&self, node: &Node) -> Option { + match node.r#type.as_str() { + "CALL" | "OPCALL" | "ATTRASGN" => ast::child_to_string(node.children.get(1)), + "FCALL" | "VCALL" => ast::child_to_string(node.children.get(0)), + _ => None, + } + } + + fn receiver_is_explicit(&self, node: &Node) -> bool { + if matches!(node.r#type.as_str(), "FCALL" | "VCALL") { return false; } + if let Some(recv) = node.children.get(0).and_then(ast::node) { + recv.r#type != "SELF" + } else { + false + } + } + + fn add_site(&mut self, kind: &str, detail: &str, node: &Node, defstack: &[String]) { + self.sites.push(Site { + kind: kind.to_string(), + detail: detail.to_string(), + file: self.file.clone(), + defn: defstack.last().cloned().unwrap_or_else(|| "(top-level)".to_string()), + line: node.first_lineno, + span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], + }); + } +} + +struct Report { + sites: Vec, +} + +impl Report { + fn new(sites: Vec) -> Self { Self { sites } } + + fn findings(&self) -> Vec { + let mut groups: BTreeMap<(String, String), Vec<&Site>> = BTreeMap::new(); + for s in &self.sites { + groups.entry((s.kind.clone(), s.detail.clone())).or_default().push(s); + } + + let mut out = Vec::new(); + for ((kind, detail), sts) in groups { + let mut defns = BTreeSet::new(); + for s in &sts { defns.insert((s.file.clone(), s.defn.clone())); } + let scatter = defns.len(); + + let mut sites = Vec::new(); + let mut spans = BTreeMap::new(); + for s in &sts { + let loc = format!("{}:{}:{}", s.file, s.defn, s.line); + sites.push(loc.clone()); + spans.insert(loc, s.span); + } + + out.push(FalseSimplicityRow { + kind, + detail, + support: sts.len(), + scatter, + at: sites.first().cloned().unwrap_or_default(), + sites, + spans, + }); + } + out.sort_by(|a, b| { + b.scatter.cmp(&a.scatter) + .then_with(|| b.support.cmp(&a.support)) + .then_with(|| a.kind.cmp(&b.kind)) + .then_with(|| a.detail.cmp(&b.detail)) + }); + out + } +} diff --git a/gems/decomplex/rust/src/decomplex/detectors/fat_union.rs b/gems/decomplex/rust/src/decomplex/detectors/fat_union.rs new file mode 100644 index 000000000..7da7e4969 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/fat_union.rs @@ -0,0 +1,173 @@ +use crate::decomplex::ast::{self, Child, Node, Span}; +use crate::decomplex::syntax::Language; +use anyhow::Result; +use serde::Serialize; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::PathBuf; + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct FatUnionReport { + pub fat_unions: Vec, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct FatUnionRow { + pub name: String, + pub common: Vec, + pub variant_set: Vec, + pub at: String, + pub spans: BTreeMap, +} + +#[derive(Clone, Debug)] +struct Read { + name: String, + span: Span, +} + +#[derive(Clone, Debug)] +struct VariantReads { + reads: Vec, +} + +pub fn scan_files(files: &[PathBuf], _language: Language) -> Result { + let mut out = Vec::new(); + for file in files { + let (root, lines) = ast::parse(file)?; + let mut detector = FatUnion::new(file.to_string_lossy().to_string(), lines); + detector.walk(&root, &Vec::new()); + out.extend(detector.findings()); + } + out.sort_by(|a, b| b.common.len().cmp(&a.common.len()).then_with(|| a.at.cmp(&b.at))); + Ok(FatUnionReport { fat_unions: out }) +} + +struct FatUnion { + file: String, + lines: Vec, + reports: Vec, +} + +impl FatUnion { + fn new(file: String, lines: Vec) -> Self { + Self { file, lines, reports: Vec::new() } + } + + fn walk(&mut self, node: &Node, defstack: &[String]) { + let mut next_defstack = defstack.to_vec(); + if matches!(node.r#type.as_str(), "DEFN" | "DEFS") { + let name_index = if node.r#type == "DEFS" { 1 } else { 0 }; + if let Some(Child::Symbol(name)) = node.children.get(name_index) { + next_defstack.push(name.clone()); + } + } + + if matches!(node.r#type.as_str(), "CASE" | "CASE2") { + self.analyze_case(node, &next_defstack); + } + + for child in node.children.iter().filter_map(ast::node) { + self.walk(child, &next_defstack); + } + } + + fn analyze_case(&mut self, node: &Node, defstack: &[String]) { + let (cond, first_when) = if node.r#type == "CASE2" { + (None, node.children.get(0).and_then(ast::node)) + } else { + (node.children.get(0).and_then(ast::node), node.children.get(1).and_then(ast::node)) + }; + + let mut variants = BTreeMap::new(); + let mut current_when = first_when; + while let Some(when_node) = current_when { + if when_node.r#type != "WHEN" { break; } + if let Some(pat) = when_node.children.get(0).and_then(ast::node) { + if let Some(variant_name) = self.variant_name(pat) { + let reads = self.collect_reads(when_node.children.get(1).and_then(ast::node).unwrap_or(when_node)); + variants.insert(variant_name, VariantReads { reads }); + } + } + current_when = when_node.children.get(2).and_then(ast::node); + } + + if variants.len() < 2 { return; } + + let mut common = None; + for v in variants.values() { + let names: BTreeSet<_> = v.reads.iter().map(|r| r.name.clone()).collect(); + match common { + None => common = Some(names), + Some(ref mut c) => { + *c = c.intersection(&names).cloned().collect(); + } + } + } + + let common = common.unwrap_or_default(); + if common.is_empty() { return; } + + let subject_name = self.subject_name(cond); + let defn = defstack.last().map(|s| s.as_str()).unwrap_or(""); + let at = format!("{}:{}:{}", self.file, defn, node.first_lineno); + + let mut spans = BTreeMap::new(); + spans.insert(at.clone(), [node.first_lineno, node.first_column, node.last_lineno, node.last_column]); + + let mut variant_set: Vec<_> = variants.keys().cloned().collect(); + variant_set.sort(); + let mut common_vec: Vec<_> = common.into_iter().collect(); + common_vec.sort(); + + self.reports.push(FatUnionRow { + name: subject_name, + common: common_vec, + variant_set, + at, + spans, + }); + } + + fn variant_name(&self, node: &Node) -> Option { + let n = if node.r#type == "LIST" { node.children.iter().filter_map(ast::node).next()? } else { node }; + match n.r#type.as_str() { + "CONSTANT" | "SCOPE_RESOLUTION" => Some(ast::slice(n, &self.lines)), + _ => None + } + } + + fn collect_reads(&self, node: &Node) -> Vec { + let mut out = Vec::new(); + self.walk_reads(node, &mut out); + out + } + + fn walk_reads(&self, node: &Node, out: &mut Vec) { + if matches!(node.r#type.as_str(), "CALL" | "OPCALL") { + if let Some(Child::Symbol(mid)) = node.children.get(1) { + out.push(Read { + name: mid.clone(), + span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], + }); + } + } else if matches!(node.r#type.as_str(), "FCALL" | "VCALL") { + if let Some(Child::Symbol(mid)) = node.children.get(0) { + out.push(Read { + name: mid.clone(), + span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], + }); + } + } + for child in node.children.iter().filter_map(ast::node) { + self.walk_reads(child, out); + } + } + + fn subject_name(&self, cond: Option<&Node>) -> String { + cond.map(|c| ast::slice(c, &self.lines)).unwrap_or_else(|| "implicit".to_string()) + } + + fn findings(&self) -> Vec { + self.reports.clone() + } +} diff --git a/gems/decomplex/rust/src/decomplex/detectors/function_lcom.rs b/gems/decomplex/rust/src/decomplex/detectors/function_lcom.rs new file mode 100644 index 000000000..912d050df --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/function_lcom.rs @@ -0,0 +1,146 @@ +use crate::decomplex::ast::{self, Child, Node, Span}; +use crate::decomplex::detectors::local_flow; +use crate::decomplex::syntax::Language; +use anyhow::Result; +use serde::Serialize; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::PathBuf; + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct FunctionLcomRow { + pub at: String, + pub owner: String, + pub defn: String, + pub score: usize, + pub components: usize, + pub mode: String, + pub locals: usize, + pub statements: usize, + pub spans: BTreeMap, +} + +pub fn scan_files(files: &[PathBuf], _language: Language) -> Result> { + let summaries = local_flow::scan_files(files, _language)?; + let mut detector = FunctionLcom::new(summaries); + Ok(detector.findings()) +} + +struct FunctionLcom { + summaries: Vec, + min_components: usize, + min_locals: usize, + min_statements: usize, + min_score: usize, +} + +impl FunctionLcom { + fn new(summaries: Vec) -> Self { + Self { + summaries, + min_components: 2, + min_locals: 5, + min_statements: 5, + min_score: 40, + } + } + + fn findings(&mut self) -> Vec { + let mut out: Vec<_> = self.summaries.iter().filter_map(|s| self.finding_for(s)).collect(); + out.sort_by(|a, b| b.score.cmp(&a.score).then_with(|| a.at.cmp(&b.at))); + out + } + + fn finding_for(&self, summary: &local_flow::MethodSummary) -> Option { + let all_locals = self.all_locals(summary); + if all_locals.len() < self.min_locals { return None } + if summary.statements.len() < self.min_statements { return None } + + let components = self.connected_components(summary, &all_locals); + if components.len() < self.min_components { return None } + + let score = (components.len() * 10) + all_locals.len() + summary.statements.len(); + if score < self.min_score { return None } + let mode = if self.late_join(summary, &components) { "late_join".to_string() } else { "disjoint".to_string() }; + + let at = format!("{}:{}:{}", summary.file, summary.name, summary.line); + let mut spans = BTreeMap::new(); + spans.insert(at.clone(), summary.span); + + Some(FunctionLcomRow { + at, + owner: summary.owner.clone(), + defn: summary.name.clone(), + score, + components: components.len(), + mode, + locals: all_locals.len(), + statements: summary.statements.len(), + spans, + }) + } + + fn all_locals(&self, summary: &local_flow::MethodSummary) -> BTreeSet { + let mut locals = BTreeSet::new(); + for s in &summary.statements { + locals.extend(s.reads.clone()); + locals.extend(s.writes.clone()); + } + locals + } + + fn connected_components(&self, summary: &local_flow::MethodSummary, locals: &BTreeSet) -> Vec> { + let mut adj: BTreeMap> = BTreeMap::new(); + for s in &summary.statements { + let mut touched: Vec<_> = s.reads.union(&s.writes).cloned().collect(); + for (lhs, rhs) in &s.dependencies { + touched.push(lhs.clone()); + touched.push(rhs.clone()); + } + for i in 0..touched.len() { + for j in i + 1..touched.len() { + adj.entry(touched[i].clone()).or_default().insert(touched[j].clone()); + adj.entry(touched[j].clone()).or_default().insert(touched[i].clone()); + } + } + } + + let mut components = Vec::new(); + let mut unvisited = locals.clone(); + + while let Some(start) = unvisited.iter().next().cloned() { + let mut component = BTreeSet::new(); + let mut queue = vec![start]; + while let Some(node) = queue.pop() { + if !unvisited.contains(&node) { continue; } + unvisited.remove(&node); + component.insert(node.clone()); + if let Some(neighbors) = adj.get(&node) { + for n in neighbors { + if unvisited.contains(n) { queue.push(n.clone()); } + } + } + } + if component.len() > 0 { components.push(component); } + } + + components.retain(|c| c.len() > 1 || self.standalone_state_usage(summary, c.iter().next().unwrap())); + components + } + + fn standalone_state_usage(&self, summary: &local_flow::MethodSummary, local: &str) -> bool { + let reads: usize = summary.statements.iter().map(|s| s.reads.contains(local) as usize).sum(); + let writes: usize = summary.statements.iter().map(|s| s.writes.contains(local) as usize).sum(); + reads + writes > 1 + } + + fn late_join(&self, summary: &local_flow::MethodSummary, components: &[BTreeSet]) -> bool { + let Some(last) = summary.statements.last() else { return false }; + let mut joined = 0; + for c in components { + if last.reads.intersection(c).next().is_some() || last.writes.intersection(c).next().is_some() { + joined += 1; + } + } + joined >= 2 + } +} diff --git a/gems/decomplex/rust/src/decomplex/detectors/mod.rs b/gems/decomplex/rust/src/decomplex/detectors/mod.rs index d9c9c153e..c7cb3b359 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/mod.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/mod.rs @@ -1,16 +1,22 @@ pub mod co_update; pub mod decision_pressure; pub mod derived_state; +pub mod false_simplicity; +pub mod fat_union; pub mod flay_similarity; +pub mod function_lcom; pub mod implicit_control_flow; pub mod inconsistent_rename_clone; pub mod local_flow; pub mod locality_drag; pub mod miner; pub mod operational_discontinuity; +pub mod oversized_predicate; +pub mod path_condition; pub mod predicate_alias; pub mod redundant_nil_guard; pub mod semantic_alias; +pub mod sequence_mine; pub mod state_branch_density; pub mod state_mesh; pub mod structural_topology; diff --git a/gems/decomplex/rust/src/decomplex/detectors/oversized_predicate.rs b/gems/decomplex/rust/src/decomplex/detectors/oversized_predicate.rs new file mode 100644 index 000000000..c0c939b6e --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/oversized_predicate.rs @@ -0,0 +1,132 @@ +use crate::decomplex::ast::{self, Child, Node, Span}; +use crate::decomplex::syntax::Language; +use anyhow::Result; +use serde::Serialize; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::PathBuf; + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct OversizedPredicateRow { + pub at: String, + pub count: usize, + pub predicate: String, + pub atoms: Vec, + pub spans: BTreeMap, +} + +#[derive(Clone, Debug, Serialize)] +pub struct ResultReport { + pub findings: Vec, +} + +const LIMIT: usize = 3; +const PREDICATE_NODES: &[&str] = &["IF", "WHILE", "UNTIL"]; + +pub fn scan_files(files: &[PathBuf], _language: Language) -> Result { + let mut findings = Vec::new(); + for file in files { + let (root, lines) = ast::parse(file)?; + let mut scanner = OversizedPredicate::new(file.to_string_lossy().to_string(), lines, LIMIT); + scanner.walk(&root, &Vec::new()); + findings.extend(scanner.findings); + } + Ok(ResultReport { findings }) +} + +struct OversizedPredicate { + file: String, + lines: Vec, + limit: usize, + findings: Vec, +} + +impl OversizedPredicate { + fn new(file: String, lines: Vec, limit: usize) -> Self { + Self { + file, + lines, + limit, + findings: Vec::new(), + } + } + + fn walk(&mut self, node: &Node, defstack: &[String]) { + let mut next_defstack = defstack.to_vec(); + if matches!(node.r#type.as_str(), "DEFN" | "DEFS") { + let name_index = if node.r#type == "DEFS" { 1 } else { 0 }; + if let Some(Child::Symbol(name)) = node.children.get(name_index) { + next_defstack.push(name.clone()); + } + } + + self.record_predicate(node, &next_defstack); + + for child in node.children.iter().filter_map(ast::node) { + self.walk(child, &next_defstack); + } + } + + fn record_predicate(&mut self, node: &Node, defstack: &[String]) { + if !PREDICATE_NODES.contains(&node.r#type.as_str()) { + return; + } + + let defn = defstack.last().map(|s| s.as_str()).unwrap_or(""); + if self.predicate_helper(defn) { + return; + } + + let cond = node.children.get(0).and_then(ast::node); + let Some(cond) = cond else { return }; + + let atoms = self.condition_atoms(cond); + if atoms.len() <= self.limit { + return; + } + + let at = format!("{}:{}:{}", self.file, defn, node.first_lineno); + let mut spans = BTreeMap::new(); + spans.insert( + at.clone(), + [ + node.first_lineno, + node.first_column, + node.last_lineno, + node.last_column, + ], + ); + + let atoms_text: Vec = atoms.into_iter().map(|a| ast::slice(a, &self.lines)).collect(); + + self.findings.push(OversizedPredicateRow { + at, + count: atoms_text.len(), + predicate: ast::slice(cond, &self.lines), + atoms: atoms_text, + spans, + }); + } + + fn condition_atoms<'a>(&self, node: &'a Node) -> Vec<&'a Node> { + match node.r#type.as_str() { + "AND" | "OR" => node + .children + .iter() + .filter_map(ast::node) + .flat_map(|child| self.condition_atoms(child)) + .collect(), + "NOT" => { + if let Some(child) = node.children.get(0).and_then(ast::node) { + self.condition_atoms(child) + } else { + vec![node] + } + } + _ => vec![node], + } + } + + fn predicate_helper(&self, name: &str) -> bool { + name.ends_with('?') + } +} diff --git a/gems/decomplex/rust/src/decomplex/detectors/path_condition.rs b/gems/decomplex/rust/src/decomplex/detectors/path_condition.rs new file mode 100644 index 000000000..b66a17a67 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/path_condition.rs @@ -0,0 +1,230 @@ +use crate::decomplex::ast::{self, Child, Node, Span}; +use crate::decomplex::syntax::Language; +use anyhow::Result; +use serde::Serialize; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::PathBuf; + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct PathConditionReport { + pub neglected: Vec, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct NeglectedPathCondition { + pub pattern: Vec, + pub support: usize, + pub missing: String, + pub at: String, + pub spans: BTreeMap, + pub action: String, +} + +#[derive(Clone, Debug)] +struct Site { + guards: Vec, + action: String, + file: String, + defn: String, + line: usize, + span: Span, +} + +pub fn scan_files(files: &[PathBuf], _language: Language) -> Result { + let mut sites = Vec::new(); + for file in files { + let (root, lines) = ast::parse(file)?; + let mut pc = PathCondition::new(file.to_string_lossy().to_string(), lines); + pc.walk(&root, &Vec::new(), &Vec::new()); + sites.extend(pc.sites); + } + Ok(Report::new(sites).findings()) +} + +struct PathCondition { + file: String, + lines: Vec, + sites: Vec, +} + +impl PathCondition { + fn new(file: String, lines: Vec) -> Self { + Self { + file, + lines, + sites: Vec::new(), + } + } + + fn walk(&mut self, node: &Node, defstack: &[String], guards: &[Vec]) { + let mut next_defstack = defstack.to_vec(); + if matches!(node.r#type.as_str(), "DEFN" | "DEFS") { + let name_index = if node.r#type == "DEFS" { 1 } else { 0 }; + if let Some(Child::Symbol(name)) = node.children.get(name_index) { + next_defstack.push(name.clone()); + } + } + + match node.r#type.as_str() { + "IF" | "UNLESS" => { + let cond = node.children.get(0).and_then(ast::node); + let a = node.children.get(1).and_then(ast::node); + let b = node.children.get(2).and_then(ast::node); + + let atoms = self.cond_atoms(cond); + let then_g = if node.r#type == "IF" { atoms.clone() } else { self.negate(&atoms) }; + let else_g = if node.r#type == "IF" { self.negate(&atoms) } else { atoms.clone() }; + + if let Some(a_node) = a { + let mut next_guards = guards.to_vec(); + next_guards.extend(then_g); + self.walk(a_node, &next_defstack, &next_guards); + } + if let Some(b_node) = b { + let mut next_guards = guards.to_vec(); + next_guards.extend(else_g); + self.walk(b_node, &next_defstack, &next_guards); + } + + if let Some(cond_node) = cond { + self.walk(cond_node, &next_defstack, guards); + } + return; + } + "CALL" | "FCALL" | "VCALL" | "ATTRASGN" | "LASGN" | "IASGN" | "OPCALL" => { + if guards.len() >= 2 { + self.record(node, &next_defstack, guards); + } + } + _ => {} + } + + for child in node.children.iter().filter_map(ast::node) { + self.walk(child, &next_defstack, guards); + } + } + + fn cond_atoms(&self, cond: Option<&Node>) -> Vec> { + let Some(cond) = cond else { return Vec::new() }; + ast::flatten_and(cond).into_iter().map(|a| { + let t = ast::slice(a, &self.lines); + let (text, neg) = ast::canon_polarity(&t); + vec![text, if neg { "true".to_string() } else { "false".to_string() }] + }).collect() + } + + fn negate(&self, atoms: &[Vec]) -> Vec> { + atoms.iter().map(|a| { + let t = &a[0]; + let n = a[1] == "true"; + vec![t.clone(), if !n { "true".to_string() } else { "false".to_string() }] + }).collect() + } + + fn record(&mut self, node: &Node, defstack: &[String], guards: &[Vec]) { + let mut members_set = BTreeSet::new(); + for g in guards { + let prefix = if g[1] == "true" { "!" } else { "" }; + members_set.insert(format!("{}{}", prefix, g[0])); + } + let members: Vec<_> = members_set.into_iter().collect(); + + if members.len() < 2 { + return; + } + + let slice = ast::slice(node, &self.lines); + let action = if slice.len() > 80 { slice[..80].to_string() } else { slice }; + + self.sites.push(Site { + guards: members, + action, + file: self.file.clone(), + defn: defstack.last().cloned().unwrap_or_else(|| "(top-level)".to_string()), + line: node.first_lineno, + span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], + }); + } +} + +struct Report { + sites: Vec, + groups: BTreeMap, Vec>, +} + +impl Report { + fn new(sites: Vec) -> Self { + let mut keys = Vec::new(); + let mut groups: BTreeMap, Vec> = BTreeMap::new(); + for s in &sites { + if !groups.contains_key(&s.guards) { + keys.push(s.guards.clone()); + } + groups.entry(s.guards.clone()).or_default().push(s.clone()); + } + + let ordered_groups = keys.into_iter().map(|k| { + let v = groups.remove(&k).unwrap(); + (k, v) + }).collect(); + + Self { + sites, + groups: ordered_groups, + } + } + + fn findings(&self) -> PathConditionReport { + PathConditionReport { + neglected: self.neglected(3), + } + } + + fn neglected(&self, min_support: usize) -> Vec { + let popular: Vec<_> = self.groups.iter() + .filter(|(_, s)| s.len() >= min_support) + .map(|(g, s)| (g.clone(), s.len())) + .collect(); + + let mut out = Vec::new(); + let mut seen = BTreeSet::new(); + + for s in &self.sites { + for (gs, sup) in &popular { + let gs_set: BTreeSet<_> = gs.iter().cloned().collect(); + let s_guards_set: BTreeSet<_> = s.guards.iter().cloned().collect(); + + let diff_gs_s: BTreeSet<_> = gs_set.difference(&s_guards_set).cloned().collect(); + let diff_s_gs: BTreeSet<_> = s_guards_set.difference(&gs_set).cloned().collect(); + + if diff_gs_s.len() == 1 && diff_s_gs.is_empty() { + if s.guards == *gs { + continue; + } + + let at = format!("{}:{}:{}", s.file, s.defn, s.line); + let missing = diff_gs_s.into_iter().next().unwrap(); + + // dedupe manually + let key = (gs.clone(), sup.clone(), missing.clone(), at.clone()); + if seen.insert(key) { + let mut spans = BTreeMap::new(); + spans.insert(at.clone(), s.span); + + out.push(NeglectedPathCondition { + pattern: gs.clone(), + support: *sup, + missing, + at, + spans, + action: s.action.clone(), + }); + } + } + } + } + + out.sort_by(|a, b| b.support.cmp(&a.support).then_with(|| a.at.cmp(&b.at))); + out + } +} diff --git a/gems/decomplex/rust/src/decomplex/detectors/sequence_mine.rs b/gems/decomplex/rust/src/decomplex/detectors/sequence_mine.rs new file mode 100644 index 000000000..df7d97054 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/sequence_mine.rs @@ -0,0 +1,193 @@ +use crate::decomplex::ast::{self, Child, Node, Span}; +use crate::decomplex::syntax::Language; +use anyhow::Result; +use serde::Serialize; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::PathBuf; + +#[derive(Clone, Debug, PartialEq, Serialize)] +pub struct BrokenProtocolReport { + pub broken: Vec, +} + +#[derive(Clone, Debug, PartialEq, Serialize)] +pub struct BrokenProtocol { + pub has: String, + pub missing: String, + pub support: usize, + pub confidence: f64, + pub at: String, + pub spans: BTreeMap, +} + +#[derive(Clone, Debug)] +struct Site { + calls: Vec, + file: String, + defn: String, + line: usize, + span: Span, +} + +pub fn scan_files(files: &[PathBuf], _language: Language) -> Result { + let mut sites = Vec::new(); + for file in files { + let (root, lines) = ast::parse(file)?; + let mut sm = SequenceMine::new(file.to_string_lossy().to_string(), lines); + sm.walk(&root, &Vec::new()); + sites.extend(sm.sites); + } + Ok(Report::new(sites).findings()) +} + +struct SequenceMine { + file: String, + lines: Vec, + sites: Vec, +} + +impl SequenceMine { + fn new(file: String, lines: Vec) -> Self { + Self { + file, + lines, + sites: Vec::new(), + } + } + + fn walk(&mut self, node: &Node, defstack: &[String]) { + let mut next_defstack = defstack.to_vec(); + if matches!(node.r#type.as_str(), "DEFN" | "DEFS") { + let name_index = if node.r#type == "DEFS" { 1 } else { 0 }; + if let Some(Child::Symbol(name)) = node.children.get(name_index) { + next_defstack.push(name.clone()); + } + } + + if node.r#type == "BLOCK" { + let calls = self.collect_calls(node); + if calls.len() >= 2 { + self.sites.push(Site { + calls, + file: self.file.clone(), + defn: next_defstack.last().cloned().unwrap_or_else(|| "(top-level)".to_string()), + line: node.first_lineno, + span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], + }); + } + } + + for child in node.children.iter().filter_map(ast::node) { + self.walk(child, &next_defstack); + } + } + + fn collect_calls(&self, block_node: &Node) -> Vec { + let mut calls = Vec::new(); + for stmt in block_node.children.iter().filter_map(ast::node) { + if let Some(mid) = self.call_mid(stmt) { + calls.push(mid); + } + } + calls + } + + fn call_mid(&self, node: &Node) -> Option { + match node.r#type.as_str() { + "CALL" | "OPCALL" | "ATTRASGN" => ast::child_to_string(node.children.get(1)), + "FCALL" | "VCALL" => ast::child_to_string(node.children.get(0)), + _ => None, + } + } +} + +struct Report { + sites: Vec, + counts: BTreeMap, + co_counts: BTreeMap<(String, String), usize>, +} + +impl Report { + fn new(sites: Vec) -> Self { + let mut counts = BTreeMap::new(); + let mut co_counts = BTreeMap::new(); + + for s in &sites { + let unique_calls: BTreeSet<_> = s.calls.iter().cloned().collect(); + let unique_calls: Vec<_> = unique_calls.into_iter().collect(); + + for c in &unique_calls { + *counts.entry(c.clone()).or_insert(0) += 1; + } + + for i in 0..unique_calls.len() { + for j in i + 1..unique_calls.len() { + let mut pair = vec![unique_calls[i].clone(), unique_calls[j].clone()]; + pair.sort(); + *co_counts.entry((pair[0].clone(), pair[1].clone())).or_insert(0) += 1; + } + } + } + + Self { + sites, + counts, + co_counts, + } + } + + fn findings(&self) -> BrokenProtocolReport { + BrokenProtocolReport { + broken: self.broken_protocols(4, 0.75), + } + } + + fn broken_protocols(&self, min_support: usize, min_confidence: f64) -> Vec { + let mut rules = Vec::new(); + for ((a, b), &co_count) in &self.co_counts { + let count_a = *self.counts.get(a).unwrap_or(&0); + let count_b = *self.counts.get(b).unwrap_or(&0); + + let conf_a = co_count as f64 / count_a as f64; + let conf_b = co_count as f64 / count_b as f64; + + if conf_a >= min_confidence && co_count >= min_support && count_a > co_count { + rules.push((a.clone(), b.clone(), co_count, conf_a)); + } + if conf_b >= min_confidence && co_count >= min_support && count_b > co_count { + rules.push((b.clone(), a.clone(), co_count, conf_b)); + } + } + + let mut out = Vec::new(); + let mut seen = BTreeSet::new(); + + for s in &self.sites { + let unique_calls: BTreeSet<_> = s.calls.iter().cloned().collect(); + + for (has, missing, sup, conf) in &rules { + if unique_calls.contains(has) && !unique_calls.contains(missing) { + let at = format!("{}:{}:{}", s.file, s.defn, s.line); + + let key = (has.clone(), missing.clone(), at.clone()); + if seen.insert(key) { + let mut spans = BTreeMap::new(); + spans.insert(at.clone(), s.span); + + out.push(BrokenProtocol { + has: has.clone(), + missing: missing.clone(), + support: *sup, + confidence: (conf * 100.0).round() / 100.0, + at, + spans, + }); + } + } + } + } + + out.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap().then_with(|| b.support.cmp(&a.support)).then_with(|| a.at.cmp(&b.at))); + out + } +} diff --git a/gems/decomplex/rust/src/main.rs b/gems/decomplex/rust/src/main.rs index 4cc7ea880..61ae41a0f 100644 --- a/gems/decomplex/rust/src/main.rs +++ b/gems/decomplex/rust/src/main.rs @@ -2,9 +2,10 @@ mod decomplex; use anyhow::{bail, Context, Result}; use decomplex::detectors::{ - co_update, decision_pressure, derived_state, flay_similarity, implicit_control_flow, - inconsistent_rename_clone, local_flow, locality_drag, miner, operational_discontinuity, - predicate_alias, redundant_nil_guard, semantic_alias, state_branch_density, state_mesh, + co_update, decision_pressure, derived_state, false_simplicity, fat_union, flay_similarity, + function_lcom, implicit_control_flow, inconsistent_rename_clone, local_flow, locality_drag, + miner, operational_discontinuity, oversized_predicate, path_condition, predicate_alias, + redundant_nil_guard, semantic_alias, sequence_mine, state_branch_density, state_mesh, structural_topology, temporal_ordering_pressure, weighted_inlined_cognitive_complexity, }; use decomplex::parallel; @@ -135,6 +136,42 @@ fn main() -> Result<()> { .with_context(|| "failed to scan structural similarity")?; println!("{}", serde_json::to_string(&findings)?); } + Command::OversizedPredicate { language, files, .. } => { + let language = Language::parse(&language)?; + let findings = oversized_predicate::scan_files(&files, language) + .with_context(|| "failed to scan oversized-predicate facts")?; + println!("{}", serde_json::to_string(&findings)?); + } + Command::PathCondition { language, files, .. } => { + let language = Language::parse(&language)?; + let findings = path_condition::scan_files(&files, language) + .with_context(|| "failed to scan path-condition facts")?; + println!("{}", serde_json::to_string(&findings)?); + } + Command::SequenceMine { language, files, .. } => { + let language = Language::parse(&language)?; + let findings = sequence_mine::scan_files(&files, language) + .with_context(|| "failed to scan sequence-mine facts")?; + println!("{}", serde_json::to_string(&findings)?); + } + Command::FunctionLcom { language, files, .. } => { + let language = Language::parse(&language)?; + let findings = function_lcom::scan_files(&files, language) + .with_context(|| "failed to scan function-lcom facts")?; + println!("{}", serde_json::to_string(&findings)?); + } + Command::FalseSimplicity { language, files, .. } => { + let language = Language::parse(&language)?; + let findings = false_simplicity::scan_files(&files, language) + .with_context(|| "failed to scan false-simplicity facts")?; + println!("{}", serde_json::to_string(&findings)?); + } + Command::FatUnion { language, files, .. } => { + let language = Language::parse(&language)?; + let findings = fat_union::scan_files(&files, language) + .with_context(|| "failed to scan fat-union facts")?; + println!("{}", serde_json::to_string(&findings)?); + } } Ok(()) } @@ -237,6 +274,36 @@ enum Command { files: Vec, jobs: Option, }, + OversizedPredicate { + language: String, + files: Vec, + jobs: Option, + }, + PathCondition { + language: String, + files: Vec, + jobs: Option, + }, + SequenceMine { + language: String, + files: Vec, + jobs: Option, + }, + FunctionLcom { + language: String, + files: Vec, + jobs: Option, + }, + FalseSimplicity { + language: String, + files: Vec, + jobs: Option, + }, + FatUnion { + language: String, + files: Vec, + jobs: Option, + }, } impl Command { @@ -260,7 +327,13 @@ impl Command { | Self::OperationalDiscontinuity { jobs, .. } | Self::StructuralTopology { jobs, .. } | Self::LocalFlow { jobs, .. } - | Self::FlaySimilarity { jobs, .. } => *jobs, + | Self::FlaySimilarity { jobs, .. } + | Self::OversizedPredicate { jobs, .. } + | Self::PathCondition { jobs, .. } + | Self::SequenceMine { jobs, .. } + | Self::FunctionLcom { jobs, .. } + | Self::FalseSimplicity { jobs, .. } + | Self::FatUnion { jobs, .. } => *jobs, } } } @@ -469,6 +542,72 @@ fn parse_args(args: Vec) -> Result { jobs, }) } + "oversized-predicate" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("oversized-predicate requires at least one file"); + } + Ok(Command::OversizedPredicate { + language, + files, + jobs, + }) + } + "path-condition" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("path-condition requires at least one file"); + } + Ok(Command::PathCondition { + language, + files, + jobs, + }) + } + "sequence-mine" | "broken-protocol" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("sequence-mine requires at least one file"); + } + Ok(Command::SequenceMine { + language, + files, + jobs, + }) + } + "function-lcom" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("function-lcom requires at least one file"); + } + Ok(Command::FunctionLcom { + language, + files, + jobs, + }) + } + "false-simplicity" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("false-simplicity requires at least one file"); + } + Ok(Command::FalseSimplicity { + language, + files, + jobs, + }) + } + "fat-union" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("fat-union requires at least one file"); + } + Ok(Command::FatUnion { + language, + files, + jobs, + }) + } "flay-similarity" => { let mut language = String::from("ruby"); let mut mass = 32usize; diff --git a/gems/decomplex/test/detector_runner_test.rb b/gems/decomplex/test/detector_runner_test.rb index 1f2469c78..b9e2095dc 100644 --- a/gems/decomplex/test/detector_runner_test.rb +++ b/gems/decomplex/test/detector_runner_test.rb @@ -371,6 +371,168 @@ def phase_shift end end + def test_oversized_predicate_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + Tempfile.create(["decomplex-oversized", ".rb"]) do |file| + file.write(<<~RUBY) + def complex_check + if a && b && c && d + puts "Too big" + end + end + RUBY + file.flush + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("oversized-predicate", [file.path]) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + end + + def test_path_condition_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + Tempfile.create(["decomplex-path", ".rb"]) do |file| + file.write(<<~RUBY) + def one + if a && b + puts "Here" + end + end + + def two + if a + if b + puts "Also here" + end + end + end + + def three + if a + puts "Neglected" + end + end + RUBY + file.flush + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("path-condition", [file.path]) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + end + + def test_sequence_mine_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + Tempfile.create(["decomplex-sequence", ".rb"]) do |file| + file.write(<<~RUBY) + def one + prepare + validate + execute + end + + def two + prepare + validate + execute + end + + def three + prepare + validate + execute + end + + def broken + prepare + execute + end + RUBY + file.flush + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("sequence-mine", [file.path]) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + end + + def test_function_lcom_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + Tempfile.create(["decomplex-lcom", ".rb"]) do |file| + file.write(<<~RUBY) + def disjoint_concerns + a = 1 + b = a + 1 + puts b + + x = 2 + y = x + 2 + puts y + end + RUBY + file.flush + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("function-lcom", [file.path]) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + end + + def test_false_simplicity_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + Tempfile.create(["decomplex-false", ".rb"]) do |file| + file.write(<<~RUBY) + class Meta + def hack + send(:foo) + puts "Hidden IO" + $GLOBAL_STATE = 1 + end + end + RUBY + file.flush + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("false-simplicity", [file.path]) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + end + + def test_fat_union_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + Tempfile.create(["decomplex-fat", ".rb"]) do |file| + file.write(<<~RUBY) + def handle(node) + case node + when CallNode + node.name + node.args + when LocalVarNode + node.name + node.type + end + end + RUBY + file.flush + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("fat-union", [file.path]) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + end + def test_decision_pressure_rust_engine_matches_ruby_engine_byte_for_byte skip "cargo is not available" unless cargo_available? From 8f107a1d183980d97ee3af740fbd5557320700c7 Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Wed, 17 Jun 2026 02:32:17 +0000 Subject: [PATCH 14/52] Decomplex: Fix O(N^2) performance and recursion limits in Rust detectors - Refactored co_update.rs and predicate_alias.rs grouping mechanisms from O(N^2) array scans to O(N log N) utilizing a hybrid BTreeMap/Vec approach, dropping execution time from ~10s to near-zero. - Capped recursion depth in implicit_control_flow.rs AST traversal to prevent stack overflow during nested path evaluations in large files. Co-authored-by: gemini-cli <218195315+gemini-cli@users.noreply.github.com> --- .../rust/src/decomplex/detectors/co_update.rs | 27 ++++++---- .../detectors/implicit_control_flow.rs | 50 +++++++++++-------- .../decomplex/detectors/predicate_alias.rs | 13 ++--- 3 files changed, 51 insertions(+), 39 deletions(-) diff --git a/gems/decomplex/rust/src/decomplex/detectors/co_update.rs b/gems/decomplex/rust/src/decomplex/detectors/co_update.rs index e7db72483..384355888 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/co_update.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/co_update.rs @@ -78,20 +78,25 @@ struct Report { impl Report { fn new(writes: Vec) -> Self { - let mut by_unit: Vec<((String, String), Vec)> = Vec::new(); + let mut keys = Vec::new(); + let mut map: BTreeMap<(String, String), Vec> = BTreeMap::new(); for w in &writes { let key = (w.file.clone(), w.defn.clone()); - if let Some(entry) = by_unit.iter_mut().find(|(k, _)| k == &key) { - entry.1.push(w.clone()); - } else { - by_unit.push((key, vec![w.clone()])); + if !map.contains_key(&key) { + keys.push(key.clone()); } + map.entry(key).or_default().push(w.clone()); } + let by_unit = keys.into_iter().map(|k| { + let v = map.remove(&k).unwrap(); + (k, v) + }).collect(); Self { writes, by_unit } } fn co_written_pairs(&self, min_support: usize) -> Vec { - let mut counts: Vec<(Vec, Vec<(String, String)>)> = Vec::new(); + let mut keys = Vec::new(); + let mut counts: BTreeMap, Vec<(String, String)>> = BTreeMap::new(); for (unit, ws) in &self.by_unit { let mut attrs: Vec<_> = ws.iter().map(|w| w.attr.clone()).collect::>().into_iter().collect(); attrs.sort(); @@ -99,17 +104,17 @@ impl Report { for i in 0..attrs.len() { for j in i+1..attrs.len() { let pair = vec![attrs[i].clone(), attrs[j].clone()]; - if let Some(entry) = counts.iter_mut().find(|(p, _)| p == &pair) { - entry.1.push(unit.clone()); - } else { - counts.push((pair, vec![unit.clone()])); + if !counts.contains_key(&pair) { + keys.push(pair.clone()); } + counts.entry(pair).or_default().push(unit.clone()); } } } let mut out = Vec::new(); - for (pair, units) in counts { + for pair in keys { + let units = counts.remove(&pair).unwrap(); if units.len() < min_support { continue; } out.push(CoWrittenPair { pair, diff --git a/gems/decomplex/rust/src/decomplex/detectors/implicit_control_flow.rs b/gems/decomplex/rust/src/decomplex/detectors/implicit_control_flow.rs index 8797b9c8e..a37461265 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/implicit_control_flow.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/implicit_control_flow.rs @@ -150,14 +150,15 @@ impl<'a> ImplicitControlFlow<'a> { } fn method_paths(&self, node: &Node) -> Vec { - self.paths_for_statements(&ast::body_stmts(node)) + self.paths_for_statements(&ast::body_stmts(node), 0) } - fn paths_for_statements(&self, statements: &[&Node]) -> Vec { + fn paths_for_statements(&self, statements: &[&Node], depth: usize) -> Vec { + if depth > 10 { return vec![self.empty_path()]; } let mut paths = vec![self.empty_path()]; for stmt in statements { if stmt.r#type == "BEGIN" { continue; } - let stmt_paths = self.paths_for(stmt); + let stmt_paths = self.paths_for(stmt, depth + 1); paths = self.append_statement_paths(paths, stmt_paths); } paths @@ -183,57 +184,62 @@ impl<'a> ImplicitControlFlow<'a> { combined.into_iter().take(PATH_LIMIT).collect() } - fn paths_for(&self, node: &Node) -> Vec { + fn paths_for(&self, node: &Node, depth: usize) -> Vec { + if depth > 10 { return vec![self.empty_path()]; } match node.r#type.as_str() { - "BLOCK" => self.paths_for_statements(&node.children.iter().filter_map(ast::node).collect::>()), - "SCOPE" => self.paths_for(node.children.get(2).and_then(ast::node).unwrap_or(node)), - "IF" | "UNLESS" => self.branch_paths(node), - "CASE" | "CASE2" => self.case_paths(node), + "BLOCK" => self.paths_for_statements(&node.children.iter().filter_map(ast::node).collect::>(), depth), + "SCOPE" => self.paths_for(node.children.get(2).and_then(ast::node).unwrap_or(node), depth), + "IF" | "UNLESS" => self.branch_paths(node, depth), + "CASE" | "CASE2" => self.case_paths(node, depth), "RETURN" | "BREAK" | "NEXT" | "REDO" | "RETRY" => { - self.generic_paths(node).into_iter().map(|mut p| { p.terminal = true; p }).collect() + self.generic_paths(node, depth).into_iter().map(|mut p| { p.terminal = true; p }).collect() } - _ => self.generic_paths(node), + _ => self.generic_paths(node, depth), } } - fn branch_paths(&self, node: &Node) -> Vec { + fn branch_paths(&self, node: &Node, depth: usize) -> Vec { + if depth > 10 { return vec![self.empty_path()]; } let cond = node.children.get(0).and_then(ast::node); let pos = node.children.get(1).and_then(ast::node); let neg = node.children.get(2).and_then(ast::node); - let mut alts = self.paths_for(pos.unwrap_or(node)); - if let Some(n) = neg { alts.extend(self.paths_for(n)); } else { alts.push(self.empty_path()); } + let mut alts = self.paths_for(pos.unwrap_or(node), depth + 1); + if let Some(n) = neg { alts.extend(self.paths_for(n, depth + 1)); } else { alts.push(self.empty_path()); } - self.combine_path_lists(self.paths_for(cond.unwrap_or(node)), alts) + self.combine_path_lists(self.paths_for(cond.unwrap_or(node), depth + 1), alts) } - fn case_paths(&self, node: &Node) -> Vec { + fn case_paths(&self, node: &Node, depth: usize) -> Vec { + if depth > 10 { return vec![self.empty_path()]; } let (cond, first_when) = if node.r#type == "CASE2" { (None, node.children.get(0).and_then(ast::node)) } else { (node.children.get(0).and_then(ast::node), node.children.get(1).and_then(ast::node)) }; - self.combine_path_lists(cond.map(|c| self.paths_for(c)).unwrap_or(vec![self.empty_path()]), self.when_paths(first_when)) + self.combine_path_lists(cond.map(|c| self.paths_for(c, depth + 1)).unwrap_or(vec![self.empty_path()]), self.when_paths(first_when, depth + 1)) } - fn when_paths(&self, node: Option<&Node>) -> Vec { + fn when_paths(&self, node: Option<&Node>, depth: usize) -> Vec { + if depth > 10 { return vec![self.empty_path()]; } let Some(n) = node else { return vec![self.empty_path()] }; - if n.r#type != "WHEN" { return self.paths_for(n) } + if n.r#type != "WHEN" { return self.paths_for(n, depth + 1) } let pat = n.children.get(0).and_then(ast::node); let body = n.children.get(1).and_then(ast::node); let next = n.children.get(2).and_then(ast::node); - let current = self.combine_path_lists(self.paths_for(pat.unwrap_or(n)), self.paths_for(body.unwrap_or(n))); + let current = self.combine_path_lists(self.paths_for(pat.unwrap_or(n), depth + 1), self.paths_for(body.unwrap_or(n), depth + 1)); let mut out = current; - out.extend(self.when_paths(next)); + out.extend(self.when_paths(next, depth + 1)); out.into_iter().take(PATH_LIMIT).collect() } - fn generic_paths(&self, node: &Node) -> Vec { + fn generic_paths(&self, node: &Node, depth: usize) -> Vec { + if depth > 10 { return vec![self.empty_path()]; } if matches!(node.r#type.as_str(), "CLASS" | "MODULE" | "DEFN" | "DEFS" | "LAMBDA") { return vec![self.empty_path()]; } let mut child_paths = vec![self.empty_path()]; for child in node.children.iter().filter_map(ast::node) { - child_paths = self.combine_path_lists(child_paths, self.paths_for(child)); + child_paths = self.combine_path_lists(child_paths, self.paths_for(child, depth + 1)); } if let Some(mid) = self.internal_protocol_call(node) { diff --git a/gems/decomplex/rust/src/decomplex/detectors/predicate_alias.rs b/gems/decomplex/rust/src/decomplex/detectors/predicate_alias.rs index fe0845248..0d92a09e8 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/predicate_alias.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/predicate_alias.rs @@ -106,17 +106,18 @@ impl Report { } fn alias_clusters(&self) -> Vec { - let mut by_body: Vec<(String, Vec<&Pred>)> = Vec::new(); + let mut keys = Vec::new(); + let mut by_body: BTreeMap> = BTreeMap::new(); for p in &self.preds { - if let Some(entry) = by_body.iter_mut().find(|(b, _)| b == &p.body) { - entry.1.push(p); - } else { - by_body.push((p.body.clone(), vec![p])); + if !by_body.contains_key(&p.body) { + keys.push(p.body.clone()); } + by_body.entry(p.body.clone()).or_default().push(p); } let mut out = Vec::new(); - for (body, ps) in by_body { + for body in keys { + let ps = by_body.remove(&body).unwrap(); let mut names_set = BTreeSet::new(); for p in &ps { names_set.insert(p.name.clone()); } let names: Vec<_> = names_set.into_iter().collect(); From 9aa7f711c9bdd20e0bc5be648a89b1c3eee918f5 Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Thu, 18 Jun 2026 11:00:10 +0000 Subject: [PATCH 15/52] WIP decomplex rust migration --- gems/decomplex/benchmark.rb | 16 + gems/decomplex/benchmark_dir.rb | 18 + .../agents/aliasing-complexity-metrics.md | 44 +++ .../docs/agents/superfluous-state.md | 361 ++++++++++++++++++ .../lib/decomplex/detector_runner.rb | 92 ++++- .../lib/decomplex/false_simplicity.rb | 2 +- .../lib/decomplex/mutability_pressure.rb | 181 +++++++++ .../lib/decomplex/native/co_update.rb | 26 +- .../lib/decomplex/native/decision_pressure.rb | 26 +- .../lib/decomplex/native/derived_state.rb | 26 +- .../lib/decomplex/native/false_simplicity.rb | 26 +- .../lib/decomplex/native/fat_union.rb | 26 +- .../lib/decomplex/native/flay_similarity.rb | 28 +- .../lib/decomplex/native/function_lcom.rb | 26 +- .../decomplex/native/implicit_control_flow.rb | 26 +- .../native/inconsistent_rename_clone.rb | 26 +- .../lib/decomplex/native/local_flow.rb | 35 ++ .../lib/decomplex/native/locality_drag.rb | 26 +- gems/decomplex/lib/decomplex/native/miner.rb | 26 +- .../native/operational_discontinuity.rb | 26 +- .../decomplex/native/oversized_predicate.rb | 26 +- .../lib/decomplex/native/path_condition.rb | 26 +- .../lib/decomplex/native/predicate_aliases.rb | 26 +- .../decomplex/native/redundant_nil_guard.rb | 26 +- .../lib/decomplex/native/semantic_aliases.rb | 26 +- .../lib/decomplex/native/sequence_mine.rb | 26 +- .../decomplex/native/state_branch_density.rb | 26 +- .../lib/decomplex/native/state_mesh.rb | 26 +- .../lib/decomplex/native/state_writes.rb | 28 +- .../decomplex/native/structural_topology.rb | 35 ++ .../native/temporal_ordering_pressure.rb | 26 +- .../native/weighted_inlined_complexity.rb | 26 +- gems/decomplex/lib/decomplex/state_mesh.rb | 3 +- .../lib/decomplex/superfluous_state.rb | 210 ++++++++++ gems/decomplex/ruby_core.json | 0 gems/decomplex/rust/Cargo.lock | 110 ++++++ gems/decomplex/rust/Cargo.toml | 10 + gems/decomplex/rust/rust_core.json | 1 + gems/decomplex/rust/rust_writes.json | 1 + gems/decomplex/rust/src/decomplex/ast.rs | 59 ++- .../decomplex/detectors/decision_pressure.rs | 4 +- .../src/decomplex/detectors/derived_state.rs | 4 +- .../decomplex/detectors/false_simplicity.rs | 4 +- .../rust/src/decomplex/detectors/fat_union.rs | 4 +- .../src/decomplex/detectors/function_lcom.rs | 4 +- .../detectors/implicit_control_flow.rs | 4 +- .../detectors/inconsistent_rename_clone.rs | 4 +- .../src/decomplex/detectors/local_flow.rs | 41 +- .../src/decomplex/detectors/locality_drag.rs | 4 +- .../detectors/operational_discontinuity.rs | 4 +- .../detectors/oversized_predicate.rs | 4 +- .../src/decomplex/detectors/path_condition.rs | 4 +- .../decomplex/detectors/predicate_alias.rs | 4 +- .../detectors/redundant_nil_guard.rs | 4 +- .../src/decomplex/detectors/semantic_alias.rs | 4 +- .../src/decomplex/detectors/sequence_mine.rs | 4 +- .../detectors/state_branch_density.rs | 4 +- .../src/decomplex/detectors/state_mesh.rs | 2 +- .../detectors/structural_topology.rs | 4 +- .../detectors/temporal_ordering_pressure.rs | 4 +- .../weighted_inlined_cognitive_complexity.rs | 6 +- gems/decomplex/rust/src/decomplex/syntax.rs | 26 +- .../{ruby.rs => tree_sitter_adapter.rs} | 248 ++++++++++-- gems/decomplex/test/detector_runner_test.rb | 60 +++ 64 files changed, 1966 insertions(+), 269 deletions(-) create mode 100644 gems/decomplex/benchmark.rb create mode 100644 gems/decomplex/benchmark_dir.rb create mode 100644 gems/decomplex/docs/agents/aliasing-complexity-metrics.md create mode 100644 gems/decomplex/docs/agents/superfluous-state.md create mode 100644 gems/decomplex/lib/decomplex/mutability_pressure.rb create mode 100644 gems/decomplex/lib/decomplex/native/local_flow.rb create mode 100644 gems/decomplex/lib/decomplex/native/structural_topology.rb create mode 100644 gems/decomplex/lib/decomplex/superfluous_state.rb create mode 100644 gems/decomplex/ruby_core.json create mode 100644 gems/decomplex/rust/rust_core.json create mode 100644 gems/decomplex/rust/rust_writes.json rename gems/decomplex/rust/src/decomplex/syntax/{ruby.rs => tree_sitter_adapter.rs} (76%) diff --git a/gems/decomplex/benchmark.rb b/gems/decomplex/benchmark.rb new file mode 100644 index 000000000..499e6df07 --- /dev/null +++ b/gems/decomplex/benchmark.rb @@ -0,0 +1,16 @@ +require_relative "lib/decomplex" +require "benchmark" + +files = ["lib/decomplex/ast.rb"] +detectors = Decomplex::DetectorRunner::DETECTORS.keys + +Benchmark.bm(40) do |x| + detectors.each do |det| + x.report("#{det} (ruby)") do + Decomplex::DetectorRunner.run(det, files, engine: "ruby") + end + x.report("#{det} (rust)") do + Decomplex::DetectorRunner.run(det, files, engine: "rust") + end + end +end diff --git a/gems/decomplex/benchmark_dir.rb b/gems/decomplex/benchmark_dir.rb new file mode 100644 index 000000000..fb50c25c9 --- /dev/null +++ b/gems/decomplex/benchmark_dir.rb @@ -0,0 +1,18 @@ +require_relative "lib/decomplex" +require "benchmark" + +files = Dir.glob("lib/decomplex/**/*.rb") +detectors = Decomplex::DetectorRunner::DETECTORS.keys + +puts "Benchmarking across #{files.size} files in lib/decomplex/" + +Benchmark.bm(40) do |x| + detectors.each do |det| + x.report("#{det} (ruby)") do + Decomplex::DetectorRunner.run(det, files, engine: "ruby") + end + x.report("#{det} (rust)") do + Decomplex::DetectorRunner.run(det, files, engine: "rust") + end + end +end diff --git a/gems/decomplex/docs/agents/aliasing-complexity-metrics.md b/gems/decomplex/docs/agents/aliasing-complexity-metrics.md new file mode 100644 index 000000000..cb421bd54 --- /dev/null +++ b/gems/decomplex/docs/agents/aliasing-complexity-metrics.md @@ -0,0 +1,44 @@ +# Aliasing and Ownership Complexity Metrics + +This document outlines the expansion of Decomplex to include pointer-aliasing and ownership detection. These metrics transform Decomplex from a heuristic structural analyzer into a semantic fact-engine capable of driving high-accuracy transpilation to CLEAR's affine ownership model. + +## Metric Tiers + +### Tier 1: High Confidence / Structural Hazards +**Metric: Encapsulation Breach** +- **Description**: Detects when a class returns a mutable reference to an internal state field (`@ivar`) without a copy (`.dup` / `.clone`) or conversion. +- **Architectural Risk**: Violates "Fortress Architecture" principles. It allows external callers to bypass class invariants and validation by mutating state "from the outside." +- **CLEAR Impact**: Identifies sites where CLEAR must either enforce a `COPY` or wrap the field in a read-only borrow. + +### Tier 2: Design Pressure / Structural Risk +**Metric: Aliasing Tangle (Action-at-a-Distance)** +- **Description**: Identifies single objects that are aliased across three or more disparate modules/subsystems. +- **Architectural Risk**: Creates "tangled webs" where mutation in one module causes unpredictable behavior in another. This is the primary driver of "locality drag"—where a developer must understand 5 files to change 1. +- **CLEAR Impact**: Signals that a resource requires a Group 1 capability (e.g., `@shared:locked` or `@shared:writeLocked`) rather than simple affine move semantics. + +### Tier 3: Strategic / Project Context +**Metric: Entanglement Density** +- **Description**: An aggregate heatmap quantifying the ratio of aliased references to total references within a file or directory. +- **Architectural Risk**: High-density files are objectively harder to refactor, test, and reason about. They represent the "dark matter" of the codebase where most regressions occur. +- **CLEAR Impact**: Prioritization metric. Files with low entanglement density are "low-hanging fruit" for 98% automated transpilation. High-density files require manual architectural review before migration. + +## Implementation Strategy + +The implementation leverages a two-pass semantic analyzer within `gems/decomplex/lib/decomplex/`: + +1. **Escape & Reachability Pass**: + - Extends `LocalFlow` to track def-use chains across method boundaries. + - Builds a **Reachability Graph** to determine if an object allocated in Scope A can reach Scope B via return, argument passing, or field assignment. + +2. **Ownership Synthesis**: + - Aggregates escape facts to classify bindings as `Unique`, `Borrowed`, or `Shared`. + - Detects "Reification Misses" where a developer intended for an object to be private but allowed it to escape via a getter. + +## Transpiler Bridge + +When the CLEAR transpiler processes Ruby code, it queries the Decomplex fact-graph: + +- **Fact: Unique** -> Transpile to `GIVE` (Move). +- **Fact: Borrowed** -> Transpile to `WITH ... AS alias` (Borrow). +- **Fact: Shared** -> Transpile to `@shared:locked` (Arc/RwLock). +- **Fact: Escaped Field** -> Transpile to `RETURN COPY field` or `@indirect` wrapper. diff --git a/gems/decomplex/docs/agents/superfluous-state.md b/gems/decomplex/docs/agents/superfluous-state.md new file mode 100644 index 000000000..671479e75 --- /dev/null +++ b/gems/decomplex/docs/agents/superfluous-state.md @@ -0,0 +1,361 @@ +# Superfluous State (Tier 1) -- state that can be eliminated entirely + +## Why this exists + +StateMesh answers "where is state and how messy is it?" TemporalOrderingPressure +answers "does this owner expose an implicit state machine?" Superfluous State +answers the natural next question: **"could this field simply be removed?"** + +Most codebases accumulate fields that are not really state at all. They are +transit data that happens to be stored in an ivar because the developer +needed to pass a value between two methods and an ivar was the path of least +resistance. The field looks like state -- it lives on the object, it persists +between calls -- but it's actually a local variable that escaped its method +body. + +This detector finds those fields and ranks them by eliminability. + +## What it detects + +### Pattern 1: Intra-method pass-through (eliminable with near-certainty) + +A field that is **written and read within the same method body**. The value +never escapes the stack frame. The ivar is purely a local variable that was +promoted for no reason. + +```ruby +def checkout(user, cart) + @total = cart.items.sum(&:price) # <-- written + charge(user, @total) # <-- read + @total # <-- read again +end +``` + +`@total` is written once, read twice, all inside `checkout`. No other method +ever touches it. It should be a local variable `total`. Detection requires +zero opinion -- the writer span and reader spans are all within the same +DEFN boundary. + +### Pattern 2: Adjacent-call pass-through (eliminable with high confidence) + +A field with **exactly one writer method and exactly one reader method**, +where every observed call site places the writer immediately before the +reader. + +```ruby +class BillingService + def set_user(user) + @user = user # <-- only writer + end + + def validate + return unless @user # <-- only reader + end +end + +# Every observed call site: +# service.set_user(u) +# service.validate +``` + +`@user` is transit data: `set_user` produces it, `validate` consumes it. It +can be eliminated by converting `set_user` to return the value and `validate` +to accept it as a parameter: `user = acquire_user(...); validate(user)`. + +False positives can occur when the writer genuinely mutates object state that +other methods depend on. This is guarded by the "exactly one reader" and +"adjacent calls" constraints. If `@user` is read in three other methods, or +if calls are not consistently adjacent, the score drops below the report +threshold. + +### Pattern 3: Derived cache (eliminable with medium confidence, user-gated) + +A field that is computed from other fields and never independently mutated. +Its value is always derivable from the source fields. + +```ruby +def initialize(cart) + @cart = cart + @total = @cart.total # <-- derived, never written elsewhere +end +``` + +`@total` is a cache of `@cart.total`. It can be eliminated by recomputing on +read. The tradeoff depends on recomputation cost: `@cart.total` with a +10,000-item collection is different from `@user.name`. This detector flags +derived caches and leaves the recomputation decision to the human. + +## Score formula + +For each field, compute: + +``` +eliminability_score = + (1.0 / max(1, reader_method_count)) × # fewer reader methods = easier to eliminate + (1.0 / max(1, writer_method_count)) × # fewer writer methods = fewer refactor sites + intra_method_bonus × # × 10 if all reads and writes are in the same method + adjacent_call_bonus × # × 5 if writer-reader is adjacent at every callsite + (1.0 - rederivation_penalty) # penalize if this field gates other re-derivations +``` + +### Terms + +| Term | Range | Definition | +|---|---|---| +| `reader_method_count` | ≥ 1 | Number of distinct (file, defn) pairs that **read** this field | +| `writer_method_count` | ≥ 1 | Number of distinct (file, defn) pairs that **write** this field | +| `intra_method_bonus` | 1.0 or 10.0 | 10.0 if all reads AND writes are in the same method body; 1.0 otherwise | +| `adjacent_call_bonus` | 1.0 or 5.0 | 5.0 if writer_method_count == 1 AND reader_method_count == 1 AND every callsite sequence is writer-then-reader adjacent; 1.0 otherwise | +| `rederivation_penalty` | 0.0 -- 1.0 | Fraction of re-derivation sites that depend on this field. If this field is an input to N re-derivations out of total T tracked re-derivations, penalty = N / T (capped at 1.0). Gives a weight penalty for "this field's value is used to derive other computed state." | + +### Thresholds + +| Score range | Classification | Action | +|---|---|---| +| > 0.5 | Almost certainly eliminable | Remove the field; convert to local variable or parameter | +| 0.1 -- 0.5 | Probably eliminable with moderate refactor | Adjust call signatures, inline the write | +| < 0.1 | Genuinely stateful or gating complex re-derivations | Do not report (below noise floor) | + +**Only scores > 0.1 are reported.** This avoids surfacing fields that are +legitimate persistent state. + +## Relationship to other metrics + +| Metric | Question | Superfluous State adds | +|---|---|---| +| StateMesh | "Where is state and how messy?" | "Which fields don't need to exist at all?" | +| TemporalOrderingPressure | "Does this owner expose an implicit state machine?" | "Can we eliminate the fields that create the machine?" | +| DecisionPressure | "Which contracts drive defensive code?" | "Can removing a field reduce contracts that need defending?" | + +StateMesh and TemporalOrderingPressure show the *problem*. Superfluous State +shows the *fix*. + +## Implementation + +### Input facts + +All required facts already exist in Decomplex. Superfluous State is a +**post-analyzer** -- it reads StateMesh and ImplicitControlFlow output, +scores each field, and emits a ranked list. No new AST walks. + +| Fact | Source | Needed for | +|---|---|---| +| Field read sites (per file, defn, line) | `StateMesh#reads` | `reader_method_count` | +| Field write sites (per file, defn, line) | `StateMesh#writes` | `writer_method_count` | +| Method boundaries (DEFN/DEFS spans) | `StateMesh` AST root | `intra_method_bonus` | +| Re-derivation chains | `StateMesh#re_derivations` | `rederivation_penalty` | +| Call adjacency per field pair | `ImplicitControlFlow` sequences | `adjacent_call_bonus` | +| Field names (normalized) | `StateMesh` known fields | Identity | + +### Phases + +**Phase 1: Group reads and writes by field.** + +For each normalized field name in StateMesh: +- Collect all `Write` sites into `writers = Map<(file, defn) → [Write]>` +- Collect all `Read` sites into `readers = Map<(file, defn) → [Read]>` +- Compute `writer_method_count = writers.keys.uniq.size` +- Compute `reader_method_count = readers.keys.uniq.size` + +**Phase 2: Detect intra-method pass-through.** + +A field is intra-method if `writer_method_count == 1` AND +`reader_method_count == 1` AND the single writer and single reader +(file, defn, line) spans are both within the same DEFN/DEFS body. + +Implementation: StateMesh already tracks file and defn per site. +Compare the `defn` field of the writer and reader. If they match and +the total read count within that defn >= 1, the field is intra-method +pass-through. + +**Phase 3: Detect adjacent-call pass-through.** + +A field is adjacent-call pass-through if: +- `writer_method_count == 1` AND `reader_method_count == 1` +- NOT intra-method (distinct methods) +- For every `ImplicitControlFlow::MethodSequence` that contains the + writer method: the reader method immediately follows it in the + observed call order. + +Adjacency is directional: `set_user → validate` is adjacent; `validate +→ set_user` is not. If at least one callsite reverses the order +(reader before writer), the field does NOT qualify for the +adjacent-call bonus. + +If no callsites are found for the pair (the writer is tested alone or +called from unknown sites), the bonus is NOT applied -- adjacency +cannot be proven. This is the conservative default. + +**Phase 4: Compute re-derivation penalty.** + +A field gates re-derivations if it appears as an input in StateMesh +re-derivation chains. For each re-derivation: +- If `re_derivation.field == this_field`, count it. +- `rederivation_penalty = this_field_rederivations / max(1, total_rederivations)` + +This prevents flagging a field like `@storage` that feeds 12 other +derived fields as "eliminable." + +**Phase 5: Score and rank.** + +Apply the formula above for each known field. Sort descending by score. +Emit only fields with score > 0.1. + +### Output schema + +```ruby +{ + field: "@total", # ivar name + normalized: "total", # without @ prefix + score: 0.92, # eliminability score + classification: "intra_method", # "intra_method" | "adjacent_call" | "derived_cache" + writer_method_count: 1, + reader_method_count: 1, + write_sites: [ # all write locations + { file: "app/services/billing.rb", defn: "checkout", line: 4 } + ], + read_sites: [ # all read locations + { file: "app/services/billing.rb", defn: "checkout", line: 5 }, + { file: "app/services/billing.rb", defn: "checkout", line: 6 } + ], + rederivations_gated: 0, # how many re-derivations depend on this field + adjacent_callsites: nil, # for adjacent_call patterns: [caller, callee, file, line] + recommendation: "Replace @total with a local variable in checkout." +} +``` + +### Test fixtures + +**Fixture A: Intra-method pass-through** + +```ruby +class Example + def checkout(cart) + @total = cart.total # write + format(@total) # read + end +end +``` + +Expected: `@total` score > 0.5, classification `intra_method`. + +**Fixture B: Adjacent-call pass-through** + +```ruby +class Billing + def set_user(user) + @user = user + end + + def validate + return unless @user + charge(@user) + end + + def process + set_user(find_user) + validate + end +end +``` + +Expected: `@user` score > 0.5, classification `adjacent_call`, +`adjacent_callsites` includes `process` line. + +**Fixture C: Adjacent-call with reversed order (NOT eliminable)** + +```ruby +class Billing + def set_user(user) + @user = user + end + + def validate + return unless @user + end + + def process + validate # reader BEFORE writer -- order is reversed + set_user(find_user) + end +end +``` + +Expected: `@user` score < 0.1 (no adjacent-call bonus, reader not +adjacent after writer), **NOT reported**. + +**Fixture D: Derived cache** + +```ruby +class Cart + def initialize(items) + @items = items + @total = items.sum(&:price) # derived from @items + end +end +``` + +Expected: `@total` score 0.1--0.5, classification `derived_cache`. + +**Fixture E: Genuine state (NOT reported)** + +```ruby +class Cart + def initialize + @items = [] + end + + def add(item) + @items << item # multiple writers + @total = @items.sum(&:price) + end + + def remove(item) + @items.delete(item) # multiple writers + @total = @items.sum(&:price) + end + + def empty? + @items.empty? # multiple readers + end + + def total + @total # multiple readers + end +end +``` + +Expected: `@items` and `@total` both score < 0.1 (`writer_method_count` and +`reader_method_count` are both > 1), **NOT reported**. + +## Language portability + +This metric is fully language-agnostic. Every language has fields/properties/ +members/ivars with write sites and read sites. The facts are: + +- **Field identity**: name within owner +- **Writer locations**: (file, function, line) +- **Reader locations**: (file, function, line) +- **Call adjacency**: (caller, callee, file, line) +- **Re-derivation dependency**: field X is an input to derived field Y + +None of these are Ruby-specific. The TreeSitter fact extraction layer +(`syntax/.rs`) needs to emit `StateWrite`, `StateRead`, and +`CallSite` facts per language. Superfluous State, like StateMesh and +TemporalOrderingPressure, consumes those language-agnostic facts. + +## Non-goals + +- **Do not** recommend refactoring actions. The detector says "this field + is probably eliminable." The human decides whether to inline it, convert + it to a return value, or keep it. +- **Do not** compute recomputation cost for derived caches. Flag them, + let the user decide. +- **Do not** attempt cross-file call adjacency. Adjacent-call detection + is intra-file only (same as ImplicitControlFlow's current scope). +- **Do not** analyze initialization-only fields differently. A field + written in `initialize` and read once can be pass-through if the read + is in a single-adjacent-call method. But `initialize` → caller is + implicit; the adjacent-call bonus does not apply across an + initialization boundary unless the caller method directly follows + construction in observed sequences. diff --git a/gems/decomplex/lib/decomplex/detector_runner.rb b/gems/decomplex/lib/decomplex/detector_runner.rb index ae634f69a..5070c92e0 100644 --- a/gems/decomplex/lib/decomplex/detector_runner.rb +++ b/gems/decomplex/lib/decomplex/detector_runner.rb @@ -3,12 +3,16 @@ require "json" require_relative "co_update" require_relative "flay_similarity" +require_relative "local_flow" +require_relative "structural_topology" require_relative "native/co_update" require_relative "native/decision_pressure" require_relative "native/predicate_aliases" require_relative "native/flay_similarity" require_relative "native/miner" require_relative "native/semantic_aliases" +require_relative "native/local_flow" +require_relative "native/structural_topology" require_relative "miner" require_relative "decision_pressure" require_relative "predicate_alias" @@ -69,7 +73,9 @@ module DetectorRunner "sequence-mine" => :sequence_mine, "function-lcom" => :function_lcom, "false-simplicity" => :false_simplicity, - "fat-union" => :fat_union + "fat-union" => :fat_union, + "local-flow" => :local_flow, + "structural-topology" => :structural_topology }.freeze ENGINES = %w[ruby rust].freeze @@ -124,6 +130,10 @@ def run(detector, files, engine: "ruby", mass: FlaySimilarity::DEFAULT_MASS, fuz false_simplicity(files, engine: engine, jobs: jobs) when :fat_union fat_union(files, engine: engine, jobs: jobs) + when :local_flow + local_flow(files, engine: engine, jobs: jobs) + when :structural_topology + structural_topology(files, engine: engine, jobs: jobs) else raise ArgumentError, "unsupported decomplex detector: #{detector}" end @@ -363,6 +373,86 @@ def detector_names { "fat_unions" => FatUnion.scan(files).fat_unions } end + private_class_method def self.local_flow(files, engine:, jobs:) + return Native::LocalFlow.scan(files, jobs: jobs) if engine.to_s == "rust" + + LocalFlow.scan(files).map { |summary| local_flow_summary(summary) } + end + + private_class_method def self.structural_topology(files, engine:, jobs:) + return Native::StructuralTopology.scan(files, jobs: jobs) if engine.to_s == "rust" + + graph = StructuralTopology.scan(files) + { + "methods" => graph.methods.map { |method| structural_method(method) }, + "edges" => graph.edges.map { |edge| structural_edge(edge) } + } + end + + private_class_method def self.local_flow_summary(summary) + { + "id" => summary.id, + "owner" => summary.owner, + "name" => summary.name, + "file" => summary.file, + "line" => summary.line, + "span" => summary.span, + "statements" => summary.statements.map { |statement| local_flow_statement(statement) }, + "boundaries" => summary.boundaries.map { |boundary| local_flow_boundary(boundary) } + } + end + + private_class_method def self.local_flow_statement(statement) + { + "index" => statement.index, + "line" => statement.line, + "end_line" => statement.end_line, + "span" => statement.span, + "source" => statement.source, + "reads" => statement.reads.to_a.sort, + "writes" => statement.writes.to_a.sort, + "dependencies" => statement.dependencies.map { |edge| Array(edge).map(&:to_s) }.sort, + "co_uses" => statement.co_uses.map { |edge| Array(edge).map(&:to_s).sort }.sort + } + end + + private_class_method def self.local_flow_boundary(boundary) + { + "before_index" => boundary.before_index, + "after_index" => boundary.after_index, + "line" => boundary.line, + "kind" => boundary.kind.to_s, + "text" => boundary.text + } + end + + private_class_method def self.structural_method(method) + { + "id" => method.id, + "owner" => method.owner, + "name" => method.name, + "file" => method.file, + "line" => method.line, + "span" => method.span, + "visibility" => method.visibility.to_s + } + end + + private_class_method def self.structural_edge(edge) + { + "caller" => edge.caller, + "callee" => edge.callee, + "caller_name" => edge.caller_name, + "callee_name" => edge.callee_name, + "file" => edge.file, + "line" => edge.line, + "span" => edge.span, + "type" => edge.type.to_s, + "kind" => edge.kind.to_s, + "confidence" => edge.confidence.to_s + } + end + private_class_method def self.canonicalize(value) case value when Hash diff --git a/gems/decomplex/lib/decomplex/false_simplicity.rb b/gems/decomplex/lib/decomplex/false_simplicity.rb index 39d1a4bad..a4b8f21c8 100644 --- a/gems/decomplex/lib/decomplex/false_simplicity.rb +++ b/gems/decomplex/lib/decomplex/false_simplicity.rb @@ -134,7 +134,7 @@ class FalseSimplicity ).freeze ZIG_LEXICON = Lexicon.new( dispatch_mids: %w[field fieldParentPtr ptrCast alignCast call].freeze, - meta_mids: %w[typeInfo TypeOf ptrCast intFromPtr ptrFromInt].freeze, + meta_mids: %w[typeInfo TypeOf ptrCast intFromPtr ptrFromInt eval].freeze, method_obj_mids: %i[method].freeze, io_consts: %w[std os fs process net Thread Mutex Atomic].freeze, io_bare: %w[panic unreachable].freeze, diff --git a/gems/decomplex/lib/decomplex/mutability_pressure.rb b/gems/decomplex/lib/decomplex/mutability_pressure.rb new file mode 100644 index 000000000..394e2d50d --- /dev/null +++ b/gems/decomplex/lib/decomplex/mutability_pressure.rb @@ -0,0 +1,181 @@ +# frozen_string_literal: true + +require_relative "ast" +require_relative "state_mesh" + +module Decomplex + # MutabilityPressure -- rank fields by how many methods participate in + # writing them, and classify each field by lifecycle pattern. + # + # Post-analyzer over StateMesh. No new AST walks. + # + # Classifications: + # immutable_convention -- written once in initialize, never mutated. + # Also catches memos (same-method write+read with read-first pattern). + # pass_through -- written and read in a SINGLE method body. + # shadow_state -- always written in strict subset of another + # field's write methods. Zero operational autonomy. Coupled state. + # one_way_state -- written in >=2 methods, read in <=1. + # mutable_entity -- written in >=2 methods, read in >=2. + # dead_state -- written but never read. + class MutabilityPressure + Finding = Struct.new(:field, :classification, :write_spread, + :read_spread, :total_writes, :total_reads, + :write_sites, :read_sites, :shadowed_by, + keyword_init: true) do + def to_h + h = { + field: field, + classification: classification, + write_spread: write_spread, + read_spread: read_spread, + total_writes: total_writes, + total_reads: total_reads, + write_sites: write_sites, + read_sites: read_sites + } + h[:shadowed_by] = shadowed_by if shadowed_by + h + end + end + + def self.scan(files) + sm = StateMesh.scan(files, min_writes: 1) + sm.run + new(sm).scan + end + + def initialize(state_mesh) + @sm = state_mesh + end + + def scan + group_by_field + classify_and_rank + end + + private + + def group_by_field + @writes_by = Hash.new { |h, k| h[k] = [] } + @reads_by = Hash.new { |h, k| h[k] = [] } + + @sm.writes.each do |w| + next unless w.recv == "self" + @writes_by[w.norm] << w + end + @sm.reads.each do |r| + next unless r.recv == "self" + @reads_by[r.norm] << r + end + end + + def classify_and_rank + results = [] + all_norms = (@writes_by.keys + @reads_by.keys).uniq + + # Build write-method signatures: field -> Set of (file, defn) + field_write_sigs = {} + all_norms.each do |norm| + ws = @writes_by[norm] || [] + field_write_sigs[norm] = ws.map { |w| [w.file, w.defn] }.uniq.sort + end + + # Shadow detection: field Y always written in strict subset of X's methods + field_shadows = Hash.new { |h, k| h[k] = [] } + field_write_sigs.each do |y_norm, y_methods| + next if y_methods.size <= 1 + field_write_sigs.each do |x_norm, x_methods| + next if x_norm == y_norm + next if x_methods.size <= y_methods.size + next if x_methods.size < 2 + next unless y_methods.all? { |ym| x_methods.include?(ym) } + field_shadows[y_norm] << x_norm + end + end + + all_norms.each do |norm| + writers = @writes_by[norm] || [] + readers = @reads_by[norm] || [] + + wmethods = writers.map { |w| [w.file, w.defn] }.uniq + rmethods = readers.map { |r| [r.file, r.defn] }.uniq + + ws = wmethods.size + rs = rmethods.size + tw = writers.size + tr = readers.size + + next if tw == 0 && tr == 0 + + # ---- dead state: written but never read ---- + if tw > 0 && tr == 0 + next if @sm.reads.any? { |r| r.norm == norm } + results << Finding.new( + field: norm, classification: "dead_state", + write_spread: ws, read_spread: 0, + total_writes: tw, total_reads: 0, + write_sites: writers.map { |w| "#{w.file}:#{w.defn}:#{w.line}" }.uniq, + read_sites: [] + ) + next + end + + next if tw == 0 + + # ---- classify ---- + all_one_method = (wmethods + rmethods).uniq.size == 1 + init_only = ws == 1 && wmethods[0][1] == "initialize" + + is_memo = false + if all_one_method + first_read = readers.map(&:line).min + first_write = writers.map(&:line).min + is_memo = first_read && first_write && first_read <= first_write + end + + shadow = field_shadows[norm].first + + classification = if init_only + "immutable_convention" + elsif is_memo + "immutable_convention" + elsif all_one_method + "pass_through" + elsif shadow + "shadow_state" + elsif ws >= 2 && rs <= 1 + "one_way_state" + elsif ws >= 2 && rs >= 2 + "mutable_entity" + else + "immutable_convention" + end + + results << Finding.new( + field: norm, + classification: classification, + write_spread: ws, + read_spread: rs, + total_writes: tw, + total_reads: tr, + write_sites: writers.map { |w| "#{w.file}:#{w.defn}:#{w.line}" }.uniq, + read_sites: readers.map { |r| "#{r.file}:#{r.defn}:#{r.line}" }.uniq, + shadowed_by: shadow + ) + end + + results.sort_by do |r| + c = case r.classification + when "dead_state" then 0 + when "shadow_state" then 1 + when "one_way_state" then 2 + when "mutable_entity" then 3 + when "pass_through" then 4 + when "immutable_convention" then 5 + else 6 end + [-r.write_spread, -r.read_spread, c, r.field] + end + end + end +end \ No newline at end of file diff --git a/gems/decomplex/lib/decomplex/native/co_update.rb b/gems/decomplex/lib/decomplex/native/co_update.rb index c63b54f25..42cb5ceeb 100644 --- a/gems/decomplex/lib/decomplex/native/co_update.rb +++ b/gems/decomplex/lib/decomplex/native/co_update.rb @@ -10,16 +10,26 @@ module CoUpdate def scan(files, jobs: nil) paths = Array(files).map(&:to_s) - validate_ruby_files!(paths) - JSON.parse(Command.run("co-update", "--language", "ruby", *Command.jobs_args(jobs), *paths)) + language = language_for(paths.first) + JSON.parse(Command.run("co-update", "--language", language, *Command.jobs_args(jobs), *paths)) end - - private_class_method def self.validate_ruby_files!(paths) - bad = paths.reject { |path| File.extname(path) == ".rb" } - return if bad.empty? - - raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" + private_class_method def self.language_for(path) + case File.extname(path) + when ".rb" then "ruby" + when ".py" then "python" + when ".js" then "javascript" + when ".ts", ".tsx" then "typescript" + when ".go" then "go" + when ".rs" then "rust" + when ".zig" then "zig" + when ".lua" then "lua" + when ".c" then "c" + when ".cpp", ".cc", ".cxx" then "cpp" + when ".cs" then "csharp" + else "ruby" + end end + end end end diff --git a/gems/decomplex/lib/decomplex/native/decision_pressure.rb b/gems/decomplex/lib/decomplex/native/decision_pressure.rb index 5bb1a4854..e3f1eeb6c 100644 --- a/gems/decomplex/lib/decomplex/native/decision_pressure.rb +++ b/gems/decomplex/lib/decomplex/native/decision_pressure.rb @@ -10,16 +10,26 @@ module DecisionPressure def scan(files, jobs: nil) paths = Array(files).map(&:to_s) - validate_ruby_files!(paths) - JSON.parse(Command.run("decision-pressure", "--language", "ruby", *Command.jobs_args(jobs), *paths)) + language = language_for(paths.first) + JSON.parse(Command.run("decision-pressure", "--language", language, *Command.jobs_args(jobs), *paths)) end - - private_class_method def self.validate_ruby_files!(paths) - bad = paths.reject { |path| File.extname(path) == ".rb" } - return if bad.empty? - - raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" + private_class_method def self.language_for(path) + case File.extname(path) + when ".rb" then "ruby" + when ".py" then "python" + when ".js" then "javascript" + when ".ts", ".tsx" then "typescript" + when ".go" then "go" + when ".rs" then "rust" + when ".zig" then "zig" + when ".lua" then "lua" + when ".c" then "c" + when ".cpp", ".cc", ".cxx" then "cpp" + when ".cs" then "csharp" + else "ruby" + end end + end end end diff --git a/gems/decomplex/lib/decomplex/native/derived_state.rb b/gems/decomplex/lib/decomplex/native/derived_state.rb index 412e8c7d5..a6c34624d 100644 --- a/gems/decomplex/lib/decomplex/native/derived_state.rb +++ b/gems/decomplex/lib/decomplex/native/derived_state.rb @@ -10,16 +10,26 @@ module DerivedState def scan(files, jobs: nil) paths = Array(files).map(&:to_s) - validate_ruby_files!(paths) - JSON.parse(Command.run("derived-state", "--language", "ruby", *Command.jobs_args(jobs), *paths)) + language = language_for(paths.first) + JSON.parse(Command.run("derived-state", "--language", language, *Command.jobs_args(jobs), *paths)) end - - private_class_method def self.validate_ruby_files!(paths) - bad = paths.reject { |path| File.extname(path) == ".rb" } - return if bad.empty? - - raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" + private_class_method def self.language_for(path) + case File.extname(path) + when ".rb" then "ruby" + when ".py" then "python" + when ".js" then "javascript" + when ".ts", ".tsx" then "typescript" + when ".go" then "go" + when ".rs" then "rust" + when ".zig" then "zig" + when ".lua" then "lua" + when ".c" then "c" + when ".cpp", ".cc", ".cxx" then "cpp" + when ".cs" then "csharp" + else "ruby" + end end + end end end diff --git a/gems/decomplex/lib/decomplex/native/false_simplicity.rb b/gems/decomplex/lib/decomplex/native/false_simplicity.rb index c8afcc290..a4700b370 100644 --- a/gems/decomplex/lib/decomplex/native/false_simplicity.rb +++ b/gems/decomplex/lib/decomplex/native/false_simplicity.rb @@ -10,16 +10,26 @@ module FalseSimplicity def scan(files, jobs: nil) paths = Array(files).map(&:to_s) - validate_ruby_files!(paths) - JSON.parse(Command.run("false-simplicity", "--language", "ruby", *Command.jobs_args(jobs), *paths)) + language = language_for(paths.first) + JSON.parse(Command.run("false-simplicity", "--language", language, *Command.jobs_args(jobs), *paths)) end - - private_class_method def self.validate_ruby_files!(paths) - bad = paths.reject { |path| File.extname(path) == ".rb" } - return if bad.empty? - - raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" + private_class_method def self.language_for(path) + case File.extname(path) + when ".rb" then "ruby" + when ".py" then "python" + when ".js" then "javascript" + when ".ts", ".tsx" then "typescript" + when ".go" then "go" + when ".rs" then "rust" + when ".zig" then "zig" + when ".lua" then "lua" + when ".c" then "c" + when ".cpp", ".cc", ".cxx" then "cpp" + when ".cs" then "csharp" + else "ruby" + end end + end end end diff --git a/gems/decomplex/lib/decomplex/native/fat_union.rb b/gems/decomplex/lib/decomplex/native/fat_union.rb index f3df23a3e..477bcfa0f 100644 --- a/gems/decomplex/lib/decomplex/native/fat_union.rb +++ b/gems/decomplex/lib/decomplex/native/fat_union.rb @@ -10,16 +10,26 @@ module FatUnion def scan(files, jobs: nil) paths = Array(files).map(&:to_s) - validate_ruby_files!(paths) - JSON.parse(Command.run("fat-union", "--language", "ruby", *Command.jobs_args(jobs), *paths)) + language = language_for(paths.first) + JSON.parse(Command.run("fat-union", "--language", language, *Command.jobs_args(jobs), *paths)) end - - private_class_method def self.validate_ruby_files!(paths) - bad = paths.reject { |path| File.extname(path) == ".rb" } - return if bad.empty? - - raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" + private_class_method def self.language_for(path) + case File.extname(path) + when ".rb" then "ruby" + when ".py" then "python" + when ".js" then "javascript" + when ".ts", ".tsx" then "typescript" + when ".go" then "go" + when ".rs" then "rust" + when ".zig" then "zig" + when ".lua" then "lua" + when ".c" then "c" + when ".cpp", ".cc", ".cxx" then "cpp" + when ".cs" then "csharp" + else "ruby" + end end + end end end diff --git a/gems/decomplex/lib/decomplex/native/flay_similarity.rb b/gems/decomplex/lib/decomplex/native/flay_similarity.rb index 88fd1b6d0..1282d2f74 100644 --- a/gems/decomplex/lib/decomplex/native/flay_similarity.rb +++ b/gems/decomplex/lib/decomplex/native/flay_similarity.rb @@ -10,11 +10,11 @@ module FlaySimilarity def scan(files, mass:, fuzzy:, jobs: nil) paths = Array(files).map(&:to_s) - validate_ruby_files!(paths) + language = language_for(paths.first) JSON.parse( Command.run( "flay-similarity", - "--language", "ruby", + "--language", language, *Command.jobs_args(jobs), "--mass", mass.to_i.to_s, "--fuzzy", fuzzy.to_i.to_s, @@ -24,19 +24,29 @@ def scan(files, mass:, fuzzy:, jobs: nil) ).map { |finding| normalize_finding(finding) } end - private_class_method def self.validate_ruby_files!(paths) - bad = paths.reject { |path| File.extname(path) == ".rb" } - return if bad.empty? - - raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" - end - private_class_method def self.normalize_finding(finding) finding.merge( clone_type: finding.fetch(:clone_type).to_sym, spans: finding.fetch(:spans).transform_values { |span| Array(span).map(&:to_i) } ) end + private_class_method def self.language_for(path) + case File.extname(path) + when ".rb" then "ruby" + when ".py" then "python" + when ".js" then "javascript" + when ".ts", ".tsx" then "typescript" + when ".go" then "go" + when ".rs" then "rust" + when ".zig" then "zig" + when ".lua" then "lua" + when ".c" then "c" + when ".cpp", ".cc", ".cxx" then "cpp" + when ".cs" then "csharp" + else "ruby" + end + end + end end end diff --git a/gems/decomplex/lib/decomplex/native/function_lcom.rb b/gems/decomplex/lib/decomplex/native/function_lcom.rb index 108ca95b2..38cce2f2b 100644 --- a/gems/decomplex/lib/decomplex/native/function_lcom.rb +++ b/gems/decomplex/lib/decomplex/native/function_lcom.rb @@ -10,16 +10,26 @@ module FunctionLcom def scan(files, jobs: nil) paths = Array(files).map(&:to_s) - validate_ruby_files!(paths) - JSON.parse(Command.run("function-lcom", "--language", "ruby", *Command.jobs_args(jobs), *paths)) + language = language_for(paths.first) + JSON.parse(Command.run("function-lcom", "--language", language, *Command.jobs_args(jobs), *paths)) end - - private_class_method def self.validate_ruby_files!(paths) - bad = paths.reject { |path| File.extname(path) == ".rb" } - return if bad.empty? - - raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" + private_class_method def self.language_for(path) + case File.extname(path) + when ".rb" then "ruby" + when ".py" then "python" + when ".js" then "javascript" + when ".ts", ".tsx" then "typescript" + when ".go" then "go" + when ".rs" then "rust" + when ".zig" then "zig" + when ".lua" then "lua" + when ".c" then "c" + when ".cpp", ".cc", ".cxx" then "cpp" + when ".cs" then "csharp" + else "ruby" + end end + end end end diff --git a/gems/decomplex/lib/decomplex/native/implicit_control_flow.rb b/gems/decomplex/lib/decomplex/native/implicit_control_flow.rb index b1fd3c530..32ecab9b0 100644 --- a/gems/decomplex/lib/decomplex/native/implicit_control_flow.rb +++ b/gems/decomplex/lib/decomplex/native/implicit_control_flow.rb @@ -10,16 +10,26 @@ module ImplicitControlFlow def scan(files, jobs: nil) paths = Array(files).map(&:to_s) - validate_ruby_files!(paths) - JSON.parse(Command.run("implicit-control-flow", "--language", "ruby", *Command.jobs_args(jobs), *paths)) + language = language_for(paths.first) + JSON.parse(Command.run("implicit-control-flow", "--language", language, *Command.jobs_args(jobs), *paths)) end - - private_class_method def self.validate_ruby_files!(paths) - bad = paths.reject { |path| File.extname(path) == ".rb" } - return if bad.empty? - - raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" + private_class_method def self.language_for(path) + case File.extname(path) + when ".rb" then "ruby" + when ".py" then "python" + when ".js" then "javascript" + when ".ts", ".tsx" then "typescript" + when ".go" then "go" + when ".rs" then "rust" + when ".zig" then "zig" + when ".lua" then "lua" + when ".c" then "c" + when ".cpp", ".cc", ".cxx" then "cpp" + when ".cs" then "csharp" + else "ruby" + end end + end end end diff --git a/gems/decomplex/lib/decomplex/native/inconsistent_rename_clone.rb b/gems/decomplex/lib/decomplex/native/inconsistent_rename_clone.rb index 93956d8ce..d3c812b2d 100644 --- a/gems/decomplex/lib/decomplex/native/inconsistent_rename_clone.rb +++ b/gems/decomplex/lib/decomplex/native/inconsistent_rename_clone.rb @@ -10,16 +10,26 @@ module InconsistentRenameClone def scan(files, jobs: nil) paths = Array(files).map(&:to_s) - validate_ruby_files!(paths) - JSON.parse(Command.run("inconsistent-rename-clone", "--language", "ruby", *Command.jobs_args(jobs), *paths)) + language = language_for(paths.first) + JSON.parse(Command.run("inconsistent-rename-clone", "--language", language, *Command.jobs_args(jobs), *paths)) end - - private_class_method def self.validate_ruby_files!(paths) - bad = paths.reject { |path| File.extname(path) == ".rb" } - return if bad.empty? - - raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" + private_class_method def self.language_for(path) + case File.extname(path) + when ".rb" then "ruby" + when ".py" then "python" + when ".js" then "javascript" + when ".ts", ".tsx" then "typescript" + when ".go" then "go" + when ".rs" then "rust" + when ".zig" then "zig" + when ".lua" then "lua" + when ".c" then "c" + when ".cpp", ".cc", ".cxx" then "cpp" + when ".cs" then "csharp" + else "ruby" + end end + end end end diff --git a/gems/decomplex/lib/decomplex/native/local_flow.rb b/gems/decomplex/lib/decomplex/native/local_flow.rb new file mode 100644 index 000000000..f255f59c0 --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/local_flow.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module LocalFlow + module_function + + def scan(files, jobs: nil) + paths = Array(files).map(&:to_s) + language = language_for(paths.first) + JSON.parse(Command.run("local-flow", "--language", language, *Command.jobs_args(jobs), *paths)) + end + + private_class_method def self.language_for(path) + case File.extname(path) + when ".rb" then "ruby" + when ".py" then "python" + when ".js" then "javascript" + when ".ts", ".tsx" then "typescript" + when ".go" then "go" + when ".rs" then "rust" + when ".zig" then "zig" + when ".lua" then "lua" + when ".c" then "c" + when ".cpp", ".cc", ".cxx" then "cpp" + when ".cs" then "csharp" + else "ruby" + end + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/locality_drag.rb b/gems/decomplex/lib/decomplex/native/locality_drag.rb index cb4bbefb6..74e936da5 100644 --- a/gems/decomplex/lib/decomplex/native/locality_drag.rb +++ b/gems/decomplex/lib/decomplex/native/locality_drag.rb @@ -10,16 +10,26 @@ module LocalityDrag def scan(files, jobs: nil) paths = Array(files).map(&:to_s) - validate_ruby_files!(paths) - JSON.parse(Command.run("locality-drag", "--language", "ruby", *Command.jobs_args(jobs), *paths)) + language = language_for(paths.first) + JSON.parse(Command.run("locality-drag", "--language", language, *Command.jobs_args(jobs), *paths)) end - - private_class_method def self.validate_ruby_files!(paths) - bad = paths.reject { |path| File.extname(path) == ".rb" } - return if bad.empty? - - raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" + private_class_method def self.language_for(path) + case File.extname(path) + when ".rb" then "ruby" + when ".py" then "python" + when ".js" then "javascript" + when ".ts", ".tsx" then "typescript" + when ".go" then "go" + when ".rs" then "rust" + when ".zig" then "zig" + when ".lua" then "lua" + when ".c" then "c" + when ".cpp", ".cc", ".cxx" then "cpp" + when ".cs" then "csharp" + else "ruby" + end end + end end end diff --git a/gems/decomplex/lib/decomplex/native/miner.rb b/gems/decomplex/lib/decomplex/native/miner.rb index e1fec456c..2f5425477 100644 --- a/gems/decomplex/lib/decomplex/native/miner.rb +++ b/gems/decomplex/lib/decomplex/native/miner.rb @@ -10,16 +10,26 @@ module Miner def scan(files, jobs: nil) paths = Array(files).map(&:to_s) - validate_ruby_files!(paths) - JSON.parse(Command.run("miner", "--language", "ruby", *Command.jobs_args(jobs), *paths)) + language = language_for(paths.first) + JSON.parse(Command.run("miner", "--language", language, *Command.jobs_args(jobs), *paths)) end - - private_class_method def self.validate_ruby_files!(paths) - bad = paths.reject { |path| File.extname(path) == ".rb" } - return if bad.empty? - - raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" + private_class_method def self.language_for(path) + case File.extname(path) + when ".rb" then "ruby" + when ".py" then "python" + when ".js" then "javascript" + when ".ts", ".tsx" then "typescript" + when ".go" then "go" + when ".rs" then "rust" + when ".zig" then "zig" + when ".lua" then "lua" + when ".c" then "c" + when ".cpp", ".cc", ".cxx" then "cpp" + when ".cs" then "csharp" + else "ruby" + end end + end end end diff --git a/gems/decomplex/lib/decomplex/native/operational_discontinuity.rb b/gems/decomplex/lib/decomplex/native/operational_discontinuity.rb index a3f6e4959..42bd4a7f5 100644 --- a/gems/decomplex/lib/decomplex/native/operational_discontinuity.rb +++ b/gems/decomplex/lib/decomplex/native/operational_discontinuity.rb @@ -10,16 +10,26 @@ module OperationalDiscontinuity def scan(files, jobs: nil) paths = Array(files).map(&:to_s) - validate_ruby_files!(paths) - JSON.parse(Command.run("operational-discontinuity", "--language", "ruby", *Command.jobs_args(jobs), *paths)) + language = language_for(paths.first) + JSON.parse(Command.run("operational-discontinuity", "--language", language, *Command.jobs_args(jobs), *paths)) end - - private_class_method def self.validate_ruby_files!(paths) - bad = paths.reject { |path| File.extname(path) == ".rb" } - return if bad.empty? - - raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" + private_class_method def self.language_for(path) + case File.extname(path) + when ".rb" then "ruby" + when ".py" then "python" + when ".js" then "javascript" + when ".ts", ".tsx" then "typescript" + when ".go" then "go" + when ".rs" then "rust" + when ".zig" then "zig" + when ".lua" then "lua" + when ".c" then "c" + when ".cpp", ".cc", ".cxx" then "cpp" + when ".cs" then "csharp" + else "ruby" + end end + end end end diff --git a/gems/decomplex/lib/decomplex/native/oversized_predicate.rb b/gems/decomplex/lib/decomplex/native/oversized_predicate.rb index a2f295430..cc563074c 100644 --- a/gems/decomplex/lib/decomplex/native/oversized_predicate.rb +++ b/gems/decomplex/lib/decomplex/native/oversized_predicate.rb @@ -10,16 +10,26 @@ module OversizedPredicate def scan(files, jobs: nil) paths = Array(files).map(&:to_s) - validate_ruby_files!(paths) - JSON.parse(Command.run("oversized-predicate", "--language", "ruby", *Command.jobs_args(jobs), *paths)) + language = language_for(paths.first) + JSON.parse(Command.run("oversized-predicate", "--language", language, *Command.jobs_args(jobs), *paths)) end - - private_class_method def self.validate_ruby_files!(paths) - bad = paths.reject { |path| File.extname(path) == ".rb" } - return if bad.empty? - - raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" + private_class_method def self.language_for(path) + case File.extname(path) + when ".rb" then "ruby" + when ".py" then "python" + when ".js" then "javascript" + when ".ts", ".tsx" then "typescript" + when ".go" then "go" + when ".rs" then "rust" + when ".zig" then "zig" + when ".lua" then "lua" + when ".c" then "c" + when ".cpp", ".cc", ".cxx" then "cpp" + when ".cs" then "csharp" + else "ruby" + end end + end end end diff --git a/gems/decomplex/lib/decomplex/native/path_condition.rb b/gems/decomplex/lib/decomplex/native/path_condition.rb index eec47d915..aa44a2fe2 100644 --- a/gems/decomplex/lib/decomplex/native/path_condition.rb +++ b/gems/decomplex/lib/decomplex/native/path_condition.rb @@ -10,16 +10,26 @@ module PathCondition def scan(files, jobs: nil) paths = Array(files).map(&:to_s) - validate_ruby_files!(paths) - JSON.parse(Command.run("path-condition", "--language", "ruby", *Command.jobs_args(jobs), *paths)) + language = language_for(paths.first) + JSON.parse(Command.run("path-condition", "--language", language, *Command.jobs_args(jobs), *paths)) end - - private_class_method def self.validate_ruby_files!(paths) - bad = paths.reject { |path| File.extname(path) == ".rb" } - return if bad.empty? - - raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" + private_class_method def self.language_for(path) + case File.extname(path) + when ".rb" then "ruby" + when ".py" then "python" + when ".js" then "javascript" + when ".ts", ".tsx" then "typescript" + when ".go" then "go" + when ".rs" then "rust" + when ".zig" then "zig" + when ".lua" then "lua" + when ".c" then "c" + when ".cpp", ".cc", ".cxx" then "cpp" + when ".cs" then "csharp" + else "ruby" + end end + end end end diff --git a/gems/decomplex/lib/decomplex/native/predicate_aliases.rb b/gems/decomplex/lib/decomplex/native/predicate_aliases.rb index 78b93c2b6..584b201ee 100644 --- a/gems/decomplex/lib/decomplex/native/predicate_aliases.rb +++ b/gems/decomplex/lib/decomplex/native/predicate_aliases.rb @@ -10,16 +10,26 @@ module PredicateAliases def scan(files, jobs: nil) paths = Array(files).map(&:to_s) - validate_ruby_files!(paths) - JSON.parse(Command.run("predicate-aliases", "--language", "ruby", *Command.jobs_args(jobs), *paths)) + language = language_for(paths.first) + JSON.parse(Command.run("predicate-aliases", "--language", language, *Command.jobs_args(jobs), *paths)) end - - private_class_method def self.validate_ruby_files!(paths) - bad = paths.reject { |path| File.extname(path) == ".rb" } - return if bad.empty? - - raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" + private_class_method def self.language_for(path) + case File.extname(path) + when ".rb" then "ruby" + when ".py" then "python" + when ".js" then "javascript" + when ".ts", ".tsx" then "typescript" + when ".go" then "go" + when ".rs" then "rust" + when ".zig" then "zig" + when ".lua" then "lua" + when ".c" then "c" + when ".cpp", ".cc", ".cxx" then "cpp" + when ".cs" then "csharp" + else "ruby" + end end + end end end diff --git a/gems/decomplex/lib/decomplex/native/redundant_nil_guard.rb b/gems/decomplex/lib/decomplex/native/redundant_nil_guard.rb index 237f1ad70..55d5ac922 100644 --- a/gems/decomplex/lib/decomplex/native/redundant_nil_guard.rb +++ b/gems/decomplex/lib/decomplex/native/redundant_nil_guard.rb @@ -10,16 +10,26 @@ module RedundantNilGuard def scan(files, jobs: nil) paths = Array(files).map(&:to_s) - validate_ruby_files!(paths) - JSON.parse(Command.run("redundant-nil-guard", "--language", "ruby", *Command.jobs_args(jobs), *paths)) + language = language_for(paths.first) + JSON.parse(Command.run("redundant-nil-guard", "--language", language, *Command.jobs_args(jobs), *paths)) end - - private_class_method def self.validate_ruby_files!(paths) - bad = paths.reject { |path| File.extname(path) == ".rb" } - return if bad.empty? - - raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" + private_class_method def self.language_for(path) + case File.extname(path) + when ".rb" then "ruby" + when ".py" then "python" + when ".js" then "javascript" + when ".ts", ".tsx" then "typescript" + when ".go" then "go" + when ".rs" then "rust" + when ".zig" then "zig" + when ".lua" then "lua" + when ".c" then "c" + when ".cpp", ".cc", ".cxx" then "cpp" + when ".cs" then "csharp" + else "ruby" + end end + end end end diff --git a/gems/decomplex/lib/decomplex/native/semantic_aliases.rb b/gems/decomplex/lib/decomplex/native/semantic_aliases.rb index 541be0cd2..e2c174559 100644 --- a/gems/decomplex/lib/decomplex/native/semantic_aliases.rb +++ b/gems/decomplex/lib/decomplex/native/semantic_aliases.rb @@ -10,16 +10,26 @@ module SemanticAliases def scan(files, jobs: nil) paths = Array(files).map(&:to_s) - validate_ruby_files!(paths) - JSON.parse(Command.run("semantic-aliases", "--language", "ruby", *Command.jobs_args(jobs), *paths)) + language = language_for(paths.first) + JSON.parse(Command.run("semantic-aliases", "--language", language, *Command.jobs_args(jobs), *paths)) end - - private_class_method def self.validate_ruby_files!(paths) - bad = paths.reject { |path| File.extname(path) == ".rb" } - return if bad.empty? - - raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" + private_class_method def self.language_for(path) + case File.extname(path) + when ".rb" then "ruby" + when ".py" then "python" + when ".js" then "javascript" + when ".ts", ".tsx" then "typescript" + when ".go" then "go" + when ".rs" then "rust" + when ".zig" then "zig" + when ".lua" then "lua" + when ".c" then "c" + when ".cpp", ".cc", ".cxx" then "cpp" + when ".cs" then "csharp" + else "ruby" + end end + end end end diff --git a/gems/decomplex/lib/decomplex/native/sequence_mine.rb b/gems/decomplex/lib/decomplex/native/sequence_mine.rb index ea80d5bce..fc5b38d65 100644 --- a/gems/decomplex/lib/decomplex/native/sequence_mine.rb +++ b/gems/decomplex/lib/decomplex/native/sequence_mine.rb @@ -10,16 +10,26 @@ module SequenceMine def scan(files, jobs: nil) paths = Array(files).map(&:to_s) - validate_ruby_files!(paths) - JSON.parse(Command.run("sequence-mine", "--language", "ruby", *Command.jobs_args(jobs), *paths)) + language = language_for(paths.first) + JSON.parse(Command.run("sequence-mine", "--language", language, *Command.jobs_args(jobs), *paths)) end - - private_class_method def self.validate_ruby_files!(paths) - bad = paths.reject { |path| File.extname(path) == ".rb" } - return if bad.empty? - - raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" + private_class_method def self.language_for(path) + case File.extname(path) + when ".rb" then "ruby" + when ".py" then "python" + when ".js" then "javascript" + when ".ts", ".tsx" then "typescript" + when ".go" then "go" + when ".rs" then "rust" + when ".zig" then "zig" + when ".lua" then "lua" + when ".c" then "c" + when ".cpp", ".cc", ".cxx" then "cpp" + when ".cs" then "csharp" + else "ruby" + end end + end end end diff --git a/gems/decomplex/lib/decomplex/native/state_branch_density.rb b/gems/decomplex/lib/decomplex/native/state_branch_density.rb index eebc752bd..d4dbb37a1 100644 --- a/gems/decomplex/lib/decomplex/native/state_branch_density.rb +++ b/gems/decomplex/lib/decomplex/native/state_branch_density.rb @@ -10,16 +10,26 @@ module StateBranchDensity def scan(files, jobs: nil) paths = Array(files).map(&:to_s) - validate_ruby_files!(paths) - JSON.parse(Command.run("state-branch-density", "--language", "ruby", *Command.jobs_args(jobs), *paths)) + language = language_for(paths.first) + JSON.parse(Command.run("state-branch-density", "--language", language, *Command.jobs_args(jobs), *paths)) end - - private_class_method def self.validate_ruby_files!(paths) - bad = paths.reject { |path| File.extname(path) == ".rb" } - return if bad.empty? - - raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" + private_class_method def self.language_for(path) + case File.extname(path) + when ".rb" then "ruby" + when ".py" then "python" + when ".js" then "javascript" + when ".ts", ".tsx" then "typescript" + when ".go" then "go" + when ".rs" then "rust" + when ".zig" then "zig" + when ".lua" then "lua" + when ".c" then "c" + when ".cpp", ".cc", ".cxx" then "cpp" + when ".cs" then "csharp" + else "ruby" + end end + end end end diff --git a/gems/decomplex/lib/decomplex/native/state_mesh.rb b/gems/decomplex/lib/decomplex/native/state_mesh.rb index 102229954..486b5fe65 100644 --- a/gems/decomplex/lib/decomplex/native/state_mesh.rb +++ b/gems/decomplex/lib/decomplex/native/state_mesh.rb @@ -10,16 +10,26 @@ module StateMesh def scan(files, jobs: nil) paths = Array(files).map(&:to_s) - validate_ruby_files!(paths) - JSON.parse(Command.run("state-mesh", "--language", "ruby", *Command.jobs_args(jobs), *paths)) + language = language_for(paths.first) + JSON.parse(Command.run("state-mesh", "--language", language, *Command.jobs_args(jobs), *paths)) end - - private_class_method def self.validate_ruby_files!(paths) - bad = paths.reject { |path| File.extname(path) == ".rb" } - return if bad.empty? - - raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" + private_class_method def self.language_for(path) + case File.extname(path) + when ".rb" then "ruby" + when ".py" then "python" + when ".js" then "javascript" + when ".ts", ".tsx" then "typescript" + when ".go" then "go" + when ".rs" then "rust" + when ".zig" then "zig" + when ".lua" then "lua" + when ".c" then "c" + when ".cpp", ".cc", ".cxx" then "cpp" + when ".cs" then "csharp" + else "ruby" + end end + end end end diff --git a/gems/decomplex/lib/decomplex/native/state_writes.rb b/gems/decomplex/lib/decomplex/native/state_writes.rb index a36b1fb83..5d5155c1e 100644 --- a/gems/decomplex/lib/decomplex/native/state_writes.rb +++ b/gems/decomplex/lib/decomplex/native/state_writes.rb @@ -14,7 +14,6 @@ module StateWrites def extract(files) paths = Array(files).map(&:to_s) - validate_ruby_files!(paths) payload = run_native(paths) JSON.parse(payload).map do |row| CoUpdate::Write.new( @@ -28,16 +27,27 @@ def extract(files) end end - private_class_method def self.validate_ruby_files!(paths) - bad = paths.reject { |path| File.extname(path) == ".rb" } - return if bad.empty? - - raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" - end - private_class_method def self.run_native(paths) - Command.run("state-writes", "--language", "ruby", *paths) + language = language_for(paths.first) + Command.run("state-writes", "--language", language, *paths) end + private_class_method def self.language_for(path) + case File.extname(path) + when ".rb" then "ruby" + when ".py" then "python" + when ".js" then "javascript" + when ".ts", ".tsx" then "typescript" + when ".go" then "go" + when ".rs" then "rust" + when ".zig" then "zig" + when ".lua" then "lua" + when ".c" then "c" + when ".cpp", ".cc", ".cxx" then "cpp" + when ".cs" then "csharp" + else "ruby" + end + end + end end end diff --git a/gems/decomplex/lib/decomplex/native/structural_topology.rb b/gems/decomplex/lib/decomplex/native/structural_topology.rb new file mode 100644 index 000000000..889e2db88 --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/structural_topology.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module StructuralTopology + module_function + + def scan(files, jobs: nil) + paths = Array(files).map(&:to_s) + language = language_for(paths.first) + JSON.parse(Command.run("structural-topology", "--language", language, *Command.jobs_args(jobs), *paths)) + end + + private_class_method def self.language_for(path) + case File.extname(path) + when ".rb" then "ruby" + when ".py" then "python" + when ".js" then "javascript" + when ".ts", ".tsx" then "typescript" + when ".go" then "go" + when ".rs" then "rust" + when ".zig" then "zig" + when ".lua" then "lua" + when ".c" then "c" + when ".cpp", ".cc", ".cxx" then "cpp" + when ".cs" then "csharp" + else "ruby" + end + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/temporal_ordering_pressure.rb b/gems/decomplex/lib/decomplex/native/temporal_ordering_pressure.rb index 611491fc1..be9208568 100644 --- a/gems/decomplex/lib/decomplex/native/temporal_ordering_pressure.rb +++ b/gems/decomplex/lib/decomplex/native/temporal_ordering_pressure.rb @@ -10,16 +10,26 @@ module TemporalOrderingPressure def scan(files, jobs: nil) paths = Array(files).map(&:to_s) - validate_ruby_files!(paths) - JSON.parse(Command.run("temporal-ordering-pressure", "--language", "ruby", *Command.jobs_args(jobs), *paths)) + language = language_for(paths.first) + JSON.parse(Command.run("temporal-ordering-pressure", "--language", language, *Command.jobs_args(jobs), *paths)) end - - private_class_method def self.validate_ruby_files!(paths) - bad = paths.reject { |path| File.extname(path) == ".rb" } - return if bad.empty? - - raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" + private_class_method def self.language_for(path) + case File.extname(path) + when ".rb" then "ruby" + when ".py" then "python" + when ".js" then "javascript" + when ".ts", ".tsx" then "typescript" + when ".go" then "go" + when ".rs" then "rust" + when ".zig" then "zig" + when ".lua" then "lua" + when ".c" then "c" + when ".cpp", ".cc", ".cxx" then "cpp" + when ".cs" then "csharp" + else "ruby" + end end + end end end diff --git a/gems/decomplex/lib/decomplex/native/weighted_inlined_complexity.rb b/gems/decomplex/lib/decomplex/native/weighted_inlined_complexity.rb index 8887a6e6c..84c39494f 100644 --- a/gems/decomplex/lib/decomplex/native/weighted_inlined_complexity.rb +++ b/gems/decomplex/lib/decomplex/native/weighted_inlined_complexity.rb @@ -10,16 +10,26 @@ module WeightedInlinedComplexity def scan(files, jobs: nil) paths = Array(files).map(&:to_s) - validate_ruby_files!(paths) - JSON.parse(Command.run("weighted-inlined-complexity", "--language", "ruby", *Command.jobs_args(jobs), *paths)) + language = language_for(paths.first) + JSON.parse(Command.run("weighted-inlined-complexity", "--language", language, *Command.jobs_args(jobs), *paths)) end - - private_class_method def self.validate_ruby_files!(paths) - bad = paths.reject { |path| File.extname(path) == ".rb" } - return if bad.empty? - - raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" + private_class_method def self.language_for(path) + case File.extname(path) + when ".rb" then "ruby" + when ".py" then "python" + when ".js" then "javascript" + when ".ts", ".tsx" then "typescript" + when ".go" then "go" + when ".rs" then "rust" + when ".zig" then "zig" + when ".lua" then "lua" + when ".c" then "c" + when ".cpp", ".cc", ".cxx" then "cpp" + when ".cs" then "csharp" + else "ruby" + end end + end end end diff --git a/gems/decomplex/lib/decomplex/state_mesh.rb b/gems/decomplex/lib/decomplex/state_mesh.rb index 668a8d2ab..ddf10e0ef 100644 --- a/gems/decomplex/lib/decomplex/state_mesh.rb +++ b/gems/decomplex/lib/decomplex/state_mesh.rb @@ -113,10 +113,9 @@ def walk_reads(node, lines, defstack, file, field_norms) defstack = defstack + [node.children[0].to_s] when :DEFN then defstack = defstack + [node.children[0].to_s] when :DEFS then defstack = defstack + [node.children[1].to_s] - when :CALL, :OPCALL, :FCALL, :VCALL + when :CALL, :OPCALL, :FCALL # CALL(recv, :method, args) - attribute reads have no args # FCALL(:method, args) - attribute reads have no args - # VCALL(:method) - attribute reads have no args recv = node.type == :CALL || node.type == :OPCALL ? node.children[0] : nil mid = node.type == :CALL || node.type == :OPCALL ? node.children[1] : node.children[0] args = node.type == :CALL || node.type == :OPCALL ? node.children[2] : node.children[1] diff --git a/gems/decomplex/lib/decomplex/superfluous_state.rb b/gems/decomplex/lib/decomplex/superfluous_state.rb new file mode 100644 index 000000000..49abcd3e7 --- /dev/null +++ b/gems/decomplex/lib/decomplex/superfluous_state.rb @@ -0,0 +1,210 @@ +# frozen_string_literal: true + +require "set" +require_relative "ast" + +module Decomplex + # SuperfluousState -- fields that could be eliminated entirely. + # + # Post-analyzer over StateMesh + ImplicitControlFlow. Does no new AST + # walks. Detects four eliminability patterns: + # + # 1. Dead state -- written but never read. The field captures a + # value that is never used. Provably removable. + # + # 2. Intra-method pass-through -- field written and read within the + # same method body. The value never escapes the stack frame. + # Memoized cache accessors (read-before-write pattern) are + # disqualified. + # + # 3. Adjacent-call pass-through -- single-writer single-reader where + # every observed callsite has writer immediately preceding reader. + # + # 4. Derived cache -- computed from other fields, never independently + # mutated. Includes memoized accessors and constructor-set config. + # + # Noise gating: + # - Only self-state (@ivar and self.attr); ignores other.attr. + # - Read-before-write within same method disqualifies intra-method + # (memo pattern, not pass-through). + # - Constructor-set fields get a 0.33x penalty. + # - Fields read only via hash/eql?/to_s/inspect are flagged as + # identity-only (may be eligible for structural replacement). + class SuperfluousState + Finding = Struct.new(:field, :score, :classification, + :writer_method_count, :reader_method_count, + :write_sites, :read_sites, + :writer_methods, :reader_methods, + :ctorset, :adjacent_sites, + keyword_init: true) do + def to_h + { + field: field, + score: score.round(3), + classification: classification, + writer_method_count: writer_method_count, + reader_method_count: reader_method_count, + write_sites: write_sites, + read_sites: read_sites, + writer_methods: writer_methods, + reader_methods: reader_methods, + ctorset: ctorset, + adjacent_sites: adjacent_sites + } + end + end + + def self.scan(files) + sm = StateMesh.scan(files, min_writes: 1) + sm.run + + adjacent_pairs = build_adjacent_pairs(files) + new(sm, adjacent_pairs).scan + end + + def initialize(state_mesh, adjacent_pairs = {}) + @sm = state_mesh + @adjacent_pairs = adjacent_pairs + end + + def scan + group_by_field + score_and_rank + end + + private + + def group_by_field + @writes_by = Hash.new { |h, k| h[k] = [] } + @reads_by = Hash.new { |h, k| h[k] = [] } + + @sm.writes.each do |w| + next unless w.recv == "self" # ignore other.attr + @writes_by[w.norm] << w + end + @sm.reads.each do |r| + next unless r.recv == "self" + @reads_by[r.norm] << r + end + end + + def score_and_rank + results = [] + + all_norms = (@writes_by.keys + @reads_by.keys).uniq + + # ---- Pattern 1: dead state (written, never read) ---- + all_norms.each do |norm| + next unless @writes_by.key?(norm) && !@reads_by.key?(norm) + # Reject if StateMesh has ANY read (including non-self reads + # like metaprogramming access), not just self-filtered reads. + next if @sm.reads.any? { |r| r.norm == norm } + + writers = @writes_by[norm] + results << Finding.new( + field: norm, score: 0.85, classification: "dead_state", + writer_method_count: writers.map { |w| [w.file, w.defn] }.uniq.size, + reader_method_count: 0, + write_sites: writers.map { |w| "#{w.file}:#{w.defn}:#{w.line}" }.uniq, + read_sites: [], + write_sites: writers.map { |w| "#{w.file}:#{w.defn}:#{w.line}" }.uniq, + read_sites: [], + writer_methods: writers.map(&:defn).uniq, + reader_methods: [], + ctorset: writers.all? { |w| w.defn == "initialize" }, + adjacent_sites: nil + ) + end + + # ---- Pattern 2-4: eliminability scoring ---- + all_norms.each do |norm| + writers = @writes_by[norm] || [] + readers = @reads_by[norm] || [] + next if writers.empty? || readers.empty? + + writer_methods = writers.map { |w| [w.file, w.defn] }.uniq + reader_methods = readers.map { |r| [r.file, r.defn] }.uniq + all_sites = (writer_methods + reader_methods).uniq + + wc = writer_methods.size + rc = reader_methods.size + + # ---- base dampened score ---- + base = 1.0 / (wc * rc + 1) + + # ---- intra-method pass-through ---- + intra = (all_sites.size == 1) + if intra + # Disqualify if any read precedes the earliest write (the field + # carries state from outside this method -- e.g. read-modify-write + # or a method that reads prior-call state before writing). + first_write_line = writers.map(&:line).min + intra = false if readers.any? { |r| r.line < first_write_line } + end + intra_bonus = intra ? 10.0 : 1.0 + + # ---- constructor-set penalty ---- + ctorset = wc == 1 && writer_methods[0][1] == "initialize" + ctor_penalty = ctorset ? 0.33 : 1.0 + + # ---- adjacent-call bonus ---- + adj_bonus = 1.0 + adj_sites = nil + if wc == 1 && rc == 1 && !intra + wm_name = writer_methods[0][1] + rm_name = reader_methods[0][1] + pair_key = [wm_name, rm_name] + fields = @adjacent_pairs[pair_key] + if fields.include?(norm) + adj_bonus = 5.0 + adj_sites = fields.to_a # would be the sites list from ICF + end + end + + score = base * intra_bonus * adj_bonus * ctor_penalty + next if score < 0.1 + + classification = if intra + "intra_method" + elsif adj_bonus > 1.0 + "adjacent_call" + else + "derived_cache" + end + + results << Finding.new( + field: norm, + score: score, + classification: classification, + writer_method_count: wc, + reader_method_count: rc, + write_sites: writers.map { |w| "#{w.file}:#{w.defn}:#{w.line}" }.uniq, + read_sites: readers.map { |r| "#{r.file}:#{r.defn}:#{r.line}" }.uniq, + writer_methods: writer_methods.map { |_f, d| d }.uniq, + reader_methods: reader_methods.map { |_f, d| d }.uniq, + ctorset: ctorset, + adjacent_sites: adj_sites + ) + end + + results.sort_by { |r| -r.score } + end + + # Build a lookup: (writer_method, reader_method) -> Set[field_norm] + # from ImplicitControlFlow's ordered protocol facts. + def self.build_adjacent_pairs(files) + pairs = Hash.new { |h, k| h[k] = Set.new } + report = ImplicitControlFlow.scan(files) + report.ordered_protocols.each do |proto| + next unless proto[:dependency] == "write_read" + writer, reader = proto[:protocol] + fields = proto[:states] + fields.each { |f| pairs[[writer, reader]].add(f) } + end + pairs + rescue StandardError => e + warn "SuperfluousState: ImplicitControlFlow unavailable: #{e.message}" + {} + end + end +end diff --git a/gems/decomplex/ruby_core.json b/gems/decomplex/ruby_core.json new file mode 100644 index 000000000..e69de29bb diff --git a/gems/decomplex/rust/Cargo.lock b/gems/decomplex/rust/Cargo.lock index 78002f238..999cc42b3 100644 --- a/gems/decomplex/rust/Cargo.lock +++ b/gems/decomplex/rust/Cargo.lock @@ -49,8 +49,18 @@ dependencies = [ "serde_json", "tempfile", "tree-sitter", + "tree-sitter-c", + "tree-sitter-c-sharp", + "tree-sitter-cpp", + "tree-sitter-go", + "tree-sitter-javascript", "tree-sitter-language", + "tree-sitter-lua", + "tree-sitter-python", "tree-sitter-ruby", + "tree-sitter-rust", + "tree-sitter-typescript", + "tree-sitter-zig", ] [[package]] @@ -250,12 +260,82 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-c" +version = "0.23.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "afd2b1bf1585dc2ef6d69e87d01db8adb059006649dd5f96f31aa789ee6e9c71" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-c-sharp" +version = "0.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8997ad04502208449025114e434c9024a33a74e700513c702a9d2cac6522a771" +dependencies = [ + "cc", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-cpp" +version = "0.23.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df2196ea9d47b4ab4a31b9297eaa5a5d19a0b121dceb9f118f6790ad0ab94743" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-go" +version = "0.23.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b13d476345220dbe600147dd444165c5791bf85ef53e28acbedd46112ee18431" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-javascript" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf40bf599e0416c16c125c3cec10ee5ddc7d1bb8b0c60fa5c4de249ad34dc1b1" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-language" version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c199356c799a8945965bb5f2c55b2ad9d9aa7c4b4f6e587fe9dea0bc715e5f9c" +[[package]] +name = "tree-sitter-lua" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cdb9adf0965fec58e7660cbb3a059dbb12ebeec9459e6dcbae3db004739641e" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-python" +version = "0.23.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d065aaa27f3aaceaf60c1f0e0ac09e1cb9eb8ed28e7bcdaa52129cffc7f4b04" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-ruby" version = "0.23.1" @@ -266,6 +346,36 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-rust" +version = "0.23.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca8ccb3e3a3495c8a943f6c3fd24c3804c471fd7f4f16087623c7fa4c0068e8a" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-typescript" +version = "0.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c5f76ed8d947a75cc446d5fccd8b602ebf0cde64ccf2ffa434d873d7a575eff" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-zig" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab11fc124851b0db4dd5e55983bbd9631192e93238389dcd44521715e5d53e28" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "unicode-ident" version = "1.0.24" diff --git a/gems/decomplex/rust/Cargo.toml b/gems/decomplex/rust/Cargo.toml index 24a40eecd..039da4f74 100644 --- a/gems/decomplex/rust/Cargo.toml +++ b/gems/decomplex/rust/Cargo.toml @@ -17,6 +17,16 @@ regex = "1.10" tree-sitter = "=0.24.7" tree-sitter-language = "=0.1.3" tree-sitter-ruby = "=0.23.1" +tree-sitter-python = "0.23.6" +tree-sitter-javascript = "0.23.1" +tree-sitter-typescript = "0.23.2" +tree-sitter-go = "0.23.4" +tree-sitter-rust = "0.23.2" +tree-sitter-zig = "1.0.2" +tree-sitter-lua = "0.2.0" +tree-sitter-c = "0.23.4" +tree-sitter-cpp = "0.23.4" +tree-sitter-c-sharp = "0.21.3" [dev-dependencies] tempfile = "=3.10.1" diff --git a/gems/decomplex/rust/rust_core.json b/gems/decomplex/rust/rust_core.json new file mode 100644 index 000000000..fb50f4462 --- /dev/null +++ b/gems/decomplex/rust/rust_core.json @@ -0,0 +1 @@ +[{"field":"tv_sec","receiver":"ts","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv_clock_gettime","line":147,"span":[147,2,147,23],"owner":"core"},{"field":"tv_nsec","receiver":"ts","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv_clock_gettime","line":148,"span":[148,2,148,25],"owner":"core"},{"field":"flags","receiver":"handle","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv_close","line":162,"span":[162,2,162,36],"owner":"core"},{"field":"close_cb","receiver":"handle","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv_close","line":163,"span":[163,2,163,29],"owner":"core"},{"field":"next_closing","receiver":"handle","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__make_close_pending","line":271,"span":[271,2,271,54],"owner":"core"},{"field":"closing_handles","receiver":"handle->loop","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__make_close_pending","line":272,"span":[272,2,272,40],"owner":"core"},{"field":"flags","receiver":"handle","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__finish_close","line":316,"span":[316,2,316,35],"owner":"core"},{"field":"flags","receiver":"handle","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__finish_close","line":338,"span":[338,8,338,41],"owner":"core"},{"field":"closing_handles","receiver":"loop","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__run_closing_handles","line":373,"span":[373,2,373,30],"owner":"core"},{"field":"stop_flag","receiver":"loop","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv_run","line":489,"span":[489,4,489,23],"owner":"core"},{"field":"watchers","receiver":"loop","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"maybe_resize","line":900,"span":[900,2,900,27],"owner":"core"},{"field":"nwatchers","receiver":"loop","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"maybe_resize","line":901,"span":[901,2,901,29],"owner":"core"},{"field":"fd","receiver":"w","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__io_init","line":945,"span":[945,2,945,12],"owner":"uv__io_t"},{"field":"bits","receiver":"w","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__io_init","line":946,"span":[946,2,946,13],"owner":"uv__io_t"},{"field":"events","receiver":"w","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__io_init","line":947,"span":[947,2,947,15],"owner":"uv__io_t"},{"field":"pevents","receiver":"w","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__io_init","line":948,"span":[948,2,948,16],"owner":"uv__io_t"},{"field":"pevents","receiver":"w","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__io_start","line":963,"span":[963,2,963,22],"owner":"core"},{"field":"pevents","receiver":"w","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__io_stop","line":1018,"span":[1018,2,1018,23],"owner":"core"},{"field":"events","receiver":"w","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__io_stop","line":1023,"span":[1023,4,1023,17],"owner":"core"},{"field":"current_timeout","receiver":"lfields","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__io_poll_prepare","line":1073,"span":[1073,2,1073,36],"owner":"core"},{"field":"tv_sec","receiver":"rusage->ru_utime","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getrusage","line":1103,"span":[1103,2,1103,49],"owner":"core"},{"field":"tv_usec","receiver":"rusage->ru_utime","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getrusage","line":1104,"span":[1104,2,1104,51],"owner":"core"},{"field":"tv_sec","receiver":"rusage->ru_stime","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getrusage","line":1106,"span":[1106,2,1106,49],"owner":"core"},{"field":"tv_usec","receiver":"rusage->ru_stime","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getrusage","line":1107,"span":[1107,2,1107,51],"owner":"core"},{"field":"ru_maxrss","receiver":"rusage","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getrusage","line":1110,"span":[1110,2,1110,37],"owner":"core"},{"field":"ru_ixrss","receiver":"rusage","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getrusage","line":1111,"span":[1111,2,1111,35],"owner":"core"},{"field":"ru_idrss","receiver":"rusage","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getrusage","line":1112,"span":[1112,2,1112,35],"owner":"core"},{"field":"ru_isrss","receiver":"rusage","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getrusage","line":1113,"span":[1113,2,1113,35],"owner":"core"},{"field":"ru_minflt","receiver":"rusage","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getrusage","line":1114,"span":[1114,2,1114,37],"owner":"core"},{"field":"ru_majflt","receiver":"rusage","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getrusage","line":1115,"span":[1115,2,1115,37],"owner":"core"},{"field":"ru_nswap","receiver":"rusage","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getrusage","line":1116,"span":[1116,2,1116,35],"owner":"core"},{"field":"ru_inblock","receiver":"rusage","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getrusage","line":1117,"span":[1117,2,1117,39],"owner":"core"},{"field":"ru_oublock","receiver":"rusage","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getrusage","line":1118,"span":[1118,2,1118,39],"owner":"core"},{"field":"ru_msgsnd","receiver":"rusage","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getrusage","line":1119,"span":[1119,2,1119,37],"owner":"core"},{"field":"ru_msgrcv","receiver":"rusage","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getrusage","line":1120,"span":[1120,2,1120,37],"owner":"core"},{"field":"ru_nsignals","receiver":"rusage","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getrusage","line":1121,"span":[1121,2,1121,41],"owner":"core"},{"field":"ru_nvcsw","receiver":"rusage","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getrusage","line":1122,"span":[1122,2,1122,35],"owner":"core"},{"field":"ru_nivcsw","receiver":"rusage","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getrusage","line":1123,"span":[1123,2,1123,37],"owner":"core"},{"field":"ru_maxrss","receiver":"rusage","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getrusage","line":1130,"span":[1130,2,1130,27],"owner":"core"},{"field":"ru_maxrss","receiver":"rusage","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getrusage","line":1132,"span":[1132,2,1132,43],"owner":"core"},{"field":"tv_sec","receiver":"rusage->ru_utime","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv_getrusage_thread","line":1165,"span":[1165,2,1165,50],"owner":"core"},{"field":"tv_usec","receiver":"rusage->ru_utime","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv_getrusage_thread","line":1166,"span":[1166,2,1166,56],"owner":"core"},{"field":"tv_sec","receiver":"rusage->ru_stime","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv_getrusage_thread","line":1167,"span":[1167,2,1167,52],"owner":"core"},{"field":"tv_usec","receiver":"rusage->ru_stime","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv_getrusage_thread","line":1168,"span":[1168,2,1168,58],"owner":"core"},{"field":"username","receiver":"pwd","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getpwuid_r","line":1395,"span":[1395,2,1395,67],"owner":"core"},{"field":"homedir","receiver":"pwd","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getpwuid_r","line":1406,"span":[1406,2,1406,42],"owner":"core"},{"field":"shell","receiver":"pwd","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getpwuid_r","line":1410,"span":[1410,2,1410,42],"owner":"core"},{"field":"uid","receiver":"pwd","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getpwuid_r","line":1414,"span":[1414,2,1414,22],"owner":"core"},{"field":"gid","receiver":"pwd","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getpwuid_r","line":1415,"span":[1415,2,1415,22],"owner":"core"},{"field":"members","receiver":"grp","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv_os_get_group","line":1483,"span":[1483,2,1483,32],"owner":"core"},{"field":"groupname","receiver":"grp","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv_os_get_group","line":1494,"span":[1494,2,1494,25],"owner":"core"},{"field":"gid","receiver":"grp","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv_os_get_group","line":1499,"span":[1499,2,1499,22],"owner":"core"},{"field":"name","receiver":"envitem","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv_os_environ","line":1558,"span":[1558,4,1558,23],"owner":"core"},{"field":"value","receiver":"envitem","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv_os_environ","line":1559,"span":[1559,4,1559,28],"owner":"core"},{"field":"sched_priority","receiver":"param","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv_thread_setpriority","line":1837,"span":[1837,4,1837,31],"owner":"uv_thread_t"},{"field":"tv_sec","receiver":"tv","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv_gettimeofday","line":1936,"span":[1936,2,1936,36],"owner":"core"},{"field":"tv_usec","receiver":"tv","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv_gettimeofday","line":1937,"span":[1937,2,1937,38],"owner":"core"},{"field":"tv_sec","receiver":"timeout","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv_sleep","line":1945,"span":[1945,2,1945,30],"owner":"core"},{"field":"tv_nsec","receiver":"timeout","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv_sleep","line":1946,"span":[1946,2,1946,47],"owner":"core"}] diff --git a/gems/decomplex/rust/rust_writes.json b/gems/decomplex/rust/rust_writes.json new file mode 100644 index 000000000..fe51488c7 --- /dev/null +++ b/gems/decomplex/rust/rust_writes.json @@ -0,0 +1 @@ +[] diff --git a/gems/decomplex/rust/src/decomplex/ast.rs b/gems/decomplex/rust/src/decomplex/ast.rs index 87d10a15d..cc844ef72 100644 --- a/gems/decomplex/rust/src/decomplex/ast.rs +++ b/gems/decomplex/rust/src/decomplex/ast.rs @@ -1,9 +1,10 @@ -use serde::Serialize; +use crate::decomplex::syntax::Language; use anyhow::{Context, Result}; +use serde::Serialize; use std::collections::BTreeSet; use std::fs; use std::path::Path; -use tree_sitter::{Node as TreeSitterNode, Parser}; +use tree_sitter::{Language as TreeSitterLanguage, Node as TreeSitterNode, Parser}; pub type Span = [usize; 4]; const COMPARISON_OPERATORS: &[&str] = &["==", "!=", "===", "!==", "<", "<=", ">", ">="]; @@ -190,12 +191,16 @@ pub struct Node { } pub fn parse(file: &Path) -> Result<(Node, Vec)> { + parse_with_language(file, Language::Ruby) +} + +pub fn parse_with_language(file: &Path, language: Language) -> Result<(Node, Vec)> { let source = fs::read_to_string(file) .with_context(|| format!("failed to read {}", file.display()))?; let mut parser = Parser::new(); parser - .set_language(&tree_sitter_ruby::LANGUAGE.into()) - .with_context(|| "failed to initialize tree-sitter ruby parser")?; + .set_language(&language_grammar(language)) + .with_context(|| "failed to initialize tree-sitter parser")?; let tree = parser .parse(&source, None) .with_context(|| format!("tree-sitter produced no tree for {}", file.display()))?; @@ -204,6 +209,22 @@ pub fn parse(file: &Path) -> Result<(Node, Vec)> { Ok((root, lines)) } +fn language_grammar(language: Language) -> TreeSitterLanguage { + match language { + Language::Ruby => tree_sitter_ruby::LANGUAGE.into(), + Language::Python => tree_sitter_python::LANGUAGE.into(), + Language::JavaScript => tree_sitter_javascript::LANGUAGE.into(), + Language::TypeScript => tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(), + Language::Go => tree_sitter_go::LANGUAGE.into(), + Language::Rust => tree_sitter_rust::LANGUAGE.into(), + Language::Zig => tree_sitter_zig::LANGUAGE.into(), + Language::Lua => tree_sitter_lua::LANGUAGE.into(), + Language::C => tree_sitter_c::LANGUAGE.into(), + Language::Cpp => tree_sitter_cpp::LANGUAGE.into(), + Language::CSharp => tree_sitter_c_sharp::language().into(), + } +} + pub fn node(child: &Child) -> Option<&Node> { match child { Child::Node(node) => Some(node), @@ -325,7 +346,8 @@ impl<'source> TreeSitterNormalizer<'source> { let children = self.normalize_children(node); Some(self.wrap("ROOT", children, node)) } - "method" => self.normalize_function(node), + "method" | "function_definition" | "function_declaration" | "method_definition" + | "method_declaration" | "function_item" => self.normalize_function(node), "singleton_method" => self.normalize_singleton_function(node), "class" | "class_definition" | "class_declaration" | "class_specifier" => { self.normalize_class(node) @@ -1620,7 +1642,8 @@ impl<'source> TreeSitterNormalizer<'source> { } if matches!( node.kind(), - "method_parameters" | "block_parameters" | "lambda_parameters" + "method_parameters" | "parameters" | "parameter_list" | "formal_parameters" + | "block_parameters" | "lambda_parameters" ) { for child in self.named_children(node) { self.collect_identifier_names(child, locals); @@ -1671,6 +1694,14 @@ impl<'source> TreeSitterNormalizer<'source> { } fn ruby_scope_boundary(&self, node: TreeSitterNode<'_>) -> bool { + if node.kind() == "block" + && node + .parent() + .map(|parent| function_kind(parent.kind())) + .unwrap_or(false) + { + return false; + } if matches!(node.kind(), "block" | "do_block") && node .parent() @@ -1720,6 +1751,9 @@ impl<'source> TreeSitterNormalizer<'source> { matches!( parent.kind(), "method_parameters" + | "parameters" + | "parameter_list" + | "formal_parameters" | "block_parameters" | "lambda_parameters" | "optional_parameter" @@ -2708,6 +2742,19 @@ fn if_kind(kind: &str) -> bool { ) } +fn function_kind(kind: &str) -> bool { + matches!( + kind, + "method" + | "function_definition" + | "function_declaration" + | "method_definition" + | "method_declaration" + | "function_item" + | "singleton_method" + ) +} + fn return_kind(kind: &str) -> &str { match kind { "return" | "return_statement" | "return_expression" => "RETURN", diff --git a/gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs b/gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs index 74319a197..0a35baf0c 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs @@ -27,12 +27,12 @@ struct Hit { span: Span, } -pub fn scan_files(files: &[PathBuf], _language: Language) -> Result> { +pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { let mut guard = Vec::new(); let mut dispatch = Vec::new(); for file in files { - let (root, lines) = ast::parse(file)?; + let (root, lines) = ast::parse_with_language(file, language)?; let mut detector = DecisionPressure::new(file.to_string_lossy().to_string(), lines); detector.walk(&root, &Vec::new(), &BTreeMap::new()); guard.extend(detector.guard_hits); diff --git a/gems/decomplex/rust/src/decomplex/detectors/derived_state.rs b/gems/decomplex/rust/src/decomplex/detectors/derived_state.rs index fe43587bc..3ba3e118f 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/derived_state.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/derived_state.rs @@ -26,10 +26,10 @@ struct Asgn { span: Span, } -pub fn scan_files(files: &[PathBuf], _language: Language) -> Result> { +pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { let mut out = Vec::new(); for file in files { - let (root, lines) = ast::parse(file)?; + let (root, lines) = ast::parse_with_language(file, language)?; let detector = DerivedState::new(file.to_string_lossy().to_string(), lines); detector.each_method(&root, &mut |file, defn, stmts| { out.extend(analyze(file, defn, stmts)); diff --git a/gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs b/gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs index 788f7d8f1..56a6214bd 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs @@ -26,10 +26,10 @@ struct Site { span: Span, } -pub fn scan_files(files: &[PathBuf], _language: Language) -> Result> { +pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { let mut sites = Vec::new(); for file in files { - let (root, lines) = ast::parse(file)?; + let (root, lines) = ast::parse_with_language(file, language)?; let mut detector = FalseSimplicity::new(file.to_string_lossy().to_string(), lines); detector.walk(&root, &Vec::new()); sites.extend(detector.sites); diff --git a/gems/decomplex/rust/src/decomplex/detectors/fat_union.rs b/gems/decomplex/rust/src/decomplex/detectors/fat_union.rs index 7da7e4969..00234429d 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/fat_union.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/fat_union.rs @@ -30,10 +30,10 @@ struct VariantReads { reads: Vec, } -pub fn scan_files(files: &[PathBuf], _language: Language) -> Result { +pub fn scan_files(files: &[PathBuf], language: Language) -> Result { let mut out = Vec::new(); for file in files { - let (root, lines) = ast::parse(file)?; + let (root, lines) = ast::parse_with_language(file, language)?; let mut detector = FatUnion::new(file.to_string_lossy().to_string(), lines); detector.walk(&root, &Vec::new()); out.extend(detector.findings()); diff --git a/gems/decomplex/rust/src/decomplex/detectors/function_lcom.rs b/gems/decomplex/rust/src/decomplex/detectors/function_lcom.rs index 912d050df..d919b1597 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/function_lcom.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/function_lcom.rs @@ -19,8 +19,8 @@ pub struct FunctionLcomRow { pub spans: BTreeMap, } -pub fn scan_files(files: &[PathBuf], _language: Language) -> Result> { - let summaries = local_flow::scan_files(files, _language)?; +pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { + let summaries = local_flow::scan_files(files, language)?; let mut detector = FunctionLcom::new(summaries); Ok(detector.findings()) } diff --git a/gems/decomplex/rust/src/decomplex/detectors/implicit_control_flow.rs b/gems/decomplex/rust/src/decomplex/detectors/implicit_control_flow.rs index a37461265..664a6f750 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/implicit_control_flow.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/implicit_control_flow.rs @@ -84,10 +84,10 @@ const MUTATING_MIDS: &[&str] = &[ const NON_MUTATING_OPERATOR_MIDS: &[&str] = &["!", "!=", "!~"]; const MUTATING_SUFFIXES: &[&str] = &["!"]; -pub fn scan_files(files: &[PathBuf], _language: Language) -> Result { +pub fn scan_files(files: &[PathBuf], language: Language) -> Result { let mut parsed = BTreeMap::new(); for file in files { - parsed.insert(file.to_string_lossy().to_string(), ast::parse(file)?); + parsed.insert(file.to_string_lossy().to_string(), ast::parse_with_language(file, language)?); } let effect_index = EffectIndex::build(&parsed); diff --git a/gems/decomplex/rust/src/decomplex/detectors/inconsistent_rename_clone.rs b/gems/decomplex/rust/src/decomplex/detectors/inconsistent_rename_clone.rs index 76912ce31..2c1ad29e8 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/inconsistent_rename_clone.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/inconsistent_rename_clone.rs @@ -40,10 +40,10 @@ struct Block { const HOLE_TYPES: &[&str] = &["LVAR", "DVAR", "IVAR", "LASGN", "DASGN", "IASGN"]; const MIN_TOKENS: usize = 8; -pub fn scan_files(files: &[PathBuf], _language: Language) -> Result> { +pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { let mut blocks = Vec::new(); for file in files { - let (root, _lines) = ast::parse(file)?; + let (root, _lines) = ast::parse_with_language(file, language)?; let detector = InconsistentRenameClone::new(file.to_string_lossy().to_string()); detector.collect(&root, &Vec::new(), &mut blocks); } diff --git a/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs b/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs index 52bfdc2ca..83b1a309c 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs @@ -52,10 +52,10 @@ const SKIP_NESTED_TYPES: &[&str] = &["CLASS", "MODULE", "DEFN", "DEFS", "LAMBDA" const LOCAL_READ_TYPES: &[&str] = &["LVAR", "DVAR"]; const LOCAL_WRITE_TYPES: &[&str] = &["LASGN", "DASGN"]; -pub fn scan_files(files: &[PathBuf], _language: Language) -> Result> { +pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { let mut out = Vec::new(); for file in files { - let (root, lines) = ast::parse(file)?; + let (root, lines) = ast::parse_with_language(file, language)?; let mut detector = LocalFlow::new(file.to_string_lossy().to_string(), lines); out.extend(detector.scan(&root)); } @@ -331,3 +331,40 @@ struct RawBoundary { kind: String, text: String, } + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Write; + use tempfile::NamedTempFile; + + #[test] + fn extracts_python_function_local_flow() { + let mut file = NamedTempFile::new().expect("tempfile"); + file.write_all( + b"def mixed(price, tax):\n subtotal = price + tax\n total = subtotal\n return total\n", + ) + .expect("write"); + + let summaries = scan_files(&[file.path().to_path_buf()], Language::Python).expect("scan"); + let summary = summaries + .iter() + .find(|summary| summary.name == "mixed") + .expect("mixed summary"); + + assert_eq!(summary.owner, "(top-level)"); + assert_eq!(summary.statements.len(), 3); + assert_eq!( + summary.statements[0].reads, + ["price".to_string(), "tax".to_string()].into_iter().collect() + ); + assert_eq!( + summary.statements[1].dependencies, + vec![("total".to_string(), "subtotal".to_string())] + ); + assert_eq!( + summary.statements[2].reads, + ["total".to_string()].into_iter().collect() + ); + } +} diff --git a/gems/decomplex/rust/src/decomplex/detectors/locality_drag.rs b/gems/decomplex/rust/src/decomplex/detectors/locality_drag.rs index 69c44cb80..2f2c3cf2e 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/locality_drag.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/locality_drag.rs @@ -45,8 +45,8 @@ pub struct BoundaryInfo { pub marker: String, } -pub fn scan_files(files: &[PathBuf], _language: Language) -> Result> { - let summaries = local_flow::scan_files(files, _language)?; +pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { + let summaries = local_flow::scan_files(files, language)?; let mut detector = LocalityDrag::new(summaries); Ok(detector.findings()) } diff --git a/gems/decomplex/rust/src/decomplex/detectors/operational_discontinuity.rs b/gems/decomplex/rust/src/decomplex/detectors/operational_discontinuity.rs index a9206c16d..1855c00ba 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/operational_discontinuity.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/operational_discontinuity.rs @@ -41,8 +41,8 @@ struct RangeInfo { last: usize, } -pub fn scan_files(files: &[PathBuf], _language: Language) -> Result> { - let summaries = local_flow::scan_files(files, _language)?; +pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { + let summaries = local_flow::scan_files(files, language)?; let detector = OperationalDiscontinuity::new(summaries); Ok(detector.findings()) } diff --git a/gems/decomplex/rust/src/decomplex/detectors/oversized_predicate.rs b/gems/decomplex/rust/src/decomplex/detectors/oversized_predicate.rs index c0c939b6e..a15e9bc8c 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/oversized_predicate.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/oversized_predicate.rs @@ -22,10 +22,10 @@ pub struct ResultReport { const LIMIT: usize = 3; const PREDICATE_NODES: &[&str] = &["IF", "WHILE", "UNTIL"]; -pub fn scan_files(files: &[PathBuf], _language: Language) -> Result { +pub fn scan_files(files: &[PathBuf], language: Language) -> Result { let mut findings = Vec::new(); for file in files { - let (root, lines) = ast::parse(file)?; + let (root, lines) = ast::parse_with_language(file, language)?; let mut scanner = OversizedPredicate::new(file.to_string_lossy().to_string(), lines, LIMIT); scanner.walk(&root, &Vec::new()); findings.extend(scanner.findings); diff --git a/gems/decomplex/rust/src/decomplex/detectors/path_condition.rs b/gems/decomplex/rust/src/decomplex/detectors/path_condition.rs index b66a17a67..51db631ef 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/path_condition.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/path_condition.rs @@ -30,10 +30,10 @@ struct Site { span: Span, } -pub fn scan_files(files: &[PathBuf], _language: Language) -> Result { +pub fn scan_files(files: &[PathBuf], language: Language) -> Result { let mut sites = Vec::new(); for file in files { - let (root, lines) = ast::parse(file)?; + let (root, lines) = ast::parse_with_language(file, language)?; let mut pc = PathCondition::new(file.to_string_lossy().to_string(), lines); pc.walk(&root, &Vec::new(), &Vec::new()); sites.extend(pc.sites); diff --git a/gems/decomplex/rust/src/decomplex/detectors/predicate_alias.rs b/gems/decomplex/rust/src/decomplex/detectors/predicate_alias.rs index 0d92a09e8..c2210dd17 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/predicate_alias.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/predicate_alias.rs @@ -28,10 +28,10 @@ struct Pred { span: Span, } -pub fn scan_files(files: &[PathBuf], _language: Language) -> Result { +pub fn scan_files(files: &[PathBuf], language: Language) -> Result { let mut preds = Vec::new(); for file in files { - let (root, lines) = ast::parse(file)?; + let (root, lines) = ast::parse_with_language(file, language)?; let mut p = PredicateAlias::new(file.to_string_lossy().to_string(), lines); p.walk(&root); preds.extend(p.preds); diff --git a/gems/decomplex/rust/src/decomplex/detectors/redundant_nil_guard.rs b/gems/decomplex/rust/src/decomplex/detectors/redundant_nil_guard.rs index d61e5c9d7..8b1f32bdf 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/redundant_nil_guard.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/redundant_nil_guard.rs @@ -61,10 +61,10 @@ impl Finding { const TERMINATING_CALLS: &[&str] = &["raise", "fail", "abort", "exit", "exit!"]; -pub fn scan_files(files: &[PathBuf], _language: Language) -> Result> { +pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { let mut findings = Vec::new(); for file in files { - let (root, lines) = ast::parse(file)?; + let (root, lines) = ast::parse_with_language(file, language)?; let mut scanner = RedundantNilGuard::new(file.to_string_lossy().to_string(), lines); scanner.walk(&root, &Vec::new()); findings.extend(scanner.findings); diff --git a/gems/decomplex/rust/src/decomplex/detectors/semantic_alias.rs b/gems/decomplex/rust/src/decomplex/detectors/semantic_alias.rs index bd69d82a7..8a73ecce2 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/semantic_alias.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/semantic_alias.rs @@ -47,11 +47,11 @@ struct Use { span: Span, } -pub fn scan_files(files: &[PathBuf], _language: Language) -> Result { +pub fn scan_files(files: &[PathBuf], language: Language) -> Result { let mut preds = Vec::new(); let mut uses = Vec::new(); for file in files { - let (root, lines) = ast::parse(file)?; + let (root, lines) = ast::parse_with_language(file, language)?; let mut scanner = SemanticAlias::new(file.to_string_lossy().to_string(), lines); scanner.walk(&root, &Vec::new()); preds.extend(scanner.preds); diff --git a/gems/decomplex/rust/src/decomplex/detectors/sequence_mine.rs b/gems/decomplex/rust/src/decomplex/detectors/sequence_mine.rs index df7d97054..d1a61fc9f 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/sequence_mine.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/sequence_mine.rs @@ -29,10 +29,10 @@ struct Site { span: Span, } -pub fn scan_files(files: &[PathBuf], _language: Language) -> Result { +pub fn scan_files(files: &[PathBuf], language: Language) -> Result { let mut sites = Vec::new(); for file in files { - let (root, lines) = ast::parse(file)?; + let (root, lines) = ast::parse_with_language(file, language)?; let mut sm = SequenceMine::new(file.to_string_lossy().to_string(), lines); sm.walk(&root, &Vec::new()); sites.extend(sm.sites); diff --git a/gems/decomplex/rust/src/decomplex/detectors/state_branch_density.rs b/gems/decomplex/rust/src/decomplex/detectors/state_branch_density.rs index f078f8fbb..d1565425b 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/state_branch_density.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/state_branch_density.rs @@ -34,14 +34,14 @@ const NOISE_MIDS: &[&str] = &[ "!", "!=", "==", "===", "<", "<=", ">", ">=", "[]", "[]=", "to_s", "inspect", "class", ]; -pub fn scan_files(files: &[PathBuf], _language: Language) -> Result> { +pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { let mut parsed = Vec::new(); let mut global_immutable_readers: BTreeMap> = BTreeMap::new(); let mut global_immutable_reader_types: BTreeMap> = BTreeMap::new(); let mut global_type_aliases: BTreeMap = BTreeMap::new(); for file in files { - let (root, lines) = ast::parse(file)?; + let (root, lines) = ast::parse_with_language(file, language)?; let scanner = StateBranchDensity::new(None, lines.clone(), None, None, None); for (name, readers) in scanner.immutable_struct_readers(&lines) { diff --git a/gems/decomplex/rust/src/decomplex/detectors/state_mesh.rs b/gems/decomplex/rust/src/decomplex/detectors/state_mesh.rs index 49ba03ba6..0e92e93b7 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/state_mesh.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/state_mesh.rs @@ -149,7 +149,7 @@ struct FieldMetrics { pub fn scan_files(files: &[PathBuf], language: Language) -> Result { let mut src_map = BTreeMap::new(); for file in files { - let (root, lines) = ast::parse(file)?; + let (root, lines) = ast::parse_with_language(file, language)?; src_map.insert(file.to_string_lossy().to_string(), (root, lines)); } diff --git a/gems/decomplex/rust/src/decomplex/detectors/structural_topology.rs b/gems/decomplex/rust/src/decomplex/detectors/structural_topology.rs index 31778dc5a..b37729187 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/structural_topology.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/structural_topology.rs @@ -43,12 +43,12 @@ const SKIP_NESTED_TYPES: &[&str] = &["CLASS", "MODULE", "DEFN", "DEFS", "LAMBDA" const CONDITIONAL_TYPES: &[&str] = &["IF", "UNLESS", "CASE", "CASE2"]; const ITERATION_TYPES: &[&str] = &["ITER", "FOR", "WHILE", "UNTIL"]; -pub fn scan_files(files: &[PathBuf], _language: Language) -> Result { +pub fn scan_files(files: &[PathBuf], language: Language) -> Result { let mut methods = Vec::new(); let mut parsed = Vec::new(); for file in files { - let (root, lines) = ast::parse(file)?; + let (root, lines) = ast::parse_with_language(file, language)?; let mut mc = MethodCollector::new(file.to_string_lossy().to_string(), lines.clone()); methods.extend(mc.scan(&root)); parsed.push((file.to_string_lossy().to_string(), root, lines)); diff --git a/gems/decomplex/rust/src/decomplex/detectors/temporal_ordering_pressure.rs b/gems/decomplex/rust/src/decomplex/detectors/temporal_ordering_pressure.rs index 90285a1b0..71f3c9356 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/temporal_ordering_pressure.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/temporal_ordering_pressure.rs @@ -32,10 +32,10 @@ struct MethodState { writes: Vec, } -pub fn scan_files(files: &[PathBuf], _language: Language) -> Result> { +pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { let mut rows = Vec::new(); for file in files { - let (root, lines) = ast::parse(file)?; + let (root, lines) = ast::parse_with_language(file, language)?; let mut detector = TemporalOrderingPressure::new(file.to_string_lossy().to_string(), lines); rows.extend(detector.scan(&root)); } diff --git a/gems/decomplex/rust/src/decomplex/detectors/weighted_inlined_cognitive_complexity.rs b/gems/decomplex/rust/src/decomplex/detectors/weighted_inlined_cognitive_complexity.rs index 11fa561dd..a615108b8 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/weighted_inlined_cognitive_complexity.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/weighted_inlined_cognitive_complexity.rs @@ -22,13 +22,13 @@ pub struct WeightedInlinedCognitiveComplexityRow { pub spans: BTreeMap, } -pub fn scan_files(files: &[PathBuf], _language: Language) -> Result> { +pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { let mut parsed = BTreeMap::new(); for file in files { - parsed.insert(file.to_string_lossy().to_string(), ast::parse(file)?); + parsed.insert(file.to_string_lossy().to_string(), ast::parse_with_language(file, language)?); } - let topology_report = structural_topology::scan_files(files, _language)?; + let topology_report = structural_topology::scan_files(files, language)?; let topology = structural_topology::Graph::new(topology_report.methods, topology_report.edges); let mut bodies = Vec::new(); diff --git a/gems/decomplex/rust/src/decomplex/syntax.rs b/gems/decomplex/rust/src/decomplex/syntax.rs index 208f62468..15720e1bf 100644 --- a/gems/decomplex/rust/src/decomplex/syntax.rs +++ b/gems/decomplex/rust/src/decomplex/syntax.rs @@ -1,4 +1,4 @@ -pub mod ruby; +pub mod tree_sitter_adapter; use crate::decomplex::ast::{RawNode, Span}; use crate::decomplex::parallel; @@ -10,12 +10,32 @@ use std::path::PathBuf; #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum Language { Ruby, + Python, + JavaScript, + TypeScript, + Go, + Rust, + Zig, + Lua, + C, + Cpp, + CSharp, } impl Language { pub fn parse(value: &str) -> Result { match value { "ruby" => Ok(Self::Ruby), + "python" => Ok(Self::Python), + "javascript" => Ok(Self::JavaScript), + "typescript" => Ok(Self::TypeScript), + "go" => Ok(Self::Go), + "rust" => Ok(Self::Rust), + "zig" => Ok(Self::Zig), + "lua" => Ok(Self::Lua), + "c" => Ok(Self::C), + "cpp" => Ok(Self::Cpp), + "csharp" => Ok(Self::CSharp), _ => bail!("unsupported Decomplex native language: {value}"), } } @@ -99,9 +119,7 @@ pub struct SimilarityFinding { } pub fn parse_file(file: PathBuf, language: Language) -> Result { - match language { - Language::Ruby => ruby::parse_file(file), - } + tree_sitter_adapter::parse_file(file, language) } pub fn parse_files(files: &[PathBuf], language: Language) -> Result> { diff --git a/gems/decomplex/rust/src/decomplex/syntax/ruby.rs b/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs similarity index 76% rename from gems/decomplex/rust/src/decomplex/syntax/ruby.rs rename to gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs index 4a5b21d72..174d807a4 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/ruby.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs @@ -6,8 +6,8 @@ use std::fs; use std::path::{Path, PathBuf}; use tree_sitter::{Language as TreeSitterLanguage, Node, Parser}; -pub fn parse_file(file: PathBuf) -> Result { - let parsed = ParsedRuby::parse(file)?; +pub fn parse_file(file: PathBuf, language: Language) -> Result { + let parsed = ParsedDocument::parse(file, language)?; let mut function_defs = Vec::new(); let mut state_writes = Vec::new(); let mut decision_sites = Vec::new(); @@ -21,6 +21,7 @@ pub fn parse_file(file: PathBuf) -> Result { parsed.tree.root_node(), &parsed.source, &parsed.file, + language, &context, &mut function_defs, &mut state_writes, @@ -33,7 +34,7 @@ pub fn parse_file(file: PathBuf) -> Result { Ok(Document { file: parsed.file.to_string_lossy().to_string(), - language: Language::Ruby, + language, source: parsed.source.clone(), lines: parsed.source.lines().map(ToString::to_string).collect(), root: RawNode::from_tree_sitter(parsed.tree.root_node(), &parsed.source), @@ -45,24 +46,36 @@ pub fn parse_file(file: PathBuf) -> Result { }) } -fn ruby_language() -> TreeSitterLanguage { - tree_sitter_ruby::LANGUAGE.into() +fn language_grammar(language: Language) -> TreeSitterLanguage { + match language { + Language::Ruby => tree_sitter_ruby::LANGUAGE.into(), + Language::Python => tree_sitter_python::LANGUAGE.into(), + Language::JavaScript => tree_sitter_javascript::LANGUAGE.into(), + Language::TypeScript => tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(), + Language::Go => tree_sitter_go::LANGUAGE.into(), + Language::Rust => tree_sitter_rust::LANGUAGE.into(), + Language::Zig => tree_sitter_zig::LANGUAGE.into(), + Language::Lua => tree_sitter_lua::LANGUAGE.into(), + Language::C => tree_sitter_c::LANGUAGE.into(), + Language::Cpp => tree_sitter_cpp::LANGUAGE.into(), + Language::CSharp => tree_sitter_c_sharp::language().into(), + } } -struct ParsedRuby { +struct ParsedDocument { file: PathBuf, source: String, tree: tree_sitter::Tree, } -impl ParsedRuby { - fn parse(file: PathBuf) -> Result { +impl ParsedDocument { + fn parse(file: PathBuf, language: Language) -> Result { let source = fs::read_to_string(&file) .with_context(|| format!("failed to read {}", file.display()))?; let mut parser = Parser::new(); parser - .set_language(&ruby_language()) - .with_context(|| "failed to initialize tree-sitter ruby parser")?; + .set_language(&language_grammar(language)) + .with_context(|| "failed to initialize tree-sitter parser")?; let tree = parser .parse(&source, None) .with_context(|| format!("tree-sitter produced no tree for {}", file.display()))?; @@ -75,6 +88,7 @@ struct ContextState { file_owner: String, owner: Option, function: Option, + pub receiver: Option, } impl ContextState { @@ -83,6 +97,7 @@ impl ContextState { file_owner, owner: None, function: None, + receiver: None, } } @@ -103,6 +118,7 @@ fn collect_facts( node: Node<'_>, source: &str, file: &Path, + language: Language, context: &ContextState, function_defs: &mut Vec, state_writes: &mut Vec, @@ -112,12 +128,12 @@ fn collect_facts( seen_writes: &mut HashSet, seen_decisions: &mut HashSet, ) { - let next_context = push_function_context(node, push_owner_context(node, source, context), source); - record_function_def(node, source, file, &next_context, function_defs); - record_state_write(node, source, file, &next_context, state_writes, seen_writes); - record_decision_site(node, source, file, &next_context, decision_sites, seen_decisions); - record_predicate_alias(node, source, file, predicate_aliases); - record_comparison_use(node, source, file, &next_context, comparison_uses); + let next_context = push_function_context(node, push_owner_context(node, source, context, language), source, language); + record_function_def(node, source, file, language, &next_context, function_defs); + record_state_write(node, source, file, language, &next_context, state_writes, seen_writes); + record_decision_site(node, source, file, language, &next_context, decision_sites, seen_decisions); + record_predicate_alias(node, source, file, language, predicate_aliases); + record_comparison_use(node, source, file, language, &next_context, comparison_uses); let mut cursor = node.walk(); for child in node.children(&mut cursor) { @@ -125,6 +141,7 @@ fn collect_facts( child, source, file, + language, &next_context, function_defs, state_writes, @@ -141,6 +158,7 @@ fn record_function_def( node: Node<'_>, source: &str, file: &Path, + _language: Language, context: &ContextState, out: &mut Vec, ) { @@ -169,9 +187,10 @@ fn record_predicate_alias( node: Node<'_>, source: &str, file: &Path, + _language: Language, out: &mut Vec, ) { - if node.kind() != "method" { + if !matches!(node.kind(), "method" | "function_definition") { return; } let Some(name) = function_name(node, source) else { @@ -199,6 +218,7 @@ fn record_comparison_use( node: Node<'_>, source: &str, file: &Path, + _language: Language, context: &ContextState, out: &mut Vec, ) { @@ -232,10 +252,15 @@ fn record_decision_site( node: Node<'_>, source: &str, file: &Path, + language: Language, context: &ContextState, out: &mut Vec, seen: &mut HashSet, ) { + if generated_lua_compat_prelude(node, source, language) { + return; + } + if boolean_container(node) && boolean_and(node, source) { record_conjunction_decision(node, source, file, context, out, seen); return; @@ -262,6 +287,17 @@ fn record_decision_site( } } +fn generated_lua_compat_prelude(node: Node<'_>, source: &str, language: Language) -> bool { + if language != Language::Lua { + return false; + } + if line(node) != 1 { + return false; + } + let first_line = source.lines().next().unwrap_or(""); + first_line.contains("_tl_compat") && first_line.contains("compat53.module") +} + fn record_conjunction_decision( mut node: Node<'_>, source: &str, @@ -352,8 +388,8 @@ fn method_single_expression_body(node: Node<'_>) -> Option> { } } -fn push_owner_context(node: Node<'_>, source: &str, context: &ContextState) -> ContextState { - let Some(owner) = owner_name_from_declaration(node, source) else { +fn push_owner_context(node: Node<'_>, source: &str, context: &ContextState, language: Language) -> ContextState { + let Some(owner) = owner_name_from_declaration(node, source).or_else(|| receiver_convention_owner_name(node, source, language)) else { return context.clone(); }; let parent_owner = context.owner.clone(); @@ -371,13 +407,14 @@ fn push_owner_context(node: Node<'_>, source: &str, context: &ContextState) -> C next } -fn push_function_context(node: Node<'_>, mut context: ContextState, source: &str) -> ContextState { +fn push_function_context(node: Node<'_>, mut context: ContextState, source: &str, language: Language) -> ContextState { let Some(function) = function_name(node, source) else { return context; }; let owner = context.current_owner(); context.function = Some(function); context.owner = Some(owner); + context.receiver = function_receiver_name(node, source, language); context } @@ -385,11 +422,12 @@ fn record_state_write( node: Node<'_>, source: &str, file: &Path, + _language: Language, context: &ContextState, out: &mut Vec, seen: &mut HashSet, ) { - if node.kind() == "operator_assignment" { + if node.kind() == "operator_assignment" || node.kind() == "augmented_assignment" { return; } @@ -399,6 +437,7 @@ fn record_state_write( let Some(target) = state_target(assignment.lhs, source) else { return; }; + let target = normalize_target_receiver(target, context); if target.field == "[]" || target.field.starts_with('$') { return; } @@ -521,9 +560,10 @@ fn state_target(lhs: Node<'_>, source: &str) -> Option { fn function_name(node: Node<'_>, source: &str) -> Option { match node.kind() { - "method" => node + "method" | "function_definition" | "function_declaration" | "method_definition" | "function_item" => node .child_by_field_name("name") .map(|name| node_text(name, source).to_string()) + .or_else(|| declarator_name(node.child_by_field_name("declarator"), source)) .or_else(|| first_named_text(node, source, &["identifier", "constant", "property_identifier"])), "singleton_method" => { let name = node @@ -543,26 +583,76 @@ fn function_name(node: Node<'_>, source: &str) -> Option { })?; Some(format!("self.{name}")) } + "method_declaration" => node + .child_by_field_name("name") + .map(|name| node_text(name, source).to_string()) + .or_else(|| first_named_text(node, source, &["field_identifier", "identifier"])), "body_statement" if first_child_kind(node) == Some("def") => hidden_ruby_method_name(node, source), "argument_list" if first_child_kind(node) == Some("def") => inline_def_name(node, source), _ => None, } } +fn declarator_name(node: Option>, source: &str) -> Option { + let mut pending = vec![node?]; + let mut seen = HashSet::new(); + while let Some(current) = pending.pop() { + let key = format!("{:?}\0{}", span(current), current.kind()); + if !seen.insert(key) { + continue; + } + if matches!( + current.kind(), + "identifier" | "simple_identifier" | "field_identifier" | "property_identifier" + ) { + return Some(node_text(current, source).to_string()); + } + let mut children = named_children(current); + children.reverse(); + pending.extend(children); + } + None +} + fn owner_name_from_declaration(node: Node<'_>, source: &str) -> Option { if node.kind() == "body_statement" && matches!(first_child_kind(node), Some("class" | "module")) { return first_named_text(node, source, &["constant", "identifier", "type_identifier"]); } match node.kind() { - "class" | "module" => node + "class" | "module" | "class_definition" | "class_declaration" | "class_specifier" => node .child_by_field_name("name") .map(|name| node_text(name, source).to_string()) .or_else(|| first_named_text(node, source, &["constant", "identifier", "type_identifier"])), + "impl_item" | "impl_block" => impl_owner_name(node, source), + "struct_item" | "struct_spec" | "struct_specifier" | "type_spec" | "type_declaration" => node + .child_by_field_name("name") + .map(|name| node_text(name, source).to_string()) + .or_else(|| first_named_text(node, source, &["type_identifier", "identifier"])), _ => None, } } +fn impl_owner_name(node: Node<'_>, source: &str) -> Option { + let r#type = node + .child_by_field_name("type") + .or_else(|| { + named_children(node) + .into_iter() + .find(|child| child.kind().contains("type") || child.kind().contains("identifier")) + })?; + Some(normalize_type_owner(node_text(r#type, source))) +} + +fn normalize_type_owner(text: &str) -> String { + let value = text.trim(); + let value = value.trim_start_matches(['&', '*']); + let value = value.replace("const", "").replace("mut", "").replace("var", ""); + let value = value.trim(); + let value = value.split(['(', '{', '<', ' ']).next().unwrap_or(""); + value.split('.').last().unwrap_or("").to_string() +} + fn hidden_ruby_method_name(node: Node<'_>, source: &str) -> Option { let children = named_children(node); let receiver_index = children @@ -657,14 +747,29 @@ fn member_field_text(field: Node<'_>, source: &str) -> Option { .or_else(|| { named_children(field) .into_iter() - .find(|child| matches!(child.kind(), "identifier" | "simple_identifier" | "field_identifier" | "property_identifier")) + .find(|child| { + matches!( + child.kind(), + "identifier" + | "simple_identifier" + | "field_identifier" + | "property_identifier" + ) + }) }) .or_else(|| last_named_child(field))?; - let text = node_text(suffix, source).trim_start_matches(['.', '?']); + let text = node_text(suffix, source) + .trim_start_matches(['.', '?']) + .trim_start_matches("->"); return (!text.is_empty()).then(|| text.to_string()); } - Some(node_text(field, source).trim_start_matches(['.', '?']).to_string()) + Some( + node_text(field, source) + .trim_start_matches(['.', '?']) + .trim_start_matches("->") + .to_string(), + ) } fn strip_assignment_suffix(text: &str) -> String { @@ -987,7 +1092,7 @@ mod tests { fn document(source: &str) -> Document { let mut file = NamedTempFile::new().expect("tempfile"); file.write_all(source.as_bytes()).expect("write source"); - parse_file(file.path().to_path_buf()).expect("document") + parse_file(file.path().to_path_buf(), Language::Ruby).expect("document") } #[test] @@ -1053,3 +1158,92 @@ end assert_eq!(doc.state_writes[0].field, "state"); } } + +#[cfg(test)] +mod c_tests { + use super::*; + use std::io::Write; + use tempfile::NamedTempFile; + + #[test] + fn test_c_assignment() { + let mut file = NamedTempFile::new().unwrap(); + file.write_all(b"void foo() { handle->loop = 1; }").unwrap(); + let doc = parse_file(file.path().to_path_buf(), Language::C).unwrap(); + assert!(!doc.state_writes.is_empty()); + } +} + +fn first_argument_receiver_language(language: Language) -> bool { + matches!(language, Language::C) +} + +fn first_argument_receiver_parameter(node: Node<'_>, source: &str) -> Option<(String, String)> { + let params = node.child_by_field_name("declarator") + .and_then(|d| d.child_by_field_name("parameters")) + .or_else(|| node.child_by_field_name("parameters")) + .or_else(|| first_named_child_with_kind(node, "parameter_list")) + .or_else(|| { + node.child_by_field_name("declarator") + .and_then(|d| first_named_child_with_kind(d, "parameter_list")) + })?; + + let first = first_named_child_with_kind(params, "parameter_declaration")?; + + let type_node = named_children(first).into_iter().find(|child| { + matches!(child.kind(), "type_identifier" | "primitive_type" | "qualified_identifier" | "scoped_type_identifier") + })?; + + let name_node = named_children(first).into_iter().rev().find(|child| { + matches!(child.kind(), "identifier" | "field_identifier") + }).or_else(|| first_named_child(first))?; + + Some((node_text(type_node, source).to_string(), node_text(name_node, source).to_string())) +} + +fn snake_case_type_name(type_str: &str) -> String { + let mut parts = type_str.split("::"); + let mut last = parts.last().unwrap_or(type_str).to_string(); + // Simplified snake casing logic + last.make_ascii_lowercase(); + last +} + +fn receiver_convention_owner_name(node: Node<'_>, source: &str, language: Language) -> Option { + if !first_argument_receiver_language(language) || node.kind() != "function_definition" { + return None; + } + + let (type_name, _) = first_argument_receiver_parameter(node, source)?; + let type_name = normalize_type_owner(&type_name); + let name = function_name(node, source)?; + + if name.starts_with(&snake_case_type_name(&type_name)) { + Some(type_name) + } else if type_name.ends_with("_t") && name.starts_with(type_name.strip_suffix("_t").unwrap()) { + Some(type_name) + } else { + None + } +} + +fn function_receiver_name(node: Node<'_>, source: &str, language: Language) -> Option { + // Only handling C convention for now + if first_argument_receiver_language(language) && node.kind() == "function_definition" { + if let Some((_, name)) = first_argument_receiver_parameter(node, source) { + return Some(name); + } + } + None +} + +fn normalize_target_receiver(mut target: Target, context: &ContextState) -> Target { + if let Some(current_receiver) = &context.receiver { + if &target.receiver == current_receiver { + target.receiver = "self".to_string(); + } else if target.receiver.starts_with(&format!("{}.", current_receiver)) { + target.receiver = format!("self.{}", target.receiver.strip_prefix(&format!("{}.", current_receiver)).unwrap()); + } + } + target +} diff --git a/gems/decomplex/test/detector_runner_test.rb b/gems/decomplex/test/detector_runner_test.rb index b9e2095dc..ba735ab47 100644 --- a/gems/decomplex/test/detector_runner_test.rb +++ b/gems/decomplex/test/detector_runner_test.rb @@ -554,6 +554,66 @@ def scan(node) end end + def test_local_flow_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + Tempfile.create(["decomplex-local-flow", ".rb"]) do |file| + file.write(<<~RUBY) + class Billing + def mixed(price, tax) + subtotal = price + tax + total = subtotal.round + + timestamp = Time.now + buffer = [] + buffer << timestamp + [total, buffer] + end + end + RUBY + file.flush + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("local-flow", [file.path]) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + end + + def test_structural_topology_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + Tempfile.create(["decomplex-structural-topology", ".rb"]) do |file| + file.write(<<~RUBY) + class Worker + def run(items) + prepare + if ready? + validate + end + items.each do |item| + helper(item) + end + end + + private + def prepare; end + def ready?; true; end + def validate; end + def helper(item); item; end + + public :validate + end + RUBY + file.flush + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("structural-topology", [file.path]) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + end + def test_detector_cli_compare_engines_outputs_canonical_json skip "cargo is not available" unless cargo_available? From 281f1d4e6b7d0ef74cc6b588515a72650a6dd325 Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Thu, 18 Jun 2026 11:19:22 +0000 Subject: [PATCH 16/52] Add Java Kotlin Swift native support --- .../lib/decomplex/native/co_update.rb | 18 +- .../decomplex/lib/decomplex/native/command.rb | 20 + .../lib/decomplex/native/decision_pressure.rb | 18 +- .../lib/decomplex/native/derived_state.rb | 18 +- .../lib/decomplex/native/false_simplicity.rb | 18 +- .../lib/decomplex/native/fat_union.rb | 18 +- .../lib/decomplex/native/flay_similarity.rb | 18 +- .../lib/decomplex/native/function_lcom.rb | 18 +- .../decomplex/native/implicit_control_flow.rb | 18 +- .../native/inconsistent_rename_clone.rb | 18 +- .../lib/decomplex/native/local_flow.rb | 19 +- .../lib/decomplex/native/locality_drag.rb | 18 +- gems/decomplex/lib/decomplex/native/miner.rb | 18 +- .../native/operational_discontinuity.rb | 18 +- .../decomplex/native/oversized_predicate.rb | 18 +- .../lib/decomplex/native/path_condition.rb | 18 +- .../lib/decomplex/native/predicate_aliases.rb | 18 +- .../decomplex/native/redundant_nil_guard.rb | 18 +- .../lib/decomplex/native/semantic_aliases.rb | 18 +- .../lib/decomplex/native/sequence_mine.rb | 18 +- .../decomplex/native/state_branch_density.rb | 18 +- .../lib/decomplex/native/state_mesh.rb | 18 +- .../lib/decomplex/native/state_writes.rb | 18 +- .../decomplex/native/structural_topology.rb | 19 +- .../native/temporal_ordering_pressure.rb | 18 +- .../native/weighted_inlined_complexity.rb | 18 +- gems/decomplex/rust/Cargo.lock | 33 + gems/decomplex/rust/Cargo.toml | 3 + gems/decomplex/rust/src/decomplex/ast.rs | 547 +++++++++++---- .../rust/src/decomplex/detectors/co_update.rs | 33 +- .../decomplex/detectors/decision_pressure.rs | 152 ++-- .../src/decomplex/detectors/derived_state.rs | 11 +- .../decomplex/detectors/false_simplicity.rs | 79 ++- .../rust/src/decomplex/detectors/fat_union.rs | 75 +- .../decomplex/detectors/flay_similarity.rs | 106 ++- .../src/decomplex/detectors/function_lcom.rs | 84 ++- .../detectors/implicit_control_flow.rs | 653 ++++++++++++++---- .../detectors/inconsistent_rename_clone.rs | 43 +- .../src/decomplex/detectors/local_flow.rs | 179 ++++- .../src/decomplex/detectors/locality_drag.rs | 296 ++++++-- .../rust/src/decomplex/detectors/miner.rs | 21 +- .../detectors/operational_discontinuity.rs | 100 ++- .../detectors/oversized_predicate.rs | 5 +- .../src/decomplex/detectors/path_condition.rs | 87 ++- .../decomplex/detectors/predicate_alias.rs | 27 +- .../detectors/redundant_nil_guard.rs | 45 +- .../src/decomplex/detectors/semantic_alias.rs | 52 +- .../src/decomplex/detectors/sequence_mine.rs | 26 +- .../detectors/state_branch_density.rs | 129 +++- .../src/decomplex/detectors/state_mesh.rs | 325 ++++++--- .../detectors/structural_topology.rs | 285 ++++++-- .../detectors/temporal_ordering_pressure.rs | 50 +- .../weighted_inlined_cognitive_complexity.rs | 463 ++++++++++--- gems/decomplex/rust/src/decomplex/parallel.rs | 8 +- gems/decomplex/rust/src/decomplex/syntax.rs | 41 ++ .../decomplex/syntax/tree_sitter_adapter.rs | 338 ++++++--- gems/decomplex/rust/src/main.rs | 108 ++- gems/decomplex/test/detector_runner_test.rb | 33 + 58 files changed, 3500 insertions(+), 1409 deletions(-) diff --git a/gems/decomplex/lib/decomplex/native/co_update.rb b/gems/decomplex/lib/decomplex/native/co_update.rb index 42cb5ceeb..beb474326 100644 --- a/gems/decomplex/lib/decomplex/native/co_update.rb +++ b/gems/decomplex/lib/decomplex/native/co_update.rb @@ -10,25 +10,9 @@ module CoUpdate def scan(files, jobs: nil) paths = Array(files).map(&:to_s) - language = language_for(paths.first) + language = Command.language_for(paths.first) JSON.parse(Command.run("co-update", "--language", language, *Command.jobs_args(jobs), *paths)) end - private_class_method def self.language_for(path) - case File.extname(path) - when ".rb" then "ruby" - when ".py" then "python" - when ".js" then "javascript" - when ".ts", ".tsx" then "typescript" - when ".go" then "go" - when ".rs" then "rust" - when ".zig" then "zig" - when ".lua" then "lua" - when ".c" then "c" - when ".cpp", ".cc", ".cxx" then "cpp" - when ".cs" then "csharp" - else "ruby" - end - end end end diff --git a/gems/decomplex/lib/decomplex/native/command.rb b/gems/decomplex/lib/decomplex/native/command.rb index 87646efc3..23c6f797e 100644 --- a/gems/decomplex/lib/decomplex/native/command.rb +++ b/gems/decomplex/lib/decomplex/native/command.rb @@ -38,6 +38,26 @@ def jobs_args(jobs) ["--jobs", count.to_s] end + def language_for(path) + case File.extname(path) + when ".rb" then "ruby" + when ".py" then "python" + when ".js" then "javascript" + when ".ts", ".tsx" then "typescript" + when ".java" then "java" + when ".swift" then "swift" + when ".kt", ".kts" then "kotlin" + when ".go" then "go" + when ".rs" then "rust" + when ".zig" then "zig" + when ".lua" then "lua" + when ".c", ".h" then "c" + when ".cpp", ".cc", ".cxx", ".hpp", ".hh", ".hxx" then "cpp" + when ".cs" then "csharp" + else "ruby" + end + end + private_class_method def self.native_command(args) if fresh_binary?(binary_path) [binary_path, *args] diff --git a/gems/decomplex/lib/decomplex/native/decision_pressure.rb b/gems/decomplex/lib/decomplex/native/decision_pressure.rb index e3f1eeb6c..f7653e7ee 100644 --- a/gems/decomplex/lib/decomplex/native/decision_pressure.rb +++ b/gems/decomplex/lib/decomplex/native/decision_pressure.rb @@ -10,25 +10,9 @@ module DecisionPressure def scan(files, jobs: nil) paths = Array(files).map(&:to_s) - language = language_for(paths.first) + language = Command.language_for(paths.first) JSON.parse(Command.run("decision-pressure", "--language", language, *Command.jobs_args(jobs), *paths)) end - private_class_method def self.language_for(path) - case File.extname(path) - when ".rb" then "ruby" - when ".py" then "python" - when ".js" then "javascript" - when ".ts", ".tsx" then "typescript" - when ".go" then "go" - when ".rs" then "rust" - when ".zig" then "zig" - when ".lua" then "lua" - when ".c" then "c" - when ".cpp", ".cc", ".cxx" then "cpp" - when ".cs" then "csharp" - else "ruby" - end - end end end diff --git a/gems/decomplex/lib/decomplex/native/derived_state.rb b/gems/decomplex/lib/decomplex/native/derived_state.rb index a6c34624d..1092a4310 100644 --- a/gems/decomplex/lib/decomplex/native/derived_state.rb +++ b/gems/decomplex/lib/decomplex/native/derived_state.rb @@ -10,25 +10,9 @@ module DerivedState def scan(files, jobs: nil) paths = Array(files).map(&:to_s) - language = language_for(paths.first) + language = Command.language_for(paths.first) JSON.parse(Command.run("derived-state", "--language", language, *Command.jobs_args(jobs), *paths)) end - private_class_method def self.language_for(path) - case File.extname(path) - when ".rb" then "ruby" - when ".py" then "python" - when ".js" then "javascript" - when ".ts", ".tsx" then "typescript" - when ".go" then "go" - when ".rs" then "rust" - when ".zig" then "zig" - when ".lua" then "lua" - when ".c" then "c" - when ".cpp", ".cc", ".cxx" then "cpp" - when ".cs" then "csharp" - else "ruby" - end - end end end diff --git a/gems/decomplex/lib/decomplex/native/false_simplicity.rb b/gems/decomplex/lib/decomplex/native/false_simplicity.rb index a4700b370..b8b7ec5e8 100644 --- a/gems/decomplex/lib/decomplex/native/false_simplicity.rb +++ b/gems/decomplex/lib/decomplex/native/false_simplicity.rb @@ -10,25 +10,9 @@ module FalseSimplicity def scan(files, jobs: nil) paths = Array(files).map(&:to_s) - language = language_for(paths.first) + language = Command.language_for(paths.first) JSON.parse(Command.run("false-simplicity", "--language", language, *Command.jobs_args(jobs), *paths)) end - private_class_method def self.language_for(path) - case File.extname(path) - when ".rb" then "ruby" - when ".py" then "python" - when ".js" then "javascript" - when ".ts", ".tsx" then "typescript" - when ".go" then "go" - when ".rs" then "rust" - when ".zig" then "zig" - when ".lua" then "lua" - when ".c" then "c" - when ".cpp", ".cc", ".cxx" then "cpp" - when ".cs" then "csharp" - else "ruby" - end - end end end diff --git a/gems/decomplex/lib/decomplex/native/fat_union.rb b/gems/decomplex/lib/decomplex/native/fat_union.rb index 477bcfa0f..b1b4f242f 100644 --- a/gems/decomplex/lib/decomplex/native/fat_union.rb +++ b/gems/decomplex/lib/decomplex/native/fat_union.rb @@ -10,25 +10,9 @@ module FatUnion def scan(files, jobs: nil) paths = Array(files).map(&:to_s) - language = language_for(paths.first) + language = Command.language_for(paths.first) JSON.parse(Command.run("fat-union", "--language", language, *Command.jobs_args(jobs), *paths)) end - private_class_method def self.language_for(path) - case File.extname(path) - when ".rb" then "ruby" - when ".py" then "python" - when ".js" then "javascript" - when ".ts", ".tsx" then "typescript" - when ".go" then "go" - when ".rs" then "rust" - when ".zig" then "zig" - when ".lua" then "lua" - when ".c" then "c" - when ".cpp", ".cc", ".cxx" then "cpp" - when ".cs" then "csharp" - else "ruby" - end - end end end diff --git a/gems/decomplex/lib/decomplex/native/flay_similarity.rb b/gems/decomplex/lib/decomplex/native/flay_similarity.rb index 1282d2f74..999af6ff6 100644 --- a/gems/decomplex/lib/decomplex/native/flay_similarity.rb +++ b/gems/decomplex/lib/decomplex/native/flay_similarity.rb @@ -10,7 +10,7 @@ module FlaySimilarity def scan(files, mass:, fuzzy:, jobs: nil) paths = Array(files).map(&:to_s) - language = language_for(paths.first) + language = Command.language_for(paths.first) JSON.parse( Command.run( "flay-similarity", @@ -30,22 +30,6 @@ def scan(files, mass:, fuzzy:, jobs: nil) spans: finding.fetch(:spans).transform_values { |span| Array(span).map(&:to_i) } ) end - private_class_method def self.language_for(path) - case File.extname(path) - when ".rb" then "ruby" - when ".py" then "python" - when ".js" then "javascript" - when ".ts", ".tsx" then "typescript" - when ".go" then "go" - when ".rs" then "rust" - when ".zig" then "zig" - when ".lua" then "lua" - when ".c" then "c" - when ".cpp", ".cc", ".cxx" then "cpp" - when ".cs" then "csharp" - else "ruby" - end - end end end diff --git a/gems/decomplex/lib/decomplex/native/function_lcom.rb b/gems/decomplex/lib/decomplex/native/function_lcom.rb index 38cce2f2b..9d4f1254a 100644 --- a/gems/decomplex/lib/decomplex/native/function_lcom.rb +++ b/gems/decomplex/lib/decomplex/native/function_lcom.rb @@ -10,25 +10,9 @@ module FunctionLcom def scan(files, jobs: nil) paths = Array(files).map(&:to_s) - language = language_for(paths.first) + language = Command.language_for(paths.first) JSON.parse(Command.run("function-lcom", "--language", language, *Command.jobs_args(jobs), *paths)) end - private_class_method def self.language_for(path) - case File.extname(path) - when ".rb" then "ruby" - when ".py" then "python" - when ".js" then "javascript" - when ".ts", ".tsx" then "typescript" - when ".go" then "go" - when ".rs" then "rust" - when ".zig" then "zig" - when ".lua" then "lua" - when ".c" then "c" - when ".cpp", ".cc", ".cxx" then "cpp" - when ".cs" then "csharp" - else "ruby" - end - end end end diff --git a/gems/decomplex/lib/decomplex/native/implicit_control_flow.rb b/gems/decomplex/lib/decomplex/native/implicit_control_flow.rb index 32ecab9b0..43944acbc 100644 --- a/gems/decomplex/lib/decomplex/native/implicit_control_flow.rb +++ b/gems/decomplex/lib/decomplex/native/implicit_control_flow.rb @@ -10,25 +10,9 @@ module ImplicitControlFlow def scan(files, jobs: nil) paths = Array(files).map(&:to_s) - language = language_for(paths.first) + language = Command.language_for(paths.first) JSON.parse(Command.run("implicit-control-flow", "--language", language, *Command.jobs_args(jobs), *paths)) end - private_class_method def self.language_for(path) - case File.extname(path) - when ".rb" then "ruby" - when ".py" then "python" - when ".js" then "javascript" - when ".ts", ".tsx" then "typescript" - when ".go" then "go" - when ".rs" then "rust" - when ".zig" then "zig" - when ".lua" then "lua" - when ".c" then "c" - when ".cpp", ".cc", ".cxx" then "cpp" - when ".cs" then "csharp" - else "ruby" - end - end end end diff --git a/gems/decomplex/lib/decomplex/native/inconsistent_rename_clone.rb b/gems/decomplex/lib/decomplex/native/inconsistent_rename_clone.rb index d3c812b2d..d34566d66 100644 --- a/gems/decomplex/lib/decomplex/native/inconsistent_rename_clone.rb +++ b/gems/decomplex/lib/decomplex/native/inconsistent_rename_clone.rb @@ -10,25 +10,9 @@ module InconsistentRenameClone def scan(files, jobs: nil) paths = Array(files).map(&:to_s) - language = language_for(paths.first) + language = Command.language_for(paths.first) JSON.parse(Command.run("inconsistent-rename-clone", "--language", language, *Command.jobs_args(jobs), *paths)) end - private_class_method def self.language_for(path) - case File.extname(path) - when ".rb" then "ruby" - when ".py" then "python" - when ".js" then "javascript" - when ".ts", ".tsx" then "typescript" - when ".go" then "go" - when ".rs" then "rust" - when ".zig" then "zig" - when ".lua" then "lua" - when ".c" then "c" - when ".cpp", ".cc", ".cxx" then "cpp" - when ".cs" then "csharp" - else "ruby" - end - end end end diff --git a/gems/decomplex/lib/decomplex/native/local_flow.rb b/gems/decomplex/lib/decomplex/native/local_flow.rb index f255f59c0..4f2571aa8 100644 --- a/gems/decomplex/lib/decomplex/native/local_flow.rb +++ b/gems/decomplex/lib/decomplex/native/local_flow.rb @@ -10,26 +10,9 @@ module LocalFlow def scan(files, jobs: nil) paths = Array(files).map(&:to_s) - language = language_for(paths.first) + language = Command.language_for(paths.first) JSON.parse(Command.run("local-flow", "--language", language, *Command.jobs_args(jobs), *paths)) end - - private_class_method def self.language_for(path) - case File.extname(path) - when ".rb" then "ruby" - when ".py" then "python" - when ".js" then "javascript" - when ".ts", ".tsx" then "typescript" - when ".go" then "go" - when ".rs" then "rust" - when ".zig" then "zig" - when ".lua" then "lua" - when ".c" then "c" - when ".cpp", ".cc", ".cxx" then "cpp" - when ".cs" then "csharp" - else "ruby" - end - end end end end diff --git a/gems/decomplex/lib/decomplex/native/locality_drag.rb b/gems/decomplex/lib/decomplex/native/locality_drag.rb index 74e936da5..604bcc3b2 100644 --- a/gems/decomplex/lib/decomplex/native/locality_drag.rb +++ b/gems/decomplex/lib/decomplex/native/locality_drag.rb @@ -10,25 +10,9 @@ module LocalityDrag def scan(files, jobs: nil) paths = Array(files).map(&:to_s) - language = language_for(paths.first) + language = Command.language_for(paths.first) JSON.parse(Command.run("locality-drag", "--language", language, *Command.jobs_args(jobs), *paths)) end - private_class_method def self.language_for(path) - case File.extname(path) - when ".rb" then "ruby" - when ".py" then "python" - when ".js" then "javascript" - when ".ts", ".tsx" then "typescript" - when ".go" then "go" - when ".rs" then "rust" - when ".zig" then "zig" - when ".lua" then "lua" - when ".c" then "c" - when ".cpp", ".cc", ".cxx" then "cpp" - when ".cs" then "csharp" - else "ruby" - end - end end end diff --git a/gems/decomplex/lib/decomplex/native/miner.rb b/gems/decomplex/lib/decomplex/native/miner.rb index 2f5425477..8a8032f4f 100644 --- a/gems/decomplex/lib/decomplex/native/miner.rb +++ b/gems/decomplex/lib/decomplex/native/miner.rb @@ -10,25 +10,9 @@ module Miner def scan(files, jobs: nil) paths = Array(files).map(&:to_s) - language = language_for(paths.first) + language = Command.language_for(paths.first) JSON.parse(Command.run("miner", "--language", language, *Command.jobs_args(jobs), *paths)) end - private_class_method def self.language_for(path) - case File.extname(path) - when ".rb" then "ruby" - when ".py" then "python" - when ".js" then "javascript" - when ".ts", ".tsx" then "typescript" - when ".go" then "go" - when ".rs" then "rust" - when ".zig" then "zig" - when ".lua" then "lua" - when ".c" then "c" - when ".cpp", ".cc", ".cxx" then "cpp" - when ".cs" then "csharp" - else "ruby" - end - end end end diff --git a/gems/decomplex/lib/decomplex/native/operational_discontinuity.rb b/gems/decomplex/lib/decomplex/native/operational_discontinuity.rb index 42bd4a7f5..21744b764 100644 --- a/gems/decomplex/lib/decomplex/native/operational_discontinuity.rb +++ b/gems/decomplex/lib/decomplex/native/operational_discontinuity.rb @@ -10,25 +10,9 @@ module OperationalDiscontinuity def scan(files, jobs: nil) paths = Array(files).map(&:to_s) - language = language_for(paths.first) + language = Command.language_for(paths.first) JSON.parse(Command.run("operational-discontinuity", "--language", language, *Command.jobs_args(jobs), *paths)) end - private_class_method def self.language_for(path) - case File.extname(path) - when ".rb" then "ruby" - when ".py" then "python" - when ".js" then "javascript" - when ".ts", ".tsx" then "typescript" - when ".go" then "go" - when ".rs" then "rust" - when ".zig" then "zig" - when ".lua" then "lua" - when ".c" then "c" - when ".cpp", ".cc", ".cxx" then "cpp" - when ".cs" then "csharp" - else "ruby" - end - end end end diff --git a/gems/decomplex/lib/decomplex/native/oversized_predicate.rb b/gems/decomplex/lib/decomplex/native/oversized_predicate.rb index cc563074c..d2d148597 100644 --- a/gems/decomplex/lib/decomplex/native/oversized_predicate.rb +++ b/gems/decomplex/lib/decomplex/native/oversized_predicate.rb @@ -10,25 +10,9 @@ module OversizedPredicate def scan(files, jobs: nil) paths = Array(files).map(&:to_s) - language = language_for(paths.first) + language = Command.language_for(paths.first) JSON.parse(Command.run("oversized-predicate", "--language", language, *Command.jobs_args(jobs), *paths)) end - private_class_method def self.language_for(path) - case File.extname(path) - when ".rb" then "ruby" - when ".py" then "python" - when ".js" then "javascript" - when ".ts", ".tsx" then "typescript" - when ".go" then "go" - when ".rs" then "rust" - when ".zig" then "zig" - when ".lua" then "lua" - when ".c" then "c" - when ".cpp", ".cc", ".cxx" then "cpp" - when ".cs" then "csharp" - else "ruby" - end - end end end diff --git a/gems/decomplex/lib/decomplex/native/path_condition.rb b/gems/decomplex/lib/decomplex/native/path_condition.rb index aa44a2fe2..71dd0f5a6 100644 --- a/gems/decomplex/lib/decomplex/native/path_condition.rb +++ b/gems/decomplex/lib/decomplex/native/path_condition.rb @@ -10,25 +10,9 @@ module PathCondition def scan(files, jobs: nil) paths = Array(files).map(&:to_s) - language = language_for(paths.first) + language = Command.language_for(paths.first) JSON.parse(Command.run("path-condition", "--language", language, *Command.jobs_args(jobs), *paths)) end - private_class_method def self.language_for(path) - case File.extname(path) - when ".rb" then "ruby" - when ".py" then "python" - when ".js" then "javascript" - when ".ts", ".tsx" then "typescript" - when ".go" then "go" - when ".rs" then "rust" - when ".zig" then "zig" - when ".lua" then "lua" - when ".c" then "c" - when ".cpp", ".cc", ".cxx" then "cpp" - when ".cs" then "csharp" - else "ruby" - end - end end end diff --git a/gems/decomplex/lib/decomplex/native/predicate_aliases.rb b/gems/decomplex/lib/decomplex/native/predicate_aliases.rb index 584b201ee..f8472f200 100644 --- a/gems/decomplex/lib/decomplex/native/predicate_aliases.rb +++ b/gems/decomplex/lib/decomplex/native/predicate_aliases.rb @@ -10,25 +10,9 @@ module PredicateAliases def scan(files, jobs: nil) paths = Array(files).map(&:to_s) - language = language_for(paths.first) + language = Command.language_for(paths.first) JSON.parse(Command.run("predicate-aliases", "--language", language, *Command.jobs_args(jobs), *paths)) end - private_class_method def self.language_for(path) - case File.extname(path) - when ".rb" then "ruby" - when ".py" then "python" - when ".js" then "javascript" - when ".ts", ".tsx" then "typescript" - when ".go" then "go" - when ".rs" then "rust" - when ".zig" then "zig" - when ".lua" then "lua" - when ".c" then "c" - when ".cpp", ".cc", ".cxx" then "cpp" - when ".cs" then "csharp" - else "ruby" - end - end end end diff --git a/gems/decomplex/lib/decomplex/native/redundant_nil_guard.rb b/gems/decomplex/lib/decomplex/native/redundant_nil_guard.rb index 55d5ac922..5c1991f02 100644 --- a/gems/decomplex/lib/decomplex/native/redundant_nil_guard.rb +++ b/gems/decomplex/lib/decomplex/native/redundant_nil_guard.rb @@ -10,25 +10,9 @@ module RedundantNilGuard def scan(files, jobs: nil) paths = Array(files).map(&:to_s) - language = language_for(paths.first) + language = Command.language_for(paths.first) JSON.parse(Command.run("redundant-nil-guard", "--language", language, *Command.jobs_args(jobs), *paths)) end - private_class_method def self.language_for(path) - case File.extname(path) - when ".rb" then "ruby" - when ".py" then "python" - when ".js" then "javascript" - when ".ts", ".tsx" then "typescript" - when ".go" then "go" - when ".rs" then "rust" - when ".zig" then "zig" - when ".lua" then "lua" - when ".c" then "c" - when ".cpp", ".cc", ".cxx" then "cpp" - when ".cs" then "csharp" - else "ruby" - end - end end end diff --git a/gems/decomplex/lib/decomplex/native/semantic_aliases.rb b/gems/decomplex/lib/decomplex/native/semantic_aliases.rb index e2c174559..6f72a1324 100644 --- a/gems/decomplex/lib/decomplex/native/semantic_aliases.rb +++ b/gems/decomplex/lib/decomplex/native/semantic_aliases.rb @@ -10,25 +10,9 @@ module SemanticAliases def scan(files, jobs: nil) paths = Array(files).map(&:to_s) - language = language_for(paths.first) + language = Command.language_for(paths.first) JSON.parse(Command.run("semantic-aliases", "--language", language, *Command.jobs_args(jobs), *paths)) end - private_class_method def self.language_for(path) - case File.extname(path) - when ".rb" then "ruby" - when ".py" then "python" - when ".js" then "javascript" - when ".ts", ".tsx" then "typescript" - when ".go" then "go" - when ".rs" then "rust" - when ".zig" then "zig" - when ".lua" then "lua" - when ".c" then "c" - when ".cpp", ".cc", ".cxx" then "cpp" - when ".cs" then "csharp" - else "ruby" - end - end end end diff --git a/gems/decomplex/lib/decomplex/native/sequence_mine.rb b/gems/decomplex/lib/decomplex/native/sequence_mine.rb index fc5b38d65..722c58fe9 100644 --- a/gems/decomplex/lib/decomplex/native/sequence_mine.rb +++ b/gems/decomplex/lib/decomplex/native/sequence_mine.rb @@ -10,25 +10,9 @@ module SequenceMine def scan(files, jobs: nil) paths = Array(files).map(&:to_s) - language = language_for(paths.first) + language = Command.language_for(paths.first) JSON.parse(Command.run("sequence-mine", "--language", language, *Command.jobs_args(jobs), *paths)) end - private_class_method def self.language_for(path) - case File.extname(path) - when ".rb" then "ruby" - when ".py" then "python" - when ".js" then "javascript" - when ".ts", ".tsx" then "typescript" - when ".go" then "go" - when ".rs" then "rust" - when ".zig" then "zig" - when ".lua" then "lua" - when ".c" then "c" - when ".cpp", ".cc", ".cxx" then "cpp" - when ".cs" then "csharp" - else "ruby" - end - end end end diff --git a/gems/decomplex/lib/decomplex/native/state_branch_density.rb b/gems/decomplex/lib/decomplex/native/state_branch_density.rb index d4dbb37a1..4a310687b 100644 --- a/gems/decomplex/lib/decomplex/native/state_branch_density.rb +++ b/gems/decomplex/lib/decomplex/native/state_branch_density.rb @@ -10,25 +10,9 @@ module StateBranchDensity def scan(files, jobs: nil) paths = Array(files).map(&:to_s) - language = language_for(paths.first) + language = Command.language_for(paths.first) JSON.parse(Command.run("state-branch-density", "--language", language, *Command.jobs_args(jobs), *paths)) end - private_class_method def self.language_for(path) - case File.extname(path) - when ".rb" then "ruby" - when ".py" then "python" - when ".js" then "javascript" - when ".ts", ".tsx" then "typescript" - when ".go" then "go" - when ".rs" then "rust" - when ".zig" then "zig" - when ".lua" then "lua" - when ".c" then "c" - when ".cpp", ".cc", ".cxx" then "cpp" - when ".cs" then "csharp" - else "ruby" - end - end end end diff --git a/gems/decomplex/lib/decomplex/native/state_mesh.rb b/gems/decomplex/lib/decomplex/native/state_mesh.rb index 486b5fe65..3f3374e20 100644 --- a/gems/decomplex/lib/decomplex/native/state_mesh.rb +++ b/gems/decomplex/lib/decomplex/native/state_mesh.rb @@ -10,25 +10,9 @@ module StateMesh def scan(files, jobs: nil) paths = Array(files).map(&:to_s) - language = language_for(paths.first) + language = Command.language_for(paths.first) JSON.parse(Command.run("state-mesh", "--language", language, *Command.jobs_args(jobs), *paths)) end - private_class_method def self.language_for(path) - case File.extname(path) - when ".rb" then "ruby" - when ".py" then "python" - when ".js" then "javascript" - when ".ts", ".tsx" then "typescript" - when ".go" then "go" - when ".rs" then "rust" - when ".zig" then "zig" - when ".lua" then "lua" - when ".c" then "c" - when ".cpp", ".cc", ".cxx" then "cpp" - when ".cs" then "csharp" - else "ruby" - end - end end end diff --git a/gems/decomplex/lib/decomplex/native/state_writes.rb b/gems/decomplex/lib/decomplex/native/state_writes.rb index 5d5155c1e..b2761f1cb 100644 --- a/gems/decomplex/lib/decomplex/native/state_writes.rb +++ b/gems/decomplex/lib/decomplex/native/state_writes.rb @@ -28,25 +28,9 @@ def extract(files) end private_class_method def self.run_native(paths) - language = language_for(paths.first) + language = Command.language_for(paths.first) Command.run("state-writes", "--language", language, *paths) end - private_class_method def self.language_for(path) - case File.extname(path) - when ".rb" then "ruby" - when ".py" then "python" - when ".js" then "javascript" - when ".ts", ".tsx" then "typescript" - when ".go" then "go" - when ".rs" then "rust" - when ".zig" then "zig" - when ".lua" then "lua" - when ".c" then "c" - when ".cpp", ".cc", ".cxx" then "cpp" - when ".cs" then "csharp" - else "ruby" - end - end end end diff --git a/gems/decomplex/lib/decomplex/native/structural_topology.rb b/gems/decomplex/lib/decomplex/native/structural_topology.rb index 889e2db88..f131d89d6 100644 --- a/gems/decomplex/lib/decomplex/native/structural_topology.rb +++ b/gems/decomplex/lib/decomplex/native/structural_topology.rb @@ -10,26 +10,9 @@ module StructuralTopology def scan(files, jobs: nil) paths = Array(files).map(&:to_s) - language = language_for(paths.first) + language = Command.language_for(paths.first) JSON.parse(Command.run("structural-topology", "--language", language, *Command.jobs_args(jobs), *paths)) end - - private_class_method def self.language_for(path) - case File.extname(path) - when ".rb" then "ruby" - when ".py" then "python" - when ".js" then "javascript" - when ".ts", ".tsx" then "typescript" - when ".go" then "go" - when ".rs" then "rust" - when ".zig" then "zig" - when ".lua" then "lua" - when ".c" then "c" - when ".cpp", ".cc", ".cxx" then "cpp" - when ".cs" then "csharp" - else "ruby" - end - end end end end diff --git a/gems/decomplex/lib/decomplex/native/temporal_ordering_pressure.rb b/gems/decomplex/lib/decomplex/native/temporal_ordering_pressure.rb index be9208568..21419c067 100644 --- a/gems/decomplex/lib/decomplex/native/temporal_ordering_pressure.rb +++ b/gems/decomplex/lib/decomplex/native/temporal_ordering_pressure.rb @@ -10,25 +10,9 @@ module TemporalOrderingPressure def scan(files, jobs: nil) paths = Array(files).map(&:to_s) - language = language_for(paths.first) + language = Command.language_for(paths.first) JSON.parse(Command.run("temporal-ordering-pressure", "--language", language, *Command.jobs_args(jobs), *paths)) end - private_class_method def self.language_for(path) - case File.extname(path) - when ".rb" then "ruby" - when ".py" then "python" - when ".js" then "javascript" - when ".ts", ".tsx" then "typescript" - when ".go" then "go" - when ".rs" then "rust" - when ".zig" then "zig" - when ".lua" then "lua" - when ".c" then "c" - when ".cpp", ".cc", ".cxx" then "cpp" - when ".cs" then "csharp" - else "ruby" - end - end end end diff --git a/gems/decomplex/lib/decomplex/native/weighted_inlined_complexity.rb b/gems/decomplex/lib/decomplex/native/weighted_inlined_complexity.rb index 84c39494f..7abd769b4 100644 --- a/gems/decomplex/lib/decomplex/native/weighted_inlined_complexity.rb +++ b/gems/decomplex/lib/decomplex/native/weighted_inlined_complexity.rb @@ -10,25 +10,9 @@ module WeightedInlinedComplexity def scan(files, jobs: nil) paths = Array(files).map(&:to_s) - language = language_for(paths.first) + language = Command.language_for(paths.first) JSON.parse(Command.run("weighted-inlined-complexity", "--language", language, *Command.jobs_args(jobs), *paths)) end - private_class_method def self.language_for(path) - case File.extname(path) - when ".rb" then "ruby" - when ".py" then "python" - when ".js" then "javascript" - when ".ts", ".tsx" then "typescript" - when ".go" then "go" - when ".rs" then "rust" - when ".zig" then "zig" - when ".lua" then "lua" - when ".c" then "c" - when ".cpp", ".cc", ".cxx" then "cpp" - when ".cs" then "csharp" - else "ruby" - end - end end end diff --git a/gems/decomplex/rust/Cargo.lock b/gems/decomplex/rust/Cargo.lock index 999cc42b3..d007a91bd 100644 --- a/gems/decomplex/rust/Cargo.lock +++ b/gems/decomplex/rust/Cargo.lock @@ -53,12 +53,15 @@ dependencies = [ "tree-sitter-c-sharp", "tree-sitter-cpp", "tree-sitter-go", + "tree-sitter-java", "tree-sitter-javascript", + "tree-sitter-kotlin-ng", "tree-sitter-language", "tree-sitter-lua", "tree-sitter-python", "tree-sitter-ruby", "tree-sitter-rust", + "tree-sitter-swift", "tree-sitter-typescript", "tree-sitter-zig", ] @@ -300,6 +303,16 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-java" +version = "0.23.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0aa6cbcdc8c679b214e616fd3300da67da0e492e066df01bcf5a5921a71e90d6" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-javascript" version = "0.23.1" @@ -310,6 +323,16 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-kotlin-ng" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e800ebbda938acfbf224f4d2c34947a31994b1295ee6e819b65226c7b51b4450" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-language" version = "0.1.3" @@ -356,6 +379,16 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-swift" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d65aeb41726119416567d0333ec17580ac4abfb96db1f67c4bd638c65f9992fe" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-typescript" version = "0.23.2" diff --git a/gems/decomplex/rust/Cargo.toml b/gems/decomplex/rust/Cargo.toml index 039da4f74..88503015b 100644 --- a/gems/decomplex/rust/Cargo.toml +++ b/gems/decomplex/rust/Cargo.toml @@ -19,6 +19,7 @@ tree-sitter-language = "=0.1.3" tree-sitter-ruby = "=0.23.1" tree-sitter-python = "0.23.6" tree-sitter-javascript = "0.23.1" +tree-sitter-java = "0.23.5" tree-sitter-typescript = "0.23.2" tree-sitter-go = "0.23.4" tree-sitter-rust = "0.23.2" @@ -27,6 +28,8 @@ tree-sitter-lua = "0.2.0" tree-sitter-c = "0.23.4" tree-sitter-cpp = "0.23.4" tree-sitter-c-sharp = "0.21.3" +tree-sitter-swift = "=0.6.0" +tree-sitter-kotlin-ng = "1.1.0" [dev-dependencies] tempfile = "=3.10.1" diff --git a/gems/decomplex/rust/src/decomplex/ast.rs b/gems/decomplex/rust/src/decomplex/ast.rs index cc844ef72..832012861 100644 --- a/gems/decomplex/rust/src/decomplex/ast.rs +++ b/gems/decomplex/rust/src/decomplex/ast.rs @@ -8,7 +8,9 @@ use tree_sitter::{Language as TreeSitterLanguage, Node as TreeSitterNode, Parser pub type Span = [usize; 4]; const COMPARISON_OPERATORS: &[&str] = &["==", "!=", "===", "!==", "<", "<=", ">", ">="]; -const OPERATOR_CALL_OPERATORS: &[&str] = &["+", "-", "*", "/", "%", "**", "|", "&", "^", "<<", ">>", "=~", "!~"]; +const OPERATOR_CALL_OPERATORS: &[&str] = &[ + "+", "-", "*", "/", "%", "**", "|", "&", "^", "<<", ">>", "=~", "!~", +]; #[derive(Clone, Debug, Eq, PartialEq, Serialize)] pub struct RawNode { @@ -67,9 +69,7 @@ impl RawNode { children = flattened; } - if node.kind() == "pattern" - && children.len() == 1 - && children[0].kind == "scope_resolution" + if node.kind() == "pattern" && children.len() == 1 && children[0].kind == "scope_resolution" { children = children[0].children.clone(); } @@ -95,7 +95,10 @@ impl RawNode { if node.kind() == "body_statement" && children.len() == 1 && children[0].kind == "call" { children = children[0].children.clone(); } - if node.kind() == "body_statement" && children.len() == 1 && children[0].kind == "conditional" { + if node.kind() == "body_statement" + && children.len() == 1 + && children[0].kind == "conditional" + { children = children[0].children.clone(); } if node.kind() == "body_statement" && children.len() == 1 && children[0].kind == "module" { @@ -123,7 +126,10 @@ impl RawNode { } if node.kind() == "block_body" && children.len() == 1 - && matches!(children[0].kind.as_str(), "array" | "binary" | "string" | "unary") + && matches!( + children[0].kind.as_str(), + "array" | "binary" | "string" | "unary" + ) { children = children[0].children.clone(); } @@ -195,8 +201,8 @@ pub fn parse(file: &Path) -> Result<(Node, Vec)> { } pub fn parse_with_language(file: &Path, language: Language) -> Result<(Node, Vec)> { - let source = fs::read_to_string(file) - .with_context(|| format!("failed to read {}", file.display()))?; + let source = + fs::read_to_string(file).with_context(|| format!("failed to read {}", file.display()))?; let mut parser = Parser::new(); parser .set_language(&language_grammar(language)) @@ -214,7 +220,10 @@ fn language_grammar(language: Language) -> TreeSitterLanguage { Language::Ruby => tree_sitter_ruby::LANGUAGE.into(), Language::Python => tree_sitter_python::LANGUAGE.into(), Language::JavaScript => tree_sitter_javascript::LANGUAGE.into(), + Language::Java => tree_sitter_java::LANGUAGE.into(), Language::TypeScript => tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(), + Language::Swift => tree_sitter_swift::LANGUAGE.into(), + Language::Kotlin => tree_sitter_kotlin_ng::LANGUAGE.into(), Language::Go => tree_sitter_go::LANGUAGE.into(), Language::Rust => tree_sitter_rust::LANGUAGE.into(), Language::Zig => tree_sitter_zig::LANGUAGE.into(), @@ -294,9 +303,8 @@ impl<'source> TreeSitterNormalizer<'source> { } fn normalize(mut self, root: TreeSitterNode<'_>) -> Node { - let children = self.with_ruby_scope(root, true, |normalizer| { - normalizer.normalize_children(root) - }); + let children = + self.with_ruby_scope(root, true, |normalizer| normalizer.normalize_children(root)); self.wrap("ROOT", children, root) } @@ -346,8 +354,12 @@ impl<'source> TreeSitterNormalizer<'source> { let children = self.normalize_children(node); Some(self.wrap("ROOT", children, node)) } - "method" | "function_definition" | "function_declaration" | "method_definition" - | "method_declaration" | "function_item" => self.normalize_function(node), + "method" + | "function_definition" + | "function_declaration" + | "method_definition" + | "method_declaration" + | "function_item" => self.normalize_function(node), "singleton_method" => self.normalize_singleton_function(node), "class" | "class_definition" | "class_declaration" | "class_specifier" => { self.normalize_class(node) @@ -360,6 +372,10 @@ impl<'source> TreeSitterNormalizer<'source> { "assignment" | "assignment_expression" | "assignment_statement" => { self.normalize_assignment(node) } + "local_variable_declaration" + | "variable_declarator" + | "variable_declaration" + | "property_declaration" => self.normalize_declaration(node), "call" | "call_expression" | "method_call" | "method_call_expression" => { self.normalize_call(node) } @@ -387,7 +403,10 @@ impl<'source> TreeSitterNormalizer<'source> { "array" => Some(self.normalize_array_literal(node)), "interpolation" => self.normalize_interpolation(node), "heredoc_beginning" => Some(self.normalize_heredoc_beginning(node)), - "string" | "string_content" | "string_literal" | "interpreted_string_literal" + "string" + | "string_content" + | "string_literal" + | "interpreted_string_literal" | "raw_string_literal" => { if self.interpolated_string(node) { Some(self.normalize_interpolated_string(node)) @@ -430,7 +449,7 @@ impl<'source> TreeSitterNormalizer<'source> { fn normalize_function(&mut self, node: TreeSitterNode<'_>) -> Option { let name = self.function_name(node)?; - let args = self.normalize_parameters(self.named_field(node, "parameters")); + let args = self.normalize_parameters(self.parameters_child(node)); let body = self.with_ruby_scope(node, true, |normalizer| { let body_node = normalizer .named_field(node, "body") @@ -455,7 +474,7 @@ impl<'source> TreeSitterNormalizer<'source> { .find(|child| matches!(child.kind(), "self" | "constant" | "identifier")) .and_then(|child| self.normalize_node(child)) .unwrap_or_else(|| self.wrap("SELF", Vec::new(), node)); - let args = self.normalize_parameters(self.named_field(node, "parameters")); + let args = self.normalize_parameters(self.parameters_child(node)); let body = self.with_ruby_scope(node, true, |normalizer| { let body_node = normalizer .named_field(node, "body") @@ -596,13 +615,18 @@ impl<'source> TreeSitterNormalizer<'source> { let positive_raw = self .named_field(node, "consequence") .or_else(|| self.named_field(node, "body")) - .or_else(|| self.named_children(node).into_iter().find(|child| child.kind() == "then")) + .or_else(|| { + self.named_children(node) + .into_iter() + .find(|child| child.kind() == "then") + }) .or_else(|| self.branch_child(node, condition_raw, 0)); let negative_raw = self .named_field(node, "alternative") .or_else(|| self.explicit_alternative(node)); let positive = optional_node(positive_raw.and_then(|child| self.normalize_body(child))); - let negative = optional_node(negative_raw.and_then(|child| self.normalize_else_or_branch(child))); + let negative = + optional_node(negative_raw.and_then(|child| self.normalize_else_or_branch(child))); let node_type = if node.kind().starts_with("unless") { "UNLESS" } else { @@ -674,7 +698,11 @@ impl<'source> TreeSitterNormalizer<'source> { .collect::>() }) .unwrap_or_default(); - self.wrap("SUPER", vec![list_or_nil(args, args_node.unwrap_or(node), self)], node) + self.wrap( + "SUPER", + vec![list_or_nil(args, args_node.unwrap_or(node), self)], + node, + ) } fn normalize_return_node(&mut self, node: TreeSitterNode<'_>) -> Option { @@ -858,7 +886,9 @@ impl<'source> TreeSitterNormalizer<'source> { fn normalize_assignment(&mut self, node: TreeSitterNode<'_>) -> Option { let left = self.assignment_left(node)?; - let right = self.assignment_right(node).and_then(|right| self.normalize_node(right)); + let right = self + .assignment_right(node) + .and_then(|right| self.normalize_node(right)); if let Some(target) = self.assignment_target(left, right.clone(), node) { return Some(target); } @@ -869,6 +899,40 @@ impl<'source> TreeSitterNormalizer<'source> { )) } + fn normalize_declaration(&mut self, node: TreeSitterNode<'_>) -> Option { + let mut assignments = Vec::new(); + for entry in self.declaration_entries(node) { + let Some(name) = self.declaration_name(entry) else { + continue; + }; + let right = self + .declaration_value(entry) + .and_then(|value| self.normalize_node(value)); + assignments.push(self.wrap( + "LASGN", + vec![Child::String(self.target_name(name)), optional_node(right)], + entry, + )); + } + + if assignments.is_empty() { + None + } else if assignments.len() == 1 { + assignments.into_iter().next() + } else { + Some( + self.wrap( + "BLOCK", + assignments + .into_iter() + .map(|assignment| Child::Node(Box::new(assignment))) + .collect(), + node, + ), + ) + } + } + fn normalize_call(&mut self, node: TreeSitterNode<'_>) -> Option { if self.call_block(node).is_some() { return self.normalize_call_with_block(node); @@ -905,12 +969,10 @@ impl<'source> TreeSitterNormalizer<'source> { let call_source = if self.dotted_call(node) { node } else { - self.named_children(node) - .into_iter() - .find(|child| { - Some(*child) != block - && (self.call_kind(child.kind()) || self.member_read_node(*child)) - })? + self.named_children(node).into_iter().find(|child| { + Some(*child) != block + && (self.call_kind(child.kind()) || self.member_read_node(*child)) + })? }; let call = self.normalize_call_without_block(call_source, block)?; let args = self.normalize_block_parameters(block); @@ -976,11 +1038,7 @@ impl<'source> TreeSitterNormalizer<'source> { source, )); } - return Some(self.wrap( - node_type, - vec![receiver, Child::Symbol(method), args], - node, - )); + return Some(self.wrap(node_type, vec![receiver, Child::Symbol(method), args], node)); } let function = self @@ -1007,11 +1065,7 @@ impl<'source> TreeSitterNormalizer<'source> { let (receiver, method) = self.member_parts(function)?; let receiver = optional_node(self.normalize_node(receiver)); let args = list_or_nil(args, node, self); - return Some(self.wrap( - "CALL", - vec![receiver, Child::Symbol(method), args], - node, - )); + return Some(self.wrap("CALL", vec![receiver, Child::Symbol(method), args], node)); } let function = optional_node(self.normalize_node(function)); let args = list_or_nil(args, node, self); @@ -1033,7 +1087,10 @@ impl<'source> TreeSitterNormalizer<'source> { if receiver.kind() == "self" { return Some(self.wrap( "FCALL", - vec![Child::Symbol("[]".to_string()), list_or_nil(args, node, self)], + vec![ + Child::Symbol("[]".to_string()), + list_or_nil(args, node, self), + ], node, )); } @@ -1049,7 +1106,9 @@ impl<'source> TreeSitterNormalizer<'source> { fn normalize_rescue_modifier(&mut self, node: TreeSitterNode<'_>) -> Option { let named = self.named_children(node); let body = named.first().and_then(|body| self.normalize_node(*body)); - let handler = named.get(1).and_then(|handler| self.normalize_node(*handler)); + let handler = named + .get(1) + .and_then(|handler| self.normalize_node(*handler)); let resbody = self.wrap( "RESBODY", vec![Child::Nil, optional_node(handler), Child::Nil], @@ -1118,7 +1177,8 @@ impl<'source> TreeSitterNormalizer<'source> { .copied() .take_while(|child| child.kind() != "ensure") .collect::>(); - let body = self.normalize_body_nodes(body_nodes.clone(), *body_nodes.first().unwrap_or(&node)); + let body = + self.normalize_body_nodes(body_nodes.clone(), *body_nodes.first().unwrap_or(&node)); let ensure_body = self.normalize_body(ensure_node); return Some(self.wrap( "ENSURE", @@ -1132,7 +1192,8 @@ impl<'source> TreeSitterNormalizer<'source> { .copied() .take_while(|child| child.kind() != "rescue") .collect::>(); - let body = self.normalize_body_nodes(body_nodes.clone(), *body_nodes.first().unwrap_or(&node)); + let body = + self.normalize_body_nodes(body_nodes.clone(), *body_nodes.first().unwrap_or(&node)); let resbodies = rescue_nodes .iter() .filter_map(|child| self.normalize_rescue_clause(*child)) @@ -1172,13 +1233,20 @@ impl<'source> TreeSitterNormalizer<'source> { .unwrap_or_default(); let exception_variable = self.rescue_exception_variable(node); let handler = self.named_children(node).into_iter().rev().find(|child| { - !matches!(child.kind(), "exceptions" | "exception_variable" | "comment") + !matches!( + child.kind(), + "exceptions" | "exception_variable" | "comment" + ) }); let normalized_handler = handler.and_then(|handler| self.normalize_body(handler)); let body = self.prepend_rescue_exception_assignment(normalized_handler, exception_variable); Some(self.wrap( "RESBODY", - vec![list_or_nil(exception_nodes, exceptions.unwrap_or(node), self), optional_node(body), Child::Nil], + vec![ + list_or_nil(exception_nodes, exceptions.unwrap_or(node), self), + optional_node(body), + Child::Nil, + ], node, )) } @@ -1244,7 +1312,10 @@ impl<'source> TreeSitterNormalizer<'source> { }; Some(Node { r#type: "BLOCK".to_string(), - children: vec![Child::Node(Box::new(assignment)), Child::Node(Box::new(body))], + children: vec![ + Child::Node(Box::new(assignment)), + Child::Node(Box::new(body)), + ], first_lineno, first_column, last_lineno, @@ -1260,11 +1331,7 @@ impl<'source> TreeSitterNormalizer<'source> { let condition = *named.last()?; let condition = optional_node(self.normalize_node(condition)); let action = optional_node(self.normalize_node(action)); - Some(self.wrap( - "IF", - vec![condition, action, Child::Nil], - node, - )) + Some(self.wrap("IF", vec![condition, action, Child::Nil], node)) } fn normalize_modifier_action(&mut self, node: TreeSitterNode<'_>) -> Option { @@ -1330,7 +1397,8 @@ impl<'source> TreeSitterNormalizer<'source> { } fn normalize_visibility_inline_def(&mut self, node: TreeSitterNode<'_>) -> Option { - let message = node_text(self.named_children(node).into_iter().next()?, self.source).to_string(); + let message = + node_text(self.named_children(node).into_iter().next()?, self.source).to_string(); let args = self .named_children(node) .into_iter() @@ -1392,7 +1460,10 @@ impl<'source> TreeSitterNormalizer<'source> { fn normalize_global_variable(&self, node: TreeSitterNode<'_>) -> Node { let text = node_text(node, self.source).to_string(); - if let Some(number) = text.strip_prefix('$').and_then(|value| value.parse::().ok()) { + if let Some(number) = text + .strip_prefix('$') + .and_then(|value| value.parse::().ok()) + { return self.wrap("NTH_REF", vec![Child::String(number.to_string())], node); } self.wrap("GVAR", vec![Child::String(text)], node) @@ -1450,18 +1521,26 @@ impl<'source> TreeSitterNormalizer<'source> { } else { Some(self.list(exprs, node)) }; - Some(self.wrap("EVSTR", body.into_iter().map(|node| Child::Node(Box::new(node))).collect(), node)) + Some( + self.wrap( + "EVSTR", + body.into_iter() + .map(|node| Child::Node(Box::new(node))) + .collect(), + node, + ), + ) } fn normalize_heredoc_beginning(&mut self, node: TreeSitterNode<'_>) -> Node { - let heredoc_body = node - .parent() - .and_then(|parent| parent.parent()) - .and_then(|body_statement| { - self.named_children(body_statement) - .into_iter() - .find(|child| child.kind() == "heredoc_body") - }); + let heredoc_body = + node.parent() + .and_then(|parent| parent.parent()) + .and_then(|body_statement| { + self.named_children(body_statement) + .into_iter() + .find(|child| child.kind() == "heredoc_body") + }); let children = heredoc_body .map(|body| self.normalize_heredoc_children(body)) .unwrap_or_default(); @@ -1507,7 +1586,10 @@ impl<'source> TreeSitterNormalizer<'source> { let value = optional_node(self.normalize_node(value)); Some(self.wrap( "LASGN", - vec![Child::Symbol(node_text(name, self.source).to_string()), value], + vec![ + Child::Symbol(node_text(name, self.source).to_string()), + value, + ], param, )) }) @@ -1642,11 +1724,25 @@ impl<'source> TreeSitterNormalizer<'source> { } if matches!( node.kind(), - "method_parameters" | "parameters" | "parameter_list" | "formal_parameters" - | "block_parameters" | "lambda_parameters" + "method_parameters" + | "parameters" + | "parameter_list" + | "formal_parameters" + | "function_value_parameters" + | "parameter" + | "block_parameters" + | "lambda_parameters" ) { - for child in self.named_children(node) { - self.collect_identifier_names(child, locals); + if node.kind() == "parameter" { + self.collect_parameter_names(node, locals); + } else { + for child in self.named_children(node) { + if child.kind() == "parameter" { + self.collect_parameter_names(child, locals); + } else { + self.collect_identifier_names(child, locals); + } + } } } if matches!(node.kind(), "assignment" | "operator_assignment") { @@ -1654,6 +1750,11 @@ impl<'source> TreeSitterNormalizer<'source> { self.collect_assignment_target_names(left, locals); } } + for target in self.declaration_entries(node) { + if let Some(name) = self.declaration_name(target) { + self.collect_assignment_target_names(name, locals); + } + } for child in self.named_children(node) { if !self.ruby_scope_boundary(child) { self.collect_ruby_scope_locals(child, locals, false); @@ -1667,7 +1768,11 @@ impl<'source> TreeSitterNormalizer<'source> { locals: &mut BTreeSet, ) { if self.identifier_kind(node.kind()) { - locals.insert(node_text(node, self.source).trim_start_matches('*').to_string()); + locals.insert( + node_text(node, self.source) + .trim_start_matches('*') + .to_string(), + ); return; } if matches!( @@ -1677,6 +1782,7 @@ impl<'source> TreeSitterNormalizer<'source> { | "splat" | "splat_parameter" | "rest_assignment" + | "pattern" ) { for child in self.named_children(node) { self.collect_assignment_target_names(child, locals); @@ -1686,13 +1792,35 @@ impl<'source> TreeSitterNormalizer<'source> { fn collect_identifier_names(&self, node: TreeSitterNode<'_>, locals: &mut BTreeSet) { if self.identifier_kind(node.kind()) { - locals.insert(node_text(node, self.source).trim_start_matches('*').to_string()); + locals.insert( + node_text(node, self.source) + .trim_start_matches('*') + .to_string(), + ); } for child in self.named_children(node) { self.collect_identifier_names(child, locals); } } + fn collect_parameter_names(&self, node: TreeSitterNode<'_>, locals: &mut BTreeSet) { + if let Some(name) = self.named_field(node, "name") { + self.collect_identifier_names(name, locals); + return; + } + if let Some(name) = self + .named_children(node) + .into_iter() + .find(|child| self.identifier_kind(child.kind())) + { + locals.insert( + node_text(name, self.source) + .trim_start_matches('*') + .to_string(), + ); + } + } + fn ruby_scope_boundary(&self, node: TreeSitterNode<'_>) -> bool { if node.kind() == "block" && node @@ -1702,6 +1830,15 @@ impl<'source> TreeSitterNormalizer<'source> { { return false; } + if node.kind() == "block" + && node + .parent() + .and_then(|parent| parent.parent()) + .map(|grandparent| function_kind(grandparent.kind())) + .unwrap_or(false) + { + return false; + } if matches!(node.kind(), "block" | "do_block") && node .parent() @@ -1754,8 +1891,10 @@ impl<'source> TreeSitterNormalizer<'source> { | "parameters" | "parameter_list" | "formal_parameters" + | "function_value_parameters" | "block_parameters" | "lambda_parameters" + | "parameter" | "optional_parameter" | "keyword_parameter" | "block_parameter" @@ -1823,12 +1962,9 @@ impl<'source> TreeSitterNormalizer<'source> { let node_type = if keyword == "unless" { "UNLESS" } else { "IF" }; let condition = optional_node(self.normalize_node(condition)); let consequence = optional_node(consequence.and_then(|child| self.normalize_body(child))); - let alternative = optional_node(alternative.and_then(|child| self.normalize_else_or_branch(child))); - Some(self.wrap( - node_type, - vec![condition, consequence, alternative], - node, - )) + let alternative = + optional_node(alternative.and_then(|child| self.normalize_else_or_branch(child))); + Some(self.wrap(node_type, vec![condition, consequence, alternative], node)) } fn command_call_statement(&self, node: TreeSitterNode<'_>) -> bool { @@ -1862,7 +1998,11 @@ impl<'source> TreeSitterNormalizer<'source> { self.named_children(node) .into_iter() .find(|child| child.kind() == "argument_list") - .map(|args| node_text(args, self.source).trim_start().starts_with("def ")) + .map(|args| { + node_text(args, self.source) + .trim_start() + .starts_with("def ") + }) .unwrap_or(false) } @@ -1945,9 +2085,12 @@ impl<'source> TreeSitterNormalizer<'source> { if !text.contains("def ") || !text.split_whitespace().nth(1).unwrap_or("").contains('.') { return None; } - self.named_children(source) - .into_iter() - .find(|child| matches!(child.kind(), "self" | "this" | "constant" | "scope_resolution")) + self.named_children(source).into_iter().find(|child| { + matches!( + child.kind(), + "self" | "this" | "constant" | "scope_resolution" + ) + }) } fn inline_def_name_after_receiver( @@ -1965,7 +2108,11 @@ impl<'source> TreeSitterNormalizer<'source> { } fn inline_def_body<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { - let mut stack = self.named_children(node).into_iter().rev().collect::>(); + let mut stack = self + .named_children(node) + .into_iter() + .rev() + .collect::>(); while let Some(child) = stack.pop() { if child.kind() == "body_statement" { return Some(child); @@ -2003,8 +2150,10 @@ impl<'source> TreeSitterNormalizer<'source> { } fn boolean_expression(&self, node: TreeSitterNode<'_>) -> bool { - (matches!(node.kind(), "binary" | "binary_expression" | "boolean_operator") - || self.boolean_statement(node)) + (matches!( + node.kind(), + "binary" | "binary_expression" | "boolean_operator" + ) || self.boolean_statement(node)) && matches!(self.boolean_operator(node).as_deref(), Some("and" | "or")) } @@ -2026,7 +2175,10 @@ impl<'source> TreeSitterNormalizer<'source> { } node.children(&mut node.walk()).all(|child| { child.is_named() - || matches!(node_text(child, self.source), "&&" | "||" | "and" | "or" | "(" | ")") + || matches!( + node_text(child, self.source), + "&&" | "||" | "and" | "or" | "(" | ")" + ) }) } @@ -2039,11 +2191,13 @@ impl<'source> TreeSitterNormalizer<'source> { } fn comparison_expression(&self, node: TreeSitterNode<'_>) -> bool { - matches!(node.kind(), "binary" | "binary_expression" | "comparison_operator") - && self - .comparison_operator(node) - .map(|operator| COMPARISON_OPERATORS.contains(&operator.as_str())) - .unwrap_or(false) + matches!( + node.kind(), + "binary" | "binary_expression" | "comparison_operator" + ) && self + .comparison_operator(node) + .map(|operator| COMPARISON_OPERATORS.contains(&operator.as_str())) + .unwrap_or(false) } fn infix_statement(&self, node: TreeSitterNode<'_>) -> bool { @@ -2129,9 +2283,10 @@ impl<'source> TreeSitterNormalizer<'source> { matches!(node.kind(), "body_statement" | "block_body" | "statement") && self.call_block(node).is_some() && (self.dotted_call(node) - || self.named_children(node).into_iter().any(|child| { - self.call_kind(child.kind()) || self.member_read_node(child) - })) + || self + .named_children(node) + .into_iter() + .any(|child| self.call_kind(child.kind()) || self.member_read_node(child))) } fn dotted_expression(&self, node: TreeSitterNode<'_>) -> bool { @@ -2266,10 +2421,7 @@ impl<'source> TreeSitterNormalizer<'source> { fn command_arguments(&mut self, args: TreeSitterNode<'_>) -> Vec { let children = self.named_children(args); if children.is_empty() { - return self - .scalar_argument_list_value(args) - .into_iter() - .collect(); + return self.scalar_argument_list_value(args).into_iter().collect(); } if self.dotted_expression(args) { return self.normalize_dotted_expression(args).into_iter().collect(); @@ -2278,7 +2430,10 @@ impl<'source> TreeSitterNormalizer<'source> { && self.call_kind(children[0].kind()) && self.call_block(children[0]).is_some() { - return self.normalize_call_with_block(children[0]).into_iter().collect(); + return self + .normalize_call_with_block(children[0]) + .into_iter() + .collect(); } children .into_iter() @@ -2326,11 +2481,108 @@ impl<'source> TreeSitterNormalizer<'source> { .or_else(|| self.named_children(node).into_iter().next()) } - fn assignment_right<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { + fn assignment_right<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { self.named_field(node, "right") .or_else(|| self.named_children(node).into_iter().nth(1)) } + fn parameters_child<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + self.named_field(node, "parameters").or_else(|| { + self.named_children(node).into_iter().find(|child| { + matches!( + child.kind(), + "parameters" + | "parameter_list" + | "formal_parameters" + | "function_value_parameters" + | "method_parameters" + ) + }) + }) + } + + fn declaration_entries<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Vec> { + if matches!(node.kind(), "local_variable_declaration") { + let entries = self + .named_children(node) + .into_iter() + .filter(|child| child.kind() == "variable_declarator") + .collect::>(); + if !entries.is_empty() { + return entries; + } + } + if matches!( + node.kind(), + "local_variable_declaration" + | "variable_declarator" + | "variable_declaration" + | "property_declaration" + ) { + vec![node] + } else { + Vec::new() + } + } + + fn declaration_name<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + if let Some(name) = self.named_field(node, "name") { + return Some(name); + } + + for child in self.named_children(node) { + if child.kind() == "variable_declaration" { + if let Some(name) = self.declaration_name(child) { + return Some(name); + } + } + if matches!(child.kind(), "identifier" | "simple_identifier" | "pattern") { + return Some(child); + } + } + None + } + + fn declaration_value<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + if node.kind() == "property_declaration" { + let mut after_target = false; + for child in self.named_children(node) { + if !after_target && matches!(child.kind(), "variable_declaration" | "pattern") { + after_target = true; + continue; + } + if after_target && !declaration_metadata_kind(child.kind()) { + return Some(child); + } + } + } + + self.named_field(node, "value").or_else(|| { + self.named_children(node).into_iter().find(|child| { + !declaration_metadata_kind(child.kind()) + && !matches!( + child.kind(), + "identifier" | "simple_identifier" | "pattern" | "variable_declaration" + ) + }) + }) + } + fn assignment_target( &mut self, left: TreeSitterNode<'_>, @@ -2406,13 +2658,14 @@ impl<'source> TreeSitterNormalizer<'source> { .next_named_sibling() .and_then(|sibling| self.normalize_node(sibling)); let source = node.parent().unwrap_or(node); - self.assignment_target(node, right.clone(), source).or_else(|| { - Some(self.wrap( - "LASGN", - vec![Child::String(self.target_name(node)), optional_node(right)], - node, - )) - }) + self.assignment_target(node, right.clone(), source) + .or_else(|| { + Some(self.wrap( + "LASGN", + vec![Child::String(self.target_name(node)), optional_node(right)], + node, + )) + }) } fn target_name(&self, node: TreeSitterNode<'_>) -> String { @@ -2424,17 +2677,26 @@ impl<'source> TreeSitterNormalizer<'source> { fn function_name(&self, node: TreeSitterNode<'_>) -> Option { self.named_field(node, "name") .or_else(|| { - self.named_children(node).into_iter().find(|child| { - self.identifier_kind(child.kind()) || child.kind() == "constant" - }) + self.named_children(node) + .into_iter() + .find(|child| self.identifier_kind(child.kind()) || child.kind() == "constant") }) .map(|name| node_text(name, self.source).to_string()) } fn block_child<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { - self.named_children(node) - .into_iter() - .find(|child| matches!(child.kind(), "body_statement" | "block_body" | "block")) + self.named_children(node).into_iter().find(|child| { + matches!( + child.kind(), + "body_statement" + | "block_body" + | "block" + | "class_body" + | "function_body" + | "statements" + | "control_structure_body" + ) + }) } fn call_block<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { @@ -2457,11 +2719,7 @@ impl<'source> TreeSitterNormalizer<'source> { .collect() } - fn source_before_child( - &self, - node: TreeSitterNode<'_>, - child: TreeSitterNode<'_>, - ) -> Node { + fn source_before_child(&self, node: TreeSitterNode<'_>, child: TreeSitterNode<'_>) -> Node { let text = self .source .get(node.start_byte()..child.start_byte()) @@ -2544,6 +2802,7 @@ impl<'source> TreeSitterNormalizer<'source> { | "block_body" | "control_structure_body" | "function_body" + | "statements" ) } @@ -2592,7 +2851,9 @@ impl<'source> TreeSitterNormalizer<'source> { "SCOPE" => { if node.children.len() > 2 { let child = std::mem::replace(&mut node.children[2], Child::Nil); - if let Some(elided) = child_node(child).and_then(|body| self.elide_tail_returns(Some(body))) { + if let Some(elided) = + child_node(child).and_then(|body| self.elide_tail_returns(Some(body))) + { node.children[2] = Child::Node(Box::new(elided)); } } @@ -2601,7 +2862,9 @@ impl<'source> TreeSitterNormalizer<'source> { for index in [1usize, 2usize] { if node.children.len() > index { let child = std::mem::replace(&mut node.children[index], Child::Nil); - if let Some(elided) = child_node(child).and_then(|body| self.elide_tail_returns(Some(body))) { + if let Some(elided) = + child_node(child).and_then(|body| self.elide_tail_returns(Some(body))) + { node.children[index] = Child::Node(Box::new(elided)); } } @@ -2611,7 +2874,9 @@ impl<'source> TreeSitterNormalizer<'source> { let index = if node.r#type == "CASE" { 1 } else { 0 }; if node.children.len() > index { let child = std::mem::replace(&mut node.children[index], Child::Nil); - if let Some(elided) = child_node(child).and_then(|body| self.elide_tail_returns(Some(body))) { + if let Some(elided) = + child_node(child).and_then(|body| self.elide_tail_returns(Some(body))) + { node.children[index] = Child::Node(Box::new(elided)); } } @@ -2620,7 +2885,9 @@ impl<'source> TreeSitterNormalizer<'source> { for index in [1usize, 2usize] { if node.children.len() > index { let child = std::mem::replace(&mut node.children[index], Child::Nil); - if let Some(elided) = child_node(child).and_then(|body| self.elide_tail_returns(Some(body))) { + if let Some(elided) = + child_node(child).and_then(|body| self.elide_tail_returns(Some(body))) + { node.children[index] = Child::Node(Box::new(elided)); } } @@ -2630,7 +2897,9 @@ impl<'source> TreeSitterNormalizer<'source> { for index in [0usize, 1usize] { if node.children.len() > index { let child = std::mem::replace(&mut node.children[index], Child::Nil); - if let Some(elided) = child_node(child).and_then(|body| self.elide_tail_returns(Some(body))) { + if let Some(elided) = + child_node(child).and_then(|body| self.elide_tail_returns(Some(body))) + { node.children[index] = Child::Node(Box::new(elided)); } } @@ -2722,15 +2991,27 @@ fn dynamic_scope(mut node: Node) -> Node { } fn assignment_operator(text: &str) -> bool { + matches!(text, "=" | "+=" | "-=" | "*=" | "/=" | "%=" | "&&=" | "||=") +} + +fn declaration_metadata_kind(kind: &str) -> bool { matches!( - text, - "=" | "+=" | "-=" | "*=" | "/=" | "%=" | "&&=" | "||=" + kind, + "modifiers" + | "type" + | "nullable_type" + | "parenthesized_type" + | "user_type" + | "type_identifier" + | "integral_type" + | "floating_point_type" + | "void_type" ) } fn kind_type(kind: &str) -> &str { match kind { - "body_statement" | "block_body" | "block" => "BLOCK", + "body_statement" | "block_body" | "block" | "statements" => "BLOCK", other => other, } } @@ -2738,7 +3019,12 @@ fn kind_type(kind: &str) -> &str { fn if_kind(kind: &str) -> bool { matches!( kind, - "if" | "if_statement" | "if_modifier" | "unless" | "unless_modifier" | "if_expression" | "conditional" + "if" | "if_statement" + | "if_modifier" + | "unless" + | "unless_modifier" + | "if_expression" + | "conditional" ) } @@ -2863,14 +3149,17 @@ end nodes_of_type(&root, "DEFS", &mut defs); assert!( - defs.iter() - .any(|node| node.children.get(1) == Some(&Child::Symbol("collect_payload_binding_names".to_string()))), + defs.iter().any(|node| node.children.get(1) + == Some(&Child::Symbol("collect_payload_binding_names".to_string()))), "expected normalized DEFS for visibility-wrapped singleton def, got {root:#?}" ); let def = defs .into_iter() - .find(|node| node.children.get(1) == Some(&Child::Symbol("collect_payload_binding_names".to_string()))) + .find(|node| { + node.children.get(1) + == Some(&Child::Symbol("collect_payload_binding_names".to_string())) + }) .expect("visibility-wrapped singleton def should normalize to DEFS"); let mut calls = Vec::new(); nodes_of_type(def, "CALL", &mut calls); @@ -2913,7 +3202,10 @@ end .iter() .find(|node| node.text == "<<~ZIG.chomp") .expect("expected heredoc chomp call"); - assert_eq!(call.children.get(1), Some(&Child::Symbol("chomp".to_string()))); + assert_eq!( + call.children.get(1), + Some(&Child::Symbol("chomp".to_string())) + ); assert_eq!( call.children .first() @@ -2945,10 +3237,7 @@ end }; let and_node = Node { r#type: "AND".to_string(), - children: vec![ - Child::Node(Box::new(left)), - Child::Node(Box::new(right)), - ], + children: vec![Child::Node(Box::new(left)), Child::Node(Box::new(right))], first_lineno: 1, first_column: 0, last_lineno: 1, diff --git a/gems/decomplex/rust/src/decomplex/detectors/co_update.rs b/gems/decomplex/rust/src/decomplex/detectors/co_update.rs index 384355888..e09d02e4f 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/co_update.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/co_update.rs @@ -87,10 +87,13 @@ impl Report { } map.entry(key).or_default().push(w.clone()); } - let by_unit = keys.into_iter().map(|k| { - let v = map.remove(&k).unwrap(); - (k, v) - }).collect(); + let by_unit = keys + .into_iter() + .map(|k| { + let v = map.remove(&k).unwrap(); + (k, v) + }) + .collect(); Self { writes, by_unit } } @@ -98,11 +101,16 @@ impl Report { let mut keys = Vec::new(); let mut counts: BTreeMap, Vec<(String, String)>> = BTreeMap::new(); for (unit, ws) in &self.by_unit { - let mut attrs: Vec<_> = ws.iter().map(|w| w.attr.clone()).collect::>().into_iter().collect(); + let mut attrs: Vec<_> = ws + .iter() + .map(|w| w.attr.clone()) + .collect::>() + .into_iter() + .collect(); attrs.sort(); - + for i in 0..attrs.len() { - for j in i+1..attrs.len() { + for j in i + 1..attrs.len() { let pair = vec![attrs[i].clone(), attrs[j].clone()]; if !counts.contains_key(&pair) { keys.push(pair.clone()); @@ -115,11 +123,16 @@ impl Report { let mut out = Vec::new(); for pair in keys { let units = counts.remove(&pair).unwrap(); - if units.len() < min_support { continue; } + if units.len() < min_support { + continue; + } out.push(CoWrittenPair { pair, support: units.len(), - sites: units.into_iter().map(|(f, d)| format!("{}:{}", f, d)).collect(), + sites: units + .into_iter() + .map(|(f, d)| format!("{}:{}", f, d)) + .collect(), }); } out.sort_by(|a, b| b.support.cmp(&a.support)); @@ -135,7 +148,7 @@ impl Report { for p in &pairs { let a = &p.pair[0]; let b = &p.pair[1]; - + let (has, miss) = if attrs.contains(a) && !attrs.contains(b) { (Some(a), Some(b)) } else if attrs.contains(b) && !attrs.contains(a) { diff --git a/gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs b/gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs index 0a35baf0c..1c7ee35bb 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs @@ -105,9 +105,15 @@ impl DecisionPressure { "IVAR" => true, "CALL" | "QCALL" => { let recv = n.children.get(0).and_then(ast::node); - let mid = n.children.get(1).and_then(|c| match c { Child::Symbol(s) => Some(s), _ => None }); + let mid = n.children.get(1).and_then(|c| match c { + Child::Symbol(s) => Some(s), + _ => None, + }); let args = n.children.get(2); - recv.is_some() && (args.is_none() || matches!(args, Some(Child::Nil)) || mid.map(|s| s.as_str()) == Some("[]")) + recv.is_some() + && (args.is_none() + || matches!(args, Some(Child::Nil)) + || mid.map(|s| s.as_str()) == Some("[]")) } _ => false, } @@ -117,25 +123,42 @@ impl DecisionPressure { Hit { contract, file: self.file.clone(), - defn: defstack.last().cloned().unwrap_or_else(|| "(top-level)".to_string()), + defn: defstack + .last() + .cloned() + .unwrap_or_else(|| "(top-level)".to_string()), line: node.first_lineno, - span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], + span: [ + node.first_lineno, + node.first_column, + node.last_lineno, + node.last_column, + ], } } - fn record_decision(&mut self, node: &Node, defstack: &[String], asgmap: &BTreeMap) { + fn record_decision( + &mut self, + node: &Node, + defstack: &[String], + asgmap: &BTreeMap, + ) { if !matches!(node.r#type.as_str(), "CALL" | "QCALL") { return; } let recv = node.children.get(0).and_then(ast::node); - let mid = node.children.get(1).and_then(|c| match c { Child::Symbol(s) => Some(s), _ => None }); + let mid = node.children.get(1).and_then(|c| match c { + Child::Symbol(s) => Some(s), + _ => None, + }); let _args = node.children.get(2); let Some(recv) = recv else { return }; let Some(mid) = mid else { return }; - let guard = (node.r#type == "CALL" && GUARD_MIDS.contains(&mid.as_str())) || node.r#type == "QCALL"; + let guard = + (node.r#type == "CALL" && GUARD_MIDS.contains(&mid.as_str())) || node.r#type == "QCALL"; if guard { if let Some(c) = self.contract_of(recv, asgmap, 0) { self.guard_hits.push(self.hit(c, defstack, node)); @@ -150,7 +173,12 @@ impl DecisionPressure { } } - fn record_rescue_nil(&mut self, node: &Node, defstack: &[String], asgmap: &BTreeMap) { + fn record_rescue_nil( + &mut self, + node: &Node, + defstack: &[String], + asgmap: &BTreeMap, + ) { if node.r#type != "RESCUE" { return; } @@ -159,23 +187,42 @@ impl DecisionPressure { let resb = node.children.get(1).and_then(ast::node); let Some(resb) = resb else { return }; - if resb.r#type != "RESBODY" { return }; - if !matches!(resb.children.get(0), None | Some(Child::Nil)) { return }; + if resb.r#type != "RESBODY" { + return; + }; + if !matches!(resb.children.get(0), None | Some(Child::Nil)) { + return; + }; let handler = resb.children.get(1); - let nil_handler = matches!(handler, None | Some(Child::Nil)) || handler.and_then(ast::node).map(|n| n.r#type == "NIL").unwrap_or(false); - if !nil_handler { return }; + let nil_handler = matches!(handler, None | Some(Child::Nil)) + || handler + .and_then(ast::node) + .map(|n| n.r#type == "NIL") + .unwrap_or(false); + if !nil_handler { + return; + }; let Some(body) = body else { return }; - if !matches!(body.r#type.as_str(), "CALL" | "QCALL") { return }; + if !matches!(body.r#type.as_str(), "CALL" | "QCALL") { + return; + }; if let Some(c) = self.contract_of(body, asgmap, 0) { self.guard_hits.push(self.hit(c, defstack, node)); } } - fn contract_of(&self, n: &Node, asgmap: &BTreeMap, depth: usize) -> Option { - if depth >= 8 { return None; } + fn contract_of( + &self, + n: &Node, + asgmap: &BTreeMap, + depth: usize, + ) -> Option { + if depth >= 8 { + return None; + } match n.r#type.as_str() { "LVAR" | "DVAR" => { @@ -196,12 +243,18 @@ impl DecisionPressure { } "CALL" | "QCALL" => { let recv = n.children.get(0).and_then(ast::node); - let mid = n.children.get(1).and_then(|c| match c { Child::Symbol(s) => Some(s), _ => None })?; + let mid = n.children.get(1).and_then(|c| match c { + Child::Symbol(s) => Some(s), + _ => None, + })?; let args = n.children.get(2); if mid == "[]" { let key = if let Some(Child::Node(node)) = args { - node.children.iter().filter(|c| !matches!(c, Child::Nil)).next() + node.children + .iter() + .filter(|c| !matches!(c, Child::Nil)) + .next() } else { None }; @@ -210,7 +263,10 @@ impl DecisionPressure { _ => "nil".to_string(), // Simplified key.inspect }; Some(format!("[{}]", kt)) - } else if (args.is_none() || matches!(args, Some(Child::Nil))) && recv.is_some() && !TRANSIENT_NOARG_MIDS.contains(&mid.as_str()) { + } else if (args.is_none() || matches!(args, Some(Child::Nil))) + && recv.is_some() + && !TRANSIENT_NOARG_MIDS.contains(&mid.as_str()) + { Some(format!(".{}", mid)) } else { None @@ -222,7 +278,7 @@ impl DecisionPressure { } None } - _ => None + _ => None, } } } @@ -248,28 +304,42 @@ impl Report { rows_map.entry(h.contract.clone()).or_default().push(h); } - let rows: Vec<_> = rows_map.into_iter().map(|(contract, hs)| { - let mut methods_set = BTreeSet::new(); - for h in &hs { - methods_set.insert((&h.file, &h.defn)); - } - let sites = hs.iter().map(|h| loc(h)).collect(); - let spans = hs.iter().map(|h| (loc(h), h.span)).collect(); - let essential = ess.get(&contract).cloned().unwrap_or(0); - DecisionPressureRow { - contract, - decisions: hs.len(), - essential, - methods: methods_set.len(), - sites, - spans, - } - }).collect(); - - let mut named: Vec<_> = rows.iter().filter(|r| r.contract != "~local").cloned().collect(); - named.sort_by(|a, b| b.decisions.cmp(&a.decisions).then_with(|| b.methods.cmp(&a.methods))); - - let local: Vec<_> = rows.into_iter().filter(|r| r.contract == "~local").collect(); + let rows: Vec<_> = rows_map + .into_iter() + .map(|(contract, hs)| { + let mut methods_set = BTreeSet::new(); + for h in &hs { + methods_set.insert((&h.file, &h.defn)); + } + let sites = hs.iter().map(|h| loc(h)).collect(); + let spans = hs.iter().map(|h| (loc(h), h.span)).collect(); + let essential = ess.get(&contract).cloned().unwrap_or(0); + DecisionPressureRow { + contract, + decisions: hs.len(), + essential, + methods: methods_set.len(), + sites, + spans, + } + }) + .collect(); + + let mut named: Vec<_> = rows + .iter() + .filter(|r| r.contract != "~local") + .cloned() + .collect(); + named.sort_by(|a, b| { + b.decisions + .cmp(&a.decisions) + .then_with(|| b.methods.cmp(&a.methods)) + }); + + let local: Vec<_> = rows + .into_iter() + .filter(|r| r.contract == "~local") + .collect(); named.into_iter().chain(local).collect() } } diff --git a/gems/decomplex/rust/src/decomplex/detectors/derived_state.rs b/gems/decomplex/rust/src/decomplex/detectors/derived_state.rs index 3ba3e118f..24a576849 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/derived_state.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/derived_state.rs @@ -113,7 +113,11 @@ fn analyze(file: &str, defn: &str, stmts: &[&Node]) -> Vec { if let Some(val) = n.children.get(1).and_then(ast::node) { lvars(val, &mut deps); } - let mut deps: Vec<_> = deps.into_iter().collect::>().into_iter().collect(); + let mut deps: Vec<_> = deps + .into_iter() + .collect::>() + .into_iter() + .collect(); deps.sort(); Asgn { name: match n.children.first().unwrap() { @@ -143,7 +147,10 @@ fn analyze(file: &str, defn: &str, stmts: &[&Node]) -> Vec { let Some(reasn) = reasn else { continue }; // b recomputed at or after a's reassignment? - let recomputed = asgns.iter().skip(i + 1).any(|x| &x.name == &b.name && x.line >= reasn.line); + let recomputed = asgns + .iter() + .skip(i + 1) + .any(|x| &x.name == &b.name && x.line >= reasn.line); if recomputed { continue; } diff --git a/gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs b/gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs index 56a6214bd..7ae5a5062 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs @@ -39,15 +39,34 @@ pub fn scan_files(files: &[PathBuf], language: Language) -> Result) -> Self { - Self { file, lines, sites: Vec::new() } + Self { + file, + lines, + sites: Vec::new(), + } } fn walk(&mut self, node: &Node, defstack: &[String]) { @@ -102,7 +125,13 @@ impl FalseSimplicity { } "GVAR" | "GASGN" => { if let Some(name) = ast::child_to_string(node.children.get(0)) { - if !name.starts_with("$PREMATCH") && !name.starts_with("$POSTMATCH") && !name.starts_with("$MATCH") && !name.starts_with("$&") && !name.starts_with("$'") && !name.starts_with("$`") { + if !name.starts_with("$PREMATCH") + && !name.starts_with("$POSTMATCH") + && !name.starts_with("$MATCH") + && !name.starts_with("$&") + && !name.starts_with("$'") + && !name.starts_with("$`") + { self.add_site("context_dependency", &name, node, defstack); } } @@ -134,7 +163,9 @@ impl FalseSimplicity { } fn receiver_is_explicit(&self, node: &Node) -> bool { - if matches!(node.r#type.as_str(), "FCALL" | "VCALL") { return false; } + if matches!(node.r#type.as_str(), "FCALL" | "VCALL") { + return false; + } if let Some(recv) = node.children.get(0).and_then(ast::node) { recv.r#type != "SELF" } else { @@ -147,9 +178,17 @@ impl FalseSimplicity { kind: kind.to_string(), detail: detail.to_string(), file: self.file.clone(), - defn: defstack.last().cloned().unwrap_or_else(|| "(top-level)".to_string()), + defn: defstack + .last() + .cloned() + .unwrap_or_else(|| "(top-level)".to_string()), line: node.first_lineno, - span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], + span: [ + node.first_lineno, + node.first_column, + node.last_lineno, + node.last_column, + ], }); } } @@ -159,18 +198,25 @@ struct Report { } impl Report { - fn new(sites: Vec) -> Self { Self { sites } } + fn new(sites: Vec) -> Self { + Self { sites } + } fn findings(&self) -> Vec { let mut groups: BTreeMap<(String, String), Vec<&Site>> = BTreeMap::new(); for s in &self.sites { - groups.entry((s.kind.clone(), s.detail.clone())).or_default().push(s); + groups + .entry((s.kind.clone(), s.detail.clone())) + .or_default() + .push(s); } let mut out = Vec::new(); for ((kind, detail), sts) in groups { let mut defns = BTreeSet::new(); - for s in &sts { defns.insert((s.file.clone(), s.defn.clone())); } + for s in &sts { + defns.insert((s.file.clone(), s.defn.clone())); + } let scatter = defns.len(); let mut sites = Vec::new(); @@ -192,7 +238,8 @@ impl Report { }); } out.sort_by(|a, b| { - b.scatter.cmp(&a.scatter) + b.scatter + .cmp(&a.scatter) .then_with(|| b.support.cmp(&a.support)) .then_with(|| a.kind.cmp(&b.kind)) .then_with(|| a.detail.cmp(&b.detail)) diff --git a/gems/decomplex/rust/src/decomplex/detectors/fat_union.rs b/gems/decomplex/rust/src/decomplex/detectors/fat_union.rs index 00234429d..2649caefb 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/fat_union.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/fat_union.rs @@ -38,7 +38,12 @@ pub fn scan_files(files: &[PathBuf], language: Language) -> Result) -> Self { - Self { file, lines, reports: Vec::new() } + Self { + file, + lines, + reports: Vec::new(), + } } fn walk(&mut self, node: &Node, defstack: &[String]) { @@ -75,23 +84,36 @@ impl FatUnion { let (cond, first_when) = if node.r#type == "CASE2" { (None, node.children.get(0).and_then(ast::node)) } else { - (node.children.get(0).and_then(ast::node), node.children.get(1).and_then(ast::node)) + ( + node.children.get(0).and_then(ast::node), + node.children.get(1).and_then(ast::node), + ) }; let mut variants = BTreeMap::new(); let mut current_when = first_when; while let Some(when_node) = current_when { - if when_node.r#type != "WHEN" { break; } + if when_node.r#type != "WHEN" { + break; + } if let Some(pat) = when_node.children.get(0).and_then(ast::node) { if let Some(variant_name) = self.variant_name(pat) { - let reads = self.collect_reads(when_node.children.get(1).and_then(ast::node).unwrap_or(when_node)); + let reads = self.collect_reads( + when_node + .children + .get(1) + .and_then(ast::node) + .unwrap_or(when_node), + ); variants.insert(variant_name, VariantReads { reads }); } } current_when = when_node.children.get(2).and_then(ast::node); } - if variants.len() < 2 { return; } + if variants.len() < 2 { + return; + } let mut common = None; for v in variants.values() { @@ -105,14 +127,24 @@ impl FatUnion { } let common = common.unwrap_or_default(); - if common.is_empty() { return; } + if common.is_empty() { + return; + } let subject_name = self.subject_name(cond); let defn = defstack.last().map(|s| s.as_str()).unwrap_or(""); let at = format!("{}:{}:{}", self.file, defn, node.first_lineno); - + let mut spans = BTreeMap::new(); - spans.insert(at.clone(), [node.first_lineno, node.first_column, node.last_lineno, node.last_column]); + spans.insert( + at.clone(), + [ + node.first_lineno, + node.first_column, + node.last_lineno, + node.last_column, + ], + ); let mut variant_set: Vec<_> = variants.keys().cloned().collect(); variant_set.sort(); @@ -129,10 +161,14 @@ impl FatUnion { } fn variant_name(&self, node: &Node) -> Option { - let n = if node.r#type == "LIST" { node.children.iter().filter_map(ast::node).next()? } else { node }; + let n = if node.r#type == "LIST" { + node.children.iter().filter_map(ast::node).next()? + } else { + node + }; match n.r#type.as_str() { "CONSTANT" | "SCOPE_RESOLUTION" => Some(ast::slice(n, &self.lines)), - _ => None + _ => None, } } @@ -147,14 +183,24 @@ impl FatUnion { if let Some(Child::Symbol(mid)) = node.children.get(1) { out.push(Read { name: mid.clone(), - span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], + span: [ + node.first_lineno, + node.first_column, + node.last_lineno, + node.last_column, + ], }); } } else if matches!(node.r#type.as_str(), "FCALL" | "VCALL") { if let Some(Child::Symbol(mid)) = node.children.get(0) { out.push(Read { name: mid.clone(), - span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], + span: [ + node.first_lineno, + node.first_column, + node.last_lineno, + node.last_column, + ], }); } } @@ -164,7 +210,8 @@ impl FatUnion { } fn subject_name(&self, cond: Option<&Node>) -> String { - cond.map(|c| ast::slice(c, &self.lines)).unwrap_or_else(|| "implicit".to_string()) + cond.map(|c| ast::slice(c, &self.lines)) + .unwrap_or_else(|| "implicit".to_string()) } fn findings(&self) -> Vec { diff --git a/gems/decomplex/rust/src/decomplex/detectors/flay_similarity.rs b/gems/decomplex/rust/src/decomplex/detectors/flay_similarity.rs index ff42c39d7..307dccfbb 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/flay_similarity.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/flay_similarity.rs @@ -133,11 +133,7 @@ pub fn scan_files( Ok(scan_documents(&documents, mass, fuzzy)) } -pub fn scan_documents( - documents: &[Document], - mass: usize, - fuzzy: usize, -) -> Vec { +pub fn scan_documents(documents: &[Document], mass: usize, fuzzy: usize) -> Vec { let mut scanner = Scanner::new(mass, fuzzy); scanner.scan(documents) } @@ -186,13 +182,17 @@ impl Scanner { fn candidates_for_document(&mut self, document: &Document) -> Vec { self.source_lines .insert(document.file.clone(), document.lines.clone()); - self.method_spans - .insert(document.file.clone(), collect_method_spans(&document.function_defs)); + self.method_spans.insert( + document.file.clone(), + collect_method_spans(&document.function_defs), + ); let mut out = Vec::new(); let mut seen = HashSet::new(); for function in &document.function_defs { - if let Some(candidate) = self.candidate_for(&document.file, &function.body, Some("defn")) { + if let Some(candidate) = + self.candidate_for(&document.file, &function.body, Some("defn")) + { self.add_candidate(&mut out, &mut seen, candidate); } } @@ -209,13 +209,23 @@ impl Scanner { out } - fn add_candidate(&self, out: &mut Vec, seen: &mut HashSet, candidate: Candidate) { - if candidate.mass < self.effective_mass_floor() || typed_struct_schema_text(&candidate.raw) { + fn add_candidate( + &self, + out: &mut Vec, + seen: &mut HashSet, + candidate: Candidate, + ) { + if candidate.mass < self.effective_mass_floor() || typed_struct_schema_text(&candidate.raw) + { return; } let key = format!( "{}\0{}\0{:?}\0{}\0{}", - candidate.file, candidate.line, candidate.span, candidate.node_name, candidate.fingerprint + candidate.file, + candidate.line, + candidate.span, + candidate.node_name, + candidate.fingerprint ); if seen.insert(key) { out.push(candidate); @@ -274,11 +284,19 @@ impl Scanner { if cluster.len() < 2 { continue; } - let raw_count = cluster.iter().map(|candidate| candidate.raw.as_str()).collect::>().len(); + let raw_count = cluster + .iter() + .map(|candidate| candidate.raw.as_str()) + .collect::>() + .len(); if raw_count < 2 || self.typed_struct_schema_cluster(&cluster) { continue; } - let mass = cluster.iter().map(|candidate| candidate.mass).min().unwrap_or(0); + let mass = cluster + .iter() + .map(|candidate| candidate.mass) + .min() + .unwrap_or(0); out.push(self.finding_for(&cluster, "type2", mass)); } out @@ -303,7 +321,11 @@ impl Scanner { let mut seen = HashSet::new(); let mut out = Vec::new(); for rows in groups.values() { - let cluster = uniq_sites(rows.iter().map(|(candidate, _)| candidate.clone()).collect()); + let cluster = uniq_sites( + rows.iter() + .map(|(candidate, _)| candidate.clone()) + .collect(), + ); if cluster.len() < 2 { continue; } @@ -317,20 +339,34 @@ impl Scanner { } let mut key = cluster .iter() - .map(|candidate| format!("{}\0{}\0{}", candidate.file, candidate.line, candidate.node_name)) + .map(|candidate| { + format!( + "{}\0{}\0{}", + candidate.file, candidate.line, candidate.node_name + ) + }) .collect::>(); key.sort(); let key = key.join("\0"); if !seen.insert(key) { continue; } - let mass = rows.iter().map(|(_, signature_mass)| *signature_mass).max().unwrap_or(0); + let mass = rows + .iter() + .map(|(_, signature_mass)| *signature_mass) + .max() + .unwrap_or(0); out.push(self.finding_for(&cluster, "type3", mass)); } out } - fn finding_for(&self, cluster: &[Candidate], clone_type: &str, mass: usize) -> SimilarityFinding { + fn finding_for( + &self, + cluster: &[Candidate], + clone_type: &str, + mass: usize, + ) -> SimilarityFinding { let mut sites = cluster.iter().map(site_for).collect::>(); sites.sort(); SimilarityFinding { @@ -436,7 +472,8 @@ impl Scanner { } fn effective_mass_floor(&self) -> usize { - self.mass.max(((self.mass as f64) * 23.0 / 8.0).ceil() as usize) + self.mass + .max(((self.mass as f64) * 23.0 / 8.0).ceil() as usize) } } @@ -609,11 +646,16 @@ fn identifier_text(text: &str) -> bool { return false; }; (first == '_' || first.is_ascii_alphabetic()) - && chars.all(|char| char == '_' || char == '!' || char == '?' || char == '=' || char.is_ascii_alphanumeric()) + && chars.all(|char| { + char == '_' || char == '!' || char == '?' || char == '=' || char.is_ascii_alphanumeric() + }) } fn literal_text(text: &str) -> bool { - if symbol_literal_text(text) || quoted_literal_text(text, '"') || quoted_literal_text(text, '\'') { + if symbol_literal_text(text) + || quoted_literal_text(text, '"') + || quoted_literal_text(text, '\'') + { return true; } text.parse::().is_ok() @@ -637,7 +679,11 @@ fn quoted_literal_text(text: &str, quote: char) -> bool { fn flay_node_name(node: &RawNode) -> &str { match node.kind.as_str() { - "method" | "function_definition" | "function_declaration" | "method_definition" | "function_item" => "defn", + "method" + | "function_definition" + | "function_declaration" + | "method_definition" + | "function_item" => "defn", "singleton_method" => "defs", other => other, } @@ -647,7 +693,10 @@ fn uniq_sites(candidates: Vec) -> Vec { let mut seen = HashSet::new(); let mut out = Vec::new(); for candidate in candidates { - let key = format!("{}\0{}\0{}", candidate.file, candidate.line, candidate.node_name); + let key = format!( + "{}\0{}\0{}", + candidate.file, candidate.line, candidate.node_name + ); if seen.insert(key) { out.push(candidate); } @@ -677,7 +726,10 @@ fn most_common_node(cluster: &[Candidate]) -> String { } fn site_for(candidate: &Candidate) -> String { - format!("{}:{}:{}", candidate.file, candidate.method_name, candidate.line) + format!( + "{}:{}:{}", + candidate.file, candidate.method_name, candidate.line + ) } fn nested_finding(inner: &SimilarityFinding, outer: &SimilarityFinding) -> bool { @@ -738,7 +790,13 @@ fn node_key(node: &RawNode) -> String { } fn combinations(size: usize, count: usize) -> Vec> { - fn step(start: usize, size: usize, count: usize, current: &mut Vec, out: &mut Vec>) { + fn step( + start: usize, + size: usize, + count: usize, + current: &mut Vec, + out: &mut Vec>, + ) { if current.len() == count { out.push(current.clone()); return; diff --git a/gems/decomplex/rust/src/decomplex/detectors/function_lcom.rs b/gems/decomplex/rust/src/decomplex/detectors/function_lcom.rs index d919b1597..b391d54d9 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/function_lcom.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/function_lcom.rs @@ -45,22 +45,38 @@ impl FunctionLcom { } fn findings(&mut self) -> Vec { - let mut out: Vec<_> = self.summaries.iter().filter_map(|s| self.finding_for(s)).collect(); + let mut out: Vec<_> = self + .summaries + .iter() + .filter_map(|s| self.finding_for(s)) + .collect(); out.sort_by(|a, b| b.score.cmp(&a.score).then_with(|| a.at.cmp(&b.at))); out } fn finding_for(&self, summary: &local_flow::MethodSummary) -> Option { let all_locals = self.all_locals(summary); - if all_locals.len() < self.min_locals { return None } - if summary.statements.len() < self.min_statements { return None } + if all_locals.len() < self.min_locals { + return None; + } + if summary.statements.len() < self.min_statements { + return None; + } let components = self.connected_components(summary, &all_locals); - if components.len() < self.min_components { return None } + if components.len() < self.min_components { + return None; + } let score = (components.len() * 10) + all_locals.len() + summary.statements.len(); - if score < self.min_score { return None } - let mode = if self.late_join(summary, &components) { "late_join".to_string() } else { "disjoint".to_string() }; + if score < self.min_score { + return None; + } + let mode = if self.late_join(summary, &components) { + "late_join".to_string() + } else { + "disjoint".to_string() + }; let at = format!("{}:{}:{}", summary.file, summary.name, summary.line); let mut spans = BTreeMap::new(); @@ -88,7 +104,11 @@ impl FunctionLcom { locals } - fn connected_components(&self, summary: &local_flow::MethodSummary, locals: &BTreeSet) -> Vec> { + fn connected_components( + &self, + summary: &local_flow::MethodSummary, + locals: &BTreeSet, + ) -> Vec> { let mut adj: BTreeMap> = BTreeMap::new(); for s in &summary.statements { let mut touched: Vec<_> = s.reads.union(&s.writes).cloned().collect(); @@ -98,8 +118,12 @@ impl FunctionLcom { } for i in 0..touched.len() { for j in i + 1..touched.len() { - adj.entry(touched[i].clone()).or_default().insert(touched[j].clone()); - adj.entry(touched[j].clone()).or_default().insert(touched[i].clone()); + adj.entry(touched[i].clone()) + .or_default() + .insert(touched[j].clone()); + adj.entry(touched[j].clone()) + .or_default() + .insert(touched[i].clone()); } } } @@ -111,33 +135,57 @@ impl FunctionLcom { let mut component = BTreeSet::new(); let mut queue = vec![start]; while let Some(node) = queue.pop() { - if !unvisited.contains(&node) { continue; } + if !unvisited.contains(&node) { + continue; + } unvisited.remove(&node); component.insert(node.clone()); if let Some(neighbors) = adj.get(&node) { for n in neighbors { - if unvisited.contains(n) { queue.push(n.clone()); } + if unvisited.contains(n) { + queue.push(n.clone()); + } } } } - if component.len() > 0 { components.push(component); } + if component.len() > 0 { + components.push(component); + } } - components.retain(|c| c.len() > 1 || self.standalone_state_usage(summary, c.iter().next().unwrap())); + components.retain(|c| { + c.len() > 1 || self.standalone_state_usage(summary, c.iter().next().unwrap()) + }); components } fn standalone_state_usage(&self, summary: &local_flow::MethodSummary, local: &str) -> bool { - let reads: usize = summary.statements.iter().map(|s| s.reads.contains(local) as usize).sum(); - let writes: usize = summary.statements.iter().map(|s| s.writes.contains(local) as usize).sum(); + let reads: usize = summary + .statements + .iter() + .map(|s| s.reads.contains(local) as usize) + .sum(); + let writes: usize = summary + .statements + .iter() + .map(|s| s.writes.contains(local) as usize) + .sum(); reads + writes > 1 } - fn late_join(&self, summary: &local_flow::MethodSummary, components: &[BTreeSet]) -> bool { - let Some(last) = summary.statements.last() else { return false }; + fn late_join( + &self, + summary: &local_flow::MethodSummary, + components: &[BTreeSet], + ) -> bool { + let Some(last) = summary.statements.last() else { + return false; + }; let mut joined = 0; for c in components { - if last.reads.intersection(c).next().is_some() || last.writes.intersection(c).next().is_some() { + if last.reads.intersection(c).next().is_some() + || last.writes.intersection(c).next().is_some() + { joined += 1; } } diff --git a/gems/decomplex/rust/src/decomplex/detectors/implicit_control_flow.rs b/gems/decomplex/rust/src/decomplex/detectors/implicit_control_flow.rs index 664a6f750..0c72b8deb 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/implicit_control_flow.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/implicit_control_flow.rs @@ -62,23 +62,105 @@ struct Path { const PATH_LIMIT: usize = 64; const IGNORED_MIDS: &[&str] = &[ - "abstract!", "alias_method", "any", "attr_accessor", "attr_reader", "attr_writer", "bind", - "cast", "checked", "enum", "extend", "final", "include", "interface!", "let", "must", "must_because", - "nilable", "override", "overridable", "params", "prepend", "private", "private_class_method", - "protected", "public", "require", "require_relative", "requires_ancestor", "sealed!", "sig", - "type_member", "type_template", "untyped", "unsafe", "void", - "a_kind_of", "after", "around", "before", "be", "be_a", "be_an", "be_empty", "be_falsey", "be_nil", - "be_truthy", "change", "contain_exactly", "context", "describe", "eq", "eql", "equal", "expect", - "have_attributes", "have_key", "have_received", "it", "match", "not_to", "raise_error", - "receive", "subject", "to", + "abstract!", + "alias_method", + "any", + "attr_accessor", + "attr_reader", + "attr_writer", + "bind", + "cast", + "checked", + "enum", + "extend", + "final", + "include", + "interface!", + "let", + "must", + "must_because", + "nilable", + "override", + "overridable", + "params", + "prepend", + "private", + "private_class_method", + "protected", + "public", + "require", + "require_relative", + "requires_ancestor", + "sealed!", + "sig", + "type_member", + "type_template", + "untyped", + "unsafe", + "void", + "a_kind_of", + "after", + "around", + "before", + "be", + "be_a", + "be_an", + "be_empty", + "be_falsey", + "be_nil", + "be_truthy", + "change", + "contain_exactly", + "context", + "describe", + "eq", + "eql", + "equal", + "expect", + "have_attributes", + "have_key", + "have_received", + "it", + "match", + "not_to", + "raise_error", + "receive", + "subject", + "to", ]; -const OPTIONAL_DIAGNOSTIC_MIDS: &[&str] = &["error!", "fixable!", "read_interpolated_string", "warn!"]; +const OPTIONAL_DIAGNOSTIC_MIDS: &[&str] = + &["error!", "fixable!", "read_interpolated_string", "warn!"]; const MUTATING_MIDS: &[&str] = &[ - "<<", "[]=", "add", "append", "clear", "collect!", "compact!", "concat", "declare", "delete", "delete_if", - "each_key=", "fill", "filter!", "keep_if", "mark", "merge!", "move", "push", "reject!", "replace", - "resolve", "shift", "stamp", "store", "unshift", "update", "write", + "<<", + "[]=", + "add", + "append", + "clear", + "collect!", + "compact!", + "concat", + "declare", + "delete", + "delete_if", + "each_key=", + "fill", + "filter!", + "keep_if", + "mark", + "merge!", + "move", + "push", + "reject!", + "replace", + "resolve", + "shift", + "stamp", + "store", + "unshift", + "update", + "write", ]; const NON_MUTATING_OPERATOR_MIDS: &[&str] = &["!", "!=", "!~"]; @@ -87,7 +169,10 @@ const MUTATING_SUFFIXES: &[&str] = &["!"]; pub fn scan_files(files: &[PathBuf], language: Language) -> Result { let mut parsed = BTreeMap::new(); for file in files { - parsed.insert(file.to_string_lossy().to_string(), ast::parse_with_language(file, language)?); + parsed.insert( + file.to_string_lossy().to_string(), + ast::parse_with_language(file, language)?, + ); } let effect_index = EffectIndex::build(&parsed); @@ -114,7 +199,12 @@ struct ImplicitControlFlow<'a> { impl<'a> ImplicitControlFlow<'a> { fn new(file: String, lines: Vec, effect_index: &'a EffectIndex) -> Self { - Self { file, lines, effect_index, sequences: Vec::new() } + Self { + file, + lines, + effect_index, + sequences: Vec::new(), + } } fn walk(&mut self, node: &Node, owners: &[String]) { @@ -136,8 +226,14 @@ impl<'a> ImplicitControlFlow<'a> { fn record_method_paths(&mut self, node: &Node, owner: &str) { let defn = self.method_name(node); for path in self.method_paths(node) { - let calls: Vec<_> = path.calls.iter().map(|c| self.call_for(c, owner, &defn)).collect(); - if calls.iter().filter(|c| self.stateful_call(c)).count() < 2 { continue; } + let calls: Vec<_> = path + .calls + .iter() + .map(|c| self.call_for(c, owner, &defn)) + .collect(); + if calls.iter().filter(|c| self.stateful_call(c)).count() < 2 { + continue; + } self.sequences.push(MethodSequence { file: self.file.clone(), @@ -154,10 +250,14 @@ impl<'a> ImplicitControlFlow<'a> { } fn paths_for_statements(&self, statements: &[&Node], depth: usize) -> Vec { - if depth > 10 { return vec![self.empty_path()]; } + if depth > 10 { + return vec![self.empty_path()]; + } let mut paths = vec![self.empty_path()]; for stmt in statements { - if stmt.r#type == "BEGIN" { continue; } + if stmt.r#type == "BEGIN" { + continue; + } let stmt_paths = self.paths_for(stmt, depth + 1); paths = self.append_statement_paths(paths, stmt_paths); } @@ -177,7 +277,10 @@ impl<'a> ImplicitControlFlow<'a> { for right in &right_paths { let mut calls = left.calls.clone(); calls.extend(right.calls.clone()); - combined.push(Path { calls, terminal: right.terminal }); + combined.push(Path { + calls, + terminal: right.terminal, + }); } } } @@ -185,55 +288,105 @@ impl<'a> ImplicitControlFlow<'a> { } fn paths_for(&self, node: &Node, depth: usize) -> Vec { - if depth > 10 { return vec![self.empty_path()]; } + if depth > 10 { + return vec![self.empty_path()]; + } match node.r#type.as_str() { - "BLOCK" => self.paths_for_statements(&node.children.iter().filter_map(ast::node).collect::>(), depth), - "SCOPE" => self.paths_for(node.children.get(2).and_then(ast::node).unwrap_or(node), depth), + "BLOCK" => self.paths_for_statements( + &node + .children + .iter() + .filter_map(ast::node) + .collect::>(), + depth, + ), + "SCOPE" => self.paths_for( + node.children.get(2).and_then(ast::node).unwrap_or(node), + depth, + ), "IF" | "UNLESS" => self.branch_paths(node, depth), "CASE" | "CASE2" => self.case_paths(node, depth), - "RETURN" | "BREAK" | "NEXT" | "REDO" | "RETRY" => { - self.generic_paths(node, depth).into_iter().map(|mut p| { p.terminal = true; p }).collect() - } + "RETURN" | "BREAK" | "NEXT" | "REDO" | "RETRY" => self + .generic_paths(node, depth) + .into_iter() + .map(|mut p| { + p.terminal = true; + p + }) + .collect(), _ => self.generic_paths(node, depth), } } fn branch_paths(&self, node: &Node, depth: usize) -> Vec { - if depth > 10 { return vec![self.empty_path()]; } + if depth > 10 { + return vec![self.empty_path()]; + } let cond = node.children.get(0).and_then(ast::node); let pos = node.children.get(1).and_then(ast::node); let neg = node.children.get(2).and_then(ast::node); let mut alts = self.paths_for(pos.unwrap_or(node), depth + 1); - if let Some(n) = neg { alts.extend(self.paths_for(n, depth + 1)); } else { alts.push(self.empty_path()); } + if let Some(n) = neg { + alts.extend(self.paths_for(n, depth + 1)); + } else { + alts.push(self.empty_path()); + } self.combine_path_lists(self.paths_for(cond.unwrap_or(node), depth + 1), alts) } fn case_paths(&self, node: &Node, depth: usize) -> Vec { - if depth > 10 { return vec![self.empty_path()]; } - let (cond, first_when) = if node.r#type == "CASE2" { (None, node.children.get(0).and_then(ast::node)) } else { (node.children.get(0).and_then(ast::node), node.children.get(1).and_then(ast::node)) }; - self.combine_path_lists(cond.map(|c| self.paths_for(c, depth + 1)).unwrap_or(vec![self.empty_path()]), self.when_paths(first_when, depth + 1)) + if depth > 10 { + return vec![self.empty_path()]; + } + let (cond, first_when) = if node.r#type == "CASE2" { + (None, node.children.get(0).and_then(ast::node)) + } else { + ( + node.children.get(0).and_then(ast::node), + node.children.get(1).and_then(ast::node), + ) + }; + self.combine_path_lists( + cond.map(|c| self.paths_for(c, depth + 1)) + .unwrap_or(vec![self.empty_path()]), + self.when_paths(first_when, depth + 1), + ) } fn when_paths(&self, node: Option<&Node>, depth: usize) -> Vec { - if depth > 10 { return vec![self.empty_path()]; } - let Some(n) = node else { return vec![self.empty_path()] }; - if n.r#type != "WHEN" { return self.paths_for(n, depth + 1) } + if depth > 10 { + return vec![self.empty_path()]; + } + let Some(n) = node else { + return vec![self.empty_path()]; + }; + if n.r#type != "WHEN" { + return self.paths_for(n, depth + 1); + } let pat = n.children.get(0).and_then(ast::node); let body = n.children.get(1).and_then(ast::node); let next = n.children.get(2).and_then(ast::node); - let current = self.combine_path_lists(self.paths_for(pat.unwrap_or(n), depth + 1), self.paths_for(body.unwrap_or(n), depth + 1)); + let current = self.combine_path_lists( + self.paths_for(pat.unwrap_or(n), depth + 1), + self.paths_for(body.unwrap_or(n), depth + 1), + ); let mut out = current; out.extend(self.when_paths(next, depth + 1)); out.into_iter().take(PATH_LIMIT).collect() } fn generic_paths(&self, node: &Node, depth: usize) -> Vec { - if depth > 10 { return vec![self.empty_path()]; } - if matches!(node.r#type.as_str(), "CLASS" | "MODULE" | "DEFN" | "DEFS" | "LAMBDA") { + if depth > 10 { + return vec![self.empty_path()]; + } + if matches!( + node.r#type.as_str(), + "CLASS" | "MODULE" | "DEFN" | "DEFS" | "LAMBDA" + ) { return vec![self.empty_path()]; } @@ -243,7 +396,13 @@ impl<'a> ImplicitControlFlow<'a> { } if let Some(mid) = self.internal_protocol_call(node) { - self.combine_path_lists(vec![Path { calls: vec![self.raw_call(&mid, node)], terminal: false }], child_paths) + self.combine_path_lists( + vec![Path { + calls: vec![self.raw_call(&mid, node)], + terminal: false, + }], + child_paths, + ) } else { child_paths } @@ -254,7 +413,12 @@ impl<'a> ImplicitControlFlow<'a> { mid: mid.to_string(), file: self.file.clone(), line: node.first_lineno, - span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], + span: [ + node.first_lineno, + node.first_column, + node.last_lineno, + node.last_column, + ], reads: Vec::new(), writes: Vec::new(), } @@ -276,11 +440,23 @@ impl<'a> ImplicitControlFlow<'a> { !call.reads.is_empty() || !call.writes.is_empty() } - fn empty_path(&self) -> Path { Path { calls: Vec::new(), terminal: false } } + fn empty_path(&self) -> Path { + Path { + calls: Vec::new(), + terminal: false, + } + } fn owner_name(&self, node: &Node) -> String { - let text = ast::slice(node.children.first().and_then(ast::node).unwrap_or(node), &self.lines); - if text.is_empty() { "(anonymous)".to_string() } else { text } + let text = ast::slice( + node.children.first().and_then(ast::node).unwrap_or(node), + &self.lines, + ); + if text.is_empty() { + "(anonymous)".to_string() + } else { + text + } } fn method_name(&self, node: &Node) -> String { @@ -293,8 +469,12 @@ impl<'a> ImplicitControlFlow<'a> { fn internal_protocol_call(&self, node: &Node) -> Option { let mid = self.call_mid(node)?; - if IGNORED_MIDS.contains(&mid.as_str()) { return None } - if !self.internal_receiver(node) { return None } + if IGNORED_MIDS.contains(&mid.as_str()) { + return None; + } + if !self.internal_receiver(node) { + return None; + } Some(mid) } @@ -307,7 +487,9 @@ impl<'a> ImplicitControlFlow<'a> { } fn internal_receiver(&self, node: &Node) -> bool { - if matches!(node.r#type.as_str(), "FCALL" | "VCALL") { return true } + if matches!(node.r#type.as_str(), "FCALL" | "VCALL") { + return true; + } let receiver = node.children.get(0).and_then(ast::node); receiver.map(|r| r.r#type == "SELF").unwrap_or(false) } @@ -328,18 +510,34 @@ impl EffectIndex { let mut by_name = BTreeMap::new(); for e in effects { by_owner_name.insert((e.owner.clone(), e.name.clone()), e.clone()); - by_name.entry(e.name.clone()).or_insert_with(Vec::new).push(e); + by_name + .entry(e.name.clone()) + .or_insert_with(Vec::new) + .push(e); + } + Self { + by_owner_name, + by_name, } - Self { by_owner_name, by_name } } fn effect_for(&self, owner: &str, name: &str) -> Option<&MethodEffect> { - if let Some(e) = self.by_owner_name.get(&(owner.to_string(), name.to_string())) { + if let Some(e) = self + .by_owner_name + .get(&(owner.to_string(), name.to_string())) + { return Some(e); } let candidates = self.by_name.get(name)?; - let stateful: Vec<_> = candidates.iter().filter(|e| !e.reads.is_empty() || !e.writes.is_empty()).collect(); - if stateful.len() == 1 { Some(stateful[0]) } else { None } + let stateful: Vec<_> = candidates + .iter() + .filter(|e| !e.reads.is_empty() || !e.writes.is_empty()) + .collect(); + if stateful.len() == 1 { + Some(stateful[0]) + } else { + None + } } } @@ -348,7 +546,9 @@ struct EffectCollector { } impl EffectCollector { - fn new(_file: String, lines: Vec) -> Self { Self { lines } } + fn new(_file: String, lines: Vec) -> Self { + Self { lines } + } fn scan(&self, root: &Node) -> Vec { let mut out = Vec::new(); @@ -379,18 +579,41 @@ impl EffectCollector { MethodEffect { owner: owner.to_string(), name: self.method_name(node), - reads: { let mut v: Vec<_> = reads.into_iter().collect(); v.sort(); v }, - writes: { let mut v: Vec<_> = writes.into_iter().collect(); v.sort(); v }, + reads: { + let mut v: Vec<_> = reads.into_iter().collect(); + v.sort(); + v + }, + writes: { + let mut v: Vec<_> = writes.into_iter().collect(); + v.sort(); + v + }, } } - fn collect_state_access(&self, node: &Node, reads: &mut BTreeSet, writes: &mut BTreeSet) { - if matches!(node.r#type.as_str(), "CLASS" | "MODULE" | "LAMBDA") { return } + fn collect_state_access( + &self, + node: &Node, + reads: &mut BTreeSet, + writes: &mut BTreeSet, + ) { + if matches!(node.r#type.as_str(), "CLASS" | "MODULE" | "LAMBDA") { + return; + } match node.r#type.as_str() { - "IASGN" => { if let Some(s) = ast::child_to_string(node.children.get(0)) { writes.insert(self.normalize_state(&s)); } } + "IASGN" => { + if let Some(s) = ast::child_to_string(node.children.get(0)) { + writes.insert(self.normalize_state(&s)); + } + } "LASGN" => self.collect_index_write(node, writes), - "IVAR" => { if let Some(s) = ast::child_to_string(node.children.get(0)) { reads.insert(self.normalize_state(&s)); } } + "IVAR" => { + if let Some(s) = ast::child_to_string(node.children.get(0)) { + reads.insert(self.normalize_state(&s)); + } + } "ATTRASGN" => self.collect_attr_write(node, writes), "CALL" | "OPCALL" => { self.collect_bare_reader_comparison(node, reads); @@ -413,7 +636,9 @@ impl EffectCollector { let attr = mid.trim_end_matches('=').to_string(); if mid == "[]=" { - if let Some(t) = receiver.and_then(|r| self.state_receiver_token(r)) { writes.insert(t); } + if let Some(t) = receiver.and_then(|r| self.state_receiver_token(r)) { + writes.insert(t); + } } else if receiver.map(|r| self.self_receiver(r)).unwrap_or(false) { writes.insert(self.normalize_state(&attr)); } else if let Some(t) = receiver.and_then(|r| self.state_receiver_token(r)) { @@ -447,7 +672,9 @@ impl EffectCollector { let mid = ast::child_to_string(node.children.get(1)).unwrap_or_default(); if self.mutating_mid(&mid) { if let Some(r) = receiver { - if let Some(t) = self.state_receiver_token(r) { writes.insert(t); } + if let Some(t) = self.state_receiver_token(r) { + writes.insert(t); + } } } } @@ -455,23 +682,43 @@ impl EffectCollector { fn collect_self_reader(&self, node: &Node, reads: &mut BTreeSet) { let mid = self.call_mid(node); let Some(mid) = mid else { return }; - if self.mutating_mid(&mid) { return } - if IGNORED_MIDS.contains(&mid.as_str()) { return } - if !self.no_args(node) { return } - if node.r#type == "CALL" && !self.self_receiver(node.children.get(0).and_then(ast::node).unwrap()) { return } + if self.mutating_mid(&mid) { + return; + } + if IGNORED_MIDS.contains(&mid.as_str()) { + return; + } + if !self.no_args(node) { + return; + } + if node.r#type == "CALL" + && !self.self_receiver(node.children.get(0).and_then(ast::node).unwrap()) + { + return; + } reads.insert(self.normalize_state(&mid)); } fn mutating_mid(&self, mid: &str) -> bool { - if NON_MUTATING_OPERATOR_MIDS.contains(&mid) { return false } + if NON_MUTATING_OPERATOR_MIDS.contains(&mid) { + return false; + } MUTATING_MIDS.contains(&mid) || MUTATING_SUFFIXES.iter().any(|s| mid.ends_with(s)) } fn no_args(&self, node: &Node) -> bool { match node.r#type.as_str() { - "CALL" | "OPCALL" => node.children.get(2).map(|c| matches!(c, Child::Nil)).unwrap_or(true), + "CALL" | "OPCALL" => node + .children + .get(2) + .map(|c| matches!(c, Child::Nil)) + .unwrap_or(true), "VCALL" => true, - "FCALL" => node.children.get(1).map(|c| matches!(c, Child::Nil)).unwrap_or(true), + "FCALL" => node + .children + .get(1) + .map(|c| matches!(c, Child::Nil)) + .unwrap_or(true), _ => false, } } @@ -480,15 +727,23 @@ impl EffectCollector { match node.r#type.as_str() { "IVAR" => ast::child_to_string(node.children.get(0)).map(|s| self.normalize_state(&s)), "SELF" => Some("self".to_string()), - "VCALL" | "FCALL" | "LVAR" => ast::child_to_string(node.children.get(0)).map(|s| self.normalize_state(&s)), + "VCALL" | "FCALL" | "LVAR" => { + ast::child_to_string(node.children.get(0)).map(|s| self.normalize_state(&s)) + } "CALL" => { - if self.no_args(node) { ast::child_to_string(node.children.get(1)).map(|s| self.normalize_state(&s)) } else { None } + if self.no_args(node) { + ast::child_to_string(node.children.get(1)).map(|s| self.normalize_state(&s)) + } else { + None + } } _ => None, } } - fn self_receiver(&self, node: &Node) -> bool { node.r#type == "SELF" } + fn self_receiver(&self, node: &Node) -> bool { + node.r#type == "SELF" + } fn call_mid(&self, node: &Node) -> Option { match node.r#type.as_str() { @@ -499,8 +754,15 @@ impl EffectCollector { } fn owner_name(&self, node: &Node) -> String { - let text = ast::slice(node.children.first().and_then(ast::node).unwrap_or(node), &self.lines); - if text.is_empty() { "(anonymous)".to_string() } else { text } + let text = ast::slice( + node.children.first().and_then(ast::node).unwrap_or(node), + &self.lines, + ); + if text.is_empty() { + "(anonymous)".to_string() + } else { + text + } } fn method_name(&self, node: &Node) -> String { @@ -512,7 +774,9 @@ impl EffectCollector { } fn normalize_state(&self, name: &str) -> String { - name.trim_start_matches('@').trim_end_matches('=').to_string() + name.trim_start_matches('@') + .trim_end_matches('=') + .to_string() } } @@ -526,53 +790,104 @@ impl Report { let mut site_call_sets = BTreeMap::new(); for seq in &sequences { let mut calls = BTreeMap::new(); - for c in seq.calls.iter().filter(|c| !c.reads.is_empty() || !c.writes.is_empty()) { + for c in seq + .calls + .iter() + .filter(|c| !c.reads.is_empty() || !c.writes.is_empty()) + { calls.insert(c.mid.clone(), true); } - site_call_sets.insert((seq.file.clone(), seq.owner.clone(), seq.defn.clone(), seq.line), calls); + site_call_sets.insert( + ( + seq.file.clone(), + seq.owner.clone(), + seq.defn.clone(), + seq.line, + ), + calls, + ); + } + Self { + sequences, + site_call_sets, } - Self { sequences, site_call_sets } } fn ordered_protocols(&self, min_support: usize) -> Vec { - let mut counts: BTreeMap<(String, String, String, String), BTreeMap<(String, String, String, usize), ProtocolFinding>> = BTreeMap::new(); + let mut counts: BTreeMap< + (String, String, String, String), + BTreeMap<(String, String, String, usize), ProtocolFinding>, + > = BTreeMap::new(); for seq in &self.sequences { - let state_calls: Vec<_> = seq.calls.iter().filter(|c| !c.reads.is_empty() || !c.writes.is_empty()).collect(); + let state_calls: Vec<_> = seq + .calls + .iter() + .filter(|c| !c.reads.is_empty() || !c.writes.is_empty()) + .collect(); let collapsed = self.collapse_consecutive(&state_calls); for i in 0..collapsed.len().saturating_sub(1) { let left = collapsed[i]; - let right = collapsed[i+1]; + let right = collapsed[i + 1]; let edge = self.dependency_edge(left, right); let Some(edge) = edge else { continue }; - if self.diagnostic_protocol(&[left.mid.clone(), right.mid.clone()]) { continue }; - - let key = (left.mid.clone(), right.mid.clone(), edge.0.join("|"), edge.1.join("|")); - let site_key = (seq.file.clone(), seq.owner.clone(), seq.defn.clone(), seq.line); - counts.entry(key).or_default().insert(site_key, ProtocolFinding { - kind: "protocol_pressure".to_string(), - protocol: vec![left.mid.clone(), right.mid.clone()], - dependency: edge.0, - states: edge.1, - support: 0, - confidence: 1.0, - at: format!("{}:{}:{}", seq.file, seq.defn, seq.line), - observed: vec![left.mid.clone(), right.mid.clone()], - missing: Vec::new(), - sites: Vec::new(), - spans: { let mut s = BTreeMap::new(); s.insert(format!("{}:{}:{}", seq.file, seq.defn, seq.line), left.span); s }, - }); + if self.diagnostic_protocol(&[left.mid.clone(), right.mid.clone()]) { + continue; + }; + + let key = ( + left.mid.clone(), + right.mid.clone(), + edge.0.join("|"), + edge.1.join("|"), + ); + let site_key = ( + seq.file.clone(), + seq.owner.clone(), + seq.defn.clone(), + seq.line, + ); + counts.entry(key).or_default().insert( + site_key, + ProtocolFinding { + kind: "protocol_pressure".to_string(), + protocol: vec![left.mid.clone(), right.mid.clone()], + dependency: edge.0, + states: edge.1, + support: 0, + confidence: 1.0, + at: format!("{}:{}:{}", seq.file, seq.defn, seq.line), + observed: vec![left.mid.clone(), right.mid.clone()], + missing: Vec::new(), + sites: Vec::new(), + spans: { + let mut s = BTreeMap::new(); + s.insert(format!("{}:{}:{}", seq.file, seq.defn, seq.line), left.span); + s + }, + }, + ); } } let mut out = Vec::new(); for (_, sites) in counts { - if sites.len() < min_support { continue; } + if sites.len() < min_support { + continue; + } let mut first = sites.values().next().unwrap().clone(); first.support = sites.len(); - first.sites = sites.keys().map(|k| format!("{}:{}:{}", k.0, k.2, k.3)).collect(); + first.sites = sites + .keys() + .map(|k| format!("{}:{}:{}", k.0, k.2, k.3)) + .collect(); out.push(first); } - out.sort_by(|a, b| b.support.cmp(&a.support).then_with(|| self.dependency_rank(a).cmp(&self.dependency_rank(b))).then_with(|| a.protocol.join("\0").cmp(&b.protocol.join("\0")))); + out.sort_by(|a, b| { + b.support + .cmp(&a.support) + .then_with(|| self.dependency_rank(a).cmp(&self.dependency_rank(b))) + .then_with(|| a.protocol.join("\0").cmp(&b.protocol.join("\0"))) + }); out } @@ -587,44 +902,83 @@ impl Report { let mut out = Vec::new(); for seq in &self.sequences { - let state_calls: Vec<_> = seq.calls.iter().filter(|c| !c.reads.is_empty() || !c.writes.is_empty()).collect(); + let state_calls: Vec<_> = seq + .calls + .iter() + .filter(|c| !c.reads.is_empty() || !c.writes.is_empty()) + .collect(); let collapsed = self.collapse_consecutive(&state_calls); let mids: Vec<_> = collapsed.iter().map(|c| c.mid.clone()).collect(); let positions = self.first_positions(&mids); - - for protocol_row in self.candidate_protocols(&positions.keys().cloned().collect::>(), &protocol_index) { - let present: Vec<_> = protocol_row.protocol.iter().filter(|m| positions.contains_key(*m)).cloned().collect(); - if present.len() < 2 { continue; } - if self.ordered_subsequence(&mids, &protocol_row.protocol) { continue; } - let confidence = (protocol_row.support as f64) / (self.denominator_for(&present) as f64); - if confidence < min_confidence { continue; } + for protocol_row in self.candidate_protocols( + &positions.keys().cloned().collect::>(), + &protocol_index, + ) { + let present: Vec<_> = protocol_row + .protocol + .iter() + .filter(|m| positions.contains_key(*m)) + .cloned() + .collect(); + if present.len() < 2 { + continue; + } + if self.ordered_subsequence(&mids, &protocol_row.protocol) { + continue; + } + + let confidence = + (protocol_row.support as f64) / (self.denominator_for(&present) as f64); + if confidence < min_confidence { + continue; + } out.push(self.finding(seq, &protocol_row, &present, &positions, confidence)); } } - + let mut deduped = Vec::new(); let mut seen = BTreeSet::new(); for row in out { - let key = (row.kind.clone(), row.at.clone(), row.protocol.clone(), row.observed.clone(), row.states.clone()); - if seen.insert(key) { deduped.push(row); } + let key = ( + row.kind.clone(), + row.at.clone(), + row.protocol.clone(), + row.observed.clone(), + row.states.clone(), + ); + if seen.insert(key) { + deduped.push(row); + } } - deduped.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap().then_with(|| b.support.cmp(&a.support)).then_with(|| a.at.cmp(&b.at))); + deduped.sort_by(|a, b| { + b.confidence + .partial_cmp(&a.confidence) + .unwrap() + .then_with(|| b.support.cmp(&a.support)) + .then_with(|| a.at.cmp(&b.at)) + }); deduped } fn dependency_rank(&self, row: &ProtocolFinding) -> usize { - if row.dependency.iter().any(|d| d == "write_read") { 0 } - else if row.dependency.iter().any(|d| d == "write_write") { 1 } - else { 2 } + if row.dependency.iter().any(|d| d == "write_read") { + 0 + } else if row.dependency.iter().any(|d| d == "write_write") { + 1 + } else { + 2 + } } fn collapse_consecutive<'a>(&self, calls: &'a [&'a Call]) -> Vec<&'a Call> { let mut out = Vec::new(); let mut last = None; for c in calls { - if last.map(|l| l == &c.mid).unwrap_or(false) { continue; } + if last.map(|l| l == &c.mid).unwrap_or(false) { + continue; + } last = Some(&c.mid); out.push(*c); } @@ -641,13 +995,30 @@ impl Report { let mut states = BTreeSet::new(); let wr: Vec<_> = lw.intersection(&rr).collect(); - if !wr.is_empty() { kinds.push("write_read".to_string()); for s in wr { states.insert((*s).clone()); } } + if !wr.is_empty() { + kinds.push("write_read".to_string()); + for s in wr { + states.insert((*s).clone()); + } + } let ww: Vec<_> = lw.intersection(&rw).collect(); - if !ww.is_empty() { kinds.push("write_write".to_string()); for s in ww { states.insert((*s).clone()); } } + if !ww.is_empty() { + kinds.push("write_write".to_string()); + for s in ww { + states.insert((*s).clone()); + } + } let rw_int: Vec<_> = lr.intersection(&rw).collect(); - if !rw_int.is_empty() { kinds.push("read_write".to_string()); for s in rw_int { states.insert((*s).clone()); } } + if !rw_int.is_empty() { + kinds.push("read_write".to_string()); + for s in rw_int { + states.insert((*s).clone()); + } + } - if kinds.is_empty() { return None } + if kinds.is_empty() { + return None; + } kinds.sort(); let mut states_v: Vec<_> = states.into_iter().collect(); states_v.sort(); @@ -655,20 +1026,28 @@ impl Report { } fn diagnostic_protocol(&self, protocol: &[String]) -> bool { - protocol.iter().any(|m| OPTIONAL_DIAGNOSTIC_MIDS.contains(&m.as_str())) + protocol + .iter() + .any(|m| OPTIONAL_DIAGNOSTIC_MIDS.contains(&m.as_str())) } - fn candidate_protocols(&self, mids: &[String], protocol_index: &BTreeMap>) -> Vec { + fn candidate_protocols( + &self, + mids: &[String], + protocol_index: &BTreeMap>, + ) -> Vec { let mut out = Vec::new(); let mut seen = BTreeSet::new(); for i in 0..mids.len() { - for j in i+1..mids.len() { + for j in i + 1..mids.len() { let mut pair = vec![mids[i].clone(), mids[j].clone()]; pair.sort(); if let Some(ps) = protocol_index.get(&pair.join("\0")) { for p in ps { let key = (p.protocol.clone(), p.dependency.clone(), p.states.clone()); - if seen.insert(key) { out.push(p.clone()); } + if seen.insert(key) { + out.push(p.clone()); + } } } } @@ -687,23 +1066,41 @@ impl Report { fn ordered_subsequence(&self, mids: &[String], protocol: &[String]) -> bool { let mut idx = 0; for m in mids { - if m == &protocol[idx] { idx += 1; } - if idx == protocol.len() { return true; } + if m == &protocol[idx] { + idx += 1; + } + if idx == protocol.len() { + return true; + } } false } fn denominator_for(&self, present: &[String]) -> usize { - self.site_call_sets.values().filter(|mids| present.iter().all(|m| mids.contains_key(m))).count().max(1) + self.site_call_sets + .values() + .filter(|mids| present.iter().all(|m| mids.contains_key(m))) + .count() + .max(1) } - fn finding(&self, seq: &MethodSequence, protocol_row: &ProtocolFinding, present: &[String], positions: &BTreeMap, confidence: f64) -> ProtocolFinding { - let anchor_mid = present.iter().min_by_key(|m| positions.get(*m).unwrap()).unwrap(); + fn finding( + &self, + seq: &MethodSequence, + protocol_row: &ProtocolFinding, + present: &[String], + positions: &BTreeMap, + confidence: f64, + ) -> ProtocolFinding { + let anchor_mid = present + .iter() + .min_by_key(|m| positions.get(*m).unwrap()) + .unwrap(); let anchor = seq.calls.iter().find(|c| &c.mid == anchor_mid).unwrap(); let loc = format!("{}:{}:{}", seq.file, seq.defn, anchor.line); let mut observed = present.to_vec(); observed.sort_by_key(|m| positions.get(m).unwrap()); - + let mut spans = BTreeMap::new(); spans.insert(loc.clone(), anchor.span); diff --git a/gems/decomplex/rust/src/decomplex/detectors/inconsistent_rename_clone.rs b/gems/decomplex/rust/src/decomplex/detectors/inconsistent_rename_clone.rs index 2c1ad29e8..4317e1932 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/inconsistent_rename_clone.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/inconsistent_rename_clone.rs @@ -40,7 +40,10 @@ struct Block { const HOLE_TYPES: &[&str] = &["LVAR", "DVAR", "IVAR", "LASGN", "DASGN", "IASGN"]; const MIN_TOKENS: usize = 8; -pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { +pub fn scan_files( + files: &[PathBuf], + language: Language, +) -> Result> { let mut blocks = Vec::new(); for file in files { let (root, _lines) = ast::parse_with_language(file, language)?; @@ -94,7 +97,10 @@ impl InconsistentRenameClone { skeleton, names, file: self.file.clone(), - defn: defstack.last().cloned().unwrap_or_else(|| "(top-level)".to_string()), + defn: defstack + .last() + .cloned() + .unwrap_or_else(|| "(top-level)".to_string()), line: stmts[0].first_lineno, span: [ stmts[0].first_lineno, @@ -120,7 +126,11 @@ impl InconsistentRenameClone { } } "CALL" | "FCALL" => { - skeleton.push(if node.r#type == "CALL" { Skeleton::CALL } else { Skeleton::FCALL }); + skeleton.push(if node.r#type == "CALL" { + Skeleton::CALL + } else { + Skeleton::FCALL + }); let mid_index = if node.r#type == "CALL" { 1 } else { 0 }; skeleton.push(Skeleton::MID); if let Some(Child::Symbol(mid)) = node.children.get(mid_index) { @@ -159,7 +169,11 @@ impl Report { for members in self.groups.values() { out.extend(self.findings_for(members)); } - out.sort_by(|a, b| b.clone_size.cmp(&a.clone_size).then_with(|| a.at.cmp(&b.at))); + out.sort_by(|a, b| { + b.clone_size + .cmp(&a.clone_size) + .then_with(|| a.at.cmp(&b.at)) + }); out.dedup_by(|a, b| a.at == b.at && a.ref_at == b.ref_at && a.ref_name == b.ref_name); out } @@ -188,7 +202,11 @@ impl Report { out } - fn inconsistent_pairs(&self, ref_block: &Block, candidate: &Block) -> Vec { + fn inconsistent_pairs( + &self, + ref_block: &Block, + candidate: &Block, + ) -> Vec { let mut out = Vec::new(); for (ref_name, positions) in self.ref_classes(ref_block) { let mut spellings = BTreeSet::new(); @@ -200,7 +218,12 @@ impl Report { if spellings.len() < 2 { continue; } - out.push(self.finding(ref_block, candidate, &ref_name, spellings.into_iter().collect())); + out.push(self.finding( + ref_block, + candidate, + &ref_name, + spellings.into_iter().collect(), + )); } out } @@ -218,7 +241,13 @@ impl Report { left.file == right.file && left.defn == right.defn } - fn finding(&self, ref_block: &Block, candidate: &Block, ref_name: &str, divergent: Vec) -> InconsistentRenameCloneRow { + fn finding( + &self, + ref_block: &Block, + candidate: &Block, + ref_name: &str, + divergent: Vec, + ) -> InconsistentRenameCloneRow { let at = format!("{}:{}:{}", candidate.file, candidate.defn, candidate.line); let ref_at = format!("{}:{}:{}", ref_block.file, ref_block.defn, ref_block.line); let mut spans = BTreeMap::new(); diff --git a/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs b/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs index 83b1a309c..6b6e93d4f 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs @@ -2,7 +2,7 @@ use crate::decomplex::ast::{self, Child, Node, Span}; use crate::decomplex::syntax::Language; use anyhow::Result; use serde::Serialize; -use std::collections::{BTreeSet}; +use std::collections::BTreeSet; use std::path::PathBuf; #[derive(Clone, Debug, Eq, PartialEq, Serialize)] @@ -56,7 +56,7 @@ pub fn scan_files(files: &[PathBuf], language: Language) -> Result Result, + language: Language, } impl LocalFlow { - fn new(file: String, lines: Vec) -> Self { - Self { file, lines } + fn new(file: String, lines: Vec, language: Language) -> Self { + Self { + file, + lines, + language, + } } fn scan(&mut self, root: &Node) -> Vec { @@ -122,7 +127,12 @@ impl LocalFlow { name: self.method_name(node), file: self.file.clone(), line: node.first_lineno, - span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], + span: [ + node.first_lineno, + node.first_column, + node.last_lineno, + node.last_column, + ], node: node.clone(), boundaries: self.structural_boundaries(&statements), statements, @@ -134,7 +144,12 @@ impl LocalFlow { index, line: node.first_lineno, end_line: node.last_lineno, - span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], + span: [ + node.first_lineno, + node.first_column, + node.last_lineno, + node.last_column, + ], source: ast::slice(node, &self.lines), reads: self.local_reads(node), writes: self.local_writes(node), @@ -168,7 +183,11 @@ impl LocalFlow { let mut blank = None; for line_number in first_line..=last_line { - let text = self.lines.get(line_number - 1).map(|s| s.as_str()).unwrap_or(""); + let text = self + .lines + .get(line_number - 1) + .map(|s| s.as_str()) + .unwrap_or(""); let stripped = text.trim(); if stripped.starts_with('#') { return Some(RawBoundary { @@ -194,27 +213,37 @@ impl LocalFlow { }; let stmts = if body.r#type == "BLOCK" { - body.children.iter().filter_map(ast::node).collect::>() + body.children + .iter() + .filter_map(ast::node) + .collect::>() } else { vec![body] }; - stmts.into_iter().flat_map(|stmt| { - if METHOD_TYPES.contains(&stmt.r#type.as_str()) { - vec![stmt] - } else if self.visibility_call(stmt) { - self.inline_methods(stmt) - } else { - vec![] - } - }).collect() + stmts + .into_iter() + .flat_map(|stmt| { + if METHOD_TYPES.contains(&stmt.r#type.as_str()) { + vec![stmt] + } else if self.visibility_call(stmt) { + self.inline_methods(stmt) + } else { + vec![] + } + }) + .collect() } fn inline_methods<'a>(&self, stmt: &'a Node) -> Vec<&'a Node> { let Some(args) = stmt.children.get(1).and_then(ast::node) else { return Vec::new(); }; - args.children.iter().filter_map(ast::node).filter(|arg| METHOD_TYPES.contains(&arg.r#type.as_str())).collect() + args.children + .iter() + .filter_map(ast::node) + .filter(|arg| METHOD_TYPES.contains(&arg.r#type.as_str())) + .collect() } fn owner_body<'a>(&self, owner_node: &'a Node) -> Option<&'a Node> { @@ -239,13 +268,33 @@ impl LocalFlow { if node.r#type == "DEFS" { let receiver = node.children.get(0).and_then(ast::node); let prefix = if let Some(r) = receiver { - if r.r#type == "SELF" { "self".to_string() } else { ast::slice(r, &self.lines) } + if r.r#type == "SELF" { + "self".to_string() + } else { + ast::slice(r, &self.lines) + } } else { "?".to_string() }; - format!("{}.{}", prefix, node.children.get(1).and_then(|c| match c { Child::Symbol(s) => Some(s), _ => None }).unwrap_or(&"?".to_string())) + format!( + "{}.{}", + prefix, + node.children + .get(1) + .and_then(|c| match c { + Child::Symbol(s) => Some(s), + _ => None, + }) + .unwrap_or(&"?".to_string()) + ) } else { - node.children.first().and_then(|c| match c { Child::Symbol(s) => Some(s.clone()), _ => None }).unwrap_or_else(|| "?".to_string()) + node.children + .first() + .and_then(|c| match c { + Child::Symbol(s) => Some(s.clone()), + _ => None, + }) + .unwrap_or_else(|| "?".to_string()) } } @@ -256,15 +305,24 @@ impl LocalFlow { } fn owner_segment(&self, node: &Node) -> String { - let text = ast::slice(node.children.first().and_then(ast::node).unwrap_or(node), &self.lines); - if text.is_empty() { "(anonymous)".to_string() } else { text } + let text = ast::slice( + node.children.first().and_then(ast::node).unwrap_or(node), + &self.lines, + ); + if text.is_empty() { + "(anonymous)".to_string() + } else { + text + } } fn local_reads(&self, node: &Node) -> BTreeSet { let mut reads = Vec::new(); self.walk_local(node, &mut |child| { - if LOCAL_READ_TYPES.contains(&child.r#type.as_str()) { - if let Some(Child::String(name)) = child.children.first() { + if LOCAL_READ_TYPES.contains(&child.r#type.as_str()) + || (self.language != Language::Ruby && child.r#type == "VCALL") + { + if let Some(name) = local_read_name(child) { reads.push(name.clone()); } } @@ -326,6 +384,13 @@ impl LocalFlow { } } +fn local_read_name(node: &Node) -> Option<&String> { + match node.children.first() { + Some(Child::String(name)) | Some(Child::Symbol(name)) => Some(name), + _ => None, + } +} + struct RawBoundary { line: usize, kind: String, @@ -338,15 +403,18 @@ mod tests { use std::io::Write; use tempfile::NamedTempFile; - #[test] - fn extracts_python_function_local_flow() { + fn summaries(source: &str, language: Language) -> Vec { let mut file = NamedTempFile::new().expect("tempfile"); - file.write_all( - b"def mixed(price, tax):\n subtotal = price + tax\n total = subtotal\n return total\n", - ) - .expect("write"); + file.write_all(source.as_bytes()).expect("write"); + scan_files(&[file.path().to_path_buf()], language).expect("scan") + } - let summaries = scan_files(&[file.path().to_path_buf()], Language::Python).expect("scan"); + #[test] + fn extracts_python_function_local_flow() { + let summaries = summaries( + "def mixed(price, tax):\n subtotal = price + tax\n total = subtotal\n return total\n", + Language::Python, + ); let summary = summaries .iter() .find(|summary| summary.name == "mixed") @@ -356,7 +424,9 @@ mod tests { assert_eq!(summary.statements.len(), 3); assert_eq!( summary.statements[0].reads, - ["price".to_string(), "tax".to_string()].into_iter().collect() + ["price".to_string(), "tax".to_string()] + .into_iter() + .collect() ); assert_eq!( summary.statements[1].dependencies, @@ -367,4 +437,47 @@ mod tests { ["total".to_string()].into_iter().collect() ); } + + #[test] + fn extracts_java_kotlin_and_swift_local_flow() { + let cases = [ + ( + Language::Java, + "class Billing {\n int mixed(int price, int tax) {\n int subtotal = price + tax;\n int total = subtotal;\n return total;\n }\n}\n", + ), + ( + Language::Kotlin, + "class Billing {\n fun mixed(price: Int, tax: Int): Int {\n val subtotal = price + tax\n val total = subtotal\n return total\n }\n}\n", + ), + ( + Language::Swift, + "class Billing {\n func mixed(price: Int, tax: Int) -> Int {\n let subtotal = price + tax\n let total = subtotal\n return total\n }\n}\n", + ), + ]; + + for (language, source) in cases { + let summaries = summaries(source, language); + let summary = summaries + .iter() + .find(|summary| summary.name == "mixed") + .expect("mixed summary"); + + assert_eq!(summary.owner, "Billing"); + assert_eq!(summary.statements.len(), 3); + assert_eq!( + summary.statements[0].reads, + ["price".to_string(), "tax".to_string()] + .into_iter() + .collect() + ); + assert_eq!( + summary.statements[1].dependencies, + vec![("total".to_string(), "subtotal".to_string())] + ); + assert_eq!( + summary.statements[2].reads, + ["total".to_string()].into_iter().collect() + ); + } + } } diff --git a/gems/decomplex/rust/src/decomplex/detectors/locality_drag.rs b/gems/decomplex/rust/src/decomplex/detectors/locality_drag.rs index 2f2c3cf2e..479633269 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/locality_drag.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/locality_drag.rs @@ -1,4 +1,4 @@ -use crate::decomplex::ast::{Span}; +use crate::decomplex::ast::Span; use crate::decomplex::detectors::{local_flow, weighted_inlined_cognitive_complexity}; use crate::decomplex::syntax::Language; use anyhow::Result; @@ -73,9 +73,14 @@ impl LocalityDrag { } fn findings(&mut self) -> Vec { - let mut out: Vec<_> = self.summaries.iter().flat_map(|s| self.findings_for(s)).collect(); + let mut out: Vec<_> = self + .summaries + .iter() + .flat_map(|s| self.findings_for(s)) + .collect(); out.sort_by(|a, b| { - b.score.cmp(&a.score) + b.score + .cmp(&a.score) .then_with(|| b.unrelated_statements.cmp(&a.unrelated_statements)) .then_with(|| b.gap_lines.cmp(&a.gap_lines)) .then_with(|| a.file.cmp(&b.file)) @@ -85,45 +90,91 @@ impl LocalityDrag { } fn findings_for(&self, summary: &local_flow::MethodSummary) -> Vec { - if summary.statements.len() < self.min_unrelated_statements + 2 { return Vec::new() } + if summary.statements.len() < self.min_unrelated_statements + 2 { + return Vec::new(); + } - let local_complexity = weighted_inlined_cognitive_complexity::LocalScorer::new().score(&summary.node).score; - if local_complexity < self.min_local_complexity { return Vec::new() } + let local_complexity = weighted_inlined_cognitive_complexity::LocalScorer::new() + .score(&summary.node) + .score; + if local_complexity < self.min_local_complexity { + return Vec::new(); + } let mut findings = Vec::new(); for (index, statement) in summary.statements.iter().enumerate() { for name in &statement.writes { - if let Some(f) = self.finding_for_write(summary, local_complexity, statement, index, name) { + if let Some(f) = + self.finding_for_write(summary, local_complexity, statement, index, name) + { findings.push(f); } } } - findings.sort_by(|a, b| b.score.cmp(&a.score).then_with(|| a.defined_at.cmp(&b.defined_at)).then_with(|| a.variable.cmp(&b.variable))); - findings.into_iter().take(self.max_findings_per_method).collect() + findings.sort_by(|a, b| { + b.score + .cmp(&a.score) + .then_with(|| a.defined_at.cmp(&b.defined_at)) + .then_with(|| a.variable.cmp(&b.variable)) + }); + findings + .into_iter() + .take(self.max_findings_per_method) + .collect() } - fn finding_for_write(&self, summary: &local_flow::MethodSummary, local_complexity: f64, statement: &local_flow::Statement, index: usize, name: &str) -> Option { - if self.ignorable_local(name) { return None } + fn finding_for_write( + &self, + summary: &local_flow::MethodSummary, + local_complexity: f64, + statement: &local_flow::Statement, + index: usize, + name: &str, + ) -> Option { + if self.ignorable_local(name) { + return None; + } let use_index = self.first_read_before_rewrite(&summary.statements, index, name)?; - if self.same_prefix_staging_batch(&summary.statements, use_index, name) { return None } + if self.same_prefix_staging_batch(&summary.statements, use_index, name) { + return None; + } let gap = &summary.statements[(index + 1)..use_index]; - if gap.is_empty() { return None } + if gap.is_empty() { + return None; + } let gap_refs: Vec<_> = gap.iter().collect(); let (related, unrelated) = self.classify_gap_statements(name, statement, &gap_refs); - let substantive_unrelated: Vec<_> = unrelated.into_iter().filter(|s| !self.trivial_initializer(s)).collect(); - if substantive_unrelated.len() < self.min_unrelated_statements { return None } + let substantive_unrelated: Vec<_> = unrelated + .into_iter() + .filter(|s| !self.trivial_initializer(s)) + .collect(); + if substantive_unrelated.len() < self.min_unrelated_statements { + return None; + } let use_statement = &summary.statements[use_index]; let gap_lines = use_statement.line - statement.line; let boundaries = self.boundary_crossings(summary, index, use_index); - if gap_lines < self.min_gap_lines && boundaries.is_empty() { return None } + if gap_lines < self.min_gap_lines && boundaries.is_empty() { + return None; + } - let score = self.score_for(name, &substantive_unrelated, &related, gap_lines, &boundaries, local_complexity, self.read_count_after_write(&summary.statements, index, name)); - if score < self.min_score { return None } + let score = self.score_for( + name, + &substantive_unrelated, + &related, + gap_lines, + &boundaries, + local_complexity, + self.read_count_after_write(&summary.statements, index, name), + ); + if score < self.min_score { + return None; + } let at = format!("{}:{}:{}", summary.file, summary.name, statement.line); let mut spans = BTreeMap::new(); @@ -149,29 +200,67 @@ impl LocalityDrag { score, definition_deps: self.definition_deps(statement, name).into_iter().collect(), use_reads: use_statement.reads.iter().cloned().collect(), - examples: substantive_unrelated.iter().take(3).map(|s| self.example_for(s)).collect(), + examples: substantive_unrelated + .iter() + .take(3) + .map(|s| self.example_for(s)) + .collect(), boundaries: boundaries.iter().map(|b| self.boundary_for(b)).collect(), - reason: self.reason_for(name, &substantive_unrelated, gap_lines, &boundaries, local_complexity), + reason: self.reason_for( + name, + &substantive_unrelated, + gap_lines, + &boundaries, + local_complexity, + ), spans, }) } - fn first_read_before_rewrite(&self, statements: &[local_flow::Statement], index: usize, name: &str) -> Option { + fn first_read_before_rewrite( + &self, + statements: &[local_flow::Statement], + index: usize, + name: &str, + ) -> Option { for (offset, statement) in statements.iter().skip(index + 1).enumerate() { - if statement.writes.contains(name) { return None } - if statement.reads.contains(name) { return Some(index + 1 + offset) } + if statement.writes.contains(name) { + return None; + } + if statement.reads.contains(name) { + return Some(index + 1 + offset); + } } None } - fn read_count_after_write(&self, statements: &[local_flow::Statement], index: usize, name: &str) -> usize { - statements.iter().skip(index + 1).filter(|s| s.reads.contains(name)).count() + fn read_count_after_write( + &self, + statements: &[local_flow::Statement], + index: usize, + name: &str, + ) -> usize { + statements + .iter() + .skip(index + 1) + .filter(|s| s.reads.contains(name)) + .count() } - fn classify_gap_statements<'a>(&self, name: &str, definition: &local_flow::Statement, gap: &'a [&local_flow::Statement]) -> (Vec<&'a local_flow::Statement>, Vec<&'a local_flow::Statement>) { + fn classify_gap_statements<'a>( + &self, + name: &str, + definition: &local_flow::Statement, + gap: &'a [&local_flow::Statement], + ) -> ( + Vec<&'a local_flow::Statement>, + Vec<&'a local_flow::Statement>, + ) { let mut related_names = BTreeSet::new(); related_names.insert(name.to_string()); - for d in self.definition_deps(definition, name) { related_names.insert(d); } + for d in self.definition_deps(definition, name) { + related_names.insert(d); + } let mut related = Vec::new(); let mut unrelated = Vec::new(); @@ -181,7 +270,9 @@ impl LocalityDrag { let touches_related = !touched.is_disjoint(&related_names); if touches_related || !new_related.is_empty() { related.push(*s); - for n in new_related { related_names.insert(n); } + for n in new_related { + related_names.insert(n); + } } else { unrelated.push(*s); } @@ -190,21 +281,60 @@ impl LocalityDrag { } fn definition_deps(&self, statement: &local_flow::Statement, name: &str) -> BTreeSet { - statement.dependencies.iter().filter(|(lhs, _)| lhs == name).map(|(_, rhs)| rhs.clone()).collect() + statement + .dependencies + .iter() + .filter(|(lhs, _)| lhs == name) + .map(|(_, rhs)| rhs.clone()) + .collect() } - fn derived_from_related(&self, statement: &local_flow::Statement, related_names: &BTreeSet) -> BTreeSet { - statement.dependencies.iter().filter(|(_, rhs)| related_names.contains(rhs)).map(|(lhs, _)| lhs.clone()).collect() + fn derived_from_related( + &self, + statement: &local_flow::Statement, + related_names: &BTreeSet, + ) -> BTreeSet { + statement + .dependencies + .iter() + .filter(|(_, rhs)| related_names.contains(rhs)) + .map(|(lhs, _)| lhs.clone()) + .collect() } - fn boundary_crossings<'a>(&self, summary: &'a local_flow::MethodSummary, definition_index: usize, use_index: usize) -> Vec<&'a local_flow::Boundary> { - summary.boundaries.iter().filter(|b| b.before_index >= definition_index && b.after_index <= use_index).collect() + fn boundary_crossings<'a>( + &self, + summary: &'a local_flow::MethodSummary, + definition_index: usize, + use_index: usize, + ) -> Vec<&'a local_flow::Boundary> { + summary + .boundaries + .iter() + .filter(|b| b.before_index >= definition_index && b.after_index <= use_index) + .collect() } - fn score_for(&self, variable: &str, unrelated: &[&local_flow::Statement], related: &[&local_flow::Statement], gap_lines: usize, boundaries: &[&local_flow::Boundary], local_complexity: f64, read_count: usize) -> isize { - let mut score = (unrelated.len() as isize * 5) + (gap_lines.min(30) as isize) + (boundaries.len() as isize * 8) + (local_complexity.min(25.0).round() as isize); - if read_count == 1 { score += 5; } - if self.benign_local(variable) { score -= 8; } + fn score_for( + &self, + variable: &str, + unrelated: &[&local_flow::Statement], + related: &[&local_flow::Statement], + gap_lines: usize, + boundaries: &[&local_flow::Boundary], + local_complexity: f64, + read_count: usize, + ) -> isize { + let mut score = (unrelated.len() as isize * 5) + + (gap_lines.min(30) as isize) + + (boundaries.len() as isize * 8) + + (local_complexity.min(25.0).round() as isize); + if read_count == 1 { + score += 5; + } + if self.benign_local(variable) { + score -= 8; + } score -= related.len() as isize * 2; score } @@ -213,27 +343,53 @@ impl LocalityDrag { name.starts_with('_') || self.source_location_local(name) } - fn same_prefix_staging_batch(&self, statements: &[local_flow::Statement], use_index: usize, name: &str) -> bool { - let Some(prefix) = self.staging_prefix(name) else { return false }; - let staged_names: BTreeSet<_> = statements.iter().take(use_index).flat_map(|s| s.writes.iter()).filter(|n| n.starts_with(&format!("{}_", prefix))).cloned().collect(); - if staged_names.len() < 4 { return false } + fn same_prefix_staging_batch( + &self, + statements: &[local_flow::Statement], + use_index: usize, + name: &str, + ) -> bool { + let Some(prefix) = self.staging_prefix(name) else { + return false; + }; + let staged_names: BTreeSet<_> = statements + .iter() + .take(use_index) + .flat_map(|s| s.writes.iter()) + .filter(|n| n.starts_with(&format!("{}_", prefix))) + .cloned() + .collect(); + if staged_names.len() < 4 { + return false; + } let use_reads = &statements[use_index].reads; staged_names.intersection(use_reads).count() >= 4 } fn trivial_initializer(&self, statement: &local_flow::Statement) -> bool { - if statement.writes.is_empty() || !statement.reads.is_empty() { return false } + if statement.writes.is_empty() || !statement.reads.is_empty() { + return false; + } let source = statement.source.trim(); - let re = regex::Regex::new(r"^\w+\s*=\s*(?:\{\}|\[\]|nil|false|true|0|T\.let\((?:nil|false|true|0)\b)").unwrap(); + let re = regex::Regex::new( + r"^\w+\s*=\s*(?:\{\}|\[\]|nil|false|true|0|T\.let\((?:nil|false|true|0)\b)", + ) + .unwrap(); re.is_match(source) } fn staging_prefix(&self, name: &str) -> Option { let parts: Vec<_> = name.split('_').collect(); - if parts.len() >= 2 && parts[0].len() >= 3 { Some(parts[0].to_string()) } else { None } + if parts.len() >= 2 && parts[0].len() >= 3 { + Some(parts[0].to_string()) + } else { + None + } } - fn benign_local(&self, name: &str) -> bool { self.source_location_local(name) } + fn benign_local(&self, name: &str) -> bool { + self.source_location_local(name) + } fn source_location_local(&self, name: &str) -> bool { let re = regex::Regex::new(r"(?i)(?:\A|_)(?:tok|token|span|source|source_code|line|column|col|pos|idx|index|loc|location)(?:\z|_)").unwrap(); @@ -242,23 +398,57 @@ impl LocalityDrag { fn example_for(&self, statement: &local_flow::Statement) -> Example { let source = statement.source.lines().next().unwrap_or("").trim(); - let source = if source.len() > 99 { format!("{}...", &source[0..96]) } else { source.to_string() }; - Example { line: statement.line, source } + let source = if source.len() > 99 { + format!("{}...", &source[0..96]) + } else { + source.to_string() + }; + Example { + line: statement.line, + source, + } } fn boundary_for(&self, boundary: &local_flow::Boundary) -> BoundaryInfo { - BoundaryInfo { line: boundary.line, marker: if boundary.text.is_empty() { boundary.kind.clone() } else { boundary.text.clone() } } + BoundaryInfo { + line: boundary.line, + marker: if boundary.text.is_empty() { + boundary.kind.clone() + } else { + boundary.text.clone() + }, + } } - fn reason_for(&self, variable: &str, unrelated: &[&local_flow::Statement], gap_lines: usize, boundaries: &[&local_flow::Boundary], local_complexity: f64) -> String { + fn reason_for( + &self, + variable: &str, + unrelated: &[&local_flow::Statement], + gap_lines: usize, + boundaries: &[&local_flow::Boundary], + local_complexity: f64, + ) -> String { let mut parts = vec![ - format!("`{}` is initialized {} line(s) before first use", variable, gap_lines), + format!( + "`{}` is initialized {} line(s) before first use", + variable, gap_lines + ), format!("{} unrelated intervening statement(s)", unrelated.len()), ]; - if !boundaries.is_empty() { parts.push(format!("{} structural boundary crossing(s)", boundaries.len())); } - parts.push(format!("method local complexity {}", self.round(local_complexity))); + if !boundaries.is_empty() { + parts.push(format!( + "{} structural boundary crossing(s)", + boundaries.len() + )); + } + parts.push(format!( + "method local complexity {}", + self.round(local_complexity) + )); parts.join("; ") } - fn round(&self, value: f64) -> f64 { (value * 10.0).round() / 10.0 } + fn round(&self, value: f64) -> f64 { + (value * 10.0).round() / 10.0 + } } diff --git a/gems/decomplex/rust/src/decomplex/detectors/miner.rs b/gems/decomplex/rust/src/decomplex/detectors/miner.rs index d771dd1a0..695ffed8e 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/miner.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/miner.rs @@ -53,7 +53,10 @@ impl Miner { fn new(sites: Vec) -> Self { let mut groups = BTreeMap::new(); for s in &sites { - groups.entry((s.kind.clone(), s.members.clone())).or_insert_with(Vec::new).push(s.clone()); + groups + .entry((s.kind.clone(), s.members.clone())) + .or_insert_with(Vec::new) + .push(s.clone()); } Self { sites, groups } } @@ -66,7 +69,9 @@ impl Miner { methods.insert((s.file.clone(), s.function.clone())); } let scatter = methods.len(); - if scatter < min_scatter { continue; } + if scatter < min_scatter { + continue; + } let mut sites = Vec::new(); let mut spans = BTreeMap::new(); @@ -101,16 +106,20 @@ impl Miner { let mut out = Vec::new(); for s in &self.sites { for (kind, mem, sup) in &popular { - if kind != &s.kind { continue; } - + if kind != &s.kind { + continue; + } + let mem_set: BTreeSet<_> = mem.iter().cloned().collect(); let s_mem_set: BTreeSet<_> = s.members.iter().cloned().collect(); - + let diff_mem_s: BTreeSet<_> = mem_set.difference(&s_mem_set).cloned().collect(); let diff_s_mem: BTreeSet<_> = s_mem_set.difference(&mem_set).cloned().collect(); if diff_mem_s.len() == 1 && diff_s_mem.is_empty() { - if s.members == *mem { continue; } + if s.members == *mem { + continue; + } let l = self.loc(s); let mut spans = BTreeMap::new(); diff --git a/gems/decomplex/rust/src/decomplex/detectors/operational_discontinuity.rs b/gems/decomplex/rust/src/decomplex/detectors/operational_discontinuity.rs index 1855c00ba..4fd2685d8 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/operational_discontinuity.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/operational_discontinuity.rs @@ -1,4 +1,4 @@ -use crate::decomplex::ast::{Span}; +use crate::decomplex::ast::Span; use crate::decomplex::detectors::local_flow; use crate::decomplex::syntax::Language; use anyhow::Result; @@ -41,7 +41,10 @@ struct RangeInfo { last: usize, } -pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { +pub fn scan_files( + files: &[PathBuf], + language: Language, +) -> Result> { let summaries = local_flow::scan_files(files, language)?; let detector = OperationalDiscontinuity::new(summaries); Ok(detector.findings()) @@ -67,20 +70,46 @@ impl OperationalDiscontinuity { } fn findings(&self) -> Vec { - let mut out: Vec<_> = self.summaries.iter().filter_map(|s| self.finding_for(s)).collect(); - out.sort_by(|a, b| b.score.cmp(&a.score).then_with(|| a.file.cmp(&b.file)).then_with(|| a.line.cmp(&b.line))); + let mut out: Vec<_> = self + .summaries + .iter() + .filter_map(|s| self.finding_for(s)) + .collect(); + out.sort_by(|a, b| { + b.score + .cmp(&a.score) + .then_with(|| a.file.cmp(&b.file)) + .then_with(|| a.line.cmp(&b.line)) + }); out } - fn finding_for(&self, summary: &local_flow::MethodSummary) -> Option { - if summary.boundaries.is_empty() { return None } + fn finding_for( + &self, + summary: &local_flow::MethodSummary, + ) -> Option { + if summary.boundaries.is_empty() { + return None; + } let ranges = self.variable_ranges(summary); - let resets: Vec<_> = summary.boundaries.iter().filter_map(|b| self.reset_at(b, &ranges)).collect(); - if resets.is_empty() { return None } + let resets: Vec<_> = summary + .boundaries + .iter() + .filter_map(|b| self.reset_at(b, &ranges)) + .collect(); + if resets.is_empty() { + return None; + } - let score = resets.iter().map(|r| (r.dead.len() as isize + r.new.len() as isize - r.continuing.len() as isize)).sum::() + (resets.len() as isize * 8); - if score < self.min_score { return None } + let score = resets + .iter() + .map(|r| (r.dead.len() as isize + r.new.len() as isize - r.continuing.len() as isize)) + .sum::() + + (resets.len() as isize * 8); + if score < self.min_score { + return None; + } let confidence_reasons = self.confidence_reasons_for(&summary.name, score, &resets); let at = format!("{}:{}:{}", summary.file, summary.name, summary.line); @@ -99,19 +128,34 @@ impl OperationalDiscontinuity { dead_total: resets.iter().map(|r| r.dead.len()).sum(), new_total: resets.iter().map(|r| r.new.len()).sum(), reset_points: resets, - confidence: if confidence_reasons.is_empty() { "review".to_string() } else { "high".to_string() }, + confidence: if confidence_reasons.is_empty() { + "review".to_string() + } else { + "high".to_string() + }, confidence_reasons, spans, }) } - fn confidence_reasons_for(&self, method_name: &str, score: isize, resets: &[ResetPoint]) -> Vec { + fn confidence_reasons_for( + &self, + method_name: &str, + score: isize, + resets: &[ResetPoint], + ) -> Vec { let explicit_phase = resets.iter().any(|r| self.phase_marker(r)); let mut reasons = Vec::new(); - if resets.len() >= 2 { reasons.push("repeated_resets".to_string()); } - if explicit_phase { reasons.push("explicit_phase_marker".to_string()); } - if score >= 20 { reasons.push("high_score".to_string()); } - + if resets.len() >= 2 { + reasons.push("repeated_resets".to_string()); + } + if explicit_phase { + reasons.push("explicit_phase_marker".to_string()); + } + if score >= 20 { + reasons.push("high_score".to_string()); + } + if self.grammar_method(method_name) && !explicit_phase { reasons.retain(|r| r != "repeated_resets" && r != "high_score"); } @@ -119,7 +163,8 @@ impl OperationalDiscontinuity { } fn phase_marker(&self, reset: &ResetPoint) -> bool { - let re = regex::Regex::new(r"(?i)^\#\s*(?:\d+[a-z]?\s*[.)]|(?:phase|step|stage)\b)").unwrap(); + let re = + regex::Regex::new(r"(?i)^\#\s*(?:\d+[a-z]?\s*[.)]|(?:phase|step|stage)\b)").unwrap(); re.is_match(&reset.text) } @@ -128,10 +173,14 @@ impl OperationalDiscontinuity { re.is_match(method_name) } - fn reset_at(&self, boundary: &local_flow::Boundary, ranges: &BTreeMap) -> Option { + fn reset_at( + &self, + boundary: &local_flow::Boundary, + ranges: &BTreeMap, + ) -> Option { let before = boundary.before_index; let after = boundary.after_index; - + let mut dead = Vec::new(); let mut continuing = Vec::new(); let mut new_vars = Vec::new(); @@ -150,7 +199,10 @@ impl OperationalDiscontinuity { } } - if dead.len() < self.min_dead || new_vars.len() < self.min_new || continuing.len() > self.max_continuing { + if dead.len() < self.min_dead + || new_vars.len() < self.min_new + || continuing.len() > self.max_continuing + { return None; } @@ -175,7 +227,13 @@ impl OperationalDiscontinuity { for statement in &summary.statements { let touched: BTreeSet<_> = statement.reads.union(&statement.writes).cloned().collect(); for name in touched { - ranges.entry(name).and_modify(|r: &mut RangeInfo| r.last = statement.index).or_insert(RangeInfo { first: statement.index, last: statement.index }); + ranges + .entry(name) + .and_modify(|r: &mut RangeInfo| r.last = statement.index) + .or_insert(RangeInfo { + first: statement.index, + last: statement.index, + }); } } ranges diff --git a/gems/decomplex/rust/src/decomplex/detectors/oversized_predicate.rs b/gems/decomplex/rust/src/decomplex/detectors/oversized_predicate.rs index a15e9bc8c..f31282df6 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/oversized_predicate.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/oversized_predicate.rs @@ -96,7 +96,10 @@ impl OversizedPredicate { ], ); - let atoms_text: Vec = atoms.into_iter().map(|a| ast::slice(a, &self.lines)).collect(); + let atoms_text: Vec = atoms + .into_iter() + .map(|a| ast::slice(a, &self.lines)) + .collect(); self.findings.push(OversizedPredicateRow { at, diff --git a/gems/decomplex/rust/src/decomplex/detectors/path_condition.rs b/gems/decomplex/rust/src/decomplex/detectors/path_condition.rs index 51db631ef..f030d819a 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/path_condition.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/path_condition.rs @@ -72,8 +72,16 @@ impl PathCondition { let b = node.children.get(2).and_then(ast::node); let atoms = self.cond_atoms(cond); - let then_g = if node.r#type == "IF" { atoms.clone() } else { self.negate(&atoms) }; - let else_g = if node.r#type == "IF" { self.negate(&atoms) } else { atoms.clone() }; + let then_g = if node.r#type == "IF" { + atoms.clone() + } else { + self.negate(&atoms) + }; + let else_g = if node.r#type == "IF" { + self.negate(&atoms) + } else { + atoms.clone() + }; if let Some(a_node) = a { let mut next_guards = guards.to_vec(); @@ -106,19 +114,39 @@ impl PathCondition { fn cond_atoms(&self, cond: Option<&Node>) -> Vec> { let Some(cond) = cond else { return Vec::new() }; - ast::flatten_and(cond).into_iter().map(|a| { - let t = ast::slice(a, &self.lines); - let (text, neg) = ast::canon_polarity(&t); - vec![text, if neg { "true".to_string() } else { "false".to_string() }] - }).collect() + ast::flatten_and(cond) + .into_iter() + .map(|a| { + let t = ast::slice(a, &self.lines); + let (text, neg) = ast::canon_polarity(&t); + vec![ + text, + if neg { + "true".to_string() + } else { + "false".to_string() + }, + ] + }) + .collect() } fn negate(&self, atoms: &[Vec]) -> Vec> { - atoms.iter().map(|a| { - let t = &a[0]; - let n = a[1] == "true"; - vec![t.clone(), if !n { "true".to_string() } else { "false".to_string() }] - }).collect() + atoms + .iter() + .map(|a| { + let t = &a[0]; + let n = a[1] == "true"; + vec![ + t.clone(), + if !n { + "true".to_string() + } else { + "false".to_string() + }, + ] + }) + .collect() } fn record(&mut self, node: &Node, defstack: &[String], guards: &[Vec]) { @@ -134,15 +162,27 @@ impl PathCondition { } let slice = ast::slice(node, &self.lines); - let action = if slice.len() > 80 { slice[..80].to_string() } else { slice }; + let action = if slice.len() > 80 { + slice[..80].to_string() + } else { + slice + }; self.sites.push(Site { guards: members, action, file: self.file.clone(), - defn: defstack.last().cloned().unwrap_or_else(|| "(top-level)".to_string()), + defn: defstack + .last() + .cloned() + .unwrap_or_else(|| "(top-level)".to_string()), line: node.first_lineno, - span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], + span: [ + node.first_lineno, + node.first_column, + node.last_lineno, + node.last_column, + ], }); } } @@ -162,11 +202,14 @@ impl Report { } groups.entry(s.guards.clone()).or_default().push(s.clone()); } - - let ordered_groups = keys.into_iter().map(|k| { - let v = groups.remove(&k).unwrap(); - (k, v) - }).collect(); + + let ordered_groups = keys + .into_iter() + .map(|k| { + let v = groups.remove(&k).unwrap(); + (k, v) + }) + .collect(); Self { sites, @@ -181,7 +224,9 @@ impl Report { } fn neglected(&self, min_support: usize) -> Vec { - let popular: Vec<_> = self.groups.iter() + let popular: Vec<_> = self + .groups + .iter() .filter(|(_, s)| s.len() >= min_support) .map(|(g, s)| (g.clone(), s.len())) .collect(); diff --git a/gems/decomplex/rust/src/decomplex/detectors/predicate_alias.rs b/gems/decomplex/rust/src/decomplex/detectors/predicate_alias.rs index c2210dd17..5c2073953 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/predicate_alias.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/predicate_alias.rs @@ -70,14 +70,20 @@ impl PredicateAlias { }; let scope = node.children.get(1).and_then(ast::node); let Some(scope) = scope else { return }; - if scope.r#type != "SCOPE" { return }; + if scope.r#type != "SCOPE" { + return; + }; let body = scope.children.get(2).and_then(ast::node); let Some(body) = body else { return }; - if body.r#type == "BLOCK" { return }; + if body.r#type == "BLOCK" { + return; + }; let txt = ast::slice(body, &self.lines); - if txt.is_empty() || txt.len() > 200 { return }; + if txt.is_empty() || txt.len() > 200 { + return; + }; self.preds.push(Pred { name: name.clone(), @@ -85,7 +91,12 @@ impl PredicateAlias { file: self.file.clone(), defn: name, line: node.first_lineno, - span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], + span: [ + node.first_lineno, + node.first_column, + node.last_lineno, + node.last_column, + ], }); } } @@ -119,9 +130,13 @@ impl Report { for body in keys { let ps = by_body.remove(&body).unwrap(); let mut names_set = BTreeSet::new(); - for p in &ps { names_set.insert(p.name.clone()); } + for p in &ps { + names_set.insert(p.name.clone()); + } let names: Vec<_> = names_set.into_iter().collect(); - if names.len() < 2 { continue; } + if names.len() < 2 { + continue; + } let mut sites = Vec::new(); let mut spans = BTreeMap::new(); diff --git a/gems/decomplex/rust/src/decomplex/detectors/redundant_nil_guard.rs b/gems/decomplex/rust/src/decomplex/detectors/redundant_nil_guard.rs index 8b1f32bdf..fb71bd95b 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/redundant_nil_guard.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/redundant_nil_guard.rs @@ -111,7 +111,12 @@ impl RedundantNilGuard { } } - fn process_block(&mut self, stmts: &[&Node], defstack: &[String], known: &BTreeSet) -> Flow { + fn process_block( + &mut self, + stmts: &[&Node], + defstack: &[String], + known: &BTreeSet, + ) -> Flow { let mut current = known.clone(); for stmt in stmts { let flow = self.process_stmt(stmt, defstack, ¤t); @@ -155,7 +160,12 @@ impl RedundantNilGuard { } } - fn process_branch(&mut self, node: &Node, defstack: &[String], known: &BTreeSet) -> Flow { + fn process_branch( + &mut self, + node: &Node, + defstack: &[String], + known: &BTreeSet, + ) -> Flow { let cond = node.children.get(0).and_then(ast::node); let then_body = node.children.get(1).and_then(ast::node); let else_body = node.children.get(2).and_then(ast::node); @@ -186,7 +196,11 @@ impl RedundantNilGuard { terminated: false, } } else { - let intersection: BTreeSet<_> = then_flow.known.intersection(&else_flow.known).cloned().collect(); + let intersection: BTreeSet<_> = then_flow + .known + .intersection(&else_flow.known) + .cloned() + .collect(); Flow { known: intersection, terminated: false, @@ -228,7 +242,12 @@ impl RedundantNilGuard { } } - fn record_redundant(&mut self, node: &Node, defstack: &[String], known: &BTreeSet) -> bool { + fn record_redundant( + &mut self, + node: &Node, + defstack: &[String], + known: &BTreeSet, + ) -> bool { let local = self.redundant_nil_subject(node, known); let Some(local) = local else { return false }; @@ -385,12 +404,10 @@ impl RedundantNilGuard { fn subject_key(&self, node: &Node) -> Option { match node.r#type.as_str() { - "LVAR" | "DVAR" | "VCALL" => { - match node.children.first()? { - Child::String(s) | Child::Symbol(s) => Some(s.clone()), - _ => None, - } - } + "LVAR" | "DVAR" | "VCALL" => match node.children.first()? { + Child::String(s) | Child::Symbol(s) => Some(s.clone()), + _ => None, + }, "CALL" => { let recv = node.children.get(0).and_then(ast::node); let mid = match node.children.get(1)? { @@ -398,7 +415,9 @@ impl RedundantNilGuard { _ => return None, }; let args = node.children.get(2); - if (args.is_none() || matches!(args, Some(Child::Nil))) && self.stable_reader_name(mid) { + if (args.is_none() || matches!(args, Some(Child::Nil))) + && self.stable_reader_name(mid) + { if let Some(recv) = recv { if recv.r#type == "SELF" { return Some(format!("self.{}", mid)); @@ -429,7 +448,9 @@ impl RedundantNilGuard { } fn nil_arg(&self, args: Option<&Child>) -> bool { - let Some(Child::Node(node)) = args else { return false }; + let Some(Child::Node(node)) = args else { + return false; + }; if node.r#type != "LIST" { return false; } diff --git a/gems/decomplex/rust/src/decomplex/detectors/semantic_alias.rs b/gems/decomplex/rust/src/decomplex/detectors/semantic_alias.rs index 8a73ecce2..c1823ca49 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/semantic_alias.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/semantic_alias.rs @@ -95,10 +95,18 @@ impl SemanticAlias { self.uses.push(Use { canon: c, file: self.file.clone(), - defn: next_defstack.last().cloned().unwrap_or_else(|| "(top-level)".to_string()), + defn: next_defstack + .last() + .cloned() + .unwrap_or_else(|| "(top-level)".to_string()), line: node.first_lineno, raw: ast::slice(node, &self.lines), - span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], + span: [ + node.first_lineno, + node.first_column, + node.last_lineno, + node.last_column, + ], }); } @@ -111,9 +119,12 @@ impl SemanticAlias { let (mut t, _) = ast::canon_polarity(text); t = t.strip_prefix("self.").unwrap_or(&t).to_string(); t = t.strip_prefix('@').unwrap_or(&t).to_string(); - + // Ruby: t = t.sub(/\A[A-Za-z_]\w*(?:\([^)]*\))?\.(?=[A-Za-z_]\w*\s*(==|!=|\.))/, "") - let re = regex::Regex::new(r"^[A-Za-z_]\w*(?:\([^)]*\))?\.(?P[A-Za-z_]\w*\s*(?:==|!=|\.))").unwrap(); + let re = regex::Regex::new( + r"^[A-Za-z_]\w*(?:\([^)]*\))?\.(?P[A-Za-z_]\w*\s*(?:==|!=|\.))", + ) + .unwrap(); t = re.replace(&t, "$rest").to_string(); t.split_whitespace().collect::>().join(" ") @@ -123,23 +134,32 @@ impl SemanticAlias { let mid = node.children.get(1); match mid { Some(Child::Symbol(s)) => matches!(s.as_str(), "==" | "!=" | "nil?"), - _ => false + _ => false, } } fn record_pred(&mut self, node: &Node) { if let Some(Child::Symbol(name)) = node.children.first() { - if !name.ends_with('?') { return; } + if !name.ends_with('?') { + return; + } let stmts = ast::body_stmts(node); - if stmts.len() != 1 { return; } + if stmts.len() != 1 { + return; + } self.preds.push(Pred { name: name.clone(), canon: self.canon(&ast::slice(stmts[0], &self.lines)), file: self.file.clone(), line: node.first_lineno, - span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], + span: [ + node.first_lineno, + node.first_column, + node.last_lineno, + node.last_column, + ], }); } } @@ -171,9 +191,13 @@ impl Report { let mut out = Vec::new(); for (c, ps) in by_canon { let mut names_set = BTreeSet::new(); - for p in &ps { names_set.insert(p.name.clone()); } + for p in &ps { + names_set.insert(p.name.clone()); + } let names: Vec<_> = names_set.into_iter().collect(); - if names.len() < 2 { continue; } + if names.len() < 2 { + continue; + } let mut sites = Vec::new(); let mut spans = BTreeMap::new(); @@ -203,8 +227,12 @@ impl Report { let mut out = Vec::new(); for u in &self.uses { if let Some(ps) = by_canon.get(&u.canon) { - if ps.is_empty() { continue; } - if u.defn.ends_with('?') && ps.iter().any(|p| p.name == u.defn) { continue; } + if ps.is_empty() { + continue; + } + if u.defn.ends_with('?') && ps.iter().any(|p| p.name == u.defn) { + continue; + } let loc = format!("{}:{}:{}", u.file, u.defn, u.line); let mut spans = BTreeMap::new(); diff --git a/gems/decomplex/rust/src/decomplex/detectors/sequence_mine.rs b/gems/decomplex/rust/src/decomplex/detectors/sequence_mine.rs index d1a61fc9f..99643c01c 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/sequence_mine.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/sequence_mine.rs @@ -70,9 +70,17 @@ impl SequenceMine { self.sites.push(Site { calls, file: self.file.clone(), - defn: next_defstack.last().cloned().unwrap_or_else(|| "(top-level)".to_string()), + defn: next_defstack + .last() + .cloned() + .unwrap_or_else(|| "(top-level)".to_string()), line: node.first_lineno, - span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], + span: [ + node.first_lineno, + node.first_column, + node.last_lineno, + node.last_column, + ], }); } } @@ -124,7 +132,9 @@ impl Report { for j in i + 1..unique_calls.len() { let mut pair = vec![unique_calls[i].clone(), unique_calls[j].clone()]; pair.sort(); - *co_counts.entry((pair[0].clone(), pair[1].clone())).or_insert(0) += 1; + *co_counts + .entry((pair[0].clone(), pair[1].clone())) + .or_insert(0) += 1; } } } @@ -168,7 +178,7 @@ impl Report { for (has, missing, sup, conf) in &rules { if unique_calls.contains(has) && !unique_calls.contains(missing) { let at = format!("{}:{}:{}", s.file, s.defn, s.line); - + let key = (has.clone(), missing.clone(), at.clone()); if seen.insert(key) { let mut spans = BTreeMap::new(); @@ -187,7 +197,13 @@ impl Report { } } - out.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap().then_with(|| b.support.cmp(&a.support)).then_with(|| a.at.cmp(&b.at))); + out.sort_by(|a, b| { + b.confidence + .partial_cmp(&a.confidence) + .unwrap() + .then_with(|| b.support.cmp(&a.support)) + .then_with(|| a.at.cmp(&b.at)) + }); out } } diff --git a/gems/decomplex/rust/src/decomplex/detectors/state_branch_density.rs b/gems/decomplex/rust/src/decomplex/detectors/state_branch_density.rs index d1565425b..aafb8d3bd 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/state_branch_density.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/state_branch_density.rs @@ -37,18 +37,25 @@ const NOISE_MIDS: &[&str] = &[ pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { let mut parsed = Vec::new(); let mut global_immutable_readers: BTreeMap> = BTreeMap::new(); - let mut global_immutable_reader_types: BTreeMap> = BTreeMap::new(); + let mut global_immutable_reader_types: BTreeMap> = + BTreeMap::new(); let mut global_type_aliases: BTreeMap = BTreeMap::new(); for file in files { let (root, lines) = ast::parse_with_language(file, language)?; let scanner = StateBranchDensity::new(None, lines.clone(), None, None, None); - + for (name, readers) in scanner.immutable_struct_readers(&lines) { - global_immutable_readers.entry(name).or_default().extend(readers); + global_immutable_readers + .entry(name) + .or_default() + .extend(readers); } for (name, reader_types) in scanner.immutable_struct_reader_types(&lines) { - global_immutable_reader_types.entry(name).or_default().extend(reader_types); + global_immutable_reader_types + .entry(name) + .or_default() + .extend(reader_types); } global_type_aliases.extend(scanner.type_aliases(&lines)); @@ -92,46 +99,52 @@ impl StateBranchDensity { let ir = immutable_readers.unwrap_or_else(|| BTreeMap::new()); // Simplified let irt = immutable_reader_types.unwrap_or_else(|| BTreeMap::new()); let ta = type_aliases.unwrap_or_else(|| BTreeMap::new()); - + // Re-extract if not provided (matches Ruby's initialize) - let ir = if ir.is_empty() { - let s = Self { - file: file.clone().unwrap_or_default(), - lines: lines.clone(), - decisions: Vec::new(), + let ir = if ir.is_empty() { + let s = Self { + file: file.clone().unwrap_or_default(), + lines: lines.clone(), + decisions: Vec::new(), immutable_readers: BTreeMap::new(), immutable_reader_types: BTreeMap::new(), type_aliases: BTreeMap::new(), method_param_types: BTreeMap::new(), }; s.immutable_struct_readers(&lines) - } else { ir }; + } else { + ir + }; let irt = if irt.is_empty() { - let s = Self { - file: file.clone().unwrap_or_default(), - lines: lines.clone(), - decisions: Vec::new(), + let s = Self { + file: file.clone().unwrap_or_default(), + lines: lines.clone(), + decisions: Vec::new(), immutable_readers: BTreeMap::new(), immutable_reader_types: BTreeMap::new(), type_aliases: BTreeMap::new(), method_param_types: BTreeMap::new(), }; s.immutable_struct_reader_types(&lines) - } else { irt }; + } else { + irt + }; let ta = if ta.is_empty() { - let s = Self { - file: file.clone().unwrap_or_default(), - lines: lines.clone(), - decisions: Vec::new(), + let s = Self { + file: file.clone().unwrap_or_default(), + lines: lines.clone(), + decisions: Vec::new(), immutable_readers: BTreeMap::new(), immutable_reader_types: BTreeMap::new(), type_aliases: BTreeMap::new(), method_param_types: BTreeMap::new(), }; s.type_aliases(&lines) - } else { ta }; + } else { + ta + }; let mut s = Self { file: file.unwrap_or_default(), @@ -186,7 +199,11 @@ impl StateBranchDensity { node.last_column, ], predicate: ast::slice(cond, &self.lines), - state_refs: refs.into_iter().collect::>().into_iter().collect(), + state_refs: refs + .into_iter() + .collect::>() + .into_iter() + .collect(), }); } @@ -266,19 +283,31 @@ impl StateBranchDensity { fn immutable_reader(&self, type_name: &str, mid: &str) -> bool { let resolved = self.resolve_type_alias(type_name); let readers = self.immutable_readers.get(&resolved).or_else(|| { - resolved.split("::").last().and_then(|last| self.immutable_readers.get(last)) + resolved + .split("::") + .last() + .and_then(|last| self.immutable_readers.get(last)) }); readers.map(|r| r.contains(mid)).unwrap_or(false) } - fn immutable_reader_result_type(&self, recv: &Node, mid: &str, args: Option<&Child>, defn: &str) -> Option { + fn immutable_reader_result_type( + &self, + recv: &Node, + mid: &str, + args: Option<&Child>, + defn: &str, + ) -> Option { if !self.empty_arg_list(args) { return None; } let owner_type = self.immutable_receiver_type(recv, defn)?; let resolved = self.resolve_type_alias(&owner_type); let reader_types = self.immutable_reader_types.get(&resolved).or_else(|| { - resolved.split("::").last().and_then(|last| self.immutable_reader_types.get(last)) + resolved + .split("::") + .last() + .and_then(|last| self.immutable_reader_types.get(last)) })?; reader_types.get(mid).cloned() } @@ -296,7 +325,8 @@ impl StateBranchDensity { fn immutable_struct_readers(&self, lines: &[String]) -> BTreeMap> { let mut readers = BTreeMap::new(); let mut class_stack = Vec::new(); - let class_struct_re = Regex::new(r"^\s*class\s+([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\s*<\s*T::Struct\b").unwrap(); + let class_struct_re = + Regex::new(r"^\s*class\s+([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\s*<\s*T::Struct\b").unwrap(); let const_re = Regex::new(r"^\s*const\s+:([A-Za-z_]\w*)\b").unwrap(); let end_re = Regex::new(r"^\s*end\s*(?:#.*)?$").unwrap(); @@ -307,7 +337,10 @@ impl StateBranchDensity { } if !class_stack.is_empty() { if let Some(caps) = const_re.captures(line) { - readers.entry(class_stack.last().unwrap().clone()).or_insert_with(BTreeSet::new).insert(caps[1].to_string()); + readers + .entry(class_stack.last().unwrap().clone()) + .or_insert_with(BTreeSet::new) + .insert(caps[1].to_string()); continue; } } @@ -318,11 +351,17 @@ impl StateBranchDensity { readers } - fn immutable_struct_reader_types(&self, lines: &[String]) -> BTreeMap> { + fn immutable_struct_reader_types( + &self, + lines: &[String], + ) -> BTreeMap> { let mut reader_types = BTreeMap::new(); let mut class_stack = Vec::new(); - let class_struct_re = Regex::new(r"^\s*class\s+([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\s*<\s*T::Struct\b").unwrap(); - let const_type_re = Regex::new(r"^\s*const\s+:([A-Za-z_]\w*)\s*,\s*([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\b").unwrap(); + let class_struct_re = + Regex::new(r"^\s*class\s+([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\s*<\s*T::Struct\b").unwrap(); + let const_type_re = + Regex::new(r"^\s*const\s+:([A-Za-z_]\w*)\s*,\s*([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\b") + .unwrap(); let end_re = Regex::new(r"^\s*end\s*(?:#.*)?$").unwrap(); for line in lines { @@ -332,7 +371,10 @@ impl StateBranchDensity { } if !class_stack.is_empty() { if let Some(caps) = const_type_re.captures(line) { - reader_types.entry(class_stack.last().unwrap().clone()).or_insert_with(BTreeMap::new).insert(caps[1].to_string(), caps[2].to_string()); + reader_types + .entry(class_stack.last().unwrap().clone()) + .or_insert_with(BTreeMap::new) + .insert(caps[1].to_string(), caps[2].to_string()); continue; } } @@ -345,8 +387,11 @@ impl StateBranchDensity { fn type_aliases(&self, lines: &[String]) -> BTreeMap { let mut aliases = BTreeMap::new(); - let type_alias_re = Regex::new(r"^\s*([A-Z]\w*)\s*=\s*T\.type_alias\s*\{\s*([A-Z]\w*(?:::[A-Z]\w*)*)\s*\}").unwrap(); - let const_alias_re = Regex::new(r"^\s*([A-Z]\w*)\s*=\s*([A-Z]\w*(?:::[A-Z]\w*)*)\b").unwrap(); + let type_alias_re = + Regex::new(r"^\s*([A-Z]\w*)\s*=\s*T\.type_alias\s*\{\s*([A-Z]\w*(?:::[A-Z]\w*)*)\s*\}") + .unwrap(); + let const_alias_re = + Regex::new(r"^\s*([A-Z]\w*)\s*=\s*([A-Z]\w*(?:::[A-Z]\w*)*)\b").unwrap(); for line in lines { if let Some(caps) = type_alias_re.captures(line) { @@ -367,7 +412,10 @@ impl StateBranchDensity { } seen.insert(current.clone()); let target = self.type_aliases.get(¤t).or_else(|| { - current.split("::").last().and_then(|last| self.type_aliases.get(last)) + current + .split("::") + .last() + .and_then(|last| self.type_aliases.get(last)) }); match target { Some(t) => current = t.clone(), @@ -376,7 +424,10 @@ impl StateBranchDensity { } } - fn extract_method_param_types(&self, lines: &[String]) -> BTreeMap> { + fn extract_method_param_types( + &self, + lines: &[String], + ) -> BTreeMap> { let mut types_by_method = BTreeMap::new(); let mut pending_sig = String::new(); let def_re = Regex::new(r"^\s*def\s+([A-Za-z_]\w*[!?=]?)(?:\s|\(|$)").unwrap(); @@ -399,7 +450,8 @@ impl StateBranchDensity { fn sig_param_types(&self, sig_source: &str) -> BTreeMap { let params_re = Regex::new(r"params\s*\((.*?)\)").unwrap(); - let param_pair_re = Regex::new(r"([A-Za-z_]\w*)\s*:\s*([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)").unwrap(); + let param_pair_re = + Regex::new(r"([A-Za-z_]\w*)\s*:\s*([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)").unwrap(); let mut params = BTreeMap::new(); if let Some(p_caps) = params_re.captures(sig_source) { for pair in param_pair_re.captures_iter(&p_caps[1]) { @@ -422,7 +474,10 @@ impl Report { fn findings(&self) -> Vec { let mut groups: BTreeMap<(String, String), Vec> = BTreeMap::new(); for d in &self.decisions { - groups.entry((d.file.clone(), d.defn.clone())).or_default().push(d.clone()); + groups + .entry((d.file.clone(), d.defn.clone())) + .or_default() + .push(d.clone()); } let mut rows = Vec::new(); diff --git a/gems/decomplex/rust/src/decomplex/detectors/state_mesh.rs b/gems/decomplex/rust/src/decomplex/detectors/state_mesh.rs index 0e92e93b7..6c958932c 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/state_mesh.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/state_mesh.rs @@ -200,7 +200,14 @@ impl StateMesh { } } - fn walk_writes(&self, node: &Node, lines: &[String], defstack: &[String], file: &str, out: &mut Vec) { + fn walk_writes( + &self, + node: &Node, + lines: &[String], + defstack: &[String], + file: &str, + out: &mut Vec, + ) { let mut next_defstack = defstack.to_vec(); match node.r#type.as_str() { "CLASS" | "MODULE" | "DEFN" => { @@ -214,7 +221,10 @@ impl StateMesh { } } "ATTRASGN" => { - if let (Some(recv), Some(Child::Symbol(msg))) = (node.children.get(0).and_then(ast::node), node.children.get(1)) { + if let (Some(recv), Some(Child::Symbol(msg))) = ( + node.children.get(0).and_then(ast::node), + node.children.get(1), + ) { if msg != "[]=" { let attr = msg.trim_end_matches('=').to_string(); let norm = self.normalize(&attr); @@ -223,9 +233,17 @@ impl StateMesh { norm, recv: self.recv_slice(Some(recv), lines), file: file.to_string(), - defn: next_defstack.last().cloned().unwrap_or_else(|| "(top-level)".to_string()), + defn: next_defstack + .last() + .cloned() + .unwrap_or_else(|| "(top-level)".to_string()), line: node.first_lineno, - span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], + span: [ + node.first_lineno, + node.first_column, + node.last_lineno, + node.last_column, + ], }); } } @@ -238,9 +256,17 @@ impl StateMesh { norm, recv: "self".to_string(), file: file.to_string(), - defn: next_defstack.last().cloned().unwrap_or_else(|| "(top-level)".to_string()), + defn: next_defstack + .last() + .cloned() + .unwrap_or_else(|| "(top-level)".to_string()), line: node.first_lineno, - span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], + span: [ + node.first_lineno, + node.first_column, + node.last_lineno, + node.last_column, + ], }); } } @@ -263,7 +289,15 @@ impl StateMesh { } } - fn walk_reads(&self, node: &Node, lines: &[String], defstack: &[String], file: &str, field_norms: &BTreeSet, out: &mut Vec) { + fn walk_reads( + &self, + node: &Node, + lines: &[String], + defstack: &[String], + file: &str, + field_norms: &BTreeSet, + out: &mut Vec, + ) { let mut next_defstack = defstack.to_vec(); match node.r#type.as_str() { "CLASS" | "MODULE" | "DEFN" => { @@ -277,21 +311,44 @@ impl StateMesh { } } "CALL" | "OPCALL" | "FCALL" | "VCALL" => { - let recv = if node.r#type == "CALL" || node.r#type == "OPCALL" { node.children.get(0).and_then(ast::node) } else { None }; - let mid = if node.r#type == "CALL" || node.r#type == "OPCALL" { node.children.get(1) } else { node.children.get(0) }; - let args = if node.r#type == "CALL" || node.r#type == "OPCALL" { node.children.get(2) } else { node.children.get(1) }; + let recv = if node.r#type == "CALL" || node.r#type == "OPCALL" { + node.children.get(0).and_then(ast::node) + } else { + None + }; + let mid = if node.r#type == "CALL" || node.r#type == "OPCALL" { + node.children.get(1) + } else { + node.children.get(0) + }; + let args = if node.r#type == "CALL" || node.r#type == "OPCALL" { + node.children.get(2) + } else { + node.children.get(1) + }; if let Some(Child::Symbol(name)) = mid { - if args.is_none() || matches!(args, Some(Child::Nil)) || self.is_empty_list(args) { + if args.is_none() + || matches!(args, Some(Child::Nil)) + || self.is_empty_list(args) + { if field_norms.contains(name) { out.push(Read { attr: name.clone(), norm: name.clone(), recv: self.recv_slice(recv, lines), file: file.to_string(), - defn: next_defstack.last().cloned().unwrap_or_else(|| "(top-level)".to_string()), + defn: next_defstack + .last() + .cloned() + .unwrap_or_else(|| "(top-level)".to_string()), line: node.first_lineno, - span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], + span: [ + node.first_lineno, + node.first_column, + node.last_lineno, + node.last_column, + ], }); } } @@ -306,9 +363,17 @@ impl StateMesh { norm, recv: "self".to_string(), file: file.to_string(), - defn: next_defstack.last().cloned().unwrap_or_else(|| "(top-level)".to_string()), + defn: next_defstack + .last() + .cloned() + .unwrap_or_else(|| "(top-level)".to_string()), line: node.first_lineno, - span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], + span: [ + node.first_lineno, + node.first_column, + node.last_lineno, + node.last_column, + ], }); } } @@ -329,16 +394,21 @@ impl StateMesh { let files: Vec<_> = self.src_map.keys().map(PathBuf::from).collect(); let sa = semantic_alias::scan_files(&files, language)?; - + for m in sa.reification_misses { let loc = m.at.clone(); let parts: Vec<&str> = loc.split(':').collect(); - if parts.len() < 3 { continue; } + if parts.len() < 3 { + continue; + } let line = parts.last().unwrap().parse::().unwrap_or(0); let defn = parts[parts.len() - 2].to_string(); let file = parts[..parts.len() - 2].join(":"); - if let Some(matched) = field_norms.iter().find(|fnorm| m.raw.contains(*fnorm) || m.canon.contains(*fnorm)) { + if let Some(matched) = field_norms + .iter() + .find(|fnorm| m.raw.contains(*fnorm) || m.canon.contains(*fnorm)) + { self.re_derivations.push(ReDerivation { field: matched.clone(), file, @@ -360,25 +430,43 @@ impl StateMesh { for fnorm in &field_norms { let ws: Vec<_> = self.writes.iter().filter(|w| &w.norm == fnorm).collect(); let rs: Vec<_> = self.reads.iter().filter(|r| &r.norm == fnorm).collect(); - let ds: Vec<_> = self.re_derivations.iter().filter(|d| &d.field == fnorm).collect(); + let ds: Vec<_> = self + .re_derivations + .iter() + .filter(|d| &d.field == fnorm) + .collect(); let mut all_sites = BTreeSet::new(); - for w in &ws { all_sites.insert((w.file.clone(), w.defn.clone())); } - for r in &rs { all_sites.insert((r.file.clone(), r.defn.clone())); } - for d in &ds { all_sites.insert((d.file.clone(), d.defn.clone())); } + for w in &ws { + all_sites.insert((w.file.clone(), w.defn.clone())); + } + for r in &rs { + all_sites.insert((r.file.clone(), r.defn.clone())); + } + for d in &ds { + all_sites.insert((d.file.clone(), d.defn.clone())); + } let scatter = all_sites.len(); let mut write_sites = BTreeSet::new(); - for w in &ws { write_sites.insert((w.file.clone(), w.defn.clone())); } + for w in &ws { + write_sites.insert((w.file.clone(), w.defn.clone())); + } let write_scatter = write_sites.len(); let mut read_sites = BTreeSet::new(); - for r in &rs { read_sites.insert((r.file.clone(), r.defn.clone())); } + for r in &rs { + read_sites.insert((r.file.clone(), r.defn.clone())); + } let read_scatter = read_sites.len(); let mut receivers = BTreeSet::new(); - for w in &ws { receivers.insert(w.recv.clone()); } - for r in &rs { receivers.insert(r.recv.clone()); } + for w in &ws { + receivers.insert(w.recv.clone()); + } + for r in &rs { + receivers.insert(r.recv.clone()); + } let receiver_types = receivers.len(); let n_writes = ws.len(); @@ -404,24 +492,39 @@ impl StateMesh { }); } - metrics_vec.sort_by(|a, b| b.messiness.partial_cmp(&a.messiness).unwrap_or(std::cmp::Ordering::Equal).then_with(|| a.name.cmp(&b.name))); + metrics_vec.sort_by(|a, b| { + b.messiness + .partial_cmp(&a.messiness) + .unwrap_or(std::cmp::Ordering::Equal) + .then_with(|| a.name.cmp(&b.name)) + }); for (i, m) in metrics_vec.iter_mut().enumerate() { m.rank = i + 1; } let total = metrics_vec.len(); if total > 1 { - let attrs = ["writes", "reads", "re_derivations", "scatter", "messiness", "pressure"]; + let attrs = [ + "writes", + "reads", + "re_derivations", + "scatter", + "messiness", + "pressure", + ]; for attr in &attrs { - let mut vals: Vec = metrics_vec.iter().map(|m| match *attr { - "writes" => m.writes as f64, - "reads" => m.reads as f64, - "re_derivations" => m.re_derivations as f64, - "scatter" => m.scatter as f64, - "messiness" => m.messiness, - "pressure" => m.pressure as f64, - _ => 0.0, - }).collect(); + let mut vals: Vec = metrics_vec + .iter() + .map(|m| match *attr { + "writes" => m.writes as f64, + "reads" => m.reads as f64, + "re_derivations" => m.re_derivations as f64, + "scatter" => m.scatter as f64, + "messiness" => m.messiness, + "pressure" => m.pressure as f64, + _ => 0.0, + }) + .collect(); vals.sort_by(|a, b| a.partial_cmp(b).unwrap()); for m in metrics_vec.iter_mut() { @@ -445,71 +548,120 @@ impl StateMesh { fn to_json_graph(&self) -> StateMeshReport { let fm = self.metrics(); - let fm_index: BTreeMap = fm.iter().map(|m| (m.name.clone(), m)).collect(); + let fm_index: BTreeMap = + fm.iter().map(|m| (m.name.clone(), m)).collect(); let field_norms = self.known_field_norms(); let mut fields_obj = BTreeMap::new(); for fnorm in &field_norms { let m = fm_index.get(fnorm).unwrap(); - let ws: Vec<_> = self.writes.iter().filter(|w| &w.norm == fnorm).map(|w| SiteInfo { - file: w.file.clone(), defn: w.defn.clone(), line: w.line, recv: w.recv.clone(), span: w.span, - }).collect(); - let rs: Vec<_> = self.reads.iter().filter(|r| &r.norm == fnorm).map(|r| SiteInfo { - file: r.file.clone(), defn: r.defn.clone(), line: r.line, recv: r.recv.clone(), span: r.span, - }).collect(); - let ds: Vec<_> = self.re_derivations.iter().filter(|d| &d.field == fnorm).map(|d| ReDerivationInfo { - file: d.file.clone(), defn: d.defn.clone(), line: d.line, raw: d.raw.clone(), predicate: d.predicate.clone(), canon: d.canon.clone(), - }).collect(); - - fields_obj.insert(fnorm.clone(), StateFieldRow { - messiness: m.messiness, - rank: m.rank, - metrics: FieldMetricsRow { - writes: m.writes, - reads: m.reads, - re_derivations: m.re_derivations, - scatter: m.scatter, - write_scatter: m.write_scatter, - read_scatter: m.read_scatter, - receiver_types: m.receiver_types, - fix_churn: 1.0, - pressure: m.pressure, - percentiles: m.percentiles.clone(), + let ws: Vec<_> = self + .writes + .iter() + .filter(|w| &w.norm == fnorm) + .map(|w| SiteInfo { + file: w.file.clone(), + defn: w.defn.clone(), + line: w.line, + recv: w.recv.clone(), + span: w.span, + }) + .collect(); + let rs: Vec<_> = self + .reads + .iter() + .filter(|r| &r.norm == fnorm) + .map(|r| SiteInfo { + file: r.file.clone(), + defn: r.defn.clone(), + line: r.line, + recv: r.recv.clone(), + span: r.span, + }) + .collect(); + let ds: Vec<_> = self + .re_derivations + .iter() + .filter(|d| &d.field == fnorm) + .map(|d| ReDerivationInfo { + file: d.file.clone(), + defn: d.defn.clone(), + line: d.line, + raw: d.raw.clone(), + predicate: d.predicate.clone(), + canon: d.canon.clone(), + }) + .collect(); + + fields_obj.insert( + fnorm.clone(), + StateFieldRow { + messiness: m.messiness, + rank: m.rank, + metrics: FieldMetricsRow { + writes: m.writes, + reads: m.reads, + re_derivations: m.re_derivations, + scatter: m.scatter, + write_scatter: m.write_scatter, + read_scatter: m.read_scatter, + receiver_types: m.receiver_types, + fix_churn: 1.0, + pressure: m.pressure, + percentiles: m.percentiles.clone(), + }, + writers: ws, + readers: rs, + re_derivations: ds, }, - writers: ws, - readers: rs, - re_derivations: ds, - }); + ); } - let mut all_unit_sites: BTreeMap<(String, String), (BTreeSet, BTreeSet)> = BTreeMap::new(); + let mut all_unit_sites: BTreeMap<(String, String), (BTreeSet, BTreeSet)> = + BTreeMap::new(); for w in &self.writes { - let entry = all_unit_sites.entry((w.file.clone(), w.defn.clone())).or_default(); + let entry = all_unit_sites + .entry((w.file.clone(), w.defn.clone())) + .or_default(); entry.0.insert(w.norm.clone()); } for r in &self.reads { - let entry = all_unit_sites.entry((r.file.clone(), r.defn.clone())).or_default(); + let entry = all_unit_sites + .entry((r.file.clone(), r.defn.clone())) + .or_default(); entry.1.insert(r.norm.clone()); } - let mut dirs: BTreeMap>> = BTreeMap::new(); + let mut dirs: BTreeMap>> = + BTreeMap::new(); for ((file, defn), (ws, rs)) in all_unit_sites { let path = Path::new(&file); - let dir = path.parent().map(|p| p.to_string_lossy().to_string()).unwrap_or_else(|| ".".to_string()); + let dir = path + .parent() + .map(|p| p.to_string_lossy().to_string()) + .unwrap_or_else(|| ".".to_string()); let dir = if dir.is_empty() { ".".to_string() } else { dir }; - let base = path.file_name().map(|s| s.to_string_lossy().to_string()).unwrap_or_else(|| file.clone()); - - dirs.entry(dir).or_default() - .entry(base).or_default() - .insert(defn.clone(), DefnObj { - name: defn, - writers: ws.len(), - readers: rs.len(), - fields: DefnFields { - written: ws.into_iter().collect(), - read: rs.into_iter().collect(), + let base = path + .file_name() + .map(|s| s.to_string_lossy().to_string()) + .unwrap_or_else(|| file.clone()); + + dirs.entry(dir) + .or_default() + .entry(base) + .or_default() + .insert( + defn.clone(), + DefnObj { + name: defn, + writers: ws.len(), + readers: rs.len(), + fields: DefnFields { + written: ws.into_iter().collect(), + read: rs.into_iter().collect(), + }, }, - }); + ); } let mut hierarchy = Vec::new(); @@ -568,7 +720,8 @@ impl StateMesh { for w in &self.writes { *discovered.entry(w.norm.clone()).or_insert(0) += 1; } - let mut norms: BTreeSet = discovered.into_iter() + let mut norms: BTreeSet = discovered + .into_iter() .filter(|(_, count)| *count >= self.min_writes) .map(|(name, _)| name) .collect(); @@ -579,7 +732,9 @@ impl StateMesh { } fn recv_slice(&self, node: Option<&Node>, lines: &[String]) -> String { - let Some(node) = node else { return "?".to_string() }; + let Some(node) = node else { + return "?".to_string(); + }; ast::slice(node, lines) } diff --git a/gems/decomplex/rust/src/decomplex/detectors/structural_topology.rs b/gems/decomplex/rust/src/decomplex/detectors/structural_topology.rs index b37729187..32e1fc264 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/structural_topology.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/structural_topology.rs @@ -74,32 +74,56 @@ pub struct Graph { impl Graph { pub fn new(methods: Vec, edges: Vec) -> Self { let mut method_by_id = BTreeMap::new(); - for m in &methods { method_by_id.insert(m.id.clone(), m.clone()); } + for m in &methods { + method_by_id.insert(m.id.clone(), m.clone()); + } let mut edges_by_caller = BTreeMap::new(); let mut edges_by_callee = BTreeMap::new(); for e in &edges { - edges_by_caller.entry(e.caller.clone()).or_insert_with(Vec::new).push(e.clone()); - edges_by_callee.entry(e.callee.clone()).or_insert_with(Vec::new).push(e.clone()); + edges_by_caller + .entry(e.caller.clone()) + .or_insert_with(Vec::new) + .push(e.clone()); + edges_by_callee + .entry(e.callee.clone()) + .or_insert_with(Vec::new) + .push(e.clone()); } - Self { methods, edges, method_by_id, edges_by_caller, edges_by_callee } + Self { + methods, + edges, + method_by_id, + edges_by_caller, + edges_by_callee, + } } - pub fn method(&self, id: &str) -> Option<&Method> { self.method_by_id.get(id) } + pub fn method(&self, id: &str) -> Option<&Method> { + self.method_by_id.get(id) + } - pub fn internal_calls(&self, id: &str) -> Vec { self.edges_by_caller.get(id).cloned().unwrap_or_default() } + pub fn internal_calls(&self, id: &str) -> Vec { + self.edges_by_caller.get(id).cloned().unwrap_or_default() + } - pub fn internal_callers(&self, id: &str) -> Vec { self.edges_by_callee.get(id).cloned().unwrap_or_default() } + pub fn internal_callers(&self, id: &str) -> Vec { + self.edges_by_callee.get(id).cloned().unwrap_or_default() + } pub fn single_internal_caller(&self, id: &str) -> bool { let callers = self.internal_callers(id); let mut unique = BTreeMap::new(); - for c in callers { unique.insert(c.caller, true); } + for c in callers { + unique.insert(c.caller, true); + } unique.len() == 1 } - pub fn visibility(&self, id: &str) -> Option<&str> { self.method(id).map(|m| m.visibility.as_str()) } + pub fn visibility(&self, id: &str) -> Option<&str> { + self.method(id).map(|m| m.visibility.as_str()) + } } struct MethodCollector { @@ -108,11 +132,15 @@ struct MethodCollector { } impl MethodCollector { - fn new(file: String, lines: Vec) -> Self { Self { file, lines } } + fn new(file: String, lines: Vec) -> Self { + Self { file, lines } + } fn scan(&mut self, root: &Node) -> Vec { let mut out = Vec::new(); - out.extend(self.methods_from_statements(&self.top_level_statements(root), &self.top_level_owner())); + out.extend( + self.methods_from_statements(&self.top_level_statements(root), &self.top_level_owner()), + ); self.walk(root, &Vec::new(), &mut out); out } @@ -134,7 +162,9 @@ impl MethodCollector { } fn owner_methods(&self, owner_node: &Node, owner: &str) -> Vec { - let Some(body) = self.owner_body(owner_node) else { return Vec::new() }; + let Some(body) = self.owner_body(owner_node) else { + return Vec::new(); + }; self.methods_from_statements(&self.owner_statements(body), owner) } @@ -153,7 +183,13 @@ impl MethodCollector { methods } - fn handle_visibility_call(&self, stmt: &Node, owner: &str, current_visibility: &str, methods: &mut Vec) -> String { + fn handle_visibility_call( + &self, + stmt: &Node, + owner: &str, + current_visibility: &str, + methods: &mut Vec, + ) -> String { let vis = ast::child_to_string(stmt.children.get(0)).unwrap_or_default(); if let Some(args) = stmt.children.get(1).and_then(ast::node) { for arg in args.children.iter().filter_map(ast::node) { @@ -172,30 +208,56 @@ impl MethodCollector { fn owner_body<'a>(&self, owner_node: &'a Node) -> Option<&'a Node> { let scope_index = if owner_node.r#type == "CLASS" { 2 } else { 1 }; let scope = owner_node.children.get(scope_index).and_then(ast::node)?; - if scope.r#type != "SCOPE" { return None } + if scope.r#type != "SCOPE" { + return None; + } scope.children.get(2).and_then(ast::node) } fn owner_statements<'a>(&self, body: &'a Node) -> Vec<&'a Node> { - if body.r#type == "BLOCK" { body.children.iter().filter_map(ast::node).collect() } else { vec![body] } + if body.r#type == "BLOCK" { + body.children.iter().filter_map(ast::node).collect() + } else { + vec![body] + } } fn top_level_statements<'a>(&self, root: &'a Node) -> Vec<&'a Node> { - root.children.iter().filter_map(ast::node).flat_map(|c| if c.r#type == "BLOCK" { c.children.iter().filter_map(ast::node).collect() } else { vec![c] }).collect() + root.children + .iter() + .filter_map(ast::node) + .flat_map(|c| { + if c.r#type == "BLOCK" { + c.children.iter().filter_map(ast::node).collect() + } else { + vec![c] + } + }) + .collect() } fn bare_visibility_marker(&self, node: &Node) -> bool { - node.r#type == "VCALL" && VISIBILITY_MIDS.contains(&ast::child_to_string(node.children.get(0)).unwrap_or_default().as_str()) + node.r#type == "VCALL" + && VISIBILITY_MIDS.contains( + &ast::child_to_string(node.children.get(0)) + .unwrap_or_default() + .as_str(), + ) } fn visibility_call(&self, node: &Node) -> bool { - node.r#type == "FCALL" && VISIBILITY_MIDS.contains(&ast::child_to_string(node.children.get(0)).unwrap_or_default().as_str()) + node.r#type == "FCALL" + && VISIBILITY_MIDS.contains( + &ast::child_to_string(node.children.get(0)) + .unwrap_or_default() + .as_str(), + ) } fn literal_method_name(&self, node: &Node) -> Option { match node.r#type.as_str() { "LIT" | "STR" | "DSTR" => ast::child_to_string(node.children.get(0)), - _ => None + _ => None, } } @@ -207,8 +269,17 @@ impl MethodCollector { name: name.clone(), file: self.file.clone(), line: node.first_lineno, - span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], - visibility: if node.r#type == "DEFS" { "public".to_string() } else { visibility.to_string() }, + span: [ + node.first_lineno, + node.first_column, + node.last_lineno, + node.last_column, + ], + visibility: if node.r#type == "DEFS" { + "public".to_string() + } else { + visibility.to_string() + }, } } @@ -216,9 +287,19 @@ impl MethodCollector { if node.r#type == "DEFS" { let receiver = node.children.get(0).and_then(ast::node); let prefix = if let Some(r) = receiver { - if r.r#type == "SELF" { "self".to_string() } else { ast::slice(r, &self.lines) } - } else { "?".to_string() }; - format!("{}.{}", prefix, ast::child_to_string(node.children.get(1)).unwrap_or_else(|| "?".to_string())) + if r.r#type == "SELF" { + "self".to_string() + } else { + ast::slice(r, &self.lines) + } + } else { + "?".to_string() + }; + format!( + "{}.{}", + prefix, + ast::child_to_string(node.children.get(1)).unwrap_or_else(|| "?".to_string()) + ) } else { ast::child_to_string(node.children.get(0)).unwrap_or_else(|| "?".to_string()) } @@ -231,11 +312,20 @@ impl MethodCollector { } fn owner_segment(&self, node: &Node) -> String { - let text = ast::slice(node.children.first().and_then(ast::node).unwrap_or(node), &self.lines); - if text.is_empty() { "(anonymous)".to_string() } else { text } + let text = ast::slice( + node.children.first().and_then(ast::node).unwrap_or(node), + &self.lines, + ); + if text.is_empty() { + "(anonymous)".to_string() + } else { + text + } } - fn top_level_owner(&self) -> String { format!("(top-level:{})", self.file) } + fn top_level_owner(&self) -> String { + format!("(top-level:{})", self.file) + } } struct EdgeCollector { @@ -247,13 +337,23 @@ struct EdgeCollector { impl EdgeCollector { fn new(file: String, lines: Vec, methods: &[Method]) -> Self { let mut map = BTreeMap::new(); - for m in methods { map.insert(m.id.clone(), m.clone()); } - Self { file, lines, method_by_id: map } + for m in methods { + map.insert(m.id.clone(), m.clone()); + } + Self { + file, + lines, + method_by_id: map, + } } fn scan(&mut self, root: &Node) -> Vec { let mut out = Vec::new(); - let top_level_methods: Vec<_> = self.top_level_statements(root).into_iter().filter(|s| METHOD_TYPES.contains(&s.r#type.as_str())).collect(); + let top_level_methods: Vec<_> = self + .top_level_statements(root) + .into_iter() + .filter(|s| METHOD_TYPES.contains(&s.r#type.as_str())) + .collect(); for m_node in top_level_methods { let id = format!("(top-level:{})#{}", self.file, self.method_name(m_node)); if let Some(m) = self.method_by_id.get(&id) { @@ -285,15 +385,31 @@ impl EdgeCollector { } } - fn collect_calls(&self, node: &Node, caller: &Method, context_stack: &[String], out: &mut Vec) { - if SKIP_NESTED_TYPES.contains(&node.r#type.as_str()) && !METHOD_TYPES.contains(&node.r#type.as_str()) { return } + fn collect_calls( + &self, + node: &Node, + caller: &Method, + context_stack: &[String], + out: &mut Vec, + ) { + if SKIP_NESTED_TYPES.contains(&node.r#type.as_str()) + && !METHOD_TYPES.contains(&node.r#type.as_str()) + { + return; + } let mut next_context = context_stack.to_vec(); - if CONDITIONAL_TYPES.contains(&node.r#type.as_str()) { next_context.push("conditional".to_string()) } - if ITERATION_TYPES.contains(&node.r#type.as_str()) { next_context.push("iterates".to_string()) } + if CONDITIONAL_TYPES.contains(&node.r#type.as_str()) { + next_context.push("conditional".to_string()) + } + if ITERATION_TYPES.contains(&node.r#type.as_str()) { + next_context.push("iterates".to_string()) + } if let Some(edge) = self.internal_edge(node, caller, &next_context) { - if edge.caller != edge.callee { out.push(edge) } + if edge.caller != edge.callee { + out.push(edge) + } } for child in node.children.iter().filter_map(ast::node) { @@ -301,7 +417,12 @@ impl EdgeCollector { } } - fn internal_edge(&self, node: &Node, caller: &Method, context_stack: &[String]) -> Option { + fn internal_edge( + &self, + node: &Node, + caller: &Method, + context_stack: &[String], + ) -> Option { let call = self.internal_call_name(node, caller)?; let id = format!("{}#{}", caller.owner, call.name); let callee = self.method_by_id.get(&id)?; @@ -313,8 +434,16 @@ impl EdgeCollector { callee_name: callee.name.clone(), file: self.file.clone(), line: node.first_lineno, - span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], - r#type: context_stack.last().cloned().unwrap_or_else(|| "always".to_string()), + span: [ + node.first_lineno, + node.first_column, + node.last_lineno, + node.last_column, + ], + r#type: context_stack + .last() + .cloned() + .unwrap_or_else(|| "always".to_string()), kind: call.kind, confidence: "high".to_string(), }) @@ -322,50 +451,91 @@ impl EdgeCollector { fn internal_call_name(&self, node: &Node, caller: &Method) -> Option { match node.r#type.as_str() { - "FCALL" | "VCALL" => { - Some(InternalCallName { name: self.scoped_name(caller, &ast::child_to_string(node.children.get(0)).unwrap_or_default()), kind: "bare_internal".to_string() }) - } + "FCALL" | "VCALL" => Some(InternalCallName { + name: self.scoped_name( + caller, + &ast::child_to_string(node.children.get(0)).unwrap_or_default(), + ), + kind: "bare_internal".to_string(), + }), "CALL" | "OPCALL" => { let recv = node.children.get(0).and_then(ast::node)?; - if recv.r#type != "SELF" { return None } + if recv.r#type != "SELF" { + return None; + } let mid = ast::child_to_string(node.children.get(1))?; - Some(InternalCallName { name: self.scoped_name(caller, &mid), kind: "direct_self".to_string() }) + Some(InternalCallName { + name: self.scoped_name(caller, &mid), + kind: "direct_self".to_string(), + }) } - _ => None + _ => None, } } fn scoped_name(&self, caller: &Method, mid: &str) -> String { - if caller.name.starts_with("self.") { format!("self.{}", mid) } else { mid.to_string() } + if caller.name.starts_with("self.") { + format!("self.{}", mid) + } else { + mid.to_string() + } } // Reuse helpers from MethodCollector fn top_level_statements<'a>(&self, root: &'a Node) -> Vec<&'a Node> { - root.children.iter().filter_map(ast::node).flat_map(|c| if c.r#type == "BLOCK" { c.children.iter().filter_map(ast::node).collect() } else { vec![c] }).collect() + root.children + .iter() + .filter_map(ast::node) + .flat_map(|c| { + if c.r#type == "BLOCK" { + c.children.iter().filter_map(ast::node).collect() + } else { + vec![c] + } + }) + .collect() } fn method_name(&self, node: &Node) -> String { if node.r#type == "DEFS" { let receiver = node.children.get(0).and_then(ast::node); let prefix = if let Some(r) = receiver { - if r.r#type == "SELF" { "self".to_string() } else { ast::slice(r, &self.lines) } - } else { "?".to_string() }; - format!("{}.{}", prefix, ast::child_to_string(node.children.get(1)).unwrap_or_else(|| "?".to_string())) + if r.r#type == "SELF" { + "self".to_string() + } else { + ast::slice(r, &self.lines) + } + } else { + "?".to_string() + }; + format!( + "{}.{}", + prefix, + ast::child_to_string(node.children.get(1)).unwrap_or_else(|| "?".to_string()) + ) } else { ast::child_to_string(node.children.get(0)).unwrap_or_else(|| "?".to_string()) } } fn owner_methods<'a>(&self, owner_node: &'a Node) -> Vec<&'a Node> { - let Some(body) = self.owner_body(owner_node) else { return Vec::new() }; + let Some(body) = self.owner_body(owner_node) else { + return Vec::new(); + }; self.owner_statements(body) } fn owner_body<'a>(&self, owner_node: &'a Node) -> Option<&'a Node> { let scope_index = if owner_node.r#type == "CLASS" { 2 } else { 1 }; let scope = owner_node.children.get(scope_index).and_then(ast::node)?; - if scope.r#type != "SCOPE" { return None } + if scope.r#type != "SCOPE" { + return None; + } scope.children.get(2).and_then(ast::node) } fn owner_statements<'a>(&self, body: &'a Node) -> Vec<&'a Node> { - if body.r#type == "BLOCK" { body.children.iter().filter_map(ast::node).collect() } else { vec![body] } + if body.r#type == "BLOCK" { + body.children.iter().filter_map(ast::node).collect() + } else { + vec![body] + } } fn full_owner_name(&self, owners: &[String], node: &Node) -> String { let mut next = owners.to_vec(); @@ -373,8 +543,15 @@ impl EdgeCollector { next.join("::") } fn owner_segment(&self, node: &Node) -> String { - let text = ast::slice(node.children.first().and_then(ast::node).unwrap_or(node), &self.lines); - if text.is_empty() { "(anonymous)".to_string() } else { text } + let text = ast::slice( + node.children.first().and_then(ast::node).unwrap_or(node), + &self.lines, + ); + if text.is_empty() { + "(anonymous)".to_string() + } else { + text + } } } diff --git a/gems/decomplex/rust/src/decomplex/detectors/temporal_ordering_pressure.rs b/gems/decomplex/rust/src/decomplex/detectors/temporal_ordering_pressure.rs index 71f3c9356..b56466ba2 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/temporal_ordering_pressure.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/temporal_ordering_pressure.rs @@ -32,7 +32,10 @@ struct MethodState { writes: Vec, } -pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { +pub fn scan_files( + files: &[PathBuf], + language: Language, +) -> Result> { let mut rows = Vec::new(); for file in files { let (root, lines) = ast::parse_with_language(file, language)?; @@ -65,7 +68,12 @@ impl TemporalOrderingPressure { out } - fn walk_owners(&self, node: &Node, owners: &[String], out: &mut Vec) { + fn walk_owners( + &self, + node: &Node, + owners: &[String], + out: &mut Vec, + ) { if matches!(node.r#type.as_str(), "CLASS" | "MODULE") { let owner = self.owner_name(node); let methods = self.owner_methods(node); @@ -85,7 +93,10 @@ impl TemporalOrderingPressure { } fn owner_name(&self, node: &Node) -> String { - let name = ast::slice(node.children.first().and_then(ast::node).unwrap_or(node), &self.lines); + let name = ast::slice( + node.children.first().and_then(ast::node).unwrap_or(node), + &self.lines, + ); if name.is_empty() { "(anonymous)".to_string() } else { @@ -99,7 +110,10 @@ impl TemporalOrderingPressure { }; let stmts = if body.r#type == "BLOCK" { - body.children.iter().filter_map(ast::node).collect::>() + body.children + .iter() + .filter_map(ast::node) + .collect::>() } else { vec![body] }; @@ -151,8 +165,16 @@ impl TemporalOrderingPressure { }) .unwrap_or_else(|| "(anonymous)".to_string()); - let mut reads: Vec<_> = reads.into_iter().collect::>().into_iter().collect(); - let mut writes: Vec<_> = writes.into_iter().collect::>().into_iter().collect(); + let mut reads: Vec<_> = reads + .into_iter() + .collect::>() + .into_iter() + .collect(); + let mut writes: Vec<_> = writes + .into_iter() + .collect::>() + .into_iter() + .collect(); reads.sort(); writes.sort(); @@ -190,13 +212,23 @@ impl TemporalOrderingPressure { } } - fn pressure_row(&self, owner: &str, methods: &[MethodState]) -> Option { - let public_methods: Vec<_> = methods.iter().filter(|m| m.visibility == "public").collect(); + fn pressure_row( + &self, + owner: &str, + methods: &[MethodState], + ) -> Option { + let public_methods: Vec<_> = methods + .iter() + .filter(|m| m.visibility == "public") + .collect(); let state_methods: Vec<_> = public_methods .iter() .filter(|m| !m.reads.is_empty() || !m.writes.is_empty()) .collect(); - let writers: Vec<_> = public_methods.iter().filter(|m| !m.writes.is_empty()).collect(); + let writers: Vec<_> = public_methods + .iter() + .filter(|m| !m.writes.is_empty()) + .collect(); if state_methods.len() < 3 || writers.len() < 2 { return None; diff --git a/gems/decomplex/rust/src/decomplex/detectors/weighted_inlined_cognitive_complexity.rs b/gems/decomplex/rust/src/decomplex/detectors/weighted_inlined_cognitive_complexity.rs index a615108b8..9958c889d 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/weighted_inlined_cognitive_complexity.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/weighted_inlined_cognitive_complexity.rs @@ -22,12 +22,18 @@ pub struct WeightedInlinedCognitiveComplexityRow { pub spans: BTreeMap, } -pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { +pub fn scan_files( + files: &[PathBuf], + language: Language, +) -> Result> { let mut parsed = BTreeMap::new(); for file in files { - parsed.insert(file.to_string_lossy().to_string(), ast::parse_with_language(file, language)?); + parsed.insert( + file.to_string_lossy().to_string(), + ast::parse_with_language(file, language)?, + ); } - + let topology_report = structural_topology::scan_files(files, language)?; let topology = structural_topology::Graph::new(topology_report.methods, topology_report.edges); @@ -40,16 +46,19 @@ pub fn scan_files(files: &[PathBuf], language: Language) -> Result) -> Self { Self { file, lines } } + fn new(file: String, lines: Vec) -> Self { + Self { file, lines } + } fn scan(&mut self, root: &Node) -> Vec { let mut out = Vec::new(); @@ -116,7 +127,10 @@ impl MethodBodyCollector { } fn top_level_methods<'a>(&self, root: &'a Node) -> Vec<&'a Node> { - self.top_level_statements(root).into_iter().filter(|s| METHOD_TYPES.contains(&s.r#type.as_str())).collect() + self.top_level_statements(root) + .into_iter() + .filter(|s| METHOD_TYPES.contains(&s.r#type.as_str())) + .collect() } fn walk<'a>(&self, node: &'a Node, owners: &[String], out: &mut Vec) { @@ -138,16 +152,21 @@ impl MethodBodyCollector { } fn owner_methods<'a>(&self, owner_node: &'a Node) -> Vec<&'a Node> { - let Some(body) = self.owner_body(owner_node) else { return Vec::new() }; - self.owner_statements(body).into_iter().flat_map(|stmt| { - if METHOD_TYPES.contains(&stmt.r#type.as_str()) { - vec![stmt] - } else if self.visibility_call(stmt) { - self.inline_methods(stmt) - } else { - vec![] - } - }).collect() + let Some(body) = self.owner_body(owner_node) else { + return Vec::new(); + }; + self.owner_statements(body) + .into_iter() + .flat_map(|stmt| { + if METHOD_TYPES.contains(&stmt.r#type.as_str()) { + vec![stmt] + } else if self.visibility_call(stmt) { + self.inline_methods(stmt) + } else { + vec![] + } + }) + .collect() } fn method_body(&self, node: &Node, owner: &str) -> MethodBody { @@ -158,42 +177,85 @@ impl MethodBodyCollector { name, file: self.file.clone(), line: node.first_lineno, - span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], + span: [ + node.first_lineno, + node.first_column, + node.last_lineno, + node.last_column, + ], node: node.clone(), } } fn inline_methods<'a>(&self, stmt: &'a Node) -> Vec<&'a Node> { - let Some(args) = stmt.children.get(1).and_then(ast::node) else { return Vec::new() }; - args.children.iter().filter_map(ast::node).filter(|arg| METHOD_TYPES.contains(&arg.r#type.as_str())).collect() + let Some(args) = stmt.children.get(1).and_then(ast::node) else { + return Vec::new(); + }; + args.children + .iter() + .filter_map(ast::node) + .filter(|arg| METHOD_TYPES.contains(&arg.r#type.as_str())) + .collect() } fn owner_body<'a>(&self, owner_node: &'a Node) -> Option<&'a Node> { let scope_index = if owner_node.r#type == "CLASS" { 2 } else { 1 }; let scope = owner_node.children.get(scope_index).and_then(ast::node)?; - if scope.r#type != "SCOPE" { return None } + if scope.r#type != "SCOPE" { + return None; + } scope.children.get(2).and_then(ast::node) } fn owner_statements<'a>(&self, body: &'a Node) -> Vec<&'a Node> { - if body.r#type == "BLOCK" { body.children.iter().filter_map(ast::node).collect() } else { vec![body] } + if body.r#type == "BLOCK" { + body.children.iter().filter_map(ast::node).collect() + } else { + vec![body] + } } fn top_level_statements<'a>(&self, root: &'a Node) -> Vec<&'a Node> { - root.children.iter().filter_map(ast::node).flat_map(|c| if c.r#type == "BLOCK" { c.children.iter().filter_map(ast::node).collect() } else { vec![c] }).collect() + root.children + .iter() + .filter_map(ast::node) + .flat_map(|c| { + if c.r#type == "BLOCK" { + c.children.iter().filter_map(ast::node).collect() + } else { + vec![c] + } + }) + .collect() } fn visibility_call(&self, node: &Node) -> bool { - node.r#type == "FCALL" && matches!(ast::child_to_string(node.children.get(0)).unwrap_or_default().as_str(), "public" | "protected" | "private") + node.r#type == "FCALL" + && matches!( + ast::child_to_string(node.children.get(0)) + .unwrap_or_default() + .as_str(), + "public" | "protected" | "private" + ) } fn method_name(&self, node: &Node) -> String { if node.r#type == "DEFS" { let receiver = node.children.get(0).and_then(ast::node); let prefix = if let Some(r) = receiver { - if r.r#type == "SELF" { "self".to_string() } else { ast::slice(r, &self.lines) } - } else { "?".to_string() }; - format!("{}.{}", prefix, ast::child_to_string(node.children.get(1)).unwrap_or_else(|| "?".to_string())) + if r.r#type == "SELF" { + "self".to_string() + } else { + ast::slice(r, &self.lines) + } + } else { + "?".to_string() + }; + format!( + "{}.{}", + prefix, + ast::child_to_string(node.children.get(1)).unwrap_or_else(|| "?".to_string()) + ) } else { ast::child_to_string(node.children.get(0)).unwrap_or_else(|| "?".to_string()) } @@ -206,11 +268,20 @@ impl MethodBodyCollector { } fn owner_segment(&self, node: &Node) -> String { - let text = ast::slice(node.children.first().and_then(ast::node).unwrap_or(node), &self.lines); - if text.is_empty() { "(anonymous)".to_string() } else { text } + let text = ast::slice( + node.children.first().and_then(ast::node).unwrap_or(node), + &self.lines, + ); + if text.is_empty() { + "(anonymous)".to_string() + } else { + text + } } - fn top_level_owner(&self) -> String { format!("(top-level:{})", self.file) } + fn top_level_owner(&self) -> String { + format!("(top-level:{})", self.file) + } } pub struct LocalScorer {} @@ -221,7 +292,9 @@ pub struct ScoreResult { } impl LocalScorer { - pub fn new() -> Self { Self {} } + pub fn new() -> Self { + Self {} + } pub fn score(&self, method_node: &Node) -> ScoreResult { let mut signals = BTreeMap::new(); @@ -231,8 +304,15 @@ impl LocalScorer { } } - fn score_node(&self, node: &Node, nesting: usize, signals: &mut BTreeMap) -> f64 { - if self.skip_nested(node) { return 0.0 } + fn score_node( + &self, + node: &Node, + nesting: usize, + signals: &mut BTreeMap, + ) -> f64 { + if self.skip_nested(node) { + return 0.0; + } match node.r#type.as_str() { t if BRANCH_TYPES.contains(&t) => self.score_branch(node, nesting, signals), @@ -246,64 +326,137 @@ impl LocalScorer { } fn skip_nested(&self, node: &Node) -> bool { - SKIP_NESTED_TYPES.contains(&node.r#type.as_str()) && !METHOD_TYPES.contains(&node.r#type.as_str()) + SKIP_NESTED_TYPES.contains(&node.r#type.as_str()) + && !METHOD_TYPES.contains(&node.r#type.as_str()) } - fn score_branch(&self, node: &Node, nesting: usize, signals: &mut BTreeMap) -> f64 { + fn score_branch( + &self, + node: &Node, + nesting: usize, + signals: &mut BTreeMap, + ) -> f64 { *signals.entry("branches".to_string()).or_insert(0) += 1; - if nesting > 0 { *signals.entry("nested".to_string()).or_insert(0) += 1; } + if nesting > 0 { + *signals.entry("nested".to_string()).or_insert(0) += 1; + } let condition = node.children.get(0).and_then(ast::node); let positive = node.children.get(1).and_then(ast::node); let negative = node.children.get(2).and_then(ast::node); - - self.branch_cost(nesting) + - self.predicate_cost(condition, signals) + - positive.map(|n| self.score_node(n, nesting + 1, signals)).unwrap_or(0.0) + - negative.map(|n| self.score_node(n, nesting + 1, signals)).unwrap_or(0.0) - } - fn score_loop(&self, node: &Node, nesting: usize, signals: &mut BTreeMap) -> f64 { + self.branch_cost(nesting) + + self.predicate_cost(condition, signals) + + positive + .map(|n| self.score_node(n, nesting + 1, signals)) + .unwrap_or(0.0) + + negative + .map(|n| self.score_node(n, nesting + 1, signals)) + .unwrap_or(0.0) + } + + fn score_loop( + &self, + node: &Node, + nesting: usize, + signals: &mut BTreeMap, + ) -> f64 { *signals.entry("loops".to_string()).or_insert(0) += 1; - if nesting > 0 { *signals.entry("nested".to_string()).or_insert(0) += 1; } + if nesting > 0 { + *signals.entry("nested".to_string()).or_insert(0) += 1; + } self.branch_cost(nesting) + self.score_children(node, nesting + 1, signals) } - fn score_case(&self, node: &Node, nesting: usize, signals: &mut BTreeMap) -> f64 { + fn score_case( + &self, + node: &Node, + nesting: usize, + signals: &mut BTreeMap, + ) -> f64 { *signals.entry("cases".to_string()).or_insert(0) += 1; 0.5 + self.score_case_children(node, nesting, signals) } - fn score_case_children(&self, node: &Node, nesting: usize, signals: &mut BTreeMap) -> f64 { - node.children.iter().filter_map(ast::node).map(|child| { - if child.r#type == "WHEN" { self.score_when(child, nesting, signals) } else { self.score_node(child, nesting, signals) } - }).sum() - } - - fn score_when(&self, node: &Node, nesting: usize, signals: &mut BTreeMap) -> f64 { + fn score_case_children( + &self, + node: &Node, + nesting: usize, + signals: &mut BTreeMap, + ) -> f64 { + node.children + .iter() + .filter_map(ast::node) + .map(|child| { + if child.r#type == "WHEN" { + self.score_when(child, nesting, signals) + } else { + self.score_node(child, nesting, signals) + } + }) + .sum() + } + + fn score_when( + &self, + node: &Node, + nesting: usize, + signals: &mut BTreeMap, + ) -> f64 { let body = node.children.get(1).and_then(ast::node); let next_when = node.children.get(2).and_then(ast::node); - body.map(|n| self.score_node(n, nesting + 1, signals)).unwrap_or(0.0) + - next_when.map(|n| self.score_node(n, nesting, signals)).unwrap_or(0.0) - } - - fn score_rescue(&self, node: &Node, nesting: usize, signals: &mut BTreeMap) -> f64 { + body.map(|n| self.score_node(n, nesting + 1, signals)) + .unwrap_or(0.0) + + next_when + .map(|n| self.score_node(n, nesting, signals)) + .unwrap_or(0.0) + } + + fn score_rescue( + &self, + node: &Node, + nesting: usize, + signals: &mut BTreeMap, + ) -> f64 { *signals.entry("rescues".to_string()).or_insert(0) += 1; self.branch_cost(nesting) + self.score_children(node, nesting + 1, signals) } - fn score_early_exit(&self, node: &Node, nesting: usize, signals: &mut BTreeMap) -> f64 { + fn score_early_exit( + &self, + node: &Node, + nesting: usize, + signals: &mut BTreeMap, + ) -> f64 { *signals.entry("early_exits".to_string()).or_insert(0) += 1; - let exit_cost = if nesting > 0 { 0.5 + (nesting as f64 * 0.25) } else { 0.0 }; + let exit_cost = if nesting > 0 { + 0.5 + (nesting as f64 * 0.25) + } else { + 0.0 + }; exit_cost + self.score_children(node, nesting, signals) } - fn score_boolean_node(&self, node: &Node, nesting: usize, signals: &mut BTreeMap) -> f64 { + fn score_boolean_node( + &self, + node: &Node, + nesting: usize, + signals: &mut BTreeMap, + ) -> f64 { *signals.entry("boolean_ops".to_string()).or_insert(0) += 1; 0.25 + self.score_children(node, nesting, signals) } - fn score_children(&self, node: &Node, nesting: usize, signals: &mut BTreeMap) -> f64 { - node.children.iter().filter_map(ast::node).map(|child| self.score_node(child, nesting, signals)).sum() + fn score_children( + &self, + node: &Node, + nesting: usize, + signals: &mut BTreeMap, + ) -> f64 { + node.children + .iter() + .filter_map(ast::node) + .map(|child| self.score_node(child, nesting, signals)) + .sum() } fn predicate_cost(&self, node: Option<&Node>, signals: &mut BTreeMap) -> f64 { @@ -314,13 +467,26 @@ impl LocalScorer { } fn boolean_count(&self, node: &Node) -> usize { - let own = if BOOLEAN_TYPES.contains(&node.r#type.as_str()) { 1 } else { 0 }; - own + node.children.iter().filter_map(ast::node).map(|child| self.boolean_count(child)).sum::() + let own = if BOOLEAN_TYPES.contains(&node.r#type.as_str()) { + 1 + } else { + 0 + }; + own + node + .children + .iter() + .filter_map(ast::node) + .map(|child| self.boolean_count(child)) + .sum::() } - fn branch_cost(&self, nesting: usize) -> f64 { 1.0 + (nesting as f64) } + fn branch_cost(&self, nesting: usize) -> f64 { + 1.0 + (nesting as f64) + } - fn round(&self, value: f64) -> f64 { (value * 10.0).round() / 10.0 } + fn round(&self, value: f64) -> f64 { + (value * 10.0).round() / 10.0 + } } struct Analyzer { @@ -332,15 +498,39 @@ struct Analyzer { } impl Analyzer { - fn new(topology: structural_topology::Graph, scores: BTreeMap, min_score: f64, min_hidden: f64, max_depth: usize) -> Self { - Self { topology, scores, min_score, min_hidden, max_depth } + fn new( + topology: structural_topology::Graph, + scores: BTreeMap, + min_score: f64, + min_hidden: f64, + max_depth: usize, + ) -> Self { + Self { + topology, + scores, + min_score, + min_hidden, + max_depth, + } } fn findings(&self) -> Vec { - let mut out: Vec<_> = self.scores.values().filter_map(|s| self.finding_for(s)).collect(); - out.sort_by(|a, b| b.hidden.partial_cmp(&a.hidden).unwrap_or(std::cmp::Ordering::Equal) - .then_with(|| b.inlined.partial_cmp(&a.inlined).unwrap_or(std::cmp::Ordering::Equal)) - .then_with(|| a.at.cmp(&b.at))); + let mut out: Vec<_> = self + .scores + .values() + .filter_map(|s| self.finding_for(s)) + .collect(); + out.sort_by(|a, b| { + b.hidden + .partial_cmp(&a.hidden) + .unwrap_or(std::cmp::Ordering::Equal) + .then_with(|| { + b.inlined + .partial_cmp(&a.inlined) + .unwrap_or(std::cmp::Ordering::Equal) + }) + .then_with(|| a.at.cmp(&b.at)) + }); out } @@ -348,10 +538,12 @@ impl Analyzer { let mut visited = BTreeSet::new(); visited.insert(score.id.clone()); let contributions = self.inlined_contributions(&score.id, 1, &mut visited); - + let hidden = self.round(contributions.iter().map(|c| c.score).sum()); let total = self.round(score.score + hidden); - if total < self.min_score || hidden < self.min_hidden { return None } + if total < self.min_score || hidden < self.min_hidden { + return None; + } let direct_single_caller = self.single_caller_callees(&score.id); let at = format!("{}:{}:{}", score.file, score.name, score.line); @@ -374,13 +566,24 @@ impl Analyzer { }) } - fn inlined_contributions(&self, method_id: &str, depth: usize, visited: &mut BTreeSet) -> Vec { - if depth > self.max_depth { return Vec::new() } + fn inlined_contributions( + &self, + method_id: &str, + depth: usize, + visited: &mut BTreeSet, + ) -> Vec { + if depth > self.max_depth { + return Vec::new(); + } let mut out = Vec::new(); for edge in self.grouped_edges(method_id) { - if visited.contains(&edge.callee) { continue; } - let Some(callee) = self.scores.get(&edge.callee) else { continue; }; + if visited.contains(&edge.callee) { + continue; + } + let Some(callee) = self.scores.get(&edge.callee) else { + continue; + }; let weight = self.contribution_weight(&edge, depth); let direct = Contribution { @@ -391,22 +594,25 @@ impl Analyzer { depth, chain: vec![edge.callee_name.clone()], }; - + let mut next_visited = visited.clone(); next_visited.insert(edge.callee.clone()); let nested = self.inlined_contributions(&edge.callee, depth + 1, &mut next_visited); - let nested: Vec<_> = nested.into_iter().map(|c| Contribution { - callee_id: c.callee_id, - callee_name: c.callee_name, - score: self.round(c.score * weight), - weight: self.round(c.weight * weight), - depth: c.depth, - chain: { - let mut chain = vec![edge.callee_name.clone()]; - chain.extend(c.chain); - chain - }, - }).collect(); + let nested: Vec<_> = nested + .into_iter() + .map(|c| Contribution { + callee_id: c.callee_id, + callee_name: c.callee_name, + score: self.round(c.score * weight), + weight: self.round(c.weight * weight), + depth: c.depth, + chain: { + let mut chain = vec![edge.callee_name.clone()]; + chain.extend(c.chain); + chain + }, + }) + .collect(); out.push(direct); out.extend(nested); @@ -419,14 +625,32 @@ impl Analyzer { for edge in self.topology.internal_calls(method_id) { by_callee.entry(edge.callee.clone()).or_default().push(edge); } - by_callee.into_iter().map(|(_, edges)| { - edges.into_iter().max_by(|a, b| self.edge_weight(&a.r#type).partial_cmp(&self.edge_weight(&b.r#type)).unwrap()).unwrap() - }).collect() + by_callee + .into_iter() + .map(|(_, edges)| { + edges + .into_iter() + .max_by(|a, b| { + self.edge_weight(&a.r#type) + .partial_cmp(&self.edge_weight(&b.r#type)) + .unwrap() + }) + .unwrap() + }) + .collect() } fn contribution_weight(&self, edge: &structural_topology::Edge, depth: usize) -> f64 { - let caller_factor = if self.topology.single_internal_caller(&edge.callee) { 1.0 } else { 0.35 }; - let visibility_factor = if self.shared_public_step(edge) { 0.6 } else { 1.0 }; + let caller_factor = if self.topology.single_internal_caller(&edge.callee) { + 1.0 + } else { + 0.35 + }; + let visibility_factor = if self.shared_public_step(edge) { + 0.6 + } else { + 1.0 + }; let depth_factor = match depth { 1 => 1.0, 2 => 0.6, @@ -446,17 +670,27 @@ impl Analyzer { } fn shared_public_step(&self, edge: &structural_topology::Edge) -> bool { - self.topology.visibility(&edge.callee) == Some("public") && !self.topology.single_internal_caller(&edge.callee) + self.topology.visibility(&edge.callee) == Some("public") + && !self.topology.single_internal_caller(&edge.callee) } fn single_caller_callees(&self, method_id: &str) -> Vec { - let mut out: Vec<_> = self.grouped_edges(method_id).into_iter().filter(|e| self.topology.single_internal_caller(&e.callee)).map(|e| e.callee_name).collect(); + let mut out: Vec<_> = self + .grouped_edges(method_id) + .into_iter() + .filter(|e| self.topology.single_internal_caller(&e.callee)) + .map(|e| e.callee_name) + .collect(); out.sort(); out } fn strongest_chain(&self, score: &LocalScore, contributions: &[Contribution]) -> Vec { - let chain = contributions.iter().max_by(|a, b| a.score.partial_cmp(&b.score).unwrap()).map(|c| c.chain.clone()).unwrap_or_default(); + let chain = contributions + .iter() + .max_by(|a, b| a.score.partial_cmp(&b.score).unwrap()) + .map(|c| c.chain.clone()) + .unwrap_or_default(); let mut out = vec![score.name.clone()]; out.extend(chain); out @@ -464,11 +698,20 @@ impl Analyzer { fn reason(&self, hidden: f64, single_caller_callees: &[String]) -> String { if single_caller_callees.is_empty() { - format!("same-owner call chain adds {} weighted cognitive points", hidden) + format!( + "same-owner call chain adds {} weighted cognitive points", + hidden + ) } else { - format!("{} single-caller helper(s) add {} weighted cognitive points", single_caller_callees.len(), hidden) + format!( + "{} single-caller helper(s) add {} weighted cognitive points", + single_caller_callees.len(), + hidden + ) } } - fn round(&self, value: f64) -> f64 { (value * 10.0).round() / 10.0 } + fn round(&self, value: f64) -> f64 { + (value * 10.0).round() / 10.0 + } } diff --git a/gems/decomplex/rust/src/decomplex/parallel.rs b/gems/decomplex/rust/src/decomplex/parallel.rs index ab1f0b6c9..52b9939da 100644 --- a/gems/decomplex/rust/src/decomplex/parallel.rs +++ b/gems/decomplex/rust/src/decomplex/parallel.rs @@ -1,7 +1,7 @@ use anyhow::{bail, Result}; use std::env; -use std::sync::mpsc; use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::mpsc; use std::thread; static JOBS_OVERRIDE: AtomicUsize = AtomicUsize::new(0); @@ -24,7 +24,11 @@ pub fn job_count() -> usize { } env_jobs() - .unwrap_or_else(|| thread::available_parallelism().map(usize::from).unwrap_or(1)) + .unwrap_or_else(|| { + thread::available_parallelism() + .map(usize::from) + .unwrap_or(1) + }) .max(1) } diff --git a/gems/decomplex/rust/src/decomplex/syntax.rs b/gems/decomplex/rust/src/decomplex/syntax.rs index 15720e1bf..6860596a2 100644 --- a/gems/decomplex/rust/src/decomplex/syntax.rs +++ b/gems/decomplex/rust/src/decomplex/syntax.rs @@ -12,7 +12,10 @@ pub enum Language { Ruby, Python, JavaScript, + Java, TypeScript, + Swift, + Kotlin, Go, Rust, Zig, @@ -28,7 +31,10 @@ impl Language { "ruby" => Ok(Self::Ruby), "python" => Ok(Self::Python), "javascript" => Ok(Self::JavaScript), + "java" => Ok(Self::Java), "typescript" => Ok(Self::TypeScript), + "swift" => Ok(Self::Swift), + "kotlin" => Ok(Self::Kotlin), "go" => Ok(Self::Go), "rust" => Ok(Self::Rust), "zig" => Ok(Self::Zig), @@ -133,6 +139,12 @@ mod tests { use std::io::Write; use tempfile::NamedTempFile; + fn document(source: &str, language: Language) -> Document { + let mut file = NamedTempFile::new().expect("tempfile"); + file.write_all(source.as_bytes()).expect("write source"); + parse_file(file.path().to_path_buf(), language).expect("parse file") + } + #[test] fn parallel_parse_files_preserves_input_order() { parallel::set_jobs_for_process(Some(4)).expect("jobs"); @@ -154,4 +166,33 @@ mod tests { assert_eq!(docs[0].function_defs[0].name, "first"); assert_eq!(docs[1].function_defs[0].name, "second"); } + + #[test] + fn parses_java_kotlin_and_swift_function_defs() { + let cases = [ + ( + Language::Java, + "class Billing { int mixed(int price, int tax) { return price + tax; } }", + ), + ( + Language::Kotlin, + "class Billing { fun mixed(price: Int, tax: Int): Int { return price + tax } }", + ), + ( + Language::Swift, + "class Billing { func mixed(price: Int, tax: Int) -> Int { return price + tax } }", + ), + ]; + + for (language, source) in cases { + let doc = document(source, language); + let function = doc + .function_defs + .iter() + .find(|function| function.name == "mixed") + .expect("mixed function"); + + assert_eq!(function.owner, "Billing"); + } + } } diff --git a/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs b/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs index 174d807a4..05e1e4ea8 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs @@ -1,4 +1,6 @@ -use super::{ComparisonUse, DecisionSite, Document, FunctionDef, Language, PredicateAlias, StateWrite}; +use super::{ + ComparisonUse, DecisionSite, Document, FunctionDef, Language, PredicateAlias, StateWrite, +}; use crate::decomplex::ast::{line, node_text, normalize_text, span, RawNode}; use anyhow::{Context, Result}; use std::collections::HashSet; @@ -51,7 +53,10 @@ fn language_grammar(language: Language) -> TreeSitterLanguage { Language::Ruby => tree_sitter_ruby::LANGUAGE.into(), Language::Python => tree_sitter_python::LANGUAGE.into(), Language::JavaScript => tree_sitter_javascript::LANGUAGE.into(), + Language::Java => tree_sitter_java::LANGUAGE.into(), Language::TypeScript => tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(), + Language::Swift => tree_sitter_swift::LANGUAGE.into(), + Language::Kotlin => tree_sitter_kotlin_ng::LANGUAGE.into(), Language::Go => tree_sitter_go::LANGUAGE.into(), Language::Rust => tree_sitter_rust::LANGUAGE.into(), Language::Zig => tree_sitter_zig::LANGUAGE.into(), @@ -128,10 +133,31 @@ fn collect_facts( seen_writes: &mut HashSet, seen_decisions: &mut HashSet, ) { - let next_context = push_function_context(node, push_owner_context(node, source, context, language), source, language); + let next_context = push_function_context( + node, + push_owner_context(node, source, context, language), + source, + language, + ); record_function_def(node, source, file, language, &next_context, function_defs); - record_state_write(node, source, file, language, &next_context, state_writes, seen_writes); - record_decision_site(node, source, file, language, &next_context, decision_sites, seen_decisions); + record_state_write( + node, + source, + file, + language, + &next_context, + state_writes, + seen_writes, + ); + record_decision_site( + node, + source, + file, + language, + &next_context, + decision_sites, + seen_decisions, + ); record_predicate_alias(node, source, file, language, predicate_aliases); record_comparison_use(node, source, file, language, &next_context, comparison_uses); @@ -173,11 +199,20 @@ fn record_function_def( span: span(node), body: RawNode::from_tree_sitter(node, source), }; - let key = (function.file.clone(), function.owner.clone(), function.name.clone(), function.line); - if out - .iter() - .any(|existing| (existing.file.clone(), existing.owner.clone(), existing.name.clone(), existing.line) == key) - { + let key = ( + function.file.clone(), + function.owner.clone(), + function.name.clone(), + function.line, + ); + if out.iter().any(|existing| { + ( + existing.file.clone(), + existing.owner.clone(), + existing.name.clone(), + existing.line, + ) == key + }) { return; } out.push(function); @@ -238,7 +273,10 @@ fn record_comparison_use( fn comparison_node(node: Node<'_>, source: &str) -> bool { if matches!(node.kind(), "binary" | "binary_expression") { - return matches!(direct_operator_from_source(node, source).as_str(), "==" | "!="); + return matches!( + direct_operator_from_source(node, source).as_str(), + "==" | "!=" + ); } if node.kind() != "call" { return false; @@ -275,15 +313,19 @@ fn record_decision_site( if patterns.len() < 2 { return; } - push_decision_site(out, seen, DecisionSite { - kind: "case_dispatch".to_string(), - members: patterns, - file: file.to_string_lossy().to_string(), - function: context.current_function(), - line: line(decision_node), - span: span(decision_node), - predicate: decision_predicate(decision_node, source), - }); + push_decision_site( + out, + seen, + DecisionSite { + kind: "case_dispatch".to_string(), + members: patterns, + file: file.to_string_lossy().to_string(), + function: context.current_function(), + line: line(decision_node), + span: span(decision_node), + predicate: decision_predicate(decision_node, source), + }, + ); } } @@ -325,7 +367,11 @@ fn record_conjunction_decision( if !from_wrapper && node .parent() - .map(|parent| boolean_container(parent) && boolean_and(parent, source) && span(parent) != span(node)) + .map(|parent| { + boolean_container(parent) + && boolean_and(parent, source) + && span(parent) != span(node) + }) .unwrap_or(false) { return; @@ -341,15 +387,19 @@ fn record_conjunction_decision( return; } - push_decision_site(out, seen, DecisionSite { - kind: "conjunction".to_string(), - members, - file: file.to_string_lossy().to_string(), - function: context.current_function(), - line: conjunction_span(node)[0], - span: conjunction_span(node), - predicate: normalize_text(node_text(node, source)), - }); + push_decision_site( + out, + seen, + DecisionSite { + kind: "conjunction".to_string(), + members, + file: file.to_string_lossy().to_string(), + function: context.current_function(), + line: conjunction_span(node)[0], + span: conjunction_span(node), + predicate: normalize_text(node_text(node, source)), + }, + ); } fn push_decision_site(out: &mut Vec, seen: &mut HashSet, site: DecisionSite) { @@ -374,9 +424,11 @@ fn method_single_expression_body(node: Node<'_>) -> Option> { return named.last().copied(); } - let body = node - .child_by_field_name("body") - .or_else(|| named_children(node).into_iter().find(|child| child.kind() == "body_statement"))?; + let body = node.child_by_field_name("body").or_else(|| { + named_children(node) + .into_iter() + .find(|child| child.kind() == "body_statement") + })?; let statements: Vec> = named_children(body) .into_iter() .filter(|child| !matches!(child.kind(), "comment" | "heredoc_body")) @@ -388,8 +440,15 @@ fn method_single_expression_body(node: Node<'_>) -> Option> { } } -fn push_owner_context(node: Node<'_>, source: &str, context: &ContextState, language: Language) -> ContextState { - let Some(owner) = owner_name_from_declaration(node, source).or_else(|| receiver_convention_owner_name(node, source, language)) else { +fn push_owner_context( + node: Node<'_>, + source: &str, + context: &ContextState, + language: Language, +) -> ContextState { + let Some(owner) = owner_name_from_declaration(node, source) + .or_else(|| receiver_convention_owner_name(node, source, language)) + else { return context.clone(); }; let parent_owner = context.owner.clone(); @@ -407,7 +466,12 @@ fn push_owner_context(node: Node<'_>, source: &str, context: &ContextState, lang next } -fn push_function_context(node: Node<'_>, mut context: ContextState, source: &str, language: Language) -> ContextState { +fn push_function_context( + node: Node<'_>, + mut context: ContextState, + source: &str, + language: Language, +) -> ContextState { let Some(function) = function_name(node, source) else { return context; }; @@ -560,11 +624,21 @@ fn state_target(lhs: Node<'_>, source: &str) -> Option { fn function_name(node: Node<'_>, source: &str) -> Option { match node.kind() { - "method" | "function_definition" | "function_declaration" | "method_definition" | "function_item" => node + "method" + | "function_definition" + | "function_declaration" + | "method_definition" + | "function_item" => node .child_by_field_name("name") .map(|name| node_text(name, source).to_string()) .or_else(|| declarator_name(node.child_by_field_name("declarator"), source)) - .or_else(|| first_named_text(node, source, &["identifier", "constant", "property_identifier"])), + .or_else(|| { + first_named_text( + node, + source, + &["identifier", "constant", "property_identifier"], + ) + }), "singleton_method" => { let name = node .child_by_field_name("name") @@ -587,7 +661,9 @@ fn function_name(node: Node<'_>, source: &str) -> Option { .child_by_field_name("name") .map(|name| node_text(name, source).to_string()) .or_else(|| first_named_text(node, source, &["field_identifier", "identifier"])), - "body_statement" if first_child_kind(node) == Some("def") => hidden_ruby_method_name(node, source), + "body_statement" if first_child_kind(node) == Some("def") => { + hidden_ruby_method_name(node, source) + } "argument_list" if first_child_kind(node) == Some("def") => inline_def_name(node, source), _ => None, } @@ -615,7 +691,8 @@ fn declarator_name(node: Option>, source: &str) -> Option { } fn owner_name_from_declaration(node: Node<'_>, source: &str) -> Option { - if node.kind() == "body_statement" && matches!(first_child_kind(node), Some("class" | "module")) { + if node.kind() == "body_statement" && matches!(first_child_kind(node), Some("class" | "module")) + { return first_named_text(node, source, &["constant", "identifier", "type_identifier"]); } @@ -623,31 +700,35 @@ fn owner_name_from_declaration(node: Node<'_>, source: &str) -> Option { "class" | "module" | "class_definition" | "class_declaration" | "class_specifier" => node .child_by_field_name("name") .map(|name| node_text(name, source).to_string()) - .or_else(|| first_named_text(node, source, &["constant", "identifier", "type_identifier"])), + .or_else(|| { + first_named_text(node, source, &["constant", "identifier", "type_identifier"]) + }), "impl_item" | "impl_block" => impl_owner_name(node, source), - "struct_item" | "struct_spec" | "struct_specifier" | "type_spec" | "type_declaration" => node - .child_by_field_name("name") - .map(|name| node_text(name, source).to_string()) - .or_else(|| first_named_text(node, source, &["type_identifier", "identifier"])), + "struct_item" | "struct_spec" | "struct_specifier" | "type_spec" | "type_declaration" => { + node.child_by_field_name("name") + .map(|name| node_text(name, source).to_string()) + .or_else(|| first_named_text(node, source, &["type_identifier", "identifier"])) + } _ => None, } } fn impl_owner_name(node: Node<'_>, source: &str) -> Option { - let r#type = node - .child_by_field_name("type") - .or_else(|| { - named_children(node) - .into_iter() - .find(|child| child.kind().contains("type") || child.kind().contains("identifier")) - })?; + let r#type = node.child_by_field_name("type").or_else(|| { + named_children(node) + .into_iter() + .find(|child| child.kind().contains("type") || child.kind().contains("identifier")) + })?; Some(normalize_type_owner(node_text(r#type, source))) } fn normalize_type_owner(text: &str) -> String { let value = text.trim(); let value = value.trim_start_matches(['&', '*']); - let value = value.replace("const", "").replace("mut", "").replace("var", ""); + let value = value + .replace("const", "") + .replace("mut", "") + .replace("var", ""); let value = value.trim(); let value = value.split(['(', '{', '<', ' ']).next().unwrap_or(""); value.split('.').last().unwrap_or("").to_string() @@ -665,7 +746,12 @@ fn hidden_ruby_method_name(node: Node<'_>, source: &str) -> Option { }; let name = search .into_iter() - .find(|child| matches!(child.kind(), "identifier" | "field_identifier" | "property_identifier")) + .find(|child| { + matches!( + child.kind(), + "identifier" | "field_identifier" | "property_identifier" + ) + }) .map(|child| node_text(child, source).to_string())?; if receiver_index.is_some() { Some(format!("self.{name}")) @@ -737,7 +823,8 @@ fn previous_sibling_raw_text(node: Node<'_>) -> Option { } fn next_sibling_raw_text(node: Node<'_>) -> Option { - node.next_sibling().map(|sibling| sibling.kind().to_string()) + node.next_sibling() + .map(|sibling| sibling.kind().to_string()) } fn member_field_text(field: Node<'_>, source: &str) -> Option { @@ -745,17 +832,15 @@ fn member_field_text(field: Node<'_>, source: &str) -> Option { let suffix = field .child_by_field_name("suffix") .or_else(|| { - named_children(field) - .into_iter() - .find(|child| { - matches!( - child.kind(), - "identifier" - | "simple_identifier" - | "field_identifier" - | "property_identifier" - ) - }) + named_children(field).into_iter().find(|child| { + matches!( + child.kind(), + "identifier" + | "simple_identifier" + | "field_identifier" + | "property_identifier" + ) + }) }) .or_else(|| last_named_child(field))?; let text = node_text(suffix, source) @@ -779,13 +864,20 @@ fn strip_assignment_suffix(text: &str) -> String { fn case_node(node: Node<'_>) -> bool { matches!( node.kind(), - "case" | "when_expression" | "switch_statement" | "switch_expression" | "match_statement" | "match_expression" + "case" + | "when_expression" + | "switch_statement" + | "switch_expression" + | "match_statement" + | "match_expression" ) } fn hidden_case(node: Node<'_>) -> bool { - matches!(node.kind(), "body_statement" | "block_body" | "argument_list") - && first_child_kind(node) == Some("case") + matches!( + node.kind(), + "body_statement" | "block_body" | "argument_list" + ) && first_child_kind(node) == Some("case") } fn case_source_node(node: Node<'_>) -> Node<'_> { @@ -883,8 +975,16 @@ fn case_arm_patterns(child: Node<'_>, source: &str) -> Vec { let value = child .child_by_field_name("value") .or_else(|| child.child_by_field_name("pattern")) - .or_else(|| named_children(child).into_iter().find(|candidate| candidate.kind() == "when_condition")) - .or_else(|| named_children(child).into_iter().find(|candidate| candidate.kind() == "switch_pattern")) + .or_else(|| { + named_children(child) + .into_iter() + .find(|candidate| candidate.kind() == "when_condition") + }) + .or_else(|| { + named_children(child) + .into_iter() + .find(|candidate| candidate.kind() == "switch_pattern") + }) .or_else(|| first_named_child(child)); value .filter(|node| !node.kind().contains("statement") && !node.kind().contains("block")) @@ -936,13 +1036,21 @@ fn default_case_pattern(text: &str) -> bool { fn decision_predicate(node: Node<'_>, source: &str) -> String { let target = decision_subject(node); - normalize_text(target.map(|child| node_text(child, source)).unwrap_or_else(|| node_text(node, source))) + normalize_text( + target + .map(|child| node_text(child, source)) + .unwrap_or_else(|| node_text(node, source)), + ) } fn decision_subject(node: Node<'_>) -> Option> { node.child_by_field_name("value") .or_else(|| node.child_by_field_name("subject")) - .or_else(|| named_children(node).into_iter().find(|child| child.kind() == "when_subject")) + .or_else(|| { + named_children(node) + .into_iter() + .find(|child| child.kind() == "when_subject") + }) .or_else(|| node.child_by_field_name("condition")) .or_else(|| { named_children(node).into_iter().find(|child| { @@ -967,13 +1075,21 @@ fn decision_subject(node: Node<'_>) -> Option> { } fn boolean_container(node: Node<'_>) -> bool { - if matches!(node.kind(), "binary" | "binary_expression" | "boolean_operator") { + if matches!( + node.kind(), + "binary" | "binary_expression" | "boolean_operator" + ) { return true; } if parenthesized_wrapper(node) { - return first_named_child(node).map(boolean_container).unwrap_or(false); + return first_named_child(node) + .map(boolean_container) + .unwrap_or(false); } - if !matches!(node.kind(), "body_statement" | "block_body" | "statement" | "pattern" | "argument_list") { + if !matches!( + node.kind(), + "body_statement" | "block_body" | "statement" | "pattern" | "argument_list" + ) { return false; } if !matches!(direct_operator(node).as_str(), "&&" | "and") { @@ -995,7 +1111,10 @@ fn boolean_and(node: Node<'_>, source: &str) -> bool { .map(|child| boolean_and(child, source)) .unwrap_or(false); } - matches!(direct_operator_from_source(node, source).as_str(), "&&" | "and") + matches!( + direct_operator_from_source(node, source).as_str(), + "&&" | "and" + ) } fn flatten_boolean_and<'tree>(node: Node<'tree>, source: &str) -> Vec> { @@ -1014,8 +1133,10 @@ fn flatten_boolean_and<'tree>(node: Node<'tree>, source: &str) -> Vec) -> bool { - matches!(node.kind(), "parenthesized_statements" | "parenthesized_expression") - && named_children(node).len() == 1 + matches!( + node.kind(), + "parenthesized_statements" | "parenthesized_expression" + ) && named_children(node).len() == 1 } fn conjunction_span(node: Node<'_>) -> [usize; 4] { @@ -1067,8 +1188,7 @@ fn direct_operator(node: Node<'_>) -> String { .children(&mut cursor) .find(|child| !child.is_named() && !matches!(child.kind(), "(" | ")")) .map(|child| child.kind().to_string()) - .unwrap_or_default() - ; + .unwrap_or_default(); result } @@ -1078,8 +1198,7 @@ fn direct_operator_from_source(node: Node<'_>, source: &str) -> String { .children(&mut cursor) .find(|child| !child.is_named() && !matches!(node_text(*child, source), "(" | ")")) .map(|child| node_text(child, source).to_string()) - .unwrap_or_default() - ; + .unwrap_or_default(); result } @@ -1179,7 +1298,8 @@ fn first_argument_receiver_language(language: Language) -> bool { } fn first_argument_receiver_parameter(node: Node<'_>, source: &str) -> Option<(String, String)> { - let params = node.child_by_field_name("declarator") + let params = node + .child_by_field_name("declarator") .and_then(|d| d.child_by_field_name("parameters")) .or_else(|| node.child_by_field_name("parameters")) .or_else(|| first_named_child_with_kind(node, "parameter_list")) @@ -1187,18 +1307,29 @@ fn first_argument_receiver_parameter(node: Node<'_>, source: &str) -> Option<(St node.child_by_field_name("declarator") .and_then(|d| first_named_child_with_kind(d, "parameter_list")) })?; - + let first = first_named_child_with_kind(params, "parameter_declaration")?; - + let type_node = named_children(first).into_iter().find(|child| { - matches!(child.kind(), "type_identifier" | "primitive_type" | "qualified_identifier" | "scoped_type_identifier") + matches!( + child.kind(), + "type_identifier" + | "primitive_type" + | "qualified_identifier" + | "scoped_type_identifier" + ) })?; - - let name_node = named_children(first).into_iter().rev().find(|child| { - matches!(child.kind(), "identifier" | "field_identifier") - }).or_else(|| first_named_child(first))?; - - Some((node_text(type_node, source).to_string(), node_text(name_node, source).to_string())) + + let name_node = named_children(first) + .into_iter() + .rev() + .find(|child| matches!(child.kind(), "identifier" | "field_identifier")) + .or_else(|| first_named_child(first))?; + + Some(( + node_text(type_node, source).to_string(), + node_text(name_node, source).to_string(), + )) } fn snake_case_type_name(type_str: &str) -> String { @@ -1209,15 +1340,19 @@ fn snake_case_type_name(type_str: &str) -> String { last } -fn receiver_convention_owner_name(node: Node<'_>, source: &str, language: Language) -> Option { +fn receiver_convention_owner_name( + node: Node<'_>, + source: &str, + language: Language, +) -> Option { if !first_argument_receiver_language(language) || node.kind() != "function_definition" { return None; } - + let (type_name, _) = first_argument_receiver_parameter(node, source)?; let type_name = normalize_type_owner(&type_name); let name = function_name(node, source)?; - + if name.starts_with(&snake_case_type_name(&type_name)) { Some(type_name) } else if type_name.ends_with("_t") && name.starts_with(type_name.strip_suffix("_t").unwrap()) { @@ -1241,8 +1376,17 @@ fn normalize_target_receiver(mut target: Target, context: &ContextState) -> Targ if let Some(current_receiver) = &context.receiver { if &target.receiver == current_receiver { target.receiver = "self".to_string(); - } else if target.receiver.starts_with(&format!("{}.", current_receiver)) { - target.receiver = format!("self.{}", target.receiver.strip_prefix(&format!("{}.", current_receiver)).unwrap()); + } else if target + .receiver + .starts_with(&format!("{}.", current_receiver)) + { + target.receiver = format!( + "self.{}", + target + .receiver + .strip_prefix(&format!("{}.", current_receiver)) + .unwrap() + ); } } target diff --git a/gems/decomplex/rust/src/main.rs b/gems/decomplex/rust/src/main.rs index 61ae41a0f..7e5f7dc15 100644 --- a/gems/decomplex/rust/src/main.rs +++ b/gems/decomplex/rust/src/main.rs @@ -16,109 +16,145 @@ fn main() -> Result<()> { let command = parse_args(std::env::args().skip(1).collect())?; parallel::set_jobs_for_process(command.jobs())?; match command { - Command::StateWrites { language, files, .. } => { + Command::StateWrites { + language, files, .. + } => { let language = Language::parse(&language)?; let facts = co_update::state_writes_for_files(&files, language) .with_context(|| "failed to extract state-write facts")?; println!("{}", serde_json::to_string(&facts)?); } - Command::CoUpdate { language, files, .. } => { + Command::CoUpdate { + language, files, .. + } => { let language = Language::parse(&language)?; let report = co_update::scan_files(&files, language) .with_context(|| "failed to scan co-update facts")?; println!("{}", serde_json::to_string(&report)?); } - Command::PredicateAliases { language, files, .. } => { + Command::PredicateAliases { + language, files, .. + } => { let language = Language::parse(&language)?; let report = predicate_alias::scan_files(&files, language) .with_context(|| "failed to scan predicate-alias facts")?; println!("{}", serde_json::to_string(&report)?); } - Command::Miner { language, files, .. } => { + Command::Miner { + language, files, .. + } => { let language = Language::parse(&language)?; let report = miner::scan_files(&files, language) .with_context(|| "failed to scan decision-site miner facts")?; println!("{}", serde_json::to_string(&report)?); } - Command::SemanticAliases { language, files, .. } => { + Command::SemanticAliases { + language, files, .. + } => { let language = Language::parse(&language)?; let report = semantic_alias::scan_files(&files, language) .with_context(|| "failed to scan semantic-alias facts")?; println!("{}", serde_json::to_string(&report)?); } - Command::DecisionPressure { language, files, .. } => { + Command::DecisionPressure { + language, files, .. + } => { let language = Language::parse(&language)?; let report = decision_pressure::scan_files(&files, language) .with_context(|| "failed to scan decision-pressure facts")?; println!("{}", serde_json::to_string(&report)?); } - Command::StateBranchDensity { language, files, .. } => { + Command::StateBranchDensity { + language, files, .. + } => { let language = Language::parse(&language)?; let report = state_branch_density::scan_files(&files, language) .with_context(|| "failed to scan state-branch-density facts")?; println!("{}", serde_json::to_string(&report)?); } - Command::TemporalOrderingPressure { language, files, .. } => { + Command::TemporalOrderingPressure { + language, files, .. + } => { let language = Language::parse(&language)?; let report = temporal_ordering_pressure::scan_files(&files, language) .with_context(|| "failed to scan temporal-ordering-pressure facts")?; println!("{}", serde_json::to_string(&report)?); } - Command::RedundantNilGuard { language, files, .. } => { + Command::RedundantNilGuard { + language, files, .. + } => { let language = Language::parse(&language)?; let report = redundant_nil_guard::scan_files(&files, language) .with_context(|| "failed to scan redundant-nil-guard facts")?; println!("{}", serde_json::to_string(&report)?); } - Command::StateMesh { language, files, .. } => { + Command::StateMesh { + language, files, .. + } => { let language = Language::parse(&language)?; let report = state_mesh::scan_files(&files, language) .with_context(|| "failed to scan state-mesh facts")?; println!("{}", serde_json::to_string(&report)?); } - Command::InconsistentRenameClone { language, files, .. } => { + Command::InconsistentRenameClone { + language, files, .. + } => { let language = Language::parse(&language)?; let report = inconsistent_rename_clone::scan_files(&files, language) .with_context(|| "failed to scan inconsistent-rename-clone facts")?; println!("{}", serde_json::to_string(&report)?); } - Command::DerivedState { language, files, .. } => { + Command::DerivedState { + language, files, .. + } => { let language = Language::parse(&language)?; let report = derived_state::scan_files(&files, language) .with_context(|| "failed to scan derived-state facts")?; println!("{}", serde_json::to_string(&report)?); } - Command::ImplicitControlFlow { language, files, .. } => { + Command::ImplicitControlFlow { + language, files, .. + } => { let language = Language::parse(&language)?; let report = implicit_control_flow::scan_files(&files, language) .with_context(|| "failed to scan implicit-control-flow facts")?; println!("{}", serde_json::to_string(&report)?); } - Command::WeightedInlinedComplexity { language, files, .. } => { + Command::WeightedInlinedComplexity { + language, files, .. + } => { let language = Language::parse(&language)?; let report = weighted_inlined_cognitive_complexity::scan_files(&files, language) .with_context(|| "failed to scan weighted-inlined-complexity facts")?; println!("{}", serde_json::to_string(&report)?); } - Command::LocalityDrag { language, files, .. } => { + Command::LocalityDrag { + language, files, .. + } => { let language = Language::parse(&language)?; let report = locality_drag::scan_files(&files, language) .with_context(|| "failed to scan locality-drag facts")?; println!("{}", serde_json::to_string(&report)?); } - Command::OperationalDiscontinuity { language, files, .. } => { + Command::OperationalDiscontinuity { + language, files, .. + } => { let language = Language::parse(&language)?; let report = operational_discontinuity::scan_files(&files, language) .with_context(|| "failed to scan operational-discontinuity facts")?; println!("{}", serde_json::to_string(&report)?); } - Command::StructuralTopology { language, files, .. } => { + Command::StructuralTopology { + language, files, .. + } => { let language = Language::parse(&language)?; let report = structural_topology::scan_files(&files, language) .with_context(|| "failed to scan structural-topology facts")?; println!("{}", serde_json::to_string(&report)?); } - Command::LocalFlow { language, files, .. } => { + Command::LocalFlow { + language, files, .. + } => { let language = Language::parse(&language)?; let report = local_flow::scan_files(&files, language) .with_context(|| "failed to scan local-flow facts")?; @@ -136,37 +172,49 @@ fn main() -> Result<()> { .with_context(|| "failed to scan structural similarity")?; println!("{}", serde_json::to_string(&findings)?); } - Command::OversizedPredicate { language, files, .. } => { + Command::OversizedPredicate { + language, files, .. + } => { let language = Language::parse(&language)?; let findings = oversized_predicate::scan_files(&files, language) .with_context(|| "failed to scan oversized-predicate facts")?; println!("{}", serde_json::to_string(&findings)?); } - Command::PathCondition { language, files, .. } => { + Command::PathCondition { + language, files, .. + } => { let language = Language::parse(&language)?; let findings = path_condition::scan_files(&files, language) .with_context(|| "failed to scan path-condition facts")?; println!("{}", serde_json::to_string(&findings)?); } - Command::SequenceMine { language, files, .. } => { + Command::SequenceMine { + language, files, .. + } => { let language = Language::parse(&language)?; let findings = sequence_mine::scan_files(&files, language) .with_context(|| "failed to scan sequence-mine facts")?; println!("{}", serde_json::to_string(&findings)?); } - Command::FunctionLcom { language, files, .. } => { + Command::FunctionLcom { + language, files, .. + } => { let language = Language::parse(&language)?; let findings = function_lcom::scan_files(&files, language) .with_context(|| "failed to scan function-lcom facts")?; println!("{}", serde_json::to_string(&findings)?); } - Command::FalseSimplicity { language, files, .. } => { + Command::FalseSimplicity { + language, files, .. + } => { let language = Language::parse(&language)?; let findings = false_simplicity::scan_files(&files, language) .with_context(|| "failed to scan false-simplicity facts")?; println!("{}", serde_json::to_string(&findings)?); } - Command::FatUnion { language, files, .. } => { + Command::FatUnion { + language, files, .. + } => { let language = Language::parse(&language)?; let findings = fat_union::scan_files(&files, language) .with_context(|| "failed to scan fat-union facts")?; @@ -617,9 +665,7 @@ fn parse_args(args: Vec) -> Result { let mut rest = cursor.collect::>().into_iter(); while let Some(arg) = rest.next() { if arg == "--language" { - language = rest - .next() - .with_context(|| "--language requires a value")?; + language = rest.next().with_context(|| "--language requires a value")?; } else if let Some(value) = arg.strip_prefix("--language=") { language = value.to_string(); } else if arg == "--mass" { @@ -637,7 +683,9 @@ fn parse_args(args: Vec) -> Result { .parse() .with_context(|| "--fuzzy must be an integer")?; } else if let Some(value) = arg.strip_prefix("--fuzzy=") { - fuzzy = value.parse().with_context(|| "--fuzzy must be an integer")?; + fuzzy = value + .parse() + .with_context(|| "--fuzzy must be an integer")?; } else if arg == "--jobs" { jobs = Some(parse_jobs( rest.next().with_context(|| "--jobs requires a value")?, @@ -663,7 +711,9 @@ fn parse_args(args: Vec) -> Result { } } -fn parse_language_files_and_jobs(args: Vec) -> Result<(String, Vec, Option)> { +fn parse_language_files_and_jobs( + args: Vec, +) -> Result<(String, Vec, Option)> { let mut language = String::from("ruby"); let mut jobs = None; let mut files = Vec::new(); diff --git a/gems/decomplex/test/detector_runner_test.rb b/gems/decomplex/test/detector_runner_test.rb index ba735ab47..d34c3581b 100644 --- a/gems/decomplex/test/detector_runner_test.rb +++ b/gems/decomplex/test/detector_runner_test.rb @@ -25,6 +25,13 @@ def test_co_update_rust_engine_matches_ruby_engine_byte_for_byte assert_equal ruby_json, rust_json end + def test_native_command_language_for_recognizes_jvm_and_swift_extensions + assert_equal "java", Decomplex::Native::Command.language_for("Example.java") + assert_equal "kotlin", Decomplex::Native::Command.language_for("Example.kt") + assert_equal "kotlin", Decomplex::Native::Command.language_for("Example.kts") + assert_equal "swift", Decomplex::Native::Command.language_for("Example.swift") + end + def test_miner_rust_engine_matches_ruby_engine_byte_for_byte skip "cargo is not available" unless cargo_available? @@ -55,6 +62,32 @@ def broken(a, b) end end + def test_flay_similarity_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + Tempfile.create(["decomplex-flay", ".rb"]) do |file| + file.write(<<~RUBY) + def one(a, b) + total = a + b + puts total + total * 2 + end + + def two(x, y) + total = x + y + puts total + total * 2 + end + RUBY + file.flush + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("flay-similarity", [file.path], mass: 4, fuzzy: 1) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + end + def test_semantic_alias_rust_engine_matches_ruby_engine_byte_for_byte skip "cargo is not available" unless cargo_available? From 062ee4ab2f15633f3ab33683027db88dd584b280 Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Thu, 18 Jun 2026 12:47:57 +0000 Subject: [PATCH 17/52] WIP improve decomplex native parity --- .../decomplex/lib/decomplex/native/command.rb | 2 +- gems/decomplex/lib/decomplex/syntax.rb | 5 +- gems/decomplex/rust/src/decomplex/ast.rs | 828 ++++++++++- .../decomplex/detectors/decision_pressure.rs | 11 +- .../src/decomplex/detectors/derived_state.rs | 21 +- .../decomplex/detectors/false_simplicity.rs | 1225 +++++++++++++++-- .../decomplex/detectors/flay_similarity.rs | 17 +- .../src/decomplex/detectors/function_lcom.rs | 286 ++-- .../detectors/implicit_control_flow.rs | 7 +- .../src/decomplex/detectors/local_flow.rs | 11 +- .../src/decomplex/detectors/locality_drag.rs | 2 +- .../rust/src/decomplex/detectors/miner.rs | 14 +- .../src/decomplex/detectors/path_condition.rs | 2 +- .../src/decomplex/detectors/sequence_mine.rs | 392 ++++-- .../detectors/temporal_ordering_pressure.rs | 4 +- gems/decomplex/rust/src/main.rs | 13 + 16 files changed, 2437 insertions(+), 403 deletions(-) diff --git a/gems/decomplex/lib/decomplex/native/command.rb b/gems/decomplex/lib/decomplex/native/command.rb index 23c6f797e..64219a2fa 100644 --- a/gems/decomplex/lib/decomplex/native/command.rb +++ b/gems/decomplex/lib/decomplex/native/command.rb @@ -42,7 +42,7 @@ def language_for(path) case File.extname(path) when ".rb" then "ruby" when ".py" then "python" - when ".js" then "javascript" + when ".js", ".jsx", ".mjs", ".cjs" then "javascript" when ".ts", ".tsx" then "typescript" when ".java" then "java" when ".swift" then "swift" diff --git a/gems/decomplex/lib/decomplex/syntax.rb b/gems/decomplex/lib/decomplex/syntax.rb index 207c204e9..aa4f579cc 100644 --- a/gems/decomplex/lib/decomplex/syntax.rb +++ b/gems/decomplex/lib/decomplex/syntax.rb @@ -694,8 +694,11 @@ def grammar_candidates(language) File.expand_path("../../vendor/tree-sitter", __dir__), File.expand_path("../../vendor/tree-sitter/#{language}", __dir__), File.expand_path("../../node_modules/#{pkg}", __dir__), + File.expand_path("../../node_modules/#{pkg}/build/Release", __dir__), File.expand_path("../../../../node_modules/#{pkg}", __dir__), - File.expand_path("../../../../../node_modules/#{pkg}", __dir__) + File.expand_path("../../../../node_modules/#{pkg}/build/Release", __dir__), + File.expand_path("../../../../../node_modules/#{pkg}", __dir__), + File.expand_path("../../../../../node_modules/#{pkg}/build/Release", __dir__) ] all_prebuilds = roots.flat_map do |root| stems.flat_map do |stem| diff --git a/gems/decomplex/rust/src/decomplex/ast.rs b/gems/decomplex/rust/src/decomplex/ast.rs index 832012861..f3357b233 100644 --- a/gems/decomplex/rust/src/decomplex/ast.rs +++ b/gems/decomplex/rust/src/decomplex/ast.rs @@ -210,7 +210,7 @@ pub fn parse_with_language(file: &Path, language: Language) -> Result<(Node, Vec let tree = parser .parse(&source, None) .with_context(|| format!("tree-sitter produced no tree for {}", file.display()))?; - let root = TreeSitterNormalizer::new(&source).normalize(tree.root_node()); + let root = TreeSitterNormalizer::new(&source, language).normalize(tree.root_node()); let lines = source.lines().map(ToString::to_string).collect(); Ok((root, lines)) } @@ -291,20 +291,25 @@ pub fn flatten_and(node: &Node) -> Vec<&Node> { struct TreeSitterNormalizer<'source> { source: &'source str, + language: Language, local_stack: Vec>, } impl<'source> TreeSitterNormalizer<'source> { - fn new(source: &'source str) -> Self { + fn new(source: &'source str, language: Language) -> Self { Self { source, + language, local_stack: Vec::new(), } } fn normalize(mut self, root: TreeSitterNode<'_>) -> Node { - let children = - self.with_ruby_scope(root, true, |normalizer| normalizer.normalize_children(root)); + let children = if self.language == Language::Ruby { + self.with_ruby_scope(root, true, |normalizer| normalizer.normalize_children(root)) + } else { + self.normalize_children(root) + }; self.wrap("ROOT", children, root) } @@ -324,6 +329,12 @@ impl<'source> TreeSitterNormalizer<'source> { if if_kind(node.kind()) { return self.normalize_if(node); } + if let Some(loop_type) = loop_kind(node.kind()) { + return self.normalize_loop(node, loop_type); + } + if self.case_kind(node.kind()) { + return self.normalize_case(node); + } if self.modifier_statement(node) { return self.normalize_modifier_statement(node); } @@ -333,12 +344,18 @@ impl<'source> TreeSitterNormalizer<'source> { if self.command_call_statement(node) { return self.normalize_command_call_statement(node); } + if self.unary_not_statement(node) { + return self.normalize_unary_not(node); + } if self.interpolated_statement(node) { return Some(self.normalize_interpolated_statement(node)); } if self.dotted_expression(node) { return self.normalize_dotted_expression(node); } + if self.unary_not_expression(node) { + return self.normalize_unary_not(node); + } if self.boolean_expression(node) { return self.normalize_boolean(node); } @@ -369,16 +386,29 @@ impl<'source> TreeSitterNormalizer<'source> { "body_statement" | "block_body" | "block" => self.normalize_body(node), "ensure" => self.normalize_ensure_clause(node), "begin" => self.normalize_begin(node), + "subshell" => Some(self.normalize_subshell(node)), + "block_argument" => self.normalize_block_argument(node), + "singleton_class" => self.normalize_singleton_class(node), + "yield" => Some(self.normalize_yield(node)), + "operator_assignment" => self.normalize_operator_assignment(node), "assignment" | "assignment_expression" | "assignment_statement" => { self.normalize_assignment(node) } - "local_variable_declaration" - | "variable_declarator" - | "variable_declaration" - | "property_declaration" => self.normalize_declaration(node), + "variable_declarator" if !self.has_assignment_operator_child(node) => { + Some(self.wrap(kind_type(node.kind()), Vec::new(), node)) + } + "expression_list" if self.single_short_var_lhs(node) => { + Some(self.wrap(kind_type(node.kind()), Vec::new(), node)) + } "call" | "call_expression" | "method_call" | "method_call_expression" => { self.normalize_call(node) } + _ if self.member_read_node(node) => self.normalize_member_read(node), + _ if self.unwrap_node(node) => self + .named_children(node) + .into_iter() + .next() + .and_then(|child| self.normalize_node(child)), "element_reference" => self.normalize_element_reference(node), "rescue_modifier" => self.normalize_rescue_modifier(node), "super" => Some(self.normalize_super(node)), @@ -392,9 +422,11 @@ impl<'source> TreeSitterNormalizer<'source> { vec![Child::String(node_text(node, self.source).to_string())], node, )), - "identifier" | "simple_identifier" | "property_identifier" | "field_identifier" => { - Some(self.normalize_identifier(node)) - } + "identifier" + | "simple_identifier" + | "property_identifier" + | "field_identifier" + | "shorthand_property_identifier" => Some(self.normalize_identifier(node)), "constant" | "scope_resolution" | "type_identifier" | "scoped_type_identifier" => { Some(self.normalize_const(node)) } @@ -403,6 +435,7 @@ impl<'source> TreeSitterNormalizer<'source> { "array" => Some(self.normalize_array_literal(node)), "interpolation" => self.normalize_interpolation(node), "heredoc_beginning" => Some(self.normalize_heredoc_beginning(node)), + "chained_string" => Some(self.normalize_chained_string(node)), "string" | "string_content" | "string_literal" @@ -438,11 +471,7 @@ impl<'source> TreeSitterNormalizer<'source> { )), _ => { let children = self.normalize_children(node); - if children.is_empty() { - None - } else { - Some(self.wrap(kind_type(node.kind()), children, node)) - } + Some(self.wrap(kind_type(node.kind()), children, node)) } } } @@ -536,6 +565,22 @@ impl<'source> TreeSitterNormalizer<'source> { )) } + fn normalize_singleton_class(&mut self, node: TreeSitterNode<'_>) -> Option { + let named = self.named_children(node); + let receiver = named + .first() + .and_then(|receiver| self.normalize_node(*receiver)); + let body = named.get(1).and_then(|body| self.normalize_body(*body)); + Some(self.wrap( + "SCLASS", + vec![ + optional_node(receiver), + Child::Node(Box::new(self.scope(body, None, node))), + ], + node, + )) + } + fn normalize_lambda(&mut self, node: TreeSitterNode<'_>) -> Option { let body_node = self .named_field(node, "body") @@ -548,6 +593,32 @@ impl<'source> TreeSitterNormalizer<'source> { Some(self.wrap("LAMBDA", vec![Child::Node(Box::new(scope))], node)) } + fn normalize_yield(&mut self, node: TreeSitterNode<'_>) -> Node { + let args_node = self + .named_children(node) + .into_iter() + .find(|child| child.kind() == "argument_list"); + let args = args_node + .map(|args| { + self.named_children(args) + .into_iter() + .filter_map(|child| self.normalize_node(child)) + .collect::>() + }) + .unwrap_or_else(|| { + self.named_children(node) + .into_iter() + .filter(|child| child.kind() != "yield") + .filter_map(|child| self.normalize_node(child)) + .collect() + }); + self.wrap( + "YIELD", + vec![list_or_nil(args, args_node.unwrap_or(node), self)], + node, + ) + } + fn normalize_body(&mut self, node: TreeSitterNode<'_>) -> Option { if self.leading_if_statement(node) { return self.normalize_leading_if_statement(node); @@ -567,6 +638,9 @@ impl<'source> TreeSitterNormalizer<'source> { if self.command_call_statement(node) { return self.normalize_command_call_statement(node); } + if self.unary_not_statement(node) { + return self.normalize_unary_not(node); + } if self.dotted_expression(node) { return self.normalize_dotted_expression(node); } @@ -635,6 +709,33 @@ impl<'source> TreeSitterNormalizer<'source> { Some(self.wrap(node_type, vec![condition, positive, negative], node)) } + fn normalize_loop(&mut self, node: TreeSitterNode<'_>, node_type: &str) -> Option { + if matches!(node.kind(), "while_modifier" | "until_modifier") { + let named = self.named_children(node); + let action = *named.first()?; + let condition = *named.get(1)?; + let condition = optional_node(self.normalize_node(condition)); + let action = optional_node(self.normalize_modifier_action(action)); + return Some(self.wrap( + node_type, + vec![condition, action, Child::String("true".to_string())], + node, + )); + } + + let condition = self + .named_field(node, "condition") + .or_else(|| self.first_named(node)); + let body = self + .named_field(node, "body") + .or_else(|| self.named_field(node, "consequence")) + .or_else(|| self.block_child(node)); + let condition = + optional_node(condition.and_then(|condition| self.normalize_node(condition))); + let body = optional_node(body.and_then(|body| self.normalize_body(body))); + Some(self.wrap(node_type, vec![condition, body], node)) + } + fn normalize_else_or_branch(&mut self, node: TreeSitterNode<'_>) -> Option { if node.kind() != "else" { return self.normalize_body(node); @@ -652,6 +753,80 @@ impl<'source> TreeSitterNormalizer<'source> { self.normalize_body_nodes(self.named_children(node), node) } + fn normalize_case(&mut self, node: TreeSitterNode<'_>) -> Option { + let value_raw = self.case_value(node); + let value = value_raw.and_then(|value| self.normalize_node(value)); + let whens = self + .case_arms(node) + .into_iter() + .filter_map(|arm| self.normalize_when(arm)) + .collect::>(); + let fallback = self.case_else_body(node); + let chain = self.link_when_chain(whens, fallback); + if value_raw.is_none() { + Some(self.wrap("CASE2", vec![optional_node(chain)], node)) + } else { + Some(self.wrap( + "CASE", + vec![optional_node(value), optional_node(chain)], + node, + )) + } + } + + fn normalize_when(&mut self, node: TreeSitterNode<'_>) -> Option { + let patterns = self.normalize_patterns(node); + let body = self + .when_body(node) + .and_then(|body| self.normalize_body(body)); + Some(self.wrap( + "WHEN", + vec![ + list_or_nil(patterns, node, self), + optional_node(body), + Child::Nil, + ], + node, + )) + } + + fn normalize_patterns(&mut self, node: TreeSitterNode<'_>) -> Vec { + let body = self.when_body(node); + let mut patterns = Vec::new(); + for child in self.named_children(node) { + if Some(child) == body + || self.block_kind(child.kind()) + || self.statement_node(child.kind()) + || self.when_kind(child.kind()) + { + continue; + } + if let Some(pattern) = self.normalize_node(child) { + patterns.push(pattern); + } + } + patterns + } + + fn link_when_chain(&self, whens: Vec, fallback: Option) -> Option { + whens + .into_iter() + .rev() + .fold(fallback, |next_when, mut current| { + if current.children.len() > 2 { + current.children[2] = optional_node(next_when); + } + Some(current) + }) + } + + fn case_else_body(&mut self, node: TreeSitterNode<'_>) -> Option { + self.named_children(node) + .into_iter() + .find(|child| child.kind() == "else") + .and_then(|else_node| self.normalize_else_or_branch(else_node)) + } + fn normalize_body_nodes( &mut self, nodes: Vec>, @@ -784,7 +959,7 @@ impl<'source> TreeSitterNormalizer<'source> { fn normalize_comparison(&mut self, node: TreeSitterNode<'_>) -> Option { let operands = self.named_children(node); let left = operands.first().and_then(|left| self.normalize_node(*left)); - let right_raw = operands.get(1).copied().unwrap_or(node); + let right_raw = operands.get(1).copied()?; let right = self.normalize_node(right_raw); Some(self.wrap( "OPCALL", @@ -800,7 +975,7 @@ impl<'source> TreeSitterNormalizer<'source> { fn normalize_operator_call(&mut self, node: TreeSitterNode<'_>) -> Option { let operands = self.named_children(node); let left = operands.first().and_then(|left| self.normalize_node(*left)); - let right_raw = operands.get(1).copied().unwrap_or(node); + let right_raw = operands.get(1).copied()?; let right = self.normalize_node(right_raw); Some(self.wrap( "OPCALL", @@ -828,6 +1003,16 @@ impl<'source> TreeSitterNormalizer<'source> { )) } + fn normalize_unary_not(&mut self, node: TreeSitterNode<'_>) -> Option { + let operand = self.named_children(node).into_iter().next()?; + let operand = optional_node(self.normalize_node(operand)); + Some(self.wrap( + "OPCALL", + vec![operand, Child::Symbol("!".to_string()), Child::Nil], + node, + )) + } + fn normalize_ternary_branch(&mut self, nodes: &[TreeSitterNode<'_>]) -> Option { if nodes.is_empty() { return None; @@ -889,6 +1074,9 @@ impl<'source> TreeSitterNormalizer<'source> { let right = self .assignment_right(node) .and_then(|right| self.normalize_node(right)); + if left.kind() == "left_assignment_list" { + return Some(self.normalize_multiple_assignment(left, right, node)); + } if let Some(target) = self.assignment_target(left, right.clone(), node) { return Some(target); } @@ -899,6 +1087,193 @@ impl<'source> TreeSitterNormalizer<'source> { )) } + fn normalize_operator_assignment(&mut self, node: TreeSitterNode<'_>) -> Option { + let left = self.assignment_left(node)?; + let right_raw = self.assignment_right(node); + let right = right_raw.and_then(|right| self.normalize_node(right)); + let operator = self.operator_assignment_operator(node); + + if left.kind() == "element_reference" { + let named = self.named_children(left); + let receiver = *named.first()?; + let args = named + .iter() + .skip(1) + .filter_map(|arg| self.normalize_node(*arg)) + .collect::>(); + let receiver = optional_node(self.normalize_node(receiver)); + return Some(self.wrap( + "OP_ASGN1", + vec![ + receiver, + Child::Symbol(operator), + list_or_nil(args, left, self), + optional_node(right), + ], + node, + )); + } + + if self.member_read_node(left) { + let (receiver, method) = self.member_parts(left)?; + let receiver = optional_node(self.normalize_node(receiver)); + return Some(self.wrap( + "OP_ASGN2", + vec![ + receiver, + Child::Nil, + Child::Symbol(method), + Child::Symbol(operator), + optional_node(right), + ], + node, + )); + } + + if let Some(logical) = + self.normalize_logical_operator_assignment(left, &operator, right.clone(), node) + { + return Some(logical); + } + + if left.kind() == "instance_variable" + || left.kind() == "global_variable" + || node_text(left, self.source).starts_with('@') + || node_text(left, self.source).starts_with('$') + { + let value = self.augmented_assignment_value(left, &operator, right_raw, node); + return self.assignment_target(left, Some(value), node); + } + + let value = self.augmented_assignment_value(left, &operator, right_raw, node); + self.assignment_target(left, Some(value.clone()), node) + .or_else(|| { + Some(self.wrap( + "LASGN", + vec![ + Child::String(self.target_name(left)), + Child::Node(Box::new(value)), + ], + node, + )) + }) + } + + fn normalize_logical_operator_assignment( + &mut self, + left: TreeSitterNode<'_>, + operator: &str, + right: Option, + source: TreeSitterNode<'_>, + ) -> Option { + if self.language != Language::Ruby || !matches!(operator, "||" | "&&") { + return None; + } + if !self.identifier_kind(left.kind()) { + return None; + } + let name = self.target_name(left); + let node_type = if operator == "||" { + "OP_ASGN_OR" + } else { + "OP_ASGN_AND" + }; + let receiver = self.wrap("LVAR", vec![Child::String(name.clone())], left); + let assignment = self.wrap( + "LASGN", + vec![Child::String(name), optional_node(right)], + source, + ); + Some(self.wrap( + node_type, + vec![ + Child::Node(Box::new(receiver)), + Child::Symbol(operator.to_string()), + Child::Node(Box::new(assignment)), + ], + source, + )) + } + + fn augmented_assignment_value( + &mut self, + left: TreeSitterNode<'_>, + operator: &str, + right_raw: Option>, + source: TreeSitterNode<'_>, + ) -> Node { + let receiver = optional_node(self.assignment_receiver(left)); + let right = right_raw.and_then(|right| self.normalize_node(right)); + self.wrap( + "CALL", + vec![ + receiver, + Child::Symbol(operator.to_string()), + list_or_nil(right.into_iter().collect(), right_raw.unwrap_or(left), self), + ], + source, + ) + } + + fn assignment_receiver(&mut self, left: TreeSitterNode<'_>) -> Option { + if self.identifier_kind(left.kind()) { + return Some(self.wrap( + "LVAR", + vec![Child::String(node_text(left, self.source).to_string())], + left, + )); + } + if left.kind() == "instance_variable" || node_text(left, self.source).starts_with('@') { + return Some(self.wrap( + "IVAR", + vec![Child::String(node_text(left, self.source).to_string())], + left, + )); + } + if left.kind() == "global_variable" || node_text(left, self.source).starts_with('$') { + return Some(self.wrap( + "GVAR", + vec![Child::String(node_text(left, self.source).to_string())], + left, + )); + } + if self.const_kind(left.kind()) { + return Some(self.normalize_const(left)); + } + self.normalize_node(left) + } + + fn normalize_multiple_assignment( + &self, + left: TreeSitterNode<'_>, + right: Option, + source: TreeSitterNode<'_>, + ) -> Node { + let targets = self + .named_children(left) + .into_iter() + .map(|child| { + let node_type = if child.kind() == "global_variable" + || node_text(child, self.source).starts_with('$') + { + "GASGN" + } else { + "LASGN" + }; + self.wrap( + node_type, + vec![Child::String(self.target_name(child)), Child::Nil], + child, + ) + }) + .collect::>(); + self.wrap( + "MASGN", + vec![optional_node(right), list_or_nil(targets, left, self)], + source, + ) + } + fn normalize_declaration(&mut self, node: TreeSitterNode<'_>) -> Option { let mut assignments = Vec::new(); for entry in self.declaration_entries(node) { @@ -943,6 +1318,19 @@ impl<'source> TreeSitterNormalizer<'source> { self.normalize_call_without_block(node, None) } + fn normalize_member_read(&mut self, node: TreeSitterNode<'_>) -> Option { + let Some((receiver, method)) = self.member_parts(node) else { + let children = self.normalize_children(node); + return Some(self.wrap(kind_type(node.kind()), children, node)); + }; + let receiver = optional_node(self.normalize_node(receiver)); + Some(self.wrap( + "CALL", + vec![receiver, Child::Symbol(method), Child::Nil], + node, + )) + } + fn normalize_call_with_block(&mut self, node: TreeSitterNode<'_>) -> Option { let block = self.call_block(node); let call = self.normalize_call_without_block(node, block)?; @@ -1061,6 +1449,16 @@ impl<'source> TreeSitterNormalizer<'source> { node, )); } + if self.language == Language::Ruby && self.const_kind(function.kind()) { + return Some(self.wrap( + "FCALL", + vec![ + Child::Symbol(node_text(function, self.source).to_string()), + list_or_nil(args, node, self), + ], + node, + )); + } if self.member_read_node(function) { let (receiver, method) = self.member_parts(function)?; let receiver = optional_node(self.normalize_node(receiver)); @@ -1498,11 +1896,78 @@ impl<'source> TreeSitterNormalizer<'source> { )) } + fn normalize_block_argument(&mut self, node: TreeSitterNode<'_>) -> Option { + let value = self + .named_children(node) + .into_iter() + .next() + .and_then(|child| self.normalize_node(child)); + Some(self.wrap("BLOCK_PASS", vec![Child::Nil, optional_node(value)], node)) + } + fn normalize_interpolated_string(&mut self, node: TreeSitterNode<'_>) -> Node { let children = self.normalize_children(node); self.wrap("DSTR", children, node) } + fn normalize_subshell(&mut self, node: TreeSitterNode<'_>) -> Node { + let children = self + .named_children(node) + .into_iter() + .filter_map(|child| match child.kind() { + "interpolation" => self + .normalize_interpolation(child) + .map(|node| Child::Node(Box::new(node))), + "string_content" => Some(Child::Node(Box::new(self.wrap( + "STR", + vec![Child::String(node_text(child, self.source).to_string())], + child, + )))), + _ => None, + }) + .collect::>(); + let node_type = if children + .iter() + .any(|child| matches!(child, Child::Node(node) if node.r#type == "EVSTR")) + { + "DXSTR" + } else { + "XSTR" + }; + self.wrap(node_type, children, node) + } + + fn normalize_chained_string(&mut self, node: TreeSitterNode<'_>) -> Node { + let mut parts = Vec::new(); + let mut dynamic_source = None; + let mut first_child = None; + for child in self.named_children(node) { + first_child.get_or_insert(child); + let Some(normalized) = self.normalize_node(child) else { + continue; + }; + if normalized.r#type == "DSTR" { + if dynamic_source.is_none() + && normalized + .children + .iter() + .filter_map(self::node) + .any(|part| part.r#type == "EVSTR") + { + dynamic_source = Some(child); + } + parts.extend(normalized.children); + } else { + parts.push(Child::Node(Box::new(normalized))); + } + } + self.wrap( + "DSTR", + parts, + dynamic_source.or(first_child).unwrap_or(node), + ) + } + fn normalize_interpolated_statement(&mut self, node: TreeSitterNode<'_>) -> Node { let children = self.normalize_children(node); self.wrap("DSTR", children, node) @@ -1568,7 +2033,7 @@ impl<'source> TreeSitterNormalizer<'source> { fn normalize_identifier(&mut self, node: TreeSitterNode<'_>) -> Node { let name = node_text(node, self.source).to_string(); - if self.ruby_vcall_identifier(node, &name) { + if self.ruby_vcall_identifier(node, &name) || self.vcall_identifier(node, &name) { self.wrap("VCALL", vec![Child::Symbol(name)], node) } else { self.wrap("LVAR", vec![Child::String(name)], node) @@ -1576,6 +2041,9 @@ impl<'source> TreeSitterNormalizer<'source> { } fn normalize_parameters(&mut self, node: Option>) -> Option { + if self.language != Language::Ruby { + return None; + } let node = node?; let defaults = self .named_children(node) @@ -1602,8 +2070,70 @@ impl<'source> TreeSitterNormalizer<'source> { } } - fn normalize_block_parameters(&mut self, _block: Option>) -> Option { - None + fn normalize_block_parameters(&mut self, block: Option>) -> Option { + if self.language != Language::Ruby { + return None; + } + let block = block?; + let params = self + .named_children(block) + .into_iter() + .find(|child| child.kind() == "block_parameters")?; + let pre_init = self + .named_children(params) + .into_iter() + .filter(|param| param.kind() == "destructured_parameter") + .filter_map(|param| self.normalize_destructured_block_parameter(param)) + .map(|node| Child::Node(Box::new(node))) + .collect::>(); + if pre_init.is_empty() { + None + } else { + Some(self.wrap("ARGS", pre_init, params)) + } + } + + fn normalize_destructured_block_parameter( + &mut self, + param: TreeSitterNode<'_>, + ) -> Option { + let mut targets = Vec::new(); + self.collect_destructured_parameter_targets(param, &mut targets); + if targets.is_empty() { + return None; + } + let dvar = self.wrap("DVAR", vec![Child::Nil], param); + Some(self.wrap( + "MASGN", + vec![ + Child::Node(Box::new(dvar)), + list_or_nil(targets, param, self), + Child::Nil, + ], + param, + )) + } + + fn collect_destructured_parameter_targets( + &mut self, + node: TreeSitterNode<'_>, + targets: &mut Vec, + ) { + if self.identifier_kind(node.kind()) { + targets.push(self.wrap( + "DASGN", + vec![ + Child::String(node_text(node, self.source).to_string()), + Child::Nil, + ], + node, + )); + return; + } + + for child in self.named_children(node) { + self.collect_destructured_parameter_targets(child, targets); + } } fn normalize_children(&mut self, node: TreeSitterNode<'_>) -> Vec { @@ -1697,6 +2227,9 @@ impl<'source> TreeSitterNormalizer<'source> { reset: bool, f: impl FnOnce(&mut Self) -> T, ) -> T { + if self.language != Language::Ruby { + return f(self); + } let previous = self.local_stack.clone(); if reset { self.local_stack.clear(); @@ -1866,7 +2399,8 @@ impl<'source> TreeSitterNormalizer<'source> { } fn ruby_vcall_identifier(&self, node: TreeSitterNode<'_>, name: &str) -> bool { - !self.assignment_lhs(node) + self.language == Language::Ruby + && !self.assignment_lhs(node) && !self.ruby_definition_identifier(node) && !self .local_stack @@ -1875,6 +2409,54 @@ impl<'source> TreeSitterNormalizer<'source> { .any(|scope| scope.contains(name)) } + fn vcall_identifier(&self, node: TreeSitterNode<'_>, name: &str) -> bool { + if self.language == Language::Ruby + && self + .local_stack + .iter() + .rev() + .any(|scope| scope.contains(name)) + { + return false; + } + let Some(parent) = node.parent() else { + return false; + }; + if matches!( + parent.kind(), + "method" | "method_parameters" | "parameter_list" | "argument_list" | "arguments" + ) { + return false; + } + if self.member_read_node(parent) { + return false; + } + if self.assignment_lhs(node) || self.assignment_rhs(node) { + return false; + } + + if matches!(parent.kind(), "body_statement" | "block_body" | "then") + && self + .named_children(parent) + .into_iter() + .any(|child| child == node) + { + return true; + } + if matches!(parent.kind(), "if_modifier" | "unless_modifier") + && self + .named_children(parent) + .into_iter() + .next() + .map(|child| child == node) + .unwrap_or(false) + { + return true; + } + + false + } + fn ruby_definition_identifier(&self, node: TreeSitterNode<'_>) -> bool { let Some(parent) = node.parent() else { return false; @@ -1920,6 +2502,28 @@ impl<'source> TreeSitterNormalizer<'source> { .unwrap_or(false) } + fn has_assignment_operator_child(&self, node: TreeSitterNode<'_>) -> bool { + node.children(&mut node.walk()) + .any(|child| !child.is_named() && assignment_operator(node_text(child, self.source))) + } + + fn single_short_var_lhs(&self, node: TreeSitterNode<'_>) -> bool { + let Some(parent) = node.parent() else { + return false; + }; + if parent.kind() != "short_var_declaration" { + return false; + } + if self.named_children(node).len() != 1 { + return false; + } + self.named_children(parent) + .into_iter() + .next() + .map(|child| child == node) + .unwrap_or(false) + } + fn modifier_statement(&self, node: TreeSitterNode<'_>) -> bool { let named = self.named_children(node); matches!(node.kind(), "body_statement" | "block_body" | "statement") @@ -2184,6 +2788,7 @@ impl<'source> TreeSitterNormalizer<'source> { fn operator_call_expression(&self, node: TreeSitterNode<'_>) -> bool { matches!(node.kind(), "binary" | "binary_expression") + && self.named_children(node).len() >= 2 && self .binary_operator(node) .map(|operator| OPERATOR_CALL_OPERATORS.contains(&operator.as_str())) @@ -2194,16 +2799,34 @@ impl<'source> TreeSitterNormalizer<'source> { matches!( node.kind(), "binary" | "binary_expression" | "comparison_operator" - ) && self - .comparison_operator(node) - .map(|operator| COMPARISON_OPERATORS.contains(&operator.as_str())) - .unwrap_or(false) + ) && self.named_children(node).len() >= 2 + && self + .comparison_operator(node) + .map(|operator| COMPARISON_OPERATORS.contains(&operator.as_str())) + .unwrap_or(false) } fn infix_statement(&self, node: TreeSitterNode<'_>) -> bool { self.infix_statement_parts(node).is_some() } + fn unary_not_statement(&self, node: TreeSitterNode<'_>) -> bool { + matches!( + node.kind(), + "body_statement" | "block_body" | "statement" | "argument_list" + ) && node + .children(&mut node.walk()) + .next() + .map(|child| node_text(child, self.source) == "!") + .unwrap_or(false) + && self.named_children(node).len() == 1 + } + + fn unary_not_expression(&self, node: TreeSitterNode<'_>) -> bool { + matches!(node.kind(), "unary" | "unary_expression") + && node_text(node, self.source).trim_start().starts_with('!') + } + fn infix_statement_parts<'tree>( &self, node: TreeSitterNode<'tree>, @@ -2489,6 +3112,20 @@ impl<'source> TreeSitterNormalizer<'source> { .or_else(|| self.named_children(node).into_iter().nth(1)) } + fn operator_assignment_operator(&self, node: TreeSitterNode<'_>) -> String { + let mut cursor = node.walk(); + let raw = node + .children(&mut cursor) + .find(|child| !child.is_named() && node_text(*child, self.source).ends_with('=')) + .map(|child| node_text(child, self.source)) + .unwrap_or(""); + match raw { + "||=" => "||".to_string(), + "&&=" => "&&".to_string(), + _ => raw.trim_end_matches('=').to_string(), + } + } + fn parameters_child<'tree>( &self, node: TreeSitterNode<'tree>, @@ -2669,19 +3306,27 @@ impl<'source> TreeSitterNormalizer<'source> { } fn target_name(&self, node: TreeSitterNode<'_>) -> String { - node_text(node, self.source) - .trim_start_matches('*') - .to_string() + let text = node_text(node, self.source); + if self.identifier_kind(node.kind()) + || matches!(node.kind(), "splat" | "splat_parameter" | "rest_assignment") + { + text.trim_start_matches('*').to_string() + } else { + text.to_string() + } } fn function_name(&self, node: TreeSitterNode<'_>) -> Option { - self.named_field(node, "name") - .or_else(|| { - self.named_children(node) - .into_iter() - .find(|child| self.identifier_kind(child.kind()) || child.kind() == "constant") - }) - .map(|name| node_text(name, self.source).to_string()) + Some( + self.named_field(node, "name") + .or_else(|| { + self.named_children(node).into_iter().find(|child| { + self.identifier_kind(child.kind()) || child.kind() == "constant" + }) + }) + .map(|name| node_text(name, self.source).to_string()) + .unwrap_or_default(), + ) } fn block_child<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { @@ -2774,10 +3419,59 @@ impl<'source> TreeSitterNormalizer<'source> { .find(|child| matches!(child.kind(), "else" | "elsif")) } + fn case_value<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { + self.named_field(node, "value") + .or_else(|| self.named_field(node, "subject")) + .or_else(|| self.named_field(node, "condition")) + .or_else(|| { + self.named_children(node).into_iter().find(|child| { + !self.when_kind(child.kind()) + && !self.block_kind(child.kind()) + && child.kind() != "else" + }) + }) + } + + fn case_arms<'tree>(&self, node: TreeSitterNode<'tree>) -> Vec> { + let mut arms = Vec::new(); + let mut stack = self.named_children(node); + while !stack.is_empty() { + let child = stack.remove(0); + if self.when_kind(child.kind()) { + arms.push(child); + } else if !function_kind(child.kind()) { + stack.extend(self.named_children(child)); + } + } + arms + } + + fn when_body<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { + self.named_field(node, "body") + .or_else(|| self.named_field(node, "consequence")) + .or_else(|| self.named_field(node, "value")) + .or_else(|| { + self.named_children(node).into_iter().rev().find(|child| { + self.block_kind(child.kind()) || self.statement_node(child.kind()) + }) + }) + } + fn identifier_kind(&self, kind: &str) -> bool { matches!( kind, - "identifier" | "simple_identifier" | "property_identifier" | "field_identifier" + "identifier" + | "simple_identifier" + | "property_identifier" + | "field_identifier" + | "shorthand_property_identifier" + ) + } + + fn const_kind(&self, kind: &str) -> bool { + matches!( + kind, + "constant" | "scope_resolution" | "type_identifier" | "scoped_type_identifier" ) } @@ -2806,6 +3500,54 @@ impl<'source> TreeSitterNormalizer<'source> { ) } + fn case_kind(&self, kind: &str) -> bool { + matches!( + kind, + "case" + | "switch_statement" + | "expression_switch_statement" + | "switch_expression" + | "match_statement" + | "match_expression" + | "when_expression" + ) + } + + fn when_kind(&self, kind: &str) -> bool { + matches!( + kind, + "when" + | "switch_case" + | "case_clause" + | "expression_case" + | "case_statement" + | "switch_section" + | "switch_block_statement_group" + | "switch_entry" + | "when_entry" + | "match_arm" + ) + } + + fn statement_node(&self, kind: &str) -> bool { + kind.ends_with("_statement") + || kind.ends_with("_expression") + || matches!(kind, "return" | "break" | "next") + } + + fn unwrap_node(&self, node: TreeSitterNode<'_>) -> bool { + matches!( + node.kind(), + "parenthesized_expression" + | "parenthesized_statements" + | "expression_statement" + | "statement" + | "case_pattern" + | "match_pattern" + | "pattern" + ) && self.named_children(node).len() == 1 + } + fn first_dotted_call_descendant<'tree>( &self, node: TreeSitterNode<'tree>, @@ -2822,6 +3564,9 @@ impl<'source> TreeSitterNormalizer<'source> { } fn elide_tail_returns(&self, node: Option) -> Option { + if self.language != Language::Ruby { + return node; + } let mut node = node?; if matches!( node.r#type.as_str(), @@ -3028,6 +3773,15 @@ fn if_kind(kind: &str) -> bool { ) } +fn loop_kind(kind: &str) -> Option<&'static str> { + match kind { + "while" | "while_statement" | "while_modifier" => Some("WHILE"), + "until_modifier" => Some("UNTIL"), + "for" | "for_statement" | "for_in_clause" => Some("FOR"), + _ => None, + } +} + fn function_kind(kind: &str) -> bool { matches!( kind, diff --git a/gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs b/gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs index 1c7ee35bb..cfa3177c7 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs @@ -299,9 +299,16 @@ impl Report { *ess.entry(&h.contract).or_insert(0) += 1; } - let mut rows_map: BTreeMap> = BTreeMap::new(); + let mut rows_map: Vec<(String, Vec<&Hit>)> = Vec::new(); for h in &self.guard { - rows_map.entry(h.contract.clone()).or_default().push(h); + if let Some((_, hits)) = rows_map + .iter_mut() + .find(|(contract, _)| contract == &h.contract) + { + hits.push(h); + } else { + rows_map.push((h.contract.clone(), vec![h])); + } } let rows: Vec<_> = rows_map diff --git a/gems/decomplex/rust/src/decomplex/detectors/derived_state.rs b/gems/decomplex/rust/src/decomplex/detectors/derived_state.rs index 24a576849..04f9514af 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/derived_state.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/derived_state.rs @@ -95,6 +95,12 @@ fn walk_lasgns<'a>(n: &'a Node, acc: &mut Vec<&'a Node>) { } fn lvars(node: &Node, acc: &mut Vec) { + if matches!( + node.r#type.as_str(), + "BRACKETED_ARGUMENT_LIST" | "bracketed_argument_list" + ) { + return; + } if node.r#type == "LVAR" { if let Some(Child::String(name)) = node.children.first() { acc.push(name.clone()); @@ -108,7 +114,11 @@ fn lvars(node: &Node, acc: &mut Vec) { fn analyze(file: &str, defn: &str, stmts: &[&Node]) -> Vec { let asgns: Vec<_> = lasgns(stmts) .iter() - .map(|n| { + .filter_map(|n| { + let name = match n.children.first() { + Some(Child::String(name)) => name.clone(), + _ => return None, + }; let mut deps = Vec::new(); if let Some(val) = n.children.get(1).and_then(ast::node) { lvars(val, &mut deps); @@ -119,15 +129,12 @@ fn analyze(file: &str, defn: &str, stmts: &[&Node]) -> Vec { .into_iter() .collect(); deps.sort(); - Asgn { - name: match n.children.first().unwrap() { - Child::String(s) => s.clone(), - _ => panic!("LASGN without name"), - }, + Some(Asgn { + name, deps, line: n.first_lineno, span: [n.first_lineno, n.first_column, n.last_lineno, n.last_column], - } + }) }) .collect(); diff --git a/gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs b/gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs index 7ae5a5062..db9d9b52c 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs @@ -17,7 +17,7 @@ pub struct FalseSimplicityRow { } #[derive(Clone, Debug)] -struct Site { +struct Hit { kind: String, detail: String, file: String, @@ -26,212 +26,1169 @@ struct Site { span: Span, } +#[derive(Clone, Debug)] +struct ClassRec { + name: String, + file: String, + line: usize, + core: bool, + span: Span, +} + +#[derive(Clone, Copy)] +struct Lexicon { + dispatch_mids: &'static [&'static str], + meta_mids: &'static [&'static str], + method_obj_mids: &'static [&'static str], + io_consts: &'static [&'static str], + io_bare: &'static [&'static str], + dir_context: &'static [&'static str], + context_pairs: &'static [(&'static str, &'static [&'static str])], + context_bare: &'static [&'static str], + callback_set: &'static [&'static str], + core_consts: &'static [&'static str], +} + pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { - let mut sites = Vec::new(); + let mut hits = Vec::new(); + let mut classrecs = Vec::new(); for file in files { let (root, lines) = ast::parse_with_language(file, language)?; - let mut detector = FalseSimplicity::new(file.to_string_lossy().to_string(), lines); - detector.walk(&root, &Vec::new()); - sites.extend(detector.sites); + let mut detector = + FalseSimplicity::new(file.to_string_lossy().to_string(), lines, language); + detector.walk(&root, &[], &[]); + hits.extend(detector.hits); + classrecs.extend(detector.classrecs); } - Ok(Report::new(sites).findings()) + Ok(Report::new(hits, classrecs).findings()) } -const DISPATCH_MIDS: &[&str] = &["send", "public_send", "method", "public_method", "__send__"]; -const IO_MIDS: &[&str] = &[ - "puts", - "print", - "p", - "open", +const EMPTY: &[&str] = &[]; +const EMPTY_PAIRS: &[(&str, &[&str])] = &[]; +const COMMON_CALLBACK_SET: &[&str] = &[ + "transaction", + "synchronize", + "lock", + "with_lock", + "unlock", + "mutex", + "atomic", + "subscribe", + "callback", + "hook", +]; + +const RUBY_CONTEXT_PAIRS: &[(&str, &[&str])] = &[ + ("Time", &["now", "current"]), + ("Date", &["today", "current"]), + ("DateTime", &["now", "current"]), + ("Process", &["pid", "ppid", "uid", "gid", "euid"]), + ("Thread", &["current", "list", "main"]), + ("Fiber", &["current"]), + ("Random", &["rand", "bytes"]), + ("GC", &["stat", "count"]), + ("ObjectSpace", &["each_object", "count_objects"]), +]; +const PYTHON_CONTEXT_PAIRS: &[(&str, &[&str])] = &[ + ("time", &["time", "monotonic", "perf_counter"]), + ("datetime", &["now", "today", "utcnow"]), + ("random", &["random", "randint", "randrange", "choice"]), +]; +const JS_CONTEXT_PAIRS: &[(&str, &[&str])] = &[ + ("Date", &["now"]), + ("Math", &["random"]), + ("performance", &["now"]), +]; +const GO_CONTEXT_PAIRS: &[(&str, &[&str])] = &[ + ("time", &["Now", "Since", "Until"]), + ("rand", &["Int", "Intn", "Float64", "Read"]), +]; +const RUST_CONTEXT_PAIRS: &[(&str, &[&str])] = &[("SystemTime", &["now"]), ("Instant", &["now"])]; +const ZIG_CONTEXT_PAIRS: &[(&str, &[&str])] = + &[("time", &["timestamp", "nanoTimestamp", "milliTimestamp"])]; +const LUA_CONTEXT_PAIRS: &[(&str, &[&str])] = &[ + ("os", &["time", "clock", "date", "getenv"]), + ("math", &["random"]), +]; +const CPP_CONTEXT_PAIRS: &[(&str, &[&str])] = + &[("chrono", &["now"]), ("random_device", &["operator()"])]; +const CSHARP_CONTEXT_PAIRS: &[(&str, &[&str])] = &[ + ("DateTime", &["Now", "UtcNow", "Today"]), + ("Guid", &["NewGuid"]), + ("Random", &["Next", "NextDouble"]), +]; +const JAVA_CONTEXT_PAIRS: &[(&str, &[&str])] = &[ + ( + "System", + &["currentTimeMillis", "nanoTime", "getenv", "getProperty"], + ), + ("Instant", &["now"]), + ("UUID", &["randomUUID"]), + ("Math", &["random"]), +]; +const SWIFT_CONTEXT_PAIRS: &[(&str, &[&str])] = &[("Date", &["now"]), ("UUID", &["init"])]; +const KOTLIN_CONTEXT_PAIRS: &[(&str, &[&str])] = &[ + ( + "System", + &["currentTimeMillis", "nanoTime", "getenv", "getProperty"], + ), + ("Instant", &["now"]), + ("UUID", &["randomUUID"]), + ("Random", &["nextInt", "nextLong", "nextDouble"]), +]; + +const RUBY_CALLBACK_SET: &[&str] = &[ + "transaction", + "synchronize", + "lock", + "with_lock", + "unlock", + "mutex", + "atomic", + "reentrant", + "subscribe", + "callback", + "hook", +]; +const GO_CALLBACK_SET: &[&str] = &[ + "transaction", + "synchronize", + "lock", + "with_lock", + "unlock", + "mutex", + "atomic", + "subscribe", + "callback", + "hook", + "Lock", + "Unlock", + "RLock", + "RUnlock", + "Do", + "Go", + "Add", + "Done", + "Wait", +]; +const RUST_CALLBACK_SET: &[&str] = &[ + "transaction", + "synchronize", + "lock", + "with_lock", + "unlock", + "mutex", + "atomic", + "subscribe", + "callback", + "hook", "read", "write", - "sysread", - "syswrite", - "recv", - "send", - "gets", - "read_nonblock", - "write_nonblock", -]; -const REFLECTION_MIDS: &[&str] = &[ - "instance_eval", - "class_eval", - "module_eval", - "instance_exec", - "class_exec", - "module_exec", - "define_method", - "define_singleton_method", - "const_get", - "const_set", - "const_missing", - "method_missing", - "respond_to_missing?", + "spawn", + "await", ]; +const ZIG_CALLBACK_SET: &[&str] = &[ + "transaction", + "synchronize", + "lock", + "with_lock", + "unlock", + "mutex", + "atomic", + "subscribe", + "callback", + "hook", + "spawn", + "wait", + "signal", +]; +const C_CALLBACK_SET: &[&str] = &[ + "transaction", + "synchronize", + "lock", + "with_lock", + "unlock", + "mutex", + "atomic", + "subscribe", + "callback", + "hook", + "pthread_mutex_lock", + "pthread_mutex_unlock", +]; +const CPP_CALLBACK_SET: &[&str] = &[ + "transaction", + "synchronize", + "lock", + "with_lock", + "unlock", + "mutex", + "atomic", + "subscribe", + "callback", + "hook", + "try_lock", + "wait", + "notify_one", + "notify_all", +]; +const CSHARP_CALLBACK_SET: &[&str] = &[ + "transaction", + "synchronize", + "lock", + "with_lock", + "unlock", + "mutex", + "atomic", + "subscribe", + "callback", + "hook", + "Lock", + "Monitor", + "Enter", + "Exit", + "Wait", + "Pulse", +]; +const JAVA_CALLBACK_SET: &[&str] = &[ + "transaction", + "synchronize", + "lock", + "with_lock", + "unlock", + "mutex", + "atomic", + "subscribe", + "callback", + "hook", + "wait", + "notify", + "notifyAll", + "submit", + "execute", +]; +const SWIFT_CALLBACK_SET: &[&str] = &[ + "transaction", + "synchronize", + "lock", + "with_lock", + "unlock", + "mutex", + "atomic", + "subscribe", + "callback", + "hook", + "async", + "sync", +]; +const KOTLIN_CALLBACK_SET: &[&str] = &[ + "transaction", + "synchronize", + "lock", + "with_lock", + "unlock", + "mutex", + "atomic", + "subscribe", + "callback", + "hook", + "synchronized", + "launch", + "async", + "await", +]; + +const RUBY_CORE_CONSTS: &[&str] = &[ + "String", + "Symbol", + "Integer", + "Float", + "Numeric", + "Rational", + "Complex", + "Array", + "Hash", + "Set", + "Range", + "Struct", + "Object", + "BasicObject", + "Kernel", + "Module", + "Class", + "Comparable", + "Enumerable", + "Enumerator", + "Proc", + "Method", + "UnboundMethod", + "NilClass", + "TrueClass", + "FalseClass", + "Exception", + "StandardError", + "RuntimeError", + "ArgumentError", + "TypeError", + "NameError", + "NoMethodError", + "IO", + "File", + "Dir", + "Time", + "Date", + "DateTime", + "Regexp", + "MatchData", + "Thread", + "Mutex", + "Fiber", + "Process", + "Math", + "GC", + "ObjectSpace", + "Marshal", + "Random", + "Encoding", +]; + +fn lexicon_for(language: Language) -> Lexicon { + match language { + Language::Ruby => Lexicon { + dispatch_mids: &[ + "send", + "__send__", + "public_send", + "const_get", + "constantize", + "instance_variable_get", + ], + meta_mids: &[ + "define_method", + "define_singleton_method", + "alias_method", + "class_eval", + "module_eval", + "instance_eval", + "class_exec", + "module_exec", + "instance_exec", + "eval", + "const_set", + "instance_variable_set", + "remove_method", + "undef_method", + "prepend", + "singleton_class", + "binding", + ], + method_obj_mids: &["method", "public_method", "instance_method"], + io_consts: &[ + "File", + "IO", + "Dir", + "FileUtils", + "Open3", + "Socket", + "TCPSocket", + "UDPSocket", + "TCPServer", + "UNIXSocket", + "Tempfile", + "Pathname", + "Marshal", + ], + io_bare: &[ + "puts", + "print", + "warn", + "gets", + "readline", + "readlines", + "system", + "exec", + "spawn", + "fork", + "sleep", + "open", + "abort", + "exit", + "exit!", + ], + dir_context: &["pwd", "getwd", "home"], + context_pairs: RUBY_CONTEXT_PAIRS, + context_bare: &["rand", "srand"], + callback_set: RUBY_CALLBACK_SET, + core_consts: RUBY_CORE_CONSTS, + }, + Language::Python => Lexicon { + dispatch_mids: &[ + "getattr", + "setattr", + "hasattr", + "__getattr__", + "__setattr__", + "import_module", + ], + meta_mids: &[ + "eval", "exec", "compile", "type", "globals", "locals", "vars", "setattr", + "delattr", + ], + method_obj_mids: &["method"], + io_consts: &[ + "Path", + "pathlib", + "os", + "sys", + "subprocess", + "socket", + "shutil", + ], + io_bare: &["print", "input", "open", "exec", "eval"], + dir_context: &["getcwd", "home"], + context_pairs: PYTHON_CONTEXT_PAIRS, + context_bare: &["random", "randint", "randrange"], + callback_set: COMMON_CALLBACK_SET, + core_consts: EMPTY, + }, + Language::JavaScript | Language::TypeScript => Lexicon { + dispatch_mids: &["eval", "Function", "call", "apply", "bind"], + meta_mids: &[ + "eval", + "Function", + "defineProperty", + "defineProperties", + "setPrototypeOf", + ], + method_obj_mids: &["method"], + io_consts: &["console", "Console", "fs", "process", "Deno", "Bun"], + io_bare: &["setTimeout", "setInterval", "fetch", "require", "import"], + dir_context: EMPTY, + context_pairs: JS_CONTEXT_PAIRS, + context_bare: EMPTY, + callback_set: COMMON_CALLBACK_SET, + core_consts: EMPTY, + }, + Language::Go => Lexicon { + dispatch_mids: &[ + "Call", + "CallSlice", + "Method", + "MethodByName", + "ValueOf", + "TypeOf", + ], + meta_mids: &["Call", "CallSlice", "MethodByName", "New", "MakeFunc"], + method_obj_mids: &["method"], + io_consts: &["os", "io", "ioutil", "fs", "net", "http", "exec", "syscall"], + io_bare: &["panic", "print", "println", "recover"], + dir_context: &["Getwd", "UserHomeDir"], + context_pairs: GO_CONTEXT_PAIRS, + context_bare: EMPTY, + callback_set: GO_CALLBACK_SET, + core_consts: EMPTY, + }, + Language::Rust => Lexicon { + dispatch_mids: &[ + "downcast", + "downcast_ref", + "downcast_mut", + "call", + "call_mut", + "call_once", + ], + meta_mids: &["transmute", "from_raw_parts", "from_raw_parts_mut"], + method_obj_mids: &["method"], + io_consts: &["std", "tokio", "fs", "env", "process", "net", "io"], + io_bare: &["panic", "todo", "unimplemented", "unreachable"], + dir_context: &["current_dir", "home_dir"], + context_pairs: RUST_CONTEXT_PAIRS, + context_bare: EMPTY, + callback_set: RUST_CALLBACK_SET, + core_consts: EMPTY, + }, + Language::Zig => Lexicon { + dispatch_mids: &["field", "fieldParentPtr", "ptrCast", "alignCast", "call"], + meta_mids: &[ + "typeInfo", + "TypeOf", + "ptrCast", + "intFromPtr", + "ptrFromInt", + "eval", + ], + method_obj_mids: &["method"], + io_consts: &[ + "std", "os", "fs", "process", "net", "Thread", "Mutex", "Atomic", + ], + io_bare: &["panic", "unreachable"], + dir_context: EMPTY, + context_pairs: ZIG_CONTEXT_PAIRS, + context_bare: EMPTY, + callback_set: ZIG_CALLBACK_SET, + core_consts: EMPTY, + }, + Language::Lua => Lexicon { + dispatch_mids: &["load", "loadfile", "dofile", "require", "rawget", "rawset"], + meta_mids: &[ + "setmetatable", + "getmetatable", + "debug", + "eval", + "load", + "loadfile", + ], + method_obj_mids: &["method"], + io_consts: &["io", "os", "debug", "package"], + io_bare: &["print", "error", "assert", "require", "collectgarbage"], + dir_context: EMPTY, + context_pairs: LUA_CONTEXT_PAIRS, + context_bare: EMPTY, + callback_set: COMMON_CALLBACK_SET, + core_consts: EMPTY, + }, + Language::C => Lexicon { + dispatch_mids: &["dlsym", "dlopen", "GetProcAddress"], + meta_mids: &["setjmp", "longjmp", "va_start", "va_arg"], + method_obj_mids: &["method"], + io_consts: &["FILE", "DIR", "pthread", "mutex", "atomic"], + io_bare: &[ + "printf", "fprintf", "fopen", "open", "read", "write", "close", "system", "exec", + "abort", "exit", "assert", + ], + dir_context: &["getcwd", "getenv"], + context_pairs: EMPTY_PAIRS, + context_bare: &["rand", "time", "clock"], + callback_set: C_CALLBACK_SET, + core_consts: EMPTY, + }, + Language::Cpp => Lexicon { + dispatch_mids: &[ + "dynamic_cast", + "typeid", + "any_cast", + "get_if", + "visit", + "invoke", + ], + meta_mids: &["reinterpret_cast", "const_cast", "dlsym", "dlopen"], + method_obj_mids: &["method"], + io_consts: &[ + "std", + "filesystem", + "fstream", + "iostream", + "thread", + "mutex", + "atomic", + ], + io_bare: &["throw", "abort", "exit", "assert", "system"], + dir_context: &["current_path"], + context_pairs: CPP_CONTEXT_PAIRS, + context_bare: EMPTY, + callback_set: CPP_CALLBACK_SET, + core_consts: EMPTY, + }, + Language::CSharp => Lexicon { + dispatch_mids: &[ + "Invoke", + "GetMethod", + "GetProperty", + "GetField", + "Activator", + "CreateInstance", + ], + meta_mids: &["Invoke", "GetType", "Reflection", "Emit", "DynamicMethod"], + method_obj_mids: &["method"], + io_consts: &[ + "Console", + "File", + "Directory", + "Path", + "Process", + "Socket", + "HttpClient", + "Environment", + ], + io_bare: &["throw"], + dir_context: &["CurrentDirectory", "GetEnvironmentVariable"], + context_pairs: CSHARP_CONTEXT_PAIRS, + context_bare: EMPTY, + callback_set: CSHARP_CALLBACK_SET, + core_consts: EMPTY, + }, + Language::Java => Lexicon { + dispatch_mids: &[ + "invoke", + "getMethod", + "getDeclaredMethod", + "getField", + "getDeclaredField", + "forName", + ], + meta_mids: &["invoke", "setAccessible", "newInstance", "Proxy"], + method_obj_mids: &["method"], + io_consts: &[ + "System", + "File", + "Files", + "Paths", + "ProcessBuilder", + "Socket", + "HttpClient", + "Thread", + "Lock", + "AtomicReference", + ], + io_bare: &["throw"], + dir_context: &["getProperty", "getenv"], + context_pairs: JAVA_CONTEXT_PAIRS, + context_bare: EMPTY, + callback_set: JAVA_CALLBACK_SET, + core_consts: EMPTY, + }, + Language::Swift => Lexicon { + dispatch_mids: &[ + "perform", + "value", + "setValue", + "selector", + "NSClassFromString", + ], + meta_mids: &[ + "Mirror", + "unsafeBitCast", + "withUnsafePointer", + "withUnsafeBytes", + ], + method_obj_mids: &["method"], + io_consts: &[ + "FileManager", + "Process", + "URLSession", + "DispatchQueue", + "Thread", + "Lock", + "NSLock", + ], + io_bare: &[ + "print", + "fatalError", + "preconditionFailure", + "assertionFailure", + ], + dir_context: &["currentDirectoryPath", "homeDirectoryForCurrentUser"], + context_pairs: SWIFT_CONTEXT_PAIRS, + context_bare: EMPTY, + callback_set: SWIFT_CALLBACK_SET, + core_consts: EMPTY, + }, + Language::Kotlin => Lexicon { + dispatch_mids: &[ + "invoke", + "call", + "callBy", + "memberProperties", + "declaredMemberFunctions", + ], + meta_mids: &[ + "reflection", + "javaClass", + "Class", + "forName", + "setAccessible", + ], + method_obj_mids: &["method"], + io_consts: &[ + "System", + "File", + "Files", + "Paths", + "ProcessBuilder", + "Socket", + "HttpClient", + "Thread", + "Mutex", + "AtomicReference", + ], + io_bare: &["println", "print", "error", "check", "require", "TODO"], + dir_context: &["getProperty", "getenv"], + context_pairs: KOTLIN_CONTEXT_PAIRS, + context_bare: EMPTY, + callback_set: KOTLIN_CALLBACK_SET, + core_consts: EMPTY, + }, + } +} struct FalseSimplicity { file: String, lines: Vec, - sites: Vec, + language: Language, + lexicon: Lexicon, + hits: Vec, + classrecs: Vec, } impl FalseSimplicity { - fn new(file: String, lines: Vec) -> Self { + fn new(file: String, lines: Vec, language: Language) -> Self { Self { file, lines, - sites: Vec::new(), - } - } - - fn walk(&mut self, node: &Node, defstack: &[String]) { - let mut next_defstack = defstack.to_vec(); - if matches!(node.r#type.as_str(), "DEFN" | "DEFS") { - let name_index = if node.r#type == "DEFS" { 1 } else { 0 }; - if let Some(Child::Symbol(name)) = node.children.get(name_index) { - next_defstack.push(name.clone()); - } - } - - self.inspect_node(node, &next_defstack); - - for child in node.children.iter().filter_map(ast::node) { - self.walk(child, &next_defstack); + language, + lexicon: lexicon_for(language), + hits: Vec::new(), + classrecs: Vec::new(), } } - fn inspect_node(&mut self, node: &Node, defstack: &[String]) { + fn walk(&mut self, node: &Node, defs: &[String], cls: &[String]) { match node.r#type.as_str() { - "CALL" | "OPCALL" | "FCALL" | "VCALL" => { - let mid = self.call_mid(node); - if let Some(mid) = mid { - if DISPATCH_MIDS.contains(&mid.as_str()) { - self.add_site("dynamic_dispatch", &mid, node, defstack); - } else if IO_MIDS.contains(&mid.as_str()) && !self.receiver_is_explicit(node) { - self.add_site("hidden_io", &mid, node, defstack); - } else if REFLECTION_MIDS.contains(&mid.as_str()) { - self.add_site("runtime_reflection", &mid, node, defstack); + "CLASS" | "MODULE" => { + self.walk_class(node, defs, cls); + return; + } + "SCLASS" => { + if self.language == Language::Ruby { + if let Some(recv) = node.children.first().and_then(ast::node) { + if recv.r#type != "SELF" { + self.emit( + "metaprogramming", + &format!("class << {}", ast::slice(recv, &self.lines)), + self.defn_name(defs), + node, + ); + } } } } + "DEFN" | "DEFS" => { + let name_index = if node.r#type == "DEFS" { 1 } else { 0 }; + let name = ast::child_to_string(node.children.get(name_index)); + if self.language == Language::Ruby { + if let Some(name) = name.as_deref() { + if matches!(name, "method_missing" | "respond_to_missing?") { + self.emit( + "metaprogramming", + &format!("def {name}"), + self.defn_name(defs), + node, + ); + } + } + } + let mut next_defs = defs.to_vec(); + if let Some(name) = name { + next_defs.push(name); + } + for child in node.children.iter().filter_map(ast::node) { + self.walk(child, &next_defs, cls); + } + return; + } + "CALL" | "FCALL" | "VCALL" | "OPCALL" => self.classify_call(node, defs), "ATTRASGN" => { - let mid = self.call_mid(node).unwrap_or_default(); - if mid.ends_with("eval=") { - self.add_site("runtime_reflection", &mid, node, defstack); + if let Some(mid) = ast::child_to_string(node.children.get(1)) { + self.emit("hidden_mutation", &mid, self.defn_name(defs), node); } } - "SUPER" | "ZSUPER" => { - self.add_site("context_dependency", "super", node, defstack); + "OP_ASGN1" | "OP_ASGN2" => { + self.emit("hidden_mutation", "op-assign", self.defn_name(defs), node); } "GVAR" | "GASGN" => { - if let Some(name) = ast::child_to_string(node.children.get(0)) { - if !name.starts_with("$PREMATCH") - && !name.starts_with("$POSTMATCH") - && !name.starts_with("$MATCH") - && !name.starts_with("$&") - && !name.starts_with("$'") - && !name.starts_with("$`") - { - self.add_site("context_dependency", &name, node, defstack); + if self.language == Language::Ruby { + if let Some(name) = ast::child_to_string(node.children.first()) { + self.emit("context_dependency", &name, self.defn_name(defs), node); } } } - "CVAR" | "CVDASGN" | "CVDECL" => { - self.add_site("hidden_mutation", "class_var", node, defstack); - } - "CLASS" | "MODULE" => { - if !defstack.is_empty() { - self.add_site("monkeypatch", "nested_reopen", node, defstack); + "XSTR" | "DXSTR" => { + if self.language == Language::Ruby { + self.emit("hidden_io", "backtick", self.defn_name(defs), node); } } - "ALIAS" => { - self.add_site("runtime_reflection", "alias", node, defstack); + "YIELD" => { + if self.language == Language::Ruby { + self.emit("dynamic_dispatch", "yield", self.defn_name(defs), node); + } } - "UNDEF" => { - self.add_site("runtime_reflection", "undef", node, defstack); + "ITER" => { + if let Some(call) = node.children.first().and_then(ast::node) { + if let Some(mid) = self.callee_mid(call) { + if self.callback(&mid) && !self.lexicon.meta_mids.contains(&mid.as_str()) { + self.emit("callback_inversion", &mid, self.defn_name(defs), node); + } + } + } } _ => {} } + + for child in node.children.iter().filter_map(ast::node) { + self.walk(child, defs, cls); + } } - fn call_mid(&self, node: &Node) -> Option { - match node.r#type.as_str() { - "CALL" | "OPCALL" | "ATTRASGN" => ast::child_to_string(node.children.get(1)), - "FCALL" | "VCALL" => ast::child_to_string(node.children.get(0)), - _ => None, + fn walk_class(&mut self, node: &Node, defs: &[String], cls: &[String]) { + let Some(cpath) = node.children.first().and_then(ast::node) else { + return; + }; + let body = if node.r#type == "CLASS" { + node.children.get(2).and_then(ast::node) + } else { + node.children.get(1).and_then(ast::node) + }; + let simple = self.const_simple(cpath); + let based = cpath.r#type == "COLON2" + && !matches!(cpath.children.first(), None | Some(Child::Nil)) + && !cpath.text.starts_with("::"); + let mut name_parts = cls.to_vec(); + name_parts.push(self.const_text(cpath)); + let fqn = name_parts.join("::"); + if body.is_some_and(|body| self.has_def(body)) { + let core = + cls.is_empty() && !based && self.lexicon.core_consts.contains(&simple.as_str()); + self.classrecs.push(ClassRec { + name: fqn.clone(), + file: self.file.clone(), + line: node.first_lineno, + core, + span: self.span(node), + }); + if core { + self.emit("monkeypatch", &simple, &simple, node); + } + } + let mut next_cls = cls.to_vec(); + next_cls.push(self.const_text(cpath)); + for child in node.children.iter().filter_map(ast::node) { + self.walk(child, defs, &next_cls); } } - fn receiver_is_explicit(&self, node: &Node) -> bool { - if matches!(node.r#type.as_str(), "FCALL" | "VCALL") { - return false; + fn classify_call(&mut self, call: &Node, defs: &[String]) { + let (recv, mid) = match call.r#type.as_str() { + "CALL" | "OPCALL" => ( + call.children.first().and_then(ast::node), + ast::child_to_string(call.children.get(1)), + ), + _ => (None, ast::child_to_string(call.children.first())), + }; + let Some(mid) = mid else { + return; + }; + + if self.block_pass(call) + && self.callback(&mid) + && !self.lexicon.meta_mids.contains(&mid.as_str()) + { + self.emit("callback_inversion", &mid, self.defn_name(defs), call); + return; } - if let Some(recv) = node.children.get(0).and_then(ast::node) { - recv.r#type != "SELF" - } else { - false + if self.lexicon.meta_mids.contains(&mid.as_str()) { + self.emit("metaprogramming", &mid, self.defn_name(defs), call); + return; + } + if self.lexicon.dispatch_mids.contains(&mid.as_str()) { + self.emit("dynamic_dispatch", &mid, self.defn_name(defs), call); + return; + } + + if mid == "call" { + if let Some(recv) = recv { + if self.method_obj(recv) { + self.emit( + "dynamic_dispatch", + "method(...).call", + self.defn_name(defs), + call, + ); + return; + } + if self.var_recv(recv) { + self.emit( + "dynamic_dispatch", + &format!("{}.call", ast::slice(recv, &self.lines)), + self.defn_name(defs), + call, + ); + return; + } + } + } + + if let Some(cp) = recv.and_then(|recv| self.const_recv(recv)) { + let base = cp + .trim_start_matches("::") + .split("::") + .next() + .unwrap_or("") + .to_string(); + if base == "Dir" && self.lexicon.dir_context.contains(&mid.as_str()) { + self.emit( + "context_dependency", + &format!("Dir.{mid}"), + self.defn_name(defs), + call, + ); + return; + } + if self.lexicon.io_consts.contains(&base.as_str()) + || (self.language == Language::Ruby && cp.starts_with("Net::")) + { + self.emit( + "hidden_io", + &format!("{cp}.{mid}"), + self.defn_name(defs), + call, + ); + return; + } + if self.language == Language::Ruby { + if base == "URI" && mid == "open" { + self.emit("hidden_io", "URI.open", self.defn_name(defs), call); + return; + } + if cp == "ENV" { + self.emit("context_dependency", "ENV", self.defn_name(defs), call); + return; + } + } + if self.context_pair(&base, &mid) { + self.emit( + "context_dependency", + &format!("{base}.{mid}"), + self.defn_name(defs), + call, + ); + return; + } + } + + if recv.is_none() { + if self.lexicon.io_bare.contains(&mid.as_str()) { + self.emit("hidden_io", &mid, self.defn_name(defs), call); + return; + } + if self.lexicon.context_bare.contains(&mid.as_str()) { + self.emit("context_dependency", &mid, self.defn_name(defs), call); + return; + } + } + + if mid.len() > 1 && mid.ends_with('!') && !matches!(mid.as_str(), "!=" | "!~") { + self.emit("hidden_mutation", &mid, self.defn_name(defs), call); + return; + } + if call.r#type == "OPCALL" && mid == "<<" { + self.emit("hidden_mutation", "<<", self.defn_name(defs), call); } } - fn add_site(&mut self, kind: &str, detail: &str, node: &Node, defstack: &[String]) { - self.sites.push(Site { + fn emit(&mut self, kind: &str, detail: &str, defn: &str, node: &Node) { + self.hits.push(Hit { kind: kind.to_string(), detail: detail.to_string(), file: self.file.clone(), - defn: defstack - .last() - .cloned() - .unwrap_or_else(|| "(top-level)".to_string()), + defn: defn.to_string(), line: node.first_lineno, - span: [ - node.first_lineno, - node.first_column, - node.last_lineno, - node.last_column, - ], + span: self.span(node), }); } + + fn defn_name<'a>(&self, defs: &'a [String]) -> &'a str { + defs.last().map(String::as_str).unwrap_or("(top-level)") + } + + fn span(&self, node: &Node) -> Span { + [ + node.first_lineno, + node.first_column, + node.last_lineno, + node.last_column, + ] + } + + fn callback(&self, mid: &str) -> bool { + self.lexicon.callback_set.contains(&mid) + || ["with_", "around_", "on_", "before_", "after_"] + .iter() + .any(|prefix| mid.starts_with(prefix)) + || mid.ends_with("_hook") + } + + fn callee_mid(&self, call: &Node) -> Option { + match call.r#type.as_str() { + "CALL" | "OPCALL" => ast::child_to_string(call.children.get(1)), + "FCALL" | "VCALL" => ast::child_to_string(call.children.first()), + _ => None, + } + } + + fn block_pass(&self, call: &Node) -> bool { + let args = match call.r#type.as_str() { + "CALL" | "OPCALL" => call.children.get(2), + "FCALL" => call.children.get(1), + _ => None, + }; + let Some(args) = args.and_then(ast::node) else { + return false; + }; + args.r#type == "BLOCK_PASS" + || (args.r#type == "LIST" + && args + .children + .iter() + .filter_map(ast::node) + .any(|child| child.r#type == "BLOCK_PASS")) + } + + fn method_obj(&self, recv: &Node) -> bool { + let mid = match recv.r#type.as_str() { + "CALL" => ast::child_to_string(recv.children.get(1)), + "FCALL" => ast::child_to_string(recv.children.first()), + _ => None, + }; + mid.is_some_and(|mid| self.lexicon.method_obj_mids.contains(&mid.as_str())) + } + + fn var_recv(&self, recv: &Node) -> bool { + matches!( + recv.r#type.as_str(), + "VCALL" | "LVAR" | "DVAR" | "IVAR" | "CVAR" | "GVAR" + ) + } + + fn const_recv(&self, recv: &Node) -> Option { + if matches!(recv.r#type.as_str(), "CONST" | "COLON2" | "COLON3") { + Some(self.const_text(recv)) + } else { + None + } + } + + fn const_text(&self, node: &Node) -> String { + match node.r#type.as_str() { + "CONST" => ast::child_to_string(node.children.first()).unwrap_or_default(), + "COLON3" => format!( + "::{}", + ast::child_to_string(node.children.first()).unwrap_or_default() + ), + "COLON2" => { + let name = ast::child_to_string(node.children.get(1)).unwrap_or_default(); + if node.text.starts_with("::") { + format!("::{name}") + } else if let Some(base) = node.children.first().and_then(ast::node) { + format!("{}::{name}", self.const_text(base)) + } else { + name + } + } + _ => ast::slice(node, &self.lines), + } + } + + fn const_simple(&self, node: &Node) -> String { + match node.r#type.as_str() { + "CONST" | "COLON3" => ast::child_to_string(node.children.first()).unwrap_or_default(), + "COLON2" => ast::child_to_string(node.children.get(1)).unwrap_or_default(), + _ => self.const_text(node), + } + } + + fn has_def(&self, node: &Node) -> bool { + let _ = self.language; + if matches!(node.r#type.as_str(), "DEFN" | "DEFS") { + return true; + } + if matches!(node.r#type.as_str(), "CLASS" | "MODULE") { + return false; + } + node.children + .iter() + .filter_map(ast::node) + .any(|child| self.has_def(child)) + } + + fn context_pair(&self, base: &str, mid: &str) -> bool { + self.lexicon + .context_pairs + .iter() + .any(|(key, mids)| *key == base && mids.contains(&mid)) + } } struct Report { - sites: Vec, + hits: Vec, } impl Report { - fn new(sites: Vec) -> Self { - Self { sites } + fn new(mut hits: Vec, classrecs: Vec) -> Self { + let mut grouped: Vec<(String, Vec)> = Vec::new(); + for rec in classrecs { + if let Some((_, recs)) = grouped.iter_mut().find(|(name, _)| name == &rec.name) { + recs.push(rec); + } else { + grouped.push((rec.name.clone(), vec![rec])); + } + } + for (_name, recs) in grouped { + if recs.first().is_some_and(|rec| rec.core) { + continue; + } + let file_count = recs + .iter() + .map(|rec| rec.file.clone()) + .collect::>() + .len(); + if file_count < 2 { + continue; + } + for rec in recs { + hits.push(Hit { + kind: "monkeypatch".to_string(), + detail: format!("reopen {}", rec.name), + file: rec.file.clone(), + defn: rec.name.clone(), + line: rec.line, + span: rec.span, + }); + } + } + Self { hits } } fn findings(&self) -> Vec { - let mut groups: BTreeMap<(String, String), Vec<&Site>> = BTreeMap::new(); - for s in &self.sites { - groups - .entry((s.kind.clone(), s.detail.clone())) - .or_default() - .push(s); + let mut groups: Vec<((String, String), Vec<&Hit>)> = Vec::new(); + for hit in &self.hits { + let key = (hit.kind.clone(), hit.detail.clone()); + if let Some((_, hits)) = groups.iter_mut().find(|(existing, _)| existing == &key) { + hits.push(hit); + } else { + groups.push((key, vec![hit])); + } } let mut out = Vec::new(); - for ((kind, detail), sts) in groups { - let mut defns = BTreeSet::new(); - for s in &sts { - defns.insert((s.file.clone(), s.defn.clone())); - } - let scatter = defns.len(); - + for ((kind, detail), hits) in groups { + let units = hits + .iter() + .map(|hit| (hit.file.clone(), hit.defn.clone())) + .collect::>(); let mut sites = Vec::new(); let mut spans = BTreeMap::new(); - for s in &sts { - let loc = format!("{}:{}:{}", s.file, s.defn, s.line); - sites.push(loc.clone()); - spans.insert(loc, s.span); + for hit in &hits { + let loc = format!("{}:{}:{}", hit.file, hit.defn, hit.line); + if !sites.contains(&loc) { + sites.push(loc.clone()); + } + spans.entry(loc).or_insert(hit.span); } - out.push(FalseSimplicityRow { kind, detail, - support: sts.len(), - scatter, + support: hits.len(), + scatter: units.len(), at: sites.first().cloned().unwrap_or_default(), sites, spans, diff --git a/gems/decomplex/rust/src/decomplex/detectors/flay_similarity.rs b/gems/decomplex/rust/src/decomplex/detectors/flay_similarity.rs index 307dccfbb..9fec9466f 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/flay_similarity.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/flay_similarity.rs @@ -254,7 +254,7 @@ impl Scanner { child_masses.push(child_mass); } } - Some(Candidate { + let candidate = Candidate { file: file.to_string(), line, span: node.span, @@ -267,7 +267,8 @@ impl Scanner { raw: normalize_text(&node.text), child_fingerprints, child_masses, - }) + }; + Some(candidate) } fn type2_findings(&self, candidates: &[Candidate]) -> Vec { @@ -525,7 +526,17 @@ fn fingerprint(node: &RawNode, active: &mut HashSet) -> (String, usize) return (String::new(), 0); } active.insert(key.clone()); - let out = if CALL_KINDS.contains(&node.kind.as_str()) && call_message(node).is_some() { + let out = if matches!( + node.kind.as_str(), + "predefined_type" | "abstract_pointer_declarator" | "storage_class_specifier" | "ERROR" + ) { + let token = terminal_token(node); + if token.is_empty() { + (String::new(), 0) + } else { + (token, 1) + } + } else if CALL_KINDS.contains(&node.kind.as_str()) && call_message(node).is_some() { fingerprint_call(node, active) } else if node.children.is_empty() { let token = terminal_token(node); diff --git a/gems/decomplex/rust/src/decomplex/detectors/function_lcom.rs b/gems/decomplex/rust/src/decomplex/detectors/function_lcom.rs index b391d54d9..5134c7ff9 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/function_lcom.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/function_lcom.rs @@ -1,4 +1,4 @@ -use crate::decomplex::ast::{self, Child, Node, Span}; +use crate::decomplex::ast::Span; use crate::decomplex::detectors::local_flow; use crate::decomplex::syntax::Language; use anyhow::Result; @@ -8,21 +8,32 @@ use std::path::PathBuf; #[derive(Clone, Debug, Eq, PartialEq, Serialize)] pub struct FunctionLcomRow { - pub at: String, - pub owner: String, + pub file: String, pub defn: String, + pub owner: String, + pub method: String, + pub line: usize, + pub at: String, pub score: usize, - pub components: usize, pub mode: String, + pub components: usize, pub locals: usize, pub statements: usize, + pub terminal_join: bool, + pub component_vars: Vec>, + pub component_lines: Vec>, pub spans: BTreeMap, } +#[derive(Clone, Debug)] +struct Component { + vars: BTreeSet, + statements: Vec, +} + pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { let summaries = local_flow::scan_files(files, language)?; - let mut detector = FunctionLcom::new(summaries); - Ok(detector.findings()) + Ok(FunctionLcom::new(summaries).findings()) } struct FunctionLcom { @@ -44,151 +55,232 @@ impl FunctionLcom { } } - fn findings(&mut self) -> Vec { - let mut out: Vec<_> = self + fn findings(&self) -> Vec { + let mut out = self .summaries .iter() - .filter_map(|s| self.finding_for(s)) - .collect(); - out.sort_by(|a, b| b.score.cmp(&a.score).then_with(|| a.at.cmp(&b.at))); + .filter_map(|summary| self.finding_for(summary)) + .collect::>(); + out.sort_by(|a, b| { + b.score + .cmp(&a.score) + .then_with(|| a.file.cmp(&b.file)) + .then_with(|| a.line.cmp(&b.line)) + }); out } fn finding_for(&self, summary: &local_flow::MethodSummary) -> Option { - let all_locals = self.all_locals(summary); - if all_locals.len() < self.min_locals { + if summary.statements.len() < self.min_statements { return None; } - if summary.statements.len() < self.min_statements { + + let full_components = + self.substantial_components(self.components(&summary.statements), &summary.statements); + let pre_terminal = self.pre_terminal_statements(summary); + let pre_components = + self.substantial_components(self.components(pre_terminal), pre_terminal); + let local_count = self.local_names(&summary.statements).len(); + if local_count < self.min_locals { return None; } - let components = self.connected_components(summary, &all_locals); - if components.len() < self.min_components { + let terminal_join = self.terminal_join(summary, &pre_components); + let mut report_components = full_components; + let mut mode = "disjoint".to_string(); + if report_components.len() < self.min_components + && terminal_join + && pre_components.len() >= self.min_components + { + report_components = pre_components; + mode = "late_join".to_string(); + } + if report_components.len() < self.min_components { return None; } - let score = (components.len() * 10) + all_locals.len() + summary.statements.len(); + let score = self.score_for( + &report_components, + local_count, + summary.statements.len(), + terminal_join, + ); if score < self.min_score { return None; } - let mode = if self.late_join(summary, &components) { - "late_join".to_string() - } else { - "disjoint".to_string() - }; let at = format!("{}:{}:{}", summary.file, summary.name, summary.line); let mut spans = BTreeMap::new(); spans.insert(at.clone(), summary.span); - Some(FunctionLcomRow { - at, - owner: summary.owner.clone(), + file: summary.file.clone(), defn: summary.name.clone(), + owner: summary.owner.clone(), + method: summary.name.clone(), + line: summary.line, + at, score, - components: components.len(), mode, - locals: all_locals.len(), + components: report_components.len(), + locals: local_count, statements: summary.statements.len(), + terminal_join, + component_vars: report_components + .iter() + .map(|component| component.vars.iter().cloned().collect()) + .collect(), + component_lines: report_components + .iter() + .map(|component| { + component + .statements + .iter() + .map(|index| summary.statements[*index].line) + .collect::>() + .into_iter() + .collect() + }) + .collect(), spans, }) } - fn all_locals(&self, summary: &local_flow::MethodSummary) -> BTreeSet { - let mut locals = BTreeSet::new(); - for s in &summary.statements { - locals.extend(s.reads.clone()); - locals.extend(s.writes.clone()); + fn pre_terminal_statements<'a>( + &self, + summary: &'a local_flow::MethodSummary, + ) -> &'a [local_flow::Statement] { + if summary.statements.len() <= 1 { + &[] + } else { + &summary.statements[..summary.statements.len() - 1] } - locals } - fn connected_components( + fn terminal_join( &self, summary: &local_flow::MethodSummary, - locals: &BTreeSet, - ) -> Vec> { - let mut adj: BTreeMap> = BTreeMap::new(); - for s in &summary.statements { - let mut touched: Vec<_> = s.reads.union(&s.writes).cloned().collect(); - for (lhs, rhs) in &s.dependencies { - touched.push(lhs.clone()); - touched.push(rhs.clone()); + pre_components: &[Component], + ) -> bool { + let Some(terminal) = summary.statements.last() else { + return false; + }; + let mut component_index = BTreeMap::new(); + for (index, component) in pre_components.iter().enumerate() { + for name in &component.vars { + component_index.insert(name.clone(), index); } - for i in 0..touched.len() { - for j in i + 1..touched.len() { - adj.entry(touched[i].clone()) - .or_default() - .insert(touched[j].clone()); - adj.entry(touched[j].clone()) - .or_default() - .insert(touched[i].clone()); + } + self.touched_vars(terminal) + .into_iter() + .filter_map(|name| component_index.get(&name).copied()) + .collect::>() + .len() + >= self.min_components + } + + fn score_for( + &self, + components: &[Component], + local_count: usize, + statement_count: usize, + terminal_join: bool, + ) -> usize { + (components.len() * 10) + local_count + statement_count + if terminal_join { 5 } else { 0 } + } + + fn substantial_components( + &self, + raw_components: Vec>, + statements: &[local_flow::Statement], + ) -> Vec { + raw_components + .into_iter() + .filter_map(|vars| { + let touched = statements + .iter() + .enumerate() + .filter_map(|(index, statement)| { + if !self.touched_vars(statement).is_disjoint(&vars) { + Some(index) + } else { + None + } + }) + .collect::>(); + if vars.len() < 2 || touched.len() < 2 { + return None; } + Some(Component { + vars, + statements: touched, + }) + }) + .collect() + } + + fn components(&self, statements: &[local_flow::Statement]) -> Vec> { + let vars = self.local_names(statements); + let edges = self.graph_edges(statements); + let mut adjacency = vars + .iter() + .map(|name| (name.clone(), BTreeSet::new())) + .collect::>(); + for (left, right) in edges { + if left == right { + continue; } + adjacency + .entry(left.clone()) + .or_default() + .insert(right.clone()); + adjacency.entry(right).or_default().insert(left); } + let mut visited = BTreeSet::new(); let mut components = Vec::new(); - let mut unvisited = locals.clone(); - - while let Some(start) = unvisited.iter().next().cloned() { + for name in vars { + if visited.contains(&name) { + continue; + } let mut component = BTreeSet::new(); - let mut queue = vec![start]; - while let Some(node) = queue.pop() { - if !unvisited.contains(&node) { + let mut stack = vec![name]; + while let Some(current) = stack.pop() { + if visited.contains(¤t) { continue; } - unvisited.remove(&node); - component.insert(node.clone()); - if let Some(neighbors) = adj.get(&node) { - for n in neighbors { - if unvisited.contains(n) { - queue.push(n.clone()); + visited.insert(current.clone()); + component.insert(current.clone()); + if let Some(neighbors) = adjacency.get(¤t) { + for neighbor in neighbors { + if !visited.contains(neighbor) { + stack.push(neighbor.clone()); } } } } - if component.len() > 0 { - components.push(component); - } + components.push(component); } - - components.retain(|c| { - c.len() > 1 || self.standalone_state_usage(summary, c.iter().next().unwrap()) - }); components } - fn standalone_state_usage(&self, summary: &local_flow::MethodSummary, local: &str) -> bool { - let reads: usize = summary - .statements - .iter() - .map(|s| s.reads.contains(local) as usize) - .sum(); - let writes: usize = summary - .statements - .iter() - .map(|s| s.writes.contains(local) as usize) - .sum(); - reads + writes > 1 + fn graph_edges(&self, statements: &[local_flow::Statement]) -> Vec<(String, String)> { + let mut edges = Vec::new(); + for statement in statements { + edges.extend(statement.dependencies.iter().cloned()); + edges.extend(statement.co_uses.iter().cloned()); + } + edges } - fn late_join( - &self, - summary: &local_flow::MethodSummary, - components: &[BTreeSet], - ) -> bool { - let Some(last) = summary.statements.last() else { - return false; - }; - let mut joined = 0; - for c in components { - if last.reads.intersection(c).next().is_some() - || last.writes.intersection(c).next().is_some() - { - joined += 1; - } + fn local_names(&self, statements: &[local_flow::Statement]) -> BTreeSet { + let mut names = BTreeSet::new(); + for statement in statements { + names.extend(self.touched_vars(statement)); } - joined >= 2 + names + } + + fn touched_vars(&self, statement: &local_flow::Statement) -> BTreeSet { + statement.reads.union(&statement.writes).cloned().collect() } } diff --git a/gems/decomplex/rust/src/decomplex/detectors/implicit_control_flow.rs b/gems/decomplex/rust/src/decomplex/detectors/implicit_control_flow.rs index 0c72b8deb..7c3a52718 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/implicit_control_flow.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/implicit_control_flow.rs @@ -692,7 +692,12 @@ impl EffectCollector { return; } if node.r#type == "CALL" - && !self.self_receiver(node.children.get(0).and_then(ast::node).unwrap()) + && !node + .children + .get(0) + .and_then(ast::node) + .map(|receiver| self.self_receiver(receiver)) + .unwrap_or(false) { return; } diff --git a/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs b/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs index 6b6e93d4f..2bc476eb2 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs @@ -319,11 +319,9 @@ impl LocalFlow { fn local_reads(&self, node: &Node) -> BTreeSet { let mut reads = Vec::new(); self.walk_local(node, &mut |child| { - if LOCAL_READ_TYPES.contains(&child.r#type.as_str()) - || (self.language != Language::Ruby && child.r#type == "VCALL") - { + if LOCAL_READ_TYPES.contains(&child.r#type.as_str()) { if let Some(name) = local_read_name(child) { - reads.push(name.clone()); + reads.push(name); } } }); @@ -384,9 +382,10 @@ impl LocalFlow { } } -fn local_read_name(node: &Node) -> Option<&String> { +fn local_read_name(node: &Node) -> Option { match node.children.first() { - Some(Child::String(name)) | Some(Child::Symbol(name)) => Some(name), + Some(Child::String(name)) | Some(Child::Symbol(name)) => Some(name.clone()), + Some(Child::Nil) => Some(String::new()), _ => None, } } diff --git a/gems/decomplex/rust/src/decomplex/detectors/locality_drag.rs b/gems/decomplex/rust/src/decomplex/detectors/locality_drag.rs index 479633269..d8769054f 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/locality_drag.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/locality_drag.rs @@ -442,7 +442,7 @@ impl LocalityDrag { )); } parts.push(format!( - "method local complexity {}", + "method local complexity {:.1}", self.round(local_complexity) )); parts.join("; ") diff --git a/gems/decomplex/rust/src/decomplex/detectors/miner.rs b/gems/decomplex/rust/src/decomplex/detectors/miner.rs index 695ffed8e..8653c69e3 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/miner.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/miner.rs @@ -46,17 +46,19 @@ pub fn scan_files(files: &[PathBuf], language: Language) -> Result struct Miner { sites: Vec, - groups: BTreeMap<(String, Vec), Vec>, + groups: Vec<((String, Vec), Vec)>, } impl Miner { fn new(sites: Vec) -> Self { - let mut groups = BTreeMap::new(); + let mut groups: Vec<((String, Vec), Vec)> = Vec::new(); for s in &sites { - groups - .entry((s.kind.clone(), s.members.clone())) - .or_insert_with(Vec::new) - .push(s.clone()); + let key = (s.kind.clone(), s.members.clone()); + if let Some((_, grouped)) = groups.iter_mut().find(|(existing, _)| existing == &key) { + grouped.push(s.clone()); + } else { + groups.push((key, vec![s.clone()])); + } } Self { sites, groups } } diff --git a/gems/decomplex/rust/src/decomplex/detectors/path_condition.rs b/gems/decomplex/rust/src/decomplex/detectors/path_condition.rs index f030d819a..c227d7def 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/path_condition.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/path_condition.rs @@ -189,7 +189,7 @@ impl PathCondition { struct Report { sites: Vec, - groups: BTreeMap, Vec>, + groups: Vec<(Vec, Vec)>, } impl Report { diff --git a/gems/decomplex/rust/src/decomplex/detectors/sequence_mine.rs b/gems/decomplex/rust/src/decomplex/detectors/sequence_mine.rs index 99643c01c..56e538d40 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/sequence_mine.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/sequence_mine.rs @@ -12,17 +12,18 @@ pub struct BrokenProtocolReport { #[derive(Clone, Debug, PartialEq, Serialize)] pub struct BrokenProtocol { - pub has: String, - pub missing: String, + pub pair: Vec, pub support: usize, pub confidence: f64, + pub has: String, + pub missing: String, pub at: String, pub spans: BTreeMap, } #[derive(Clone, Debug)] -struct Site { - calls: Vec, +struct Call { + mid: String, file: String, defn: String, line: usize, @@ -30,20 +31,156 @@ struct Site { } pub fn scan_files(files: &[PathBuf], language: Language) -> Result { - let mut sites = Vec::new(); + let mut calls = Vec::new(); for file in files { let (root, lines) = ast::parse_with_language(file, language)?; let mut sm = SequenceMine::new(file.to_string_lossy().to_string(), lines); sm.walk(&root, &Vec::new()); - sites.extend(sm.sites); + calls.extend(sm.calls); } - Ok(Report::new(sites).findings()) + Ok(Report::new(calls).findings()) } +const DECLARATIVE_MIDS: &[&str] = &[ + "abstract!", + "alias_method", + "any", + "attr_accessor", + "attr_reader", + "attr_writer", + "bind", + "cast", + "checked", + "enum", + "extend", + "final", + "include", + "interface!", + "let", + "must", + "must_because", + "nilable", + "override", + "overridable", + "params", + "prepend", + "private", + "private_class_method", + "public", + "require", + "require_relative", + "requires_ancestor", + "sealed!", + "sig", + "type_member", + "type_template", + "untyped", + "unsafe", + "void", +]; +const TEST_DSL_MIDS: &[&str] = &[ + "a_kind_of", + "after", + "around", + "before", + "be", + "be_a", + "be_an", + "be_empty", + "be_falsey", + "be_nil", + "be_truthy", + "change", + "contain_exactly", + "context", + "describe", + "eq", + "eql", + "equal", + "expect", + "have_attributes", + "have_key", + "have_received", + "it", + "match", + "not_to", + "raise_error", + "receive", + "subject", + "to", +]; +const ZERO_ARG_ACTION_MIDS: &[&str] = &[ + "acquire", + "begin", + "close", + "commit", + "connect", + "deinit", + "disconnect", + "drain", + "finish", + "flush", + "lock", + "open", + "release", + "rollback", + "start", + "stop", + "unlock", + "wait", +]; +const ZERO_ARG_ACTION_PREFIXES: &[&str] = &[ + "analyze", + "append", + "apply", + "build", + "call", + "check", + "classify", + "collect", + "compile", + "compute", + "consume", + "create", + "declare", + "emit", + "enforce", + "finalize", + "find", + "flush", + "handle", + "initialize", + "lower", + "mark", + "normalize", + "parse", + "perform", + "process", + "push", + "record", + "register", + "render", + "resolve", + "rewrite", + "run", + "scan", + "set", + "stamp", + "sync", + "transform", + "validate", + "verify", + "visit", + "walk", + "warn", + "write", +]; + struct SequenceMine { file: String, + #[allow(dead_code)] lines: Vec, - sites: Vec, + calls: Vec, } impl SequenceMine { @@ -51,7 +188,7 @@ impl SequenceMine { Self { file, lines, - sites: Vec::new(), + calls: Vec::new(), } } @@ -64,24 +201,25 @@ impl SequenceMine { } } - if node.r#type == "BLOCK" { - let calls = self.collect_calls(node); - if calls.len() >= 2 { - self.sites.push(Site { - calls, - file: self.file.clone(), - defn: next_defstack - .last() - .cloned() - .unwrap_or_else(|| "(top-level)".to_string()), - line: node.first_lineno, - span: [ - node.first_lineno, - node.first_column, - node.last_lineno, - node.last_column, - ], - }); + if matches!(node.r#type.as_str(), "CALL" | "FCALL" | "VCALL") { + if let Some(mid) = self.call_mid(node) { + if self.protocol_event(node, &mid) { + self.calls.push(Call { + mid, + file: self.file.clone(), + defn: next_defstack + .last() + .cloned() + .unwrap_or_else(|| "(top-level)".to_string()), + line: node.first_lineno, + span: [ + node.first_lineno, + node.first_column, + node.last_lineno, + node.last_column, + ], + }); + } } } @@ -90,120 +228,166 @@ impl SequenceMine { } } - fn collect_calls(&self, block_node: &Node) -> Vec { - let mut calls = Vec::new(); - for stmt in block_node.children.iter().filter_map(ast::node) { - if let Some(mid) = self.call_mid(stmt) { - calls.push(mid); - } + fn protocol_event(&self, node: &Node, mid: &str) -> bool { + !ignored_mid(mid) && !self.passive_reader_call(node, mid) + } + + fn passive_reader_call(&self, node: &Node, mid: &str) -> bool { + if zero_arg_action_name(mid) { + return false; + } + + match node.r#type.as_str() { + "CALL" => no_args(node.children.get(2)), + "VCALL" => true, + "FCALL" => no_args(node.children.get(1)), + _ => false, } - calls } fn call_mid(&self, node: &Node) -> Option { match node.r#type.as_str() { - "CALL" | "OPCALL" | "ATTRASGN" => ast::child_to_string(node.children.get(1)), + "CALL" => ast::child_to_string(node.children.get(1)), "FCALL" | "VCALL" => ast::child_to_string(node.children.get(0)), _ => None, } } } +struct PairSupport { + pair: Vec, + support: usize, + sites: Vec<(String, String)>, +} + struct Report { - sites: Vec, - counts: BTreeMap, - co_counts: BTreeMap<(String, String), usize>, + by_unit: Vec<((String, String), Vec)>, + support: BTreeMap, } impl Report { - fn new(sites: Vec) -> Self { - let mut counts = BTreeMap::new(); - let mut co_counts = BTreeMap::new(); - - for s in &sites { - let unique_calls: BTreeSet<_> = s.calls.iter().cloned().collect(); - let unique_calls: Vec<_> = unique_calls.into_iter().collect(); - - for c in &unique_calls { - *counts.entry(c.clone()).or_insert(0) += 1; + fn new(calls: Vec) -> Self { + let mut by_unit: Vec<((String, String), Vec)> = Vec::new(); + for call in calls { + let key = (call.file.clone(), call.defn.clone()); + if let Some((_, unit_calls)) = by_unit.iter_mut().find(|(existing, _)| existing == &key) + { + unit_calls.push(call); + } else { + by_unit.push((key, vec![call])); } + } - for i in 0..unique_calls.len() { - for j in i + 1..unique_calls.len() { - let mut pair = vec![unique_calls[i].clone(), unique_calls[j].clone()]; - pair.sort(); - *co_counts - .entry((pair[0].clone(), pair[1].clone())) - .or_insert(0) += 1; - } + let mut support = BTreeMap::new(); + for (_, calls) in &by_unit { + for mid in unique_mids(calls) { + *support.entry(mid).or_insert(0) += 1; } } - Self { - sites, - counts, - co_counts, - } + Self { by_unit, support } } fn findings(&self) -> BrokenProtocolReport { BrokenProtocolReport { - broken: self.broken_protocols(4, 0.75), + broken: self.broken_protocol(4, 0.75), } } - fn broken_protocols(&self, min_support: usize, min_confidence: f64) -> Vec { - let mut rules = Vec::new(); - for ((a, b), &co_count) in &self.co_counts { - let count_a = *self.counts.get(a).unwrap_or(&0); - let count_b = *self.counts.get(b).unwrap_or(&0); - - let conf_a = co_count as f64 / count_a as f64; - let conf_b = co_count as f64 / count_b as f64; - - if conf_a >= min_confidence && co_count >= min_support && count_a > co_count { - rules.push((a.clone(), b.clone(), co_count, conf_a)); - } - if conf_b >= min_confidence && co_count >= min_support && count_b > co_count { - rules.push((b.clone(), a.clone(), co_count, conf_b)); - } - } - + fn broken_protocol(&self, min_support: usize, min_confidence: f64) -> Vec { + let pairs = self.co_called_pairs(min_support); let mut out = Vec::new(); - let mut seen = BTreeSet::new(); - - for s in &self.sites { - let unique_calls: BTreeSet<_> = s.calls.iter().cloned().collect(); - - for (has, missing, sup, conf) in &rules { - if unique_calls.contains(has) && !unique_calls.contains(missing) { - let at = format!("{}:{}:{}", s.file, s.defn, s.line); - - let key = (has.clone(), missing.clone(), at.clone()); - if seen.insert(key) { - let mut spans = BTreeMap::new(); - spans.insert(at.clone(), s.span); - - out.push(BrokenProtocol { - has: has.clone(), - missing: missing.clone(), - support: *sup, - confidence: (conf * 100.0).round() / 100.0, - at, - spans, - }); - } + for ((file, defn), calls) in &self.by_unit { + let mids = unique_mids(calls); + for pair in &pairs { + let (has, missing) = + if mids.contains(&pair.pair[0]) && !mids.contains(&pair.pair[1]) { + (pair.pair[0].clone(), pair.pair[1].clone()) + } else if mids.contains(&pair.pair[1]) && !mids.contains(&pair.pair[0]) { + (pair.pair[1].clone(), pair.pair[0].clone()) + } else { + continue; + }; + let denominator = *self.support.get(&has).unwrap_or(&0); + if denominator == 0 { + continue; + } + let confidence = pair.support as f64 / denominator as f64; + if confidence < min_confidence { + continue; } + let Some(has_call) = calls.iter().find(|call| call.mid == has) else { + continue; + }; + let loc = format!("{}:{}:{}", file, defn, has_call.line); + let mut spans = BTreeMap::new(); + spans.insert(loc.clone(), has_call.span); + out.push(BrokenProtocol { + pair: pair.pair.clone(), + support: pair.support, + confidence: (confidence * 100.0).round() / 100.0, + has, + missing, + at: loc, + spans, + }); } } - out.sort_by(|a, b| { b.confidence .partial_cmp(&a.confidence) .unwrap() .then_with(|| b.support.cmp(&a.support)) - .then_with(|| a.at.cmp(&b.at)) }); out } + + fn co_called_pairs(&self, min_support: usize) -> Vec { + let mut counts: Vec = Vec::new(); + for (unit, calls) in &self.by_unit { + let mids = unique_mids(calls); + for i in 0..mids.len() { + for j in i + 1..mids.len() { + let pair = vec![mids[i].clone(), mids[j].clone()]; + if let Some(existing) = counts.iter_mut().find(|row| row.pair == pair) { + existing.support += 1; + existing.sites.push(unit.clone()); + } else { + counts.push(PairSupport { + pair, + support: 1, + sites: vec![unit.clone()], + }); + } + } + } + } + let mut out: Vec<_> = counts + .into_iter() + .filter(|row| row.support >= min_support) + .collect(); + out.sort_by(|a, b| b.support.cmp(&a.support)); + out + } +} + +fn ignored_mid(mid: &str) -> bool { + DECLARATIVE_MIDS.contains(&mid) || TEST_DSL_MIDS.contains(&mid) +} + +fn zero_arg_action_name(mid: &str) -> bool { + ZERO_ARG_ACTION_MIDS.contains(&mid) + || mid.ends_with('!') + || ZERO_ARG_ACTION_PREFIXES + .iter() + .any(|prefix| mid == *prefix || mid.starts_with(&format!("{prefix}_"))) +} + +fn no_args(child: Option<&Child>) -> bool { + child.is_none() || matches!(child, Some(Child::Nil)) +} + +fn unique_mids(calls: &[Call]) -> Vec { + let set: BTreeSet<_> = calls.iter().map(|call| call.mid.clone()).collect(); + set.into_iter().collect() } diff --git a/gems/decomplex/rust/src/decomplex/detectors/temporal_ordering_pressure.rs b/gems/decomplex/rust/src/decomplex/detectors/temporal_ordering_pressure.rs index b56466ba2..1a3247e36 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/temporal_ordering_pressure.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/temporal_ordering_pressure.rs @@ -262,8 +262,8 @@ impl TemporalOrderingPressure { } let n = state_methods.len(); - let state_space_exp = fields.len().min(12); - let state_space = 2usize.pow(state_space_exp as u32); + let state_space_exp = fields.len(); + let state_space = 2usize.pow(state_space_exp.min(12) as u32); let score = (n * writers.len() * shared_fields.len().max(1)) + state_space; let first_line = state_methods.first()?.line; diff --git a/gems/decomplex/rust/src/main.rs b/gems/decomplex/rust/src/main.rs index 7e5f7dc15..eb4304dca 100644 --- a/gems/decomplex/rust/src/main.rs +++ b/gems/decomplex/rust/src/main.rs @@ -13,6 +13,19 @@ use decomplex::syntax::Language; use std::path::PathBuf; fn main() -> Result<()> { + let worker = std::thread::Builder::new() + .name("decomplex-rust".to_string()) + .stack_size(64 * 1024 * 1024) + .spawn(run) + .with_context(|| "failed to start decomplex worker thread")?; + + match worker.join() { + Ok(result) => result, + Err(payload) => std::panic::resume_unwind(payload), + } +} + +fn run() -> Result<()> { let command = parse_args(std::env::args().skip(1).collect())?; parallel::set_jobs_for_process(command.jobs())?; match command { From 01927f1998356872b167a5acb2dae01d446e056f Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Thu, 18 Jun 2026 19:29:52 +0000 Subject: [PATCH 18/52] WIP: add tree-sitter normalization adapters --- gems/decomplex/lib/decomplex/ast.rb | 194 +- gems/decomplex/rust/Cargo.lock | 62 +- gems/decomplex/rust/Cargo.toml | 20 +- gems/decomplex/rust/src/bin/dump_ast.rs | 101 + gems/decomplex/rust/src/decomplex/ast.rs | 2839 ++++++++++++++++- .../decomplex/syntax/tree_sitter_adapter.rs | 2 +- gems/decomplex/test/ast_test.rb | 494 +++ gems/espalier/exe/espalier | 6 +- gems/espalier/lib/espalier.rb | 2 + .../espalier/lib/espalier/dependency_graph.rb | 486 +++ gems/espalier/lib/espalier/formatter.rb | 6 + .../lib/espalier/graphviz_formatter.rb | 223 ++ gems/espalier/test/dependency_graph_test.rb | 193 ++ 13 files changed, 4524 insertions(+), 104 deletions(-) create mode 100644 gems/decomplex/rust/src/bin/dump_ast.rs create mode 100644 gems/decomplex/test/ast_test.rb create mode 100644 gems/espalier/lib/espalier/dependency_graph.rb create mode 100644 gems/espalier/lib/espalier/graphviz_formatter.rb create mode 100644 gems/espalier/test/dependency_graph_test.rb diff --git a/gems/decomplex/lib/decomplex/ast.rb b/gems/decomplex/lib/decomplex/ast.rb index 11c4af045..e2b9974ec 100644 --- a/gems/decomplex/lib/decomplex/ast.rb +++ b/gems/decomplex/lib/decomplex/ast.rb @@ -40,9 +40,160 @@ def slice(node, _lines) node.text.to_s.strip.gsub(/\s+/, " ") end + # Language-specific syntax-shape decisions live here, before nodes + # are converted into Decomplex's shared AST vocabulary. + class TreeSitterNormalizationAdapter + BINARY_WRAPPER_KINDS = %w[ + binary binary_expression binary_operator boolean_operator comparison_operator + ].freeze + + class << self + def for(document) + case document&.language&.to_sym + when :ruby then RubyTreeSitterNormalizationAdapter.new(document) + when :python then PythonTreeSitterNormalizationAdapter.new(document) + when :lua then LuaTreeSitterNormalizationAdapter.new(document) + when :typescript, :javascript then TypeScriptTreeSitterNormalizationAdapter.new(document) + else new(document) + end + end + end + + attr_reader :document + + def initialize(document) + @document = document + end + + def ruby? + false + end + + def yield_statement?(node) + %w[body_statement block block_body statement].include?(node.kind) && + node.children.first&.text == "yield" + rescue StandardError + false + end + + def super_statement?(_node) + false + end + + def explicit_alternative(node) + node.named_children.find { |child| %w[else else_clause else_statement].include?(child.kind) } + rescue StandardError + nil + end + + def unary_not_expression?(node) + %w[unary unary_expression].include?(node.kind) && node.text.to_s.lstrip.start_with?("!") + end + + def unary_minus_expression?(node) + %w[unary unary_expression].include?(node.kind) && node.text.to_s.lstrip.start_with?("-") + end + + def binary_operator(node) + direct_binary_operator(node).to_s + end + + private + + def direct_binary_operator(node) + node.children.find { |child| !child.named? && !%w[( )].include?(child.text.to_s) }&.text + rescue StandardError + nil + end + + def exact_single_named_child(node, kinds:) + children = node.named_children + return nil unless children.size == 1 + + child = children.first + return nil unless kinds.include?(child.kind) + return nil unless node.text.to_s == child.text.to_s + + child + rescue StandardError + nil + end + end + + class RubyTreeSitterNormalizationAdapter < TreeSitterNormalizationAdapter + def ruby? + true + end + + def super_statement?(node) + %w[body_statement block block_body statement].include?(node.kind) && + (node.text.to_s.strip == "super" || + (node.named_children.first&.kind == "super" && + node.named_children.drop(1).all? { |child| child.kind == "argument_list" })) + rescue StandardError + false + end + + def explicit_alternative(node) + node.named_children.find { |child| %w[elsif else].include?(child.kind) } + rescue StandardError + nil + end + end + + class PythonTreeSitterNormalizationAdapter < TreeSitterNormalizationAdapter + def yield_statement?(node) + (%w[body_statement block block_body expression_statement statement].include?(node.kind) && + node.children.first&.text == "yield") + rescue StandardError + false + end + + def explicit_alternative(node) + node.named_children.find { |child| %w[elif_clause else else_clause].include?(child.kind) } + rescue StandardError + nil + end + + def unary_minus_expression?(node) + (%w[unary unary_expression unary_operator].include?(node.kind) && node.text.to_s.lstrip.start_with?("-")) + end + end + + class LuaTreeSitterNormalizationAdapter < TreeSitterNormalizationAdapter + def explicit_alternative(node) + node.named_children.find { |child| %w[elseif_statement else else_statement].include?(child.kind) } + rescue StandardError + nil + end + + def unary_minus_expression?(node) + super || + (node.kind == "expression_list" && node.children.first&.text == "-" && node.named_children.size == 1) + rescue StandardError + false + end + + def binary_operator(node) + direct = direct_binary_operator(node) + return direct.to_s if direct + + child = exact_single_named_child(node, kinds: BINARY_WRAPPER_KINDS) + child ? binary_operator(child) : "" + end + end + + class TypeScriptTreeSitterNormalizationAdapter < TreeSitterNormalizationAdapter + def explicit_alternative(node) + node.named_children.find { |child| %w[else else_clause].include?(child.kind) } + rescue StandardError + nil + end + end + # Tree-sitter exposes each grammar's native node names. Decomplex's # detectors share a small language-neutral AST vocabulary, so this - # adapter normalizes common syntax categories into that vocabulary: + # normalizer converts common syntax categories into that vocabulary: # DEFN, CLASS, IF, CASE/WHEN, AND/OR, CALL, LASGN, ATTRASGN, IVAR, # LVAR, and friends. The goal is portable structural facts, not # Ruby semantics. @@ -111,6 +262,7 @@ class TreeSitterNormalizer def initialize(document) @document = document + @normalization_adapter = TreeSitterNormalizationAdapter.for(document) @local_stack = [] @normalizing = Set.new end @@ -142,6 +294,8 @@ def normalize_node(node) if leading_function_statement?(node) normalize_leading_function_statement(node) + elsif leading_if_statement?(node) + normalize_leading_if_statement(node) elsif modifier_statement?(node) normalize_modifier_statement(node) elsif ternary_statement?(node) @@ -190,6 +344,8 @@ def normalize_node(node) normalize_lambda(node) elsif node.kind == "yield" normalize_yield(node) + elsif yield_statement?(node) + normalize_yield_statement(node) elsif yield_argument_list?(node) normalize_yield_argument_list(node) elsif node.kind == "heredoc_beginning" @@ -537,10 +693,7 @@ def normalize_yield(node) end def yield_statement?(node) - %w[body_statement block block_body statement].include?(node.kind) && - node.children.first&.text == "yield" - rescue StandardError - false + normalization_adapter.yield_statement?(node) end def normalize_yield_statement(node) @@ -572,16 +725,19 @@ def yield_argument_nodes(node) end def super_statement?(node) - %w[body_statement block block_body statement].include?(node.kind) && - node.named_children.first&.kind == "super" && - node.named_children.drop(1).all? { |child| child.kind == "argument_list" } - rescue StandardError - false + normalization_adapter.super_statement?(node) end def normalize_super_statement(node) args_node = node.named_children.find { |child| child.kind == "argument_list" } - args = args_node ? args_node.named_children.map { |child| normalize_node(child) }.compact : [] + args = + if args_node && args_node.named_children.empty? + [scalar_argument_list_value(args_node)].compact + elsif args_node + args_node.named_children.map { |child| normalize_node(child) }.compact + else + [] + end wrap(:SUPER, children: [list(args, source: args_node || node)], source: node) end @@ -1139,11 +1295,11 @@ def regex_literal?(node) end def unary_not_expression?(node) - %w[unary unary_expression].include?(node.kind) && node.text.to_s.lstrip.start_with?("!") + normalization_adapter.unary_not_expression?(node) end def unary_minus_expression?(node) - %w[unary unary_expression].include?(node.kind) && node.text.to_s.lstrip.start_with?("-") + normalization_adapter.unary_minus_expression?(node) end def boolean_operator(node) @@ -1164,7 +1320,7 @@ def comparison_operator(node) end def binary_operator(node) - node.children.find { |child| !child.named? && !%w[( )].include?(child.text.to_s) }&.text.to_s + normalization_adapter.binary_operator(node) end def spaced_text(node) @@ -1225,7 +1381,11 @@ def ruby_local_name?(name) end def ruby? - @document.language == :ruby + normalization_adapter.ruby? + end + + def normalization_adapter + @normalization_adapter ||= TreeSitterNormalizationAdapter.for(@document) end def interpolated_string?(node) @@ -1281,6 +1441,8 @@ def assignment_lhs?(node) end def assignment_rhs?(node) + return false if literal_fragment_assignment_context?(node) + sibling = prev_sibling(node) sibling && assignment_operator?(sibling.text) end @@ -1517,7 +1679,7 @@ def branch_child(node, cond, index) end def explicit_alternative(node) - node.named_children.find { |child| %w[elsif else].include?(child.kind) } + normalization_adapter.explicit_alternative(node) end def const_for(node) diff --git a/gems/decomplex/rust/Cargo.lock b/gems/decomplex/rust/Cargo.lock index d007a91bd..92cf25461 100644 --- a/gems/decomplex/rust/Cargo.lock +++ b/gems/decomplex/rust/Cargo.lock @@ -66,6 +66,12 @@ dependencies = [ "tree-sitter-zig", ] +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + [[package]] name = "errno" version = "0.3.14" @@ -88,6 +94,22 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" +[[package]] +name = "hashbrown" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a" + +[[package]] +name = "indexmap" +version = "2.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" +dependencies = [ + "equivalent", + "hashbrown", +] + [[package]] name = "itoa" version = "1.0.18" @@ -208,6 +230,7 @@ version = "1.0.150" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e8014e44b4736ed0538adeecded0fce2a272f22dc9578a7eb6b2d9993c74cfb9" dependencies = [ + "indexmap", "itoa", "memchr", "serde", @@ -252,22 +275,23 @@ dependencies = [ [[package]] name = "tree-sitter" -version = "0.24.7" +version = "0.25.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5387dffa7ffc7d2dae12b50c6f7aab8ff79d6210147c6613561fc3d474c6f75" +checksum = "6d7b8994f367f16e6fa14b5aebbcb350de5d7cbea82dc5b00ae997dd71680dd2" dependencies = [ "cc", "regex", "regex-syntax", + "serde_json", "streaming-iterator", "tree-sitter-language", ] [[package]] name = "tree-sitter-c" -version = "0.23.4" +version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "afd2b1bf1585dc2ef6d69e87d01db8adb059006649dd5f96f31aa789ee6e9c71" +checksum = "1a3aad8f0129083a59fe8596157552d2bb7148c492d44c21558d68ca1c722707" dependencies = [ "cc", "tree-sitter-language", @@ -275,12 +299,12 @@ dependencies = [ [[package]] name = "tree-sitter-c-sharp" -version = "0.21.3" +version = "0.23.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8997ad04502208449025114e434c9024a33a74e700513c702a9d2cac6522a771" +checksum = "c1aac67f1ad71de1d6d39708d34811081c26dfa495658de6c14c34200849357c" dependencies = [ "cc", - "tree-sitter", + "tree-sitter-language", ] [[package]] @@ -295,9 +319,9 @@ dependencies = [ [[package]] name = "tree-sitter-go" -version = "0.23.4" +version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b13d476345220dbe600147dd444165c5791bf85ef53e28acbedd46112ee18431" +checksum = "c8560a4d2f835cc0d4d2c2e03cbd0dde2f6114b43bc491164238d333e28b16ea" dependencies = [ "cc", "tree-sitter-language", @@ -315,9 +339,9 @@ dependencies = [ [[package]] name = "tree-sitter-javascript" -version = "0.23.1" +version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf40bf599e0416c16c125c3cec10ee5ddc7d1bb8b0c60fa5c4de249ad34dc1b1" +checksum = "68204f2abc0627a90bdf06e605f5c470aa26fdcb2081ea553a04bdad756693f5" dependencies = [ "cc", "tree-sitter-language", @@ -341,9 +365,9 @@ checksum = "c199356c799a8945965bb5f2c55b2ad9d9aa7c4b4f6e587fe9dea0bc715e5f9c" [[package]] name = "tree-sitter-lua" -version = "0.2.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cdb9adf0965fec58e7660cbb3a059dbb12ebeec9459e6dcbae3db004739641e" +checksum = "ea992f4164d83f371ef1239ae178c4d4596c296c09055e9a48bb02a2760403af" dependencies = [ "cc", "tree-sitter-language", @@ -351,9 +375,9 @@ dependencies = [ [[package]] name = "tree-sitter-python" -version = "0.23.6" +version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d065aaa27f3aaceaf60c1f0e0ac09e1cb9eb8ed28e7bcdaa52129cffc7f4b04" +checksum = "6bf85fd39652e740bf60f46f4cda9492c3a9ad75880575bf14960f775cb74a1c" dependencies = [ "cc", "tree-sitter-language", @@ -371,9 +395,9 @@ dependencies = [ [[package]] name = "tree-sitter-rust" -version = "0.23.3" +version = "0.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca8ccb3e3a3495c8a943f6c3fd24c3804c471fd7f4f16087623c7fa4c0068e8a" +checksum = "4b9b18034c684a2420722be8b2a91c9c44f2546b631c039edf575ccba8c61be1" dependencies = [ "cc", "tree-sitter-language", @@ -381,9 +405,9 @@ dependencies = [ [[package]] name = "tree-sitter-swift" -version = "0.6.0" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d65aeb41726119416567d0333ec17580ac4abfb96db1f67c4bd638c65f9992fe" +checksum = "4ef216011c3e3df4fa864736f347cb8d509b1066cf0c8549fb1fd81ac9832e59" dependencies = [ "cc", "tree-sitter-language", diff --git a/gems/decomplex/rust/Cargo.toml b/gems/decomplex/rust/Cargo.toml index 88503015b..f1ae79567 100644 --- a/gems/decomplex/rust/Cargo.toml +++ b/gems/decomplex/rust/Cargo.toml @@ -14,21 +14,21 @@ anyhow = "1.0" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" regex = "1.10" -tree-sitter = "=0.24.7" +tree-sitter = "=0.25.8" tree-sitter-language = "=0.1.3" tree-sitter-ruby = "=0.23.1" -tree-sitter-python = "0.23.6" -tree-sitter-javascript = "0.23.1" +tree-sitter-python = "=0.25.0" +tree-sitter-javascript = "=0.25.0" tree-sitter-java = "0.23.5" tree-sitter-typescript = "0.23.2" -tree-sitter-go = "0.23.4" -tree-sitter-rust = "0.23.2" -tree-sitter-zig = "1.0.2" -tree-sitter-lua = "0.2.0" -tree-sitter-c = "0.23.4" +tree-sitter-go = "=0.25.0" +tree-sitter-rust = "=0.24.0" +tree-sitter-zig = "=1.1.2" +tree-sitter-lua = "=0.4.1" +tree-sitter-c = "=0.24.1" tree-sitter-cpp = "0.23.4" -tree-sitter-c-sharp = "0.21.3" -tree-sitter-swift = "=0.6.0" +tree-sitter-c-sharp = "=0.23.5" +tree-sitter-swift = "=0.7.1" tree-sitter-kotlin-ng = "1.1.0" [dev-dependencies] diff --git a/gems/decomplex/rust/src/bin/dump_ast.rs b/gems/decomplex/rust/src/bin/dump_ast.rs new file mode 100644 index 000000000..c1e077865 --- /dev/null +++ b/gems/decomplex/rust/src/bin/dump_ast.rs @@ -0,0 +1,101 @@ +#[path = "../decomplex/mod.rs"] +mod decomplex; + +use anyhow::{bail, Result}; +use decomplex::ast::{self, Child, Node}; +use decomplex::syntax::Language; +use serde_json::{json, Value}; +use std::env; +use std::fs; +use std::path::PathBuf; +use tree_sitter::{Language as TreeSitterLanguage, Parser}; + +fn main() -> Result<()> { + let mut args = env::args().skip(1).collect::>(); + let raw = args.first().map(|arg| arg == "--raw").unwrap_or(false); + if raw { + args.remove(0); + } + let mut args = args.into_iter(); + let language = args + .next() + .ok_or_else(|| anyhow::anyhow!("usage: dump_ast [--raw] LANGUAGE FILE"))?; + let file = args + .next() + .ok_or_else(|| anyhow::anyhow!("usage: dump_ast [--raw] LANGUAGE FILE"))?; + if args.next().is_some() { + bail!("usage: dump_ast [--raw] LANGUAGE FILE"); + } + + let language = Language::parse(&language)?; + let file = PathBuf::from(file); + if raw { + let source = fs::read_to_string(&file)?; + let mut parser = Parser::new(); + parser.set_language(&language_grammar(language))?; + let tree = parser + .parse(&source, None) + .ok_or_else(|| anyhow::anyhow!("tree-sitter produced no tree"))?; + println!( + "{}", + serde_json::to_string(&raw_node_value(tree.root_node(), &source))? + ); + } else { + let (root, _lines) = ast::parse_with_language(&file, language)?; + println!("{}", serde_json::to_string(&node_value(&root))?); + } + Ok(()) +} + +fn node_value(node: &Node) -> Value { + json!({ + "type": node.r#type, + "children": node.children.iter().map(child_value).collect::>(), + "first_lineno": node.first_lineno, + "first_column": node.first_column, + "last_lineno": node.last_lineno, + "last_column": node.last_column, + "text": node.text, + }) +} + +fn child_value(child: &Child) -> Value { + match child { + Child::Node(node) => node_value(node), + Child::Symbol(value) | Child::String(value) => Value::String(value.clone()), + Child::Nil => Value::Null, + } +} + +fn raw_node_value(node: tree_sitter::Node<'_>, source: &str) -> Value { + let mut cursor = node.walk(); + json!({ + "kind": node.kind(), + "named": node.is_named(), + "start_byte": node.start_byte(), + "end_byte": node.end_byte(), + "start": {"row": node.start_position().row, "column": node.start_position().column}, + "end": {"row": node.end_position().row, "column": node.end_position().column}, + "text": node.utf8_text(source.as_bytes()).unwrap_or(""), + "children": node.children(&mut cursor).map(|child| raw_node_value(child, source)).collect::>(), + }) +} + +fn language_grammar(language: Language) -> TreeSitterLanguage { + match language { + Language::Ruby => tree_sitter_ruby::LANGUAGE.into(), + Language::Python => tree_sitter_python::LANGUAGE.into(), + Language::JavaScript => tree_sitter_javascript::LANGUAGE.into(), + Language::Java => tree_sitter_java::LANGUAGE.into(), + Language::TypeScript => tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(), + Language::Swift => tree_sitter_swift::LANGUAGE.into(), + Language::Kotlin => tree_sitter_kotlin_ng::LANGUAGE.into(), + Language::Go => tree_sitter_go::LANGUAGE.into(), + Language::Rust => tree_sitter_rust::LANGUAGE.into(), + Language::Zig => tree_sitter_zig::LANGUAGE.into(), + Language::Lua => tree_sitter_lua::LANGUAGE.into(), + Language::C => tree_sitter_c::LANGUAGE.into(), + Language::Cpp => tree_sitter_cpp::LANGUAGE.into(), + Language::CSharp => tree_sitter_c_sharp::LANGUAGE.into(), + } +} diff --git a/gems/decomplex/rust/src/decomplex/ast.rs b/gems/decomplex/rust/src/decomplex/ast.rs index f3357b233..7ba90ca27 100644 --- a/gems/decomplex/rust/src/decomplex/ast.rs +++ b/gems/decomplex/rust/src/decomplex/ast.rs @@ -230,7 +230,7 @@ fn language_grammar(language: Language) -> TreeSitterLanguage { Language::Lua => tree_sitter_lua::LANGUAGE.into(), Language::C => tree_sitter_c::LANGUAGE.into(), Language::Cpp => tree_sitter_cpp::LANGUAGE.into(), - Language::CSharp => tree_sitter_c_sharp::language().into(), + Language::CSharp => tree_sitter_c_sharp::LANGUAGE.into(), } } @@ -293,6 +293,7 @@ struct TreeSitterNormalizer<'source> { source: &'source str, language: Language, local_stack: Vec>, + root_span: Option, } impl<'source> TreeSitterNormalizer<'source> { @@ -301,10 +302,12 @@ impl<'source> TreeSitterNormalizer<'source> { source, language, local_stack: Vec::new(), + root_span: None, } } fn normalize(mut self, root: TreeSitterNode<'_>) -> Node { + self.root_span = Some(span(root)); let children = if self.language == Language::Ruby { self.with_ruby_scope(root, true, |normalizer| normalizer.normalize_children(root)) } else { @@ -326,6 +329,9 @@ impl<'source> TreeSitterNormalizer<'source> { if self.ternary_statement(node) { return self.normalize_ternary_statement(node); } + if self.leading_if_statement(node) { + return self.normalize_leading_if_statement(node); + } if if_kind(node.kind()) { return self.normalize_if(node); } @@ -341,9 +347,18 @@ impl<'source> TreeSitterNormalizer<'source> { if self.statement_call_with_block(node) { return self.normalize_statement_call_with_block(node); } + if self.super_statement(node) { + return Some(self.normalize_super_statement(node)); + } if self.command_call_statement(node) { return self.normalize_command_call_statement(node); } + if self.yield_statement(node) { + return Some(self.normalize_yield_statement(node)); + } + if self.super_statement(node) { + return Some(self.normalize_super_statement(node)); + } if self.unary_not_statement(node) { return self.normalize_unary_not(node); } @@ -365,6 +380,19 @@ impl<'source> TreeSitterNormalizer<'source> { if self.comparison_expression(node) { return self.normalize_comparison(node); } + if self.self_node(node) { + return Some(self.wrap("SELF", Vec::new(), node)); + } + if instance_variable_node(node, self.source) { + return Some(self.wrap( + "IVAR", + vec![Child::String(node_text(node, self.source).to_string())], + node, + )); + } + if global_variable_node(node, self.source) { + return Some(self.normalize_global_variable(node)); + } match node.kind() { "program" => { @@ -383,7 +411,10 @@ impl<'source> TreeSitterNormalizer<'source> { } "module" => self.normalize_module(node), "lambda" => self.normalize_lambda(node), - "body_statement" | "block_body" | "block" => self.normalize_body(node), + _ if self.block_kind(node.kind()) => { + let children = self.normalize_children(node); + Some(self.wrap("BLOCK", children, node)) + } "ensure" => self.normalize_ensure_clause(node), "begin" => self.normalize_begin(node), "subshell" => Some(self.normalize_subshell(node)), @@ -395,10 +426,10 @@ impl<'source> TreeSitterNormalizer<'source> { self.normalize_assignment(node) } "variable_declarator" if !self.has_assignment_operator_child(node) => { - Some(self.wrap(kind_type(node.kind()), Vec::new(), node)) + Some(self.wrap(&kind_type(node.kind()), Vec::new(), node)) } "expression_list" if self.single_short_var_lhs(node) => { - Some(self.wrap(kind_type(node.kind()), Vec::new(), node)) + Some(self.wrap(&kind_type(node.kind()), Vec::new(), node)) } "call" | "call_expression" | "method_call" | "method_call_expression" => { self.normalize_call(node) @@ -414,7 +445,7 @@ impl<'source> TreeSitterNormalizer<'source> { "super" => Some(self.normalize_super(node)), "return" | "return_statement" | "return_expression" | "break" | "break_statement" | "break_expression" | "next" | "continue_statement" => self.normalize_return(node), - "nil" => Some(self.wrap("NIL", Vec::new(), node)), + "nil" | "none" | "null" => Some(self.wrap("NIL", Vec::new(), node)), "true" => Some(self.wrap("TRUE", Vec::new(), node)), "false" => Some(self.wrap("FALSE", Vec::new(), node)), "instance_variable" => Some(self.wrap( @@ -443,6 +474,12 @@ impl<'source> TreeSitterNormalizer<'source> { | "raw_string_literal" => { if self.interpolated_string(node) { Some(self.normalize_interpolated_string(node)) + } else if let Some(content) = self.lua_no_paren_string_argument_content(node) { + Some(self.wrap( + "STR", + vec![Child::String(node_text(content, self.source).to_string())], + content, + )) } else { Some(self.wrap( "STR", @@ -451,17 +488,9 @@ impl<'source> TreeSitterNormalizer<'source> { )) } } - "integer" => Some(self.wrap( - "INTEGER", - vec![Child::String(node_text(node, self.source).to_string())], - node, - )), - "float" | "float_literal" => Some(self.wrap( - "FLOAT", - vec![Child::String(node_text(node, self.source).to_string())], - node, - )), - "pair" | "keyword_argument" => self.normalize_pair(node), + "integer" => Some(self.wrap("INTEGER", Vec::new(), node)), + "float" | "float_literal" => Some(self.wrap("FLOAT", Vec::new(), node)), + "pair" => self.normalize_pair(node), "simple_symbol" | "symbol" => Some(self.wrap( "LIT", vec![Child::Symbol( @@ -471,7 +500,7 @@ impl<'source> TreeSitterNormalizer<'source> { )), _ => { let children = self.normalize_children(node); - Some(self.wrap(kind_type(node.kind()), children, node)) + Some(self.wrap(&kind_type(node.kind()), children, node)) } } } @@ -545,6 +574,33 @@ impl<'source> TreeSitterNormalizer<'source> { )) } + fn normalize_python_nested_class_as_iter(&mut self, node: TreeSitterNode<'_>) -> Option { + let name_node = self + .named_field(node, "name") + .or_else(|| self.first_named(node))?; + let name = node_text(name_node, self.source).to_string(); + let header_end = node + .children(&mut node.walk()) + .find(|child| !child.is_named() && node_text(*child, self.source) == ":") + .unwrap_or(name_node); + let call = self.wrap_from_nodes( + "VCALL", + vec![Child::Symbol(name), Child::Nil], + node, + header_end, + ); + let body = self + .named_field(node, "body") + .or_else(|| self.block_child(node)) + .and_then(|body| self.normalize_body(body)); + let scope = self.scope(body, None, node); + Some(self.wrap( + "ITER", + vec![Child::Node(Box::new(call)), Child::Node(Box::new(scope))], + node, + )) + } + fn normalize_module(&mut self, node: TreeSitterNode<'_>) -> Option { let name = self.const_for( self.named_field(node, "name") @@ -619,7 +675,60 @@ impl<'source> TreeSitterNormalizer<'source> { ) } + fn normalize_yield_statement(&mut self, node: TreeSitterNode<'_>) -> Node { + let args_node = self + .named_children(node) + .into_iter() + .find(|child| child.kind() == "argument_list"); + let args = args_node + .map(|args| self.yield_argument_nodes(args)) + .unwrap_or_else(|| { + self.named_children(node) + .into_iter() + .filter(|child| child.kind() != "yield") + .filter_map(|child| self.normalize_node(child)) + .collect() + }); + self.wrap( + "YIELD", + vec![list_or_nil(args, args_node.unwrap_or(node), self)], + node, + ) + } + + fn normalize_super_statement(&mut self, node: TreeSitterNode<'_>) -> Node { + let raw = self.raw_named_children(node); + let children = if raw.len() == 1 && raw[0].kind() == "call" { + self.raw_named_children(raw[0]) + } else { + raw + }; + let args_node = children + .into_iter() + .find(|child| child.kind() == "argument_list"); + let args = args_node + .map(|args| self.yield_argument_nodes(args)) + .unwrap_or_default(); + self.wrap( + "SUPER", + vec![list_or_nil(args, args_node.unwrap_or(node), self)], + node, + ) + } + fn normalize_body(&mut self, node: TreeSitterNode<'_>) -> Option { + if self.language == Language::Python && node.kind() == "block" { + let raw_children = self.raw_named_children(node); + if raw_children.len() == 1 + && raw_children[0].kind() == "class_definition" + && node + .parent() + .map(|parent| parent.kind() == "class_definition") + .unwrap_or(false) + { + return self.normalize_python_nested_class_as_iter(raw_children[0]); + } + } if self.leading_if_statement(node) { return self.normalize_leading_if_statement(node); } @@ -638,6 +747,9 @@ impl<'source> TreeSitterNormalizer<'source> { if self.command_call_statement(node) { return self.normalize_command_call_statement(node); } + if self.yield_statement(node) { + return Some(self.normalize_yield_statement(node)); + } if self.unary_not_statement(node) { return self.normalize_unary_not(node); } @@ -654,6 +766,10 @@ impl<'source> TreeSitterNormalizer<'source> { if self.block_kind(node.kind()) { let children = self.normalize_children(node); if children.is_empty() { + let text = node_text(node, self.source).trim(); + if bare_identifier_text(text) { + return Some(self.wrap("VCALL", vec![Child::Symbol(text.to_string())], node)); + } return None; } if children.len() == 1 { @@ -737,6 +853,21 @@ impl<'source> TreeSitterNormalizer<'source> { } fn normalize_else_or_branch(&mut self, node: TreeSitterNode<'_>) -> Option { + if self.language == Language::Python && node.kind() == "else_clause" { + if let Some(block) = self + .raw_named_children(node) + .into_iter() + .find(|child| child.kind() == "block") + { + if let Some(normalized) = self.normalize_python_else_if_block(block) { + return Some(self.wrap( + "ELSE_CLAUSE", + vec![Child::Node(Box::new(normalized))], + node, + )); + } + } + } if node.kind() != "else" { return self.normalize_body(node); } @@ -753,6 +884,75 @@ impl<'source> TreeSitterNormalizer<'source> { self.normalize_body_nodes(self.named_children(node), node) } + fn normalize_python_else_if_block(&mut self, node: TreeSitterNode<'_>) -> Option { + let statements = self + .raw_named_children(node) + .into_iter() + .filter(|child| child.kind() != "comment") + .collect::>(); + if statements.len() != 1 || statements[0].kind() != "if_statement" { + return None; + } + let if_node = statements[0]; + let condition = self + .named_field(if_node, "condition") + .or_else(|| self.named_field(if_node, "predicate")) + .or_else(|| self.first_named(if_node))?; + if self.identifier_kind(condition.kind()) { + return self.normalize_python_if_statement_as_iter(if_node); + } + let consequence = self + .named_field(if_node, "consequence") + .or_else(|| self.named_field(if_node, "body")) + .or_else(|| self.branch_child(if_node, condition, 0)); + let alternative = self.explicit_alternative(if_node); + let mut children = Vec::new(); + if let Some(condition) = self.normalize_node(condition) { + children.push(Child::Node(Box::new(condition))); + } + if let Some(consequence) = consequence.and_then(|child| { + self.normalize_python_else_if_block(child) + .or_else(|| self.normalize_body(child)) + }) { + children.push(Child::Node(Box::new(consequence))); + } + if let Some(alternative) = + alternative.and_then(|child| self.normalize_else_or_branch(child)) + { + children.push(Child::Node(Box::new(alternative))); + } + Some(self.wrap("BLOCK", children, node)) + } + + fn normalize_python_if_statement_as_iter(&mut self, node: TreeSitterNode<'_>) -> Option { + let condition = self + .named_field(node, "condition") + .or_else(|| self.named_field(node, "predicate")) + .or_else(|| self.first_named(node))?; + let body = self + .named_field(node, "consequence") + .or_else(|| self.named_field(node, "body")) + .or_else(|| self.branch_child(node, condition, 0))?; + let call_source = self.source_before_child(node, body); + let call = self.wrap_from_source_node( + "VCALL", + vec![ + Child::Symbol(node_text(condition, self.source).to_string()), + Child::Nil, + ], + &call_source, + ); + let body = self.with_ruby_scope(body, false, |normalizer| { + normalizer.normalize_body(body).map(dynamic_scope) + }); + let scope = self.scope(body, None, node); + Some(self.wrap( + "ITER", + vec![Child::Node(Box::new(call)), Child::Node(Box::new(scope))], + node, + )) + } + fn normalize_case(&mut self, node: TreeSitterNode<'_>) -> Option { let value_raw = self.case_value(node); let value = value_raw.and_then(|value| self.normalize_node(value)); @@ -813,9 +1013,10 @@ impl<'source> TreeSitterNormalizer<'source> { .into_iter() .rev() .fold(fallback, |next_when, mut current| { - if current.children.len() > 2 { - current.children[2] = optional_node(next_when); + while current.children.len() <= 2 { + current.children.push(Child::Nil); } + current.children[2] = optional_node(next_when); Some(current) }) } @@ -1321,7 +1522,7 @@ impl<'source> TreeSitterNormalizer<'source> { fn normalize_member_read(&mut self, node: TreeSitterNode<'_>) -> Option { let Some((receiver, method)) = self.member_parts(node) else { let children = self.normalize_children(node); - return Some(self.wrap(kind_type(node.kind()), children, node)); + return Some(self.wrap(&kind_type(node.kind()), children, node)); }; let receiver = optional_node(self.normalize_node(receiver)); Some(self.wrap( @@ -1652,9 +1853,10 @@ impl<'source> TreeSitterNormalizer<'source> { fn link_rescue_chain(&self, mut resbodies: Vec) -> Option { let mut next = None; while let Some(mut current) = resbodies.pop() { - if current.children.len() > 2 { - current.children[2] = optional_node(next); + while current.children.len() <= 2 { + current.children.push(Child::Nil); } + current.children[2] = optional_node(next); next = Some(current); } next @@ -1757,22 +1959,24 @@ impl<'source> TreeSitterNormalizer<'source> { .map(|args| self.command_arguments(args)) .unwrap_or_default(); let block = self.call_block(node); + let call_source = block.map(|block| self.source_before_child(node, block)); if node_text(function, self.source) == "yield" { - return Some(self.wrap( - "YIELD", - vec![list_or_nil(args, args_node.unwrap_or(node), self)], - node, - )); + let children = vec![list_or_nil(args, args_node.unwrap_or(node), self)]; + if let Some(source) = call_source.as_ref() { + return Some(self.wrap_from_source_node("YIELD", children, source)); + } + return Some(self.wrap("YIELD", children, node)); } let call_type = if args.is_empty() { "VCALL" } else { "FCALL" }; - let call = self.wrap( - call_type, - vec![ - Child::Symbol(node_text(function, self.source).to_string()), - list_or_nil(args, args_node.unwrap_or(node), self), - ], - node, - ); + let call_children = vec![ + Child::Symbol(node_text(function, self.source).to_string()), + list_or_nil(args, args_node.unwrap_or(node), self), + ]; + let call = if let Some(source) = call_source.as_ref() { + self.wrap_from_source_node(call_type, call_children, source) + } else { + self.wrap(call_type, call_children, node) + }; let Some(block) = block else { return Some(call); }; @@ -2153,11 +2357,15 @@ impl<'source> TreeSitterNormalizer<'source> { } fn scope(&self, body: Option, args: Option, source: TreeSitterNode<'_>) -> Node { - self.wrap( - "SCOPE", - vec![Child::Nil, optional_node(args), optional_node(body)], - source, - ) + let source_node = body.as_ref().or(args.as_ref()).cloned(); + let children = vec![Child::Nil, optional_node(args), optional_node(body)]; + if let Some(source_node) = source_node { + self.wrap_from_source_node("SCOPE", children, &source_node) + } else if let Some(root_span) = self.root_span { + self.wrap_from_span_text("SCOPE", children, root_span, self.source) + } else { + self.wrap("SCOPE", children, source) + } } fn list(&self, children: Vec, source: TreeSitterNode<'_>) -> Node { @@ -2221,6 +2429,24 @@ impl<'source> TreeSitterNormalizer<'source> { } } + fn wrap_from_span_text( + &self, + node_type: &str, + children: Vec, + node_span: Span, + text: &str, + ) -> Node { + Node { + r#type: node_type.to_string(), + children, + first_lineno: node_span[0], + first_column: node_span[1], + last_lineno: node_span[2], + last_column: node_span[3], + text: text.to_string(), + } + } + fn with_ruby_scope( &mut self, node: TreeSitterNode<'_>, @@ -2483,7 +2709,15 @@ impl<'source> TreeSitterNormalizer<'source> { ) } + fn self_node(&self, node: TreeSitterNode<'_>) -> bool { + matches!(node.kind(), "self" | "this") + || matches!(node_text(node, self.source), "self" | "this") + } + fn assignment_lhs(&self, node: TreeSitterNode<'_>) -> bool { + if self.lua_single_assignment_block_child(node) { + return false; + } if node .prev_sibling() .map(|sibling| node_text(sibling, self.source) == ":") @@ -2491,17 +2725,69 @@ impl<'source> TreeSitterNormalizer<'source> { { return false; } + if self.literal_fragment_assignment_context(node) { + return false; + } node.next_sibling() .map(|sibling| assignment_operator(node_text(sibling, self.source))) .unwrap_or(false) } + fn literal_fragment_assignment_context(&self, node: TreeSitterNode<'_>) -> bool { + let Some(parent) = node.parent() else { + return false; + }; + if matches!( + parent.kind(), + "string" | "delimited_symbol" | "regex" | "regex_literal" + ) { + return true; + } + + matches!( + node.kind(), + "string_content" | "escape_sequence" | "interpolation" + ) && parent + .parent() + .map(|grandparent| { + matches!( + grandparent.kind(), + "string" | "delimited_symbol" | "regex" | "regex_literal" + ) + }) + .unwrap_or(false) + } + fn assignment_rhs(&self, node: TreeSitterNode<'_>) -> bool { + if self.lua_single_assignment_block_child(node) { + return false; + } + if self.literal_fragment_assignment_context(node) { + return false; + } node.prev_sibling() .map(|sibling| assignment_operator(node_text(sibling, self.source))) .unwrap_or(false) } + fn lua_single_assignment_block_child(&self, node: TreeSitterNode<'_>) -> bool { + if self.language != Language::Lua { + return false; + } + let Some(parent) = node.parent() else { + return false; + }; + if parent.kind() != "assignment_statement" { + return false; + } + let Some(grandparent) = parent.parent() else { + return false; + }; + grandparent.kind() == "block" + && node_text(grandparent, self.source) == node_text(parent, self.source) + && self.raw_named_children(grandparent).len() == 1 + } + fn has_assignment_operator_child(&self, node: TreeSitterNode<'_>) -> bool { node.children(&mut node.walk()) .any(|child| !child.is_named() && assignment_operator(node_text(child, self.source))) @@ -2532,14 +2818,22 @@ impl<'source> TreeSitterNormalizer<'source> { } fn leading_if_statement(&self, node: TreeSitterNode<'_>) -> bool { + let first_child = node.children(&mut node.walk()).next(); + let single_named_if_block = matches!(self.language, Language::Python | Language::Lua) + && node.kind() == "block" + && self.raw_named_children(node).len() == 1 + && first_child + .map(|child| child.kind() == "if_statement") + .unwrap_or(false); + if single_named_if_block { + return true; + } matches!( node.kind(), "body_statement" | "block" | "block_body" | "statement" - ) && node - .children(&mut node.walk()) - .next() + ) && (first_child .map(|child| matches!(child.kind(), "if" | "unless")) - .unwrap_or(false) + .unwrap_or(false)) && self.named_children(node).len() >= 2 && self .named_children(node) @@ -2549,6 +2843,35 @@ impl<'source> TreeSitterNormalizer<'source> { } fn normalize_leading_if_statement(&mut self, node: TreeSitterNode<'_>) -> Option { + if self.language == Language::Python && node.kind() == "block" { + if let Some(if_node) = self + .raw_named_children(node) + .into_iter() + .find(|child| child.kind() == "if_statement") + { + let condition = self + .named_field(if_node, "condition") + .or_else(|| self.named_field(if_node, "predicate")) + .or_else(|| self.first_named(if_node))?; + let consequence = self + .named_field(if_node, "consequence") + .or_else(|| self.named_field(if_node, "body")) + .or_else(|| self.branch_child(if_node, condition, 0)); + let condition = optional_node(self.normalize_node(condition)); + let consequence = + optional_node(consequence.and_then(|child| self.normalize_body(child))); + return Some(self.wrap("IF", vec![condition, consequence, Child::Nil], if_node)); + } + } + if self.language == Language::Lua && node.kind() == "block" { + if let Some(if_node) = self + .raw_named_children(node) + .into_iter() + .find(|child| child.kind() == "if_statement") + { + return self.normalize_if(if_node); + } + } let keyword = node .children(&mut node.walk()) .next() @@ -2810,16 +3133,54 @@ impl<'source> TreeSitterNormalizer<'source> { self.infix_statement_parts(node).is_some() } + fn argument_list_unary_not(&self, node: TreeSitterNode<'_>) -> bool { + if node.kind() != "argument_list" { + return false; + } + let named = self.named_children(node); + if node + .children(&mut node.walk()) + .next() + .map(|child| node_text(child, self.source) == "!") + .unwrap_or(false) + && named.len() == 1 + { + return true; + } + + let raw_named = self.raw_named_children(node); + if raw_named.len() != 1 || raw_named[0].kind() != "unary" { + return false; + } + node_text(node, self.source) == node_text(raw_named[0], self.source) + && self.unary_not_expression(raw_named[0]) + && self.raw_named_children(raw_named[0]).len() == 1 + } + fn unary_not_statement(&self, node: TreeSitterNode<'_>) -> bool { - matches!( + if !matches!( node.kind(), "body_statement" | "block_body" | "statement" | "argument_list" - ) && node + ) { + return false; + } + let named = self.named_children(node); + if node .children(&mut node.walk()) .next() .map(|child| node_text(child, self.source) == "!") .unwrap_or(false) - && self.named_children(node).len() == 1 + && named.len() == 1 + { + return true; + } + + let raw_named = self.raw_named_children(node); + raw_named.len() == 1 + && raw_named[0].kind() == "unary" + && node_text(node, self.source) == node_text(raw_named[0], self.source) + && self.unary_not_expression(raw_named[0]) + && self.raw_named_children(raw_named[0]).len() == 1 } fn unary_not_expression(&self, node: TreeSitterNode<'_>) -> bool { @@ -2827,6 +3188,33 @@ impl<'source> TreeSitterNormalizer<'source> { && node_text(node, self.source).trim_start().starts_with('!') } + fn unary_minus_expression(&self, node: TreeSitterNode<'_>) -> bool { + if matches!(node.kind(), "unary" | "unary_expression" | "unary_operator") + && node_text(node, self.source).trim_start().starts_with('-') + { + return true; + } + + if node.kind() != "expression_list" { + return false; + } + let named = self.named_children(node); + if node + .children(&mut node.walk()) + .next() + .map(|child| node_text(child, self.source) == "-") + .unwrap_or(false) + && named.len() == 1 + { + return true; + } + + let raw_named = self.raw_named_children(node); + raw_named.len() == 1 + && node_text(node, self.source) == node_text(raw_named[0], self.source) + && self.unary_minus_expression(raw_named[0]) + } + fn infix_statement_parts<'tree>( &self, node: TreeSitterNode<'tree>, @@ -2837,11 +3225,23 @@ impl<'source> TreeSitterNormalizer<'source> { ) { return None; } + let raw_named = self.raw_named_children(node); + let target = if raw_named.len() == 1 + && matches!( + raw_named[0].kind(), + "binary" | "binary_expression" | "comparison_operator" + ) + && node_text(node, self.source) == node_text(raw_named[0], self.source) + { + raw_named[0] + } else { + node + }; let mut named_index = 0usize; let mut left = None; let mut right = None; let mut operator = None; - for child in node.children(&mut node.walk()) { + for child in target.children(&mut target.walk()) { if child.is_named() { left.get_or_insert(child); if operator.is_some() { @@ -2879,10 +3279,31 @@ impl<'source> TreeSitterNormalizer<'source> { } fn binary_operator(&self, node: TreeSitterNode<'_>) -> Option { - node.children(&mut node.walk()) + if let Some(operator) = node + .children(&mut node.walk()) .find(|child| !child.is_named() && !matches!(node_text(*child, self.source), "(" | ")")) .map(|child| node_text(child, self.source).to_string()) - } + { + return Some(operator); + } + + let raw_named = self.raw_named_children(node); + if raw_named.len() == 1 + && matches!( + raw_named[0].kind(), + "binary" + | "binary_expression" + | "binary_operator" + | "boolean_operator" + | "comparison_operator" + ) + && node_text(node, self.source) == node_text(raw_named[0], self.source) + { + return self.binary_operator(raw_named[0]); + } + + None + } fn interpolated_statement(&self, node: TreeSitterNode<'_>) -> bool { matches!( @@ -2912,6 +3333,115 @@ impl<'source> TreeSitterNormalizer<'source> { .any(|child| self.call_kind(child.kind()) || self.member_read_node(child))) } + fn yield_statement(&self, node: TreeSitterNode<'_>) -> bool { + if !matches!( + node.kind(), + "body_statement" | "block" | "block_body" | "expression_statement" | "statement" + ) { + return false; + } + let Some(first) = node.children(&mut node.walk()).next() else { + return false; + }; + if node_text(first, self.source) == "yield" { + return true; + } + + if matches!( + node.kind(), + "body_statement" | "block_body" | "expression_statement" | "statement" + ) && first.kind() == "yield" + { + let Some(keyword) = first.children(&mut first.walk()).next() else { + return false; + }; + return node_text(keyword, self.source) == "yield"; + } + + false + } + + fn super_statement(&self, node: TreeSitterNode<'_>) -> bool { + if !matches!( + node.kind(), + "body_statement" | "block" | "block_body" | "call" | "statement" + ) { + return false; + } + if node_text(node, self.source).trim() == "super" { + return true; + } + let raw = self.raw_named_children(node); + let named = if raw.len() == 1 && raw[0].kind() == "call" { + self.raw_named_children(raw[0]) + } else { + raw + }; + named + .first() + .map(|child| child.kind() == "super") + .unwrap_or(false) + && named + .iter() + .skip(1) + .all(|child| child.kind() == "argument_list") + } + + fn argument_list_element_reference(&self, node: TreeSitterNode<'_>) -> bool { + if node.kind() != "argument_list" { + return false; + } + let named = self.named_children(node); + if named + .iter() + .any(|child| matches!(child.kind(), "block" | "do_block")) + { + return false; + } + + let children = node.children(&mut node.walk()).collect::>(); + let direct_bracket_shape = children + .first() + .map(|child| node_text(*child, self.source) != "[") + .unwrap_or(false) + && children + .iter() + .any(|child| !child.is_named() && node_text(*child, self.source) == "[") + && children + .iter() + .any(|child| !child.is_named() && node_text(*child, self.source) == "]") + && named.len() >= 2; + if direct_bracket_shape { + return true; + } + + if named.len() != 1 || named[0].kind() != "element_reference" { + return false; + } + let reference = named[0]; + let reference_named = self.raw_named_children(reference); + if reference_named.len() < 2 + || reference_named + .iter() + .any(|child| matches!(child.kind(), "block" | "do_block")) + { + return false; + } + let reference_children = reference + .children(&mut reference.walk()) + .collect::>(); + reference_children + .first() + .map(|child| node_text(*child, self.source) != "[") + .unwrap_or(false) + && reference_children + .iter() + .any(|child| !child.is_named() && node_text(*child, self.source) == "[") + && reference_children + .iter() + .any(|child| !child.is_named() && node_text(*child, self.source) == "]") + } + fn dotted_expression(&self, node: TreeSitterNode<'_>) -> bool { matches!( node.kind(), @@ -2968,6 +3498,9 @@ impl<'source> TreeSitterNormalizer<'source> { } fn member_read_node(&self, node: TreeSitterNode<'_>) -> bool { + if self.language == Language::Lua && node.kind() == "field" { + return false; + } matches!( node.kind(), "attribute" @@ -2987,6 +3520,12 @@ impl<'source> TreeSitterNormalizer<'source> { &self, node: TreeSitterNode<'tree>, ) -> Option<(TreeSitterNode<'tree>, String)> { + if node.kind() == "expression_list" + && !(self.named_field(node, "operand").is_some() + && self.named_field(node, "field").is_some()) + { + return None; + } if self.dotted_call(node) { return self.dotted_call_parts(node, None); } @@ -3064,6 +3603,17 @@ impl<'source> TreeSitterNormalizer<'source> { .collect() } + fn yield_argument_nodes(&mut self, node: TreeSitterNode<'_>) -> Vec { + let children = self.named_children(node); + if children.is_empty() { + return self.scalar_argument_list_value(node).into_iter().collect(); + } + children + .into_iter() + .filter_map(|child| self.normalize_node(child)) + .collect() + } + fn scalar_argument_list_value(&mut self, node: TreeSitterNode<'_>) -> Option { let text = node_text(node, self.source).trim(); if text == "yield" { @@ -3355,15 +3905,382 @@ impl<'source> TreeSitterNormalizer<'source> { node: TreeSitterNode<'tree>, name: &str, ) -> Option> { + if self.language == Language::Python + && matches!(name, "body" | "consequence") + && matches!( + node.kind(), + "elif_clause" + | "else_clause" + | "for_statement" + | "function_definition" + | "if_statement" + | "try_statement" + | "while_statement" + | "with_statement" + ) + { + if let Some(block) = self + .raw_named_children(node) + .into_iter() + .find(|child| child.kind() == "block") + { + return Some(block); + } + } node.child_by_field_name(name) } fn named_children<'tree>(&self, node: TreeSitterNode<'tree>) -> Vec> { + if node.kind() == "dotted_name" && !node_text(node, self.source).contains('.') { + return Vec::new(); + } + if self.language == Language::Python + && node.kind() == "with_clause" + && bare_identifier_text(node_text(node, self.source)) + { + return Vec::new(); + } + if self.language == Language::Lua + && node.kind() == "variable_list" + && self.raw_named_children(node).len() == 1 + && self + .raw_named_children(node) + .first() + .map(|child| self.identifier_kind(child.kind())) + .unwrap_or(false) + && self.lua_single_assignment_block_child(node) + { + return Vec::new(); + } + if self.language == Language::Lua + && node.kind() == "variable_list" + && self.raw_named_children(node).len() == 1 + && node + .parent() + .map(|parent| parent.kind() == "for_generic_clause") + .unwrap_or(false) + { + return Vec::new(); + } + if self.language == Language::Lua + && node.kind() == "variable_list" + && self.raw_named_children(node).len() == 1 + && node + .parent() + .map(|parent| { + parent.kind() == "variable_declaration" + && self.raw_named_children(parent).len() == 1 + }) + .unwrap_or(false) + { + return Vec::new(); + } + + let children = self.raw_named_children(node); + if self.language == Language::Lua + && node.kind() == "expression_list" + && children.len() == 1 + && self.identifier_kind(children[0].kind()) + && node + .parent() + .map(|parent| matches!(parent.kind(), "assignment_statement" | "return_statement")) + .unwrap_or(false) + && node_text(node, self.source) == node_text(children[0], self.source) + { + return Vec::new(); + } + if self.language == Language::Lua + && node.kind() == "expression_list" + && children.len() == 1 + && matches!( + children[0].kind(), + "true" | "false" | "nil" | "number" | "integer" | "float" + ) + && node + .parent() + .map(|parent| matches!(parent.kind(), "assignment_statement" | "return_statement")) + .unwrap_or(false) + && node_text(node, self.source) == node_text(children[0], self.source) + { + return Vec::new(); + } + if self.language == Language::Lua + && node.kind() == "expression_list" + && children.len() == 1 + && matches!( + children[0].kind(), + "binary_expression" + | "function_call" + | "dot_index_expression" + | "function_definition" + | "string" + ) + && node_text(node, self.source) == node_text(children[0], self.source) + { + return self.named_children(children[0]); + } + if self.language == Language::Lua + && node.kind() == "expression_list" + && children.len() == 1 + && children[0].kind() == "table_constructor" + && node_text(node, self.source) == node_text(children[0], self.source) + { + return self.named_children(children[0]); + } + if self.language == Language::Lua + && node.kind() == "field" + && children.len() == 1 + && self.identifier_kind(children[0].kind()) + && node_text(node, self.source) == node_text(children[0], self.source) + { + return Vec::new(); + } + if self.language == Language::Lua + && node.kind() == "field" + && children.len() == 1 + && children[0].kind() == "string" + && node_text(node, self.source) == node_text(children[0], self.source) + { + return self.named_children(children[0]); + } + if self.language == Language::Lua + && node.kind() == "field" + && children.len() == 1 + && children[0].kind() == "function_call" + && node_text(node, self.source) == node_text(children[0], self.source) + { + return self.named_children(children[0]); + } + if self.language == Language::Lua + && node.kind() == "block" + && children.len() == 1 + && matches!( + children[0].kind(), + "assignment_statement" + | "function_call" + | "return_statement" + | "variable_declaration" + ) + && node_text(node, self.source) == node_text(children[0], self.source) + { + return self.named_children(children[0]); + } + if self.language == Language::Python + && node.kind() == "relative_import" + && children.len() == 1 + && children[0].kind() == "import_prefix" + { + return Vec::new(); + } + if self.language == Language::Python && node.kind() == "block" && children.len() == 1 { + if children[0].kind() == "function_definition" { + return self.named_children(children[0]); + } + if children[0].kind() == "decorated_definition" { + return self.named_children(children[0]); + } + if children[0].kind() == "pass_statement" + && node_text(node, self.source).trim() == "pass" + { + return Vec::new(); + } + if matches!(children[0].kind(), "break_statement" | "continue_statement") + && bare_identifier_text(node_text(node, self.source).trim()) + { + return Vec::new(); + } + if children[0].kind() == "return_statement" + && node_text(node, self.source) == node_text(children[0], self.source) + { + if self.raw_named_children(children[0]).is_empty() { + return Vec::new(); + } + return self.named_children(children[0]); + } + if children[0].kind() == "delete_statement" { + return self.named_children(children[0]); + } + if children[0].kind() == "if_statement" { + return self.named_children(children[0]); + } + if matches!( + children[0].kind(), + "assert_statement" + | "for_statement" + | "import_from_statement" + | "import_statement" + | "raise_statement" + | "try_statement" + | "while_statement" + | "with_statement" + ) { + return self.named_children(children[0]); + } + if children[0].kind() != "expression_statement" { + return children; + } + let statement_children = self.raw_named_children(children[0]); + if statement_children.len() == 1 + && statement_children[0].kind() == "identifier" + && node_text(node, self.source) == node_text(children[0], self.source) + { + return Vec::new(); + } + if statement_children.len() == 1 && statement_children[0].kind() == "ellipsis" { + return Vec::new(); + } + if statement_children.len() == 1 + && matches!( + statement_children[0].kind(), + "assignment" + | "augmented_assignment" + | "binary_operator" + | "call" + | "string" + | "subscript" + ) + { + return self.named_children(statement_children[0]); + } + } + if self.language == Language::Python + && node.kind() == "expression_statement" + && children.len() == 1 + && children[0].kind() == "yield" + { + return self.named_children(children[0]); + } + if self.language == Language::Python + && node.kind() == "expression_statement" + && children.len() == 1 + && children[0].kind() == "identifier" + { + return Vec::new(); + } + if self.language == Language::Python + && node.kind() == "expression_statement" + && children.len() == 1 + && children[0].kind() == "binary_operator" + { + return self.named_children(children[0]); + } + if self.language == Language::Python + && node.kind() == "expression_statement" + && children.len() == 1 + && children[0].kind() == "comparison_operator" + { + return self.named_children(children[0]); + } + if self.language == Language::Python + && node.kind() == "expression_statement" + && children.len() == 1 + && children[0].kind() == "call" + { + return self.named_children(children[0]); + } + if self.language == Language::Python + && node.kind() == "expression_statement" + && children.len() == 1 + && children[0].kind() == "attribute" + { + return self.named_children(children[0]); + } + if self.language == Language::Python + && node.kind() == "expression_statement" + && children.len() == 1 + && children[0].kind() == "string" + { + return self.named_children(children[0]); + } + if self.language == Language::Python && node.kind() == "as_pattern_target" { + return Vec::new(); + } + if self.language == Language::Python + && matches!(node.kind(), "with_clause" | "with_item") + && children.len() == 1 + && matches!(children[0].kind(), "with_item" | "as_pattern") + { + return self.named_children(children[0]); + } + if self.language == Language::Python + && node.kind() == "with_item" + && children.len() == 1 + && children[0].kind() == "call" + && node_text(node, self.source) == node_text(children[0], self.source) + { + return self.named_children(children[0]); + } + if self.language == Language::Python + && node.kind() == "with_item" + && children.len() == 1 + && children[0].kind() == "attribute" + && node_text(node, self.source) == node_text(children[0], self.source) + { + return self.named_children(children[0]); + } + if node.kind() == "type" && children.len() == 1 { + if children[0].kind() == "union_type" { + return self.named_children(children[0]); + } + if self.language == Language::Python && children[0].kind() == "binary_operator" { + return self.named_children(children[0]); + } + if children[0].kind() == "generic_type" { + return self.named_children(children[0]); + } + if children[0].kind() == "attribute" { + return self.named_children(children[0]); + } + if children[0].kind() == "string" { + return self.named_children(children[0]); + } + if children[0].kind() == "list" { + if self.raw_named_children(children[0]).is_empty() { + return Vec::new(); + } + return self.named_children(children[0]); + } + if matches!( + children[0].kind(), + "ellipsis" | "identifier" | "nil" | "none" | "null" + ) { + return Vec::new(); + } + } + if node.kind() == "expression_statement" + && children.len() == 1 + && matches!(children[0].kind(), "assignment" | "augmented_assignment") + { + return self.named_children(children[0]); + } + + children + } + + fn raw_named_children<'tree>(&self, node: TreeSitterNode<'tree>) -> Vec> { node.children(&mut node.walk()) .filter(|child| child.is_named()) .collect() } + fn lua_no_paren_string_argument_content<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + if self.language != Language::Lua || node.kind() != "string" { + return None; + } + let parent = node.parent()?; + if parent.kind() != "arguments" + || node_text(parent, self.source) != node_text(node, self.source) + { + return None; + } + self.raw_named_children(node) + .into_iter() + .find(|child| child.kind() == "string_content") + } + fn source_before_child(&self, node: TreeSitterNode<'_>, child: TreeSitterNode<'_>) -> Node { let text = self .source @@ -3414,9 +4331,17 @@ impl<'source> TreeSitterNormalizer<'source> { &self, node: TreeSitterNode<'tree>, ) -> Option> { - self.named_children(node) - .into_iter() - .find(|child| matches!(child.kind(), "else" | "elsif")) + self.named_children(node).into_iter().find(|child| { + matches!( + child.kind(), + "elif_clause" + | "else" + | "else_clause" + | "else_statement" + | "elsif" + | "elseif_statement" + ) + }) } fn case_value<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { @@ -3657,6 +4582,9 @@ impl<'source> TreeSitterNormalizer<'source> { } fn elide_implicit_nil_body(&self, node: Option) -> Option { + if self.language != Language::Ruby { + return node; + } let node = self.drop_trailing_nil_statement(node); match node { Some(node) if node.r#type == "NIL" => None, @@ -3754,10 +4682,19 @@ fn declaration_metadata_kind(kind: &str) -> bool { ) } -fn kind_type(kind: &str) -> &str { +fn kind_type(kind: &str) -> String { match kind { - "body_statement" | "block_body" | "block" | "statements" => "BLOCK", - other => other, + "body_statement" | "block_body" | "block" | "statements" => "BLOCK".to_string(), + other => other + .chars() + .map(|ch| { + if ch.is_ascii_alphanumeric() { + ch.to_ascii_uppercase() + } else { + '_' + } + }) + .collect(), } } @@ -3822,6 +4759,21 @@ fn bare_identifier_text(text: &str) -> bool { chars.all(|ch| ch == '_' || ch == '!' || ch == '?' || ch == '=' || ch.is_ascii_alphanumeric()) } +fn instance_variable_node(node: TreeSitterNode<'_>, source: &str) -> bool { + let text = node_text(node, source); + node.kind() == "instance_variable" + || text + .strip_prefix('@') + .map(bare_identifier_text) + .unwrap_or(false) +} + +fn global_variable_node(node: TreeSitterNode<'_>, source: &str) -> bool { + node.kind() == "global_variable" + || (!matches!(node.kind(), "string_content" | "escape_sequence") + && node_text(node, source).starts_with('$')) +} + fn comparison_operator_from_text(text: &str) -> Option { for operator in ["===", "!==", "==", "!=", "<=", ">=", "<", ">"] { if text.contains(operator) { @@ -3840,8 +4792,13 @@ pub fn child_to_string(child: Option<&Child>) -> Option { #[cfg(test)] mod tests { - use super::{parse, Child, Node}; + use super::{parse, parse_with_language, Child, Node}; + use crate::decomplex::syntax::Language; + use serde_json::{json, Value}; use std::io::Write; + use std::path::Path; + use std::process::Command; + use tree_sitter::{Node as TreeSitterNode, Parser as TreeSitterParser}; fn parse_source(source: &str) -> Node { let mut file = tempfile::Builder::new() @@ -3853,6 +4810,18 @@ mod tests { parse(file.path()).expect("parse temp ruby file").0 } + fn parse_language_source(source: &str, language: Language, suffix: &str) -> Node { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create temp source file"); + file.write_all(source.as_bytes()) + .expect("write temp source file"); + parse_with_language(file.path(), language) + .expect("parse temp source file") + .0 + } + fn nodes_of_type<'a>(node: &'a Node, node_type: &str, out: &mut Vec<&'a Node>) { if node.r#type == node_type { out.push(node); @@ -3862,6 +4831,1762 @@ mod tests { } } + fn first_node<'a>(root: &'a Node, node_type: &str, text: &str) -> &'a Node { + let mut nodes = Vec::new(); + nodes_of_type(root, node_type, &mut nodes); + nodes + .into_iter() + .find(|node| node.text == text) + .unwrap_or_else(|| panic!("expected {node_type} with text {text:?} in {root:#?}")) + } + + fn child_node(node: &Node, index: usize) -> &Node { + node.children + .get(index) + .and_then(super::node) + .unwrap_or_else(|| panic!("expected child node {index} in {node:#?}")) + } + + fn child_types(node: &Node) -> Vec<&str> { + node.children + .iter() + .filter_map(super::node) + .map(|child| child.r#type.as_str()) + .collect() + } + + fn test_node(node_type: &str, children: Vec) -> Node { + Node { + r#type: node_type.to_string(), + children, + first_lineno: 1, + first_column: 0, + last_lineno: 1, + last_column: 1, + text: node_type.to_string(), + } + } + + fn infix_parts_text( + normalizer: &super::TreeSitterNormalizer<'_>, + node: TreeSitterNode<'_>, + source: &str, + ) -> Option<(String, String, String)> { + let (left, operator, right) = normalizer.infix_statement_parts(node)?; + Some(( + super::node_text(left, source).to_string(), + operator, + super::node_text(right, source).to_string(), + )) + } + + fn node_value(node: &Node) -> Value { + json!({ + "type": node.r#type, + "children": node.children.iter().map(child_value).collect::>(), + "first_lineno": node.first_lineno, + "first_column": node.first_column, + "last_lineno": node.last_lineno, + "last_column": node.last_column, + "text": node.text, + }) + } + + fn child_value(child: &Child) -> Value { + match child { + Child::Node(node) => node_value(node), + Child::Symbol(value) | Child::String(value) => Value::String(value.clone()), + Child::Nil => Value::Null, + } + } + + fn ruby_language_name(language: Language) -> &'static str { + match language { + Language::Ruby => "ruby", + Language::Python => "python", + Language::JavaScript => "javascript", + Language::Java => "java", + Language::TypeScript => "typescript", + Language::Swift => "swift", + Language::Kotlin => "kotlin", + Language::Go => "go", + Language::Rust => "rust", + Language::Zig => "zig", + Language::Lua => "lua", + Language::C => "c", + Language::Cpp => "cpp", + Language::CSharp => "csharp", + } + } + + fn ruby_normalized_value(path: &Path, language: Language) -> Value { + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + root, = Decomplex::Ast.parse(ARGV.fetch(0)) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(value(root)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "json", + "-e", + script, + ]) + .arg(path) + .output() + .expect("run ruby normalizer"); + assert!( + output.status.success(), + "ruby normalizer failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout).expect("ruby normalizer should emit JSON") + } + + fn assert_ruby_parity(source: &str, language: Language, suffix: &str) { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create parity temp source file"); + file.write_all(source.as_bytes()) + .expect("write parity temp source file"); + + let rust = node_value( + &parse_with_language(file.path(), language) + .expect("parse parity temp source file") + .0, + ); + let ruby = ruby_normalized_value(file.path(), language); + assert_eq!(rust, ruby); + } + + fn raw_tree(source: &str, language: Language) -> tree_sitter::Tree { + let mut parser = TreeSitterParser::new(); + parser + .set_language(&super::language_grammar(language)) + .expect("set raw parser language"); + parser.parse(source, None).expect("parse raw source") + } + + fn first_raw_node<'tree>( + node: TreeSitterNode<'tree>, + source: &str, + kind: &str, + text: &str, + ) -> TreeSitterNode<'tree> { + if node.kind() == kind && super::node_text(node, source) == text { + return node; + } + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + if let Some(found) = first_raw_node_opt(child, source, kind, text) { + return found; + } + } + panic!("expected raw node kind={kind:?} text={text:?}"); + } + + fn first_raw_node_opt<'tree>( + node: TreeSitterNode<'tree>, + source: &str, + kind: &str, + text: &str, + ) -> Option> { + if node.kind() == kind && super::node_text(node, source) == text { + return Some(node); + } + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + if let Some(found) = first_raw_node_opt(child, source, kind, text) { + return Some(found); + } + } + None + } + + fn ruby_private_predicate( + source: &str, + language: Language, + suffix: &str, + method: &str, + kind: &str, + text: &str, + ) -> bool { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby predicate temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby predicate temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + method = ARGV.fetch(3) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + puts normalizer.send(method, target) ? "true" : "false" + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(method) + .output() + .expect("run ruby private predicate"); + assert!( + output.status.success(), + "ruby predicate failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + String::from_utf8(output.stdout) + .expect("ruby predicate output should be utf8") + .trim() + == "true" + } + + fn ruby_private_string( + source: &str, + language: Language, + suffix: &str, + method: &str, + kind: &str, + text: &str, + ) -> String { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby string temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby string temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + method = ARGV.fetch(3) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + puts normalizer.send(method, target).to_s + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(method) + .output() + .expect("run ruby private string helper"); + assert!( + output.status.success(), + "ruby string helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + String::from_utf8(output.stdout) + .expect("ruby string helper output should be utf8") + .trim() + .to_string() + } + + #[test] + fn tree_normalizer_new_initializes_empty_state() { + let normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); + + assert_eq!(normalizer.source, ""); + assert_eq!(normalizer.language, Language::Ruby); + assert!(normalizer.local_stack.is_empty()); + assert_eq!(normalizer.root_span, None); + } + + #[test] + fn tree_normalizer_yield_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def each\n yield :item\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "yield :item", + ), + ( + "def each\n value\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value", + ), + ( + "def gen():\n yield item\n other()\n", + Language::Python, + ".py", + "expression_statement", + "yield item", + ), + ( + "def gen():\n yield from items\n other()\n", + Language::Python, + ".py", + "expression_statement", + "yield from items", + ), + ( + "def gen():\n yield item\n other()\n", + Language::Python, + ".py", + "block", + "yield item\n other()", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.yield_statement(node), + ruby_private_predicate(source, language, suffix, "yield_statement?", kind, text), + "yield_statement? mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn python_yield_statement_in_multi_statement_block_matches_ruby_ast() { + let source = "def gen():\n yield item\n other()\n"; + assert_ruby_parity(source, Language::Python, ".py"); + + let root = parse_language_source(source, Language::Python, ".py"); + let defn = first_node(&root, "DEFN", "def gen():\n yield item\n other()"); + let scope = child_node(defn, 1); + let body = child_node(scope, 2); + + assert_eq!(body.r#type, "BLOCK"); + assert_eq!(child_types(body), vec!["YIELD", "EXPRESSION_STATEMENT"]); + } + + #[test] + fn tree_normalizer_super_statement_matches_ruby_private_predicate() { + for (source, kind, text) in [ + ( + "class Child < Parent\n def call\n super\n end\nend\n", + "body_statement", + "super", + ), + ( + "class Child < Parent\n def call\n super :item\n end\nend\n", + "body_statement", + "super :item", + ), + ( + "class Child < Parent\n def call\n value\n end\nend\n", + "body_statement", + "value", + ), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + + assert_eq!( + normalizer.super_statement(node), + ruby_private_predicate( + source, + Language::Ruby, + ".rb", + "super_statement?", + kind, + text + ), + "super_statement? mismatch for {kind} {text:?}" + ); + } + } + + #[test] + fn ruby_super_statement_normalization_matches_ruby_ast() { + let source = "class Child < Parent\n def bare\n super\n end\n def with_arg\n super :item\n end\nend\n"; + assert_ruby_parity(source, Language::Ruby, ".rb"); + + let root = parse_language_source(source, Language::Ruby, ".rb"); + let bare = first_node(&root, "SUPER", "super"); + let with_arg = first_node(&root, "SUPER", "super :item"); + + assert_eq!(bare.children, vec![Child::Nil]); + assert_eq!(child_types(with_arg), vec!["LIST"]); + assert_eq!(child_types(child_node(with_arg, 0)), vec!["LIT"]); + } + + #[test] + fn tree_normalizer_argument_list_element_reference_matches_ruby_private_predicate() { + for (source, text) in [ + ("def indexed\n return items[0]\nend\n", "items[0]"), + ("def indexed\n return obj.foo[0]\nend\n", "obj.foo[0]"), + ("def indexed\n return [0]\nend\n", "[0]"), + ( + "def indexed\n return items[0], other\nend\n", + "items[0], other", + ), + ("def indexed\n return items[]\nend\n", "items[]"), + ( + "def indexed\n return items[0] { nope }\nend\n", + "items[0] { nope }", + ), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, "argument_list", text); + let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + + assert_eq!( + normalizer.argument_list_element_reference(node), + ruby_private_predicate( + source, + Language::Ruby, + ".rb", + "argument_list_element_reference?", + "argument_list", + text + ), + "argument_list_element_reference? mismatch for {text:?}" + ); + } + } + + #[test] + fn dynamic_scope_rewrites_locals_without_crossing_scope_boundaries() { + let inner_assignment = test_node("LASGN", vec![Child::Symbol("inner".to_string())]); + let node = test_node( + "BLOCK", + vec![ + Child::Node(Box::new(test_node( + "LASGN", + vec![Child::Symbol("value".to_string())], + ))), + Child::Node(Box::new(test_node( + "LVAR", + vec![Child::Symbol("value".to_string())], + ))), + Child::Node(Box::new(test_node( + "DEFN", + vec![ + Child::Symbol("nested".to_string()), + Child::Node(Box::new(test_node( + "SCOPE", + vec![ + Child::Nil, + Child::Nil, + Child::Node(Box::new(inner_assignment)), + ], + ))), + ], + ))), + ], + ); + + let result = super::dynamic_scope(node); + + assert_eq!(child_node(&result, 0).r#type, "DASGN"); + assert_eq!(child_node(&result, 1).r#type, "DVAR"); + let nested = child_node(&result, 2); + assert_eq!(nested.r#type, "DEFN"); + let nested_scope = child_node(nested, 1); + assert_eq!(nested_scope.r#type, "SCOPE"); + assert_eq!(child_node(nested_scope, 2).r#type, "LASGN"); + } + + #[test] + fn link_when_chain_sets_next_arm_and_pads_short_when_nodes() { + let fallback = test_node("ELSE", Vec::new()); + let first = test_node( + "WHEN", + vec![ + Child::Symbol("patterns".to_string()), + Child::Nil, + Child::Nil, + ], + ); + let second = test_node( + "WHEN", + vec![ + Child::Symbol("patterns".to_string()), + Child::Nil, + Child::Nil, + ], + ); + let normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); + + let result = normalizer + .link_when_chain(vec![first, second], Some(fallback)) + .expect("expected linked when chain"); + + assert_eq!(result.r#type, "WHEN"); + let next = child_node(&result, 2); + assert_eq!(next.r#type, "WHEN"); + assert_eq!(child_node(next, 2).r#type, "ELSE"); + + let short = test_node("WHEN", vec![Child::Symbol("patterns".to_string())]); + let fallback = test_node("ELSE", Vec::new()); + let result = normalizer + .link_when_chain(vec![short], Some(fallback)) + .expect("expected padded when chain"); + + assert_eq!(result.children.len(), 3); + assert_eq!(result.children[1], Child::Nil); + assert_eq!(child_node(&result, 2).r#type, "ELSE"); + } + + #[test] + fn link_rescue_chain_sets_next_rescue_and_pads_short_resbody_nodes() { + let first = test_node( + "RESBODY", + vec![ + Child::Symbol("exceptions".to_string()), + Child::Nil, + Child::Nil, + ], + ); + let second = test_node( + "RESBODY", + vec![ + Child::Symbol("exceptions".to_string()), + Child::Nil, + Child::Nil, + ], + ); + let normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); + + let result = normalizer + .link_rescue_chain(vec![first, second]) + .expect("expected linked rescue chain"); + + assert_eq!(result.r#type, "RESBODY"); + let next = child_node(&result, 2); + assert_eq!(next.r#type, "RESBODY"); + assert_eq!(next.children[2], Child::Nil); + + let short = test_node("RESBODY", vec![Child::Symbol("exceptions".to_string())]); + let result = normalizer + .link_rescue_chain(vec![short]) + .expect("expected padded rescue chain"); + + assert_eq!(result.children.len(), 3); + assert_eq!(result.children[1], Child::Nil); + assert_eq!(result.children[2], Child::Nil); + } + + #[test] + fn infix_statement_parts_extracts_allowed_wrapper_parts() { + let source = "def calc\n left + right\nend\n"; + let tree = raw_tree(source, Language::Ruby); + let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let body = first_raw_node(tree.root_node(), source, "body_statement", "left + right"); + let binary = first_raw_node(tree.root_node(), source, "binary", "left + right"); + + assert_eq!( + infix_parts_text(&normalizer, body, source), + Some(("left".to_string(), "+".to_string(), "right".to_string())) + ); + assert_eq!(infix_parts_text(&normalizer, binary, source), None); + + let source = "def calc\n return left + right\nend\n"; + let tree = raw_tree(source, Language::Ruby); + let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let args = first_raw_node(tree.root_node(), source, "argument_list", "left + right"); + assert_eq!( + infix_parts_text(&normalizer, args, source), + Some(("left".to_string(), "+".to_string(), "right".to_string())) + ); + + let source = "def calc\n left && right\nend\n"; + let tree = raw_tree(source, Language::Ruby); + let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let boolean = first_raw_node(tree.root_node(), source, "body_statement", "left && right"); + assert_eq!(infix_parts_text(&normalizer, boolean, source), None); + } + + #[test] + fn argument_list_unary_not_matches_ruby_private_predicate() { + for (line, text) in [ + ("return !flag", "!flag"), + ("return !!flag", "!!flag"), + ("return flag", "flag"), + ("return !flag, other", "!flag, other"), + ("return (!flag)", "(!flag)"), + ("return not flag", "not flag"), + ] { + let source = format!("def check\n {line}\nend\n"); + let tree = raw_tree(&source, Language::Ruby); + let node = first_raw_node(tree.root_node(), &source, "argument_list", text); + let normalizer = super::TreeSitterNormalizer::new(&source, Language::Ruby); + + assert_eq!( + normalizer.argument_list_unary_not(node), + ruby_private_predicate( + &source, + Language::Ruby, + ".rb", + "argument_list_unary_not?", + "argument_list", + text + ), + "argument_list_unary_not? mismatch for {line:?}" + ); + } + } + + #[test] + fn unary_not_statement_matches_ruby_private_predicate() { + for (line, text) in [ + ("!flag", "!flag"), + ("!!flag", "!!flag"), + ("flag", "flag"), + ("!flag; other", "!flag; other"), + ("(!flag)", "(!flag)"), + ("not flag", "not flag"), + ] { + let source = format!("def check\n {line}\nend\n"); + let tree = raw_tree(&source, Language::Ruby); + let node = first_raw_node(tree.root_node(), &source, "body_statement", text); + let normalizer = super::TreeSitterNormalizer::new(&source, Language::Ruby); + + assert_eq!( + normalizer.unary_not_statement(node), + ruby_private_predicate( + &source, + Language::Ruby, + ".rb", + "unary_not_statement?", + "body_statement", + text + ), + "unary_not_statement? mismatch for {line:?}" + ); + } + } + + #[test] + fn unary_not_expression_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def check\n !flag\n !!flag\n -flag\n not flag\nend\n", + Language::Ruby, + ".rb", + "unary", + "!flag", + ), + ( + "def check\n !flag\n !!flag\n -flag\n not flag\nend\n", + Language::Ruby, + ".rb", + "unary", + "!!flag", + ), + ( + "def check\n !flag\n !!flag\n -flag\n not flag\nend\n", + Language::Ruby, + ".rb", + "unary", + "-flag", + ), + ( + "def check\n !flag\n !!flag\n -flag\n not flag\nend\n", + Language::Ruby, + ".rb", + "unary", + "not flag", + ), + ( + "function check(flag: boolean) { return !flag; }\n", + Language::TypeScript, + ".ts", + "unary_expression", + "!flag", + ), + ( + "if not flag:\n pass\n", + Language::Python, + ".py", + "not_operator", + "not flag", + ), + ( + "if not flag then end\n", + Language::Lua, + ".lua", + "unary_expression", + "not flag", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.unary_not_expression(node), + ruby_private_predicate( + source, + language, + suffix, + "unary_not_expression?", + kind, + text + ), + "unary_not_expression? mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn unary_minus_expression_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def check\n -flag\n !flag\n value\nend\n", + Language::Ruby, + ".rb", + "unary", + "-flag", + ), + ( + "def check\n -flag\n !flag\n value\nend\n", + Language::Ruby, + ".rb", + "unary", + "!flag", + ), + ( + "function check(value: number) { return -value; }\n", + Language::TypeScript, + ".ts", + "unary_expression", + "-value", + ), + ( + "x = -value\n", + Language::Python, + ".py", + "unary_operator", + "-value", + ), + ( + "local x = -value\n", + Language::Lua, + ".lua", + "expression_list", + "-value", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.unary_minus_expression(node), + ruby_private_predicate( + source, + language, + suffix, + "unary_minus_expression?", + kind, + text + ), + "unary_minus_expression? mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn binary_operator_matches_ruby_private_helper() { + for (source, language, suffix, kind, text) in [ + ( + "def calc\n left + right\n left && right\n value\nend\n", + Language::Ruby, + ".rb", + "binary", + "left + right", + ), + ( + "def calc\n left + right\n left && right\n value\nend\n", + Language::Ruby, + ".rb", + "binary", + "left && right", + ), + ( + "def calc\n left + right\n left && right\n value\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left + right\n left && right\n value", + ), + ( + "const value = left + right && other;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left + right && other", + ), + ( + "const value = left + right && other;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left + right", + ), + ( + "value = left + right and other\n", + Language::Python, + ".py", + "boolean_operator", + "left + right and other", + ), + ( + "value = left + right and other\n", + Language::Python, + ".py", + "binary_operator", + "left + right", + ), + ( + "local value = left + right and other\n", + Language::Lua, + ".lua", + "expression_list", + "left + right and other", + ), + ( + "local value = left + right and other\n", + Language::Lua, + ".lua", + "binary_expression", + "left + right", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.binary_operator(node).unwrap_or_default(), + ruby_private_string(source, language, suffix, "binary_operator", kind, text), + "binary_operator mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn python_f_string_interpolation_next_to_equals_is_evstr_not_assignment() { + let root = parse_language_source( + r#" +class Tag: + @property + def markup(self): + return f"[{self.name}={self.parameters}]" +"#, + Language::Python, + ".py", + ); + let dstr = first_node(&root, "DSTR", r#"f"[{self.name}={self.parameters}]""#); + + let types = child_types(dstr); + assert_eq!( + types, + vec![ + "STRING_START", + "STR", + "EVSTR", + "STR", + "EVSTR", + "STR", + "STRING_END" + ], + "expected Ruby-style f-string interpolation parts in {dstr:#?}" + ); + assert!( + !types.contains(&"LASGN"), + "interpolation next to '=' must not normalize as assignment: {dstr:#?}" + ); + } + + #[test] + fn python_relative_import_prefix_only_has_no_children() { + let root = parse_language_source( + r#" +if __name__ == "__main__": + from . import box as box +"#, + Language::Python, + ".py", + ); + let relative_import = first_node(&root, "RELATIVE_IMPORT", "."); + + assert!( + relative_import.children.is_empty(), + "Ruby exposes bare relative import prefix as an empty RELATIVE_IMPORT: {relative_import:#?}" + ); + } + + #[test] + fn python_annotation_type_wrappers_match_ruby_tree_shape() { + let root = parse_language_source( + r#" +from typing import Callable + +_is_single_cell_widths: Callable[[str], bool] = value +last_measured_character: str | None = None +fileno: Callable[[], int] | None = value +"#, + Language::Python, + ".py", + ); + + let str_list_type = first_node(&root, "TYPE", "[str]"); + assert_eq!(child_types(str_list_type), vec!["LVAR"]); + assert_eq!( + child_node(str_list_type, 0).children, + vec![Child::String("str".to_string())] + ); + + let empty_list_type = first_node(&root, "TYPE", "[]"); + assert!( + empty_list_type.children.is_empty(), + "Ruby keeps Callable[[]] list type empty: {empty_list_type:#?}" + ); + + let union_type = first_node(&root, "TYPE", "str | None"); + assert_eq!(child_types(union_type), vec!["LVAR", "NIL"]); + } + + #[test] + fn python_docstring_only_class_body_stays_block_wrapped() { + let root = parse_language_source( + r#" +class ColorParseError(Exception): + """The color could not be parsed.""" +"#, + Language::Python, + ".py", + ); + let class_node = first_node( + &root, + "CLASS", + "class ColorParseError(Exception):\n \"\"\"The color could not be parsed.\"\"\"", + ); + let scope = child_node(class_node, 2); + let body = child_node(scope, 2); + + assert_eq!(body.r#type, "BLOCK"); + assert_eq!( + child_types(body), + vec!["STRING_START", "STR", "STRING_END"], + "Ruby exposes docstring-only class body as BLOCK of string parts: {body:#?}" + ); + } + + #[test] + fn python_ellipsis_only_function_body_is_empty_scope_with_root_source() { + let root = parse_language_source( + r#"def __rich__(): + ... +"#, + Language::Python, + ".py", + ); + let defn = first_node(&root, "DEFN", "def __rich__():\n ..."); + let scope = child_node(defn, 1); + + assert_eq!(scope.r#type, "SCOPE"); + assert!(matches!(scope.children.get(2), Some(Child::Nil))); + assert_eq!( + scope.first_lineno, root.first_lineno, + "Ruby scope(body=nil,args=nil) falls back to document root source" + ); + assert_eq!(scope.text, root.text); + } + + #[test] + fn python_explicit_return_none_is_not_elided_from_function_body() { + let root = parse_language_source( + r#" +class Thing: + def _repr_latex_(self): + return None +"#, + Language::Python, + ".py", + ); + let iter = first_node( + &root, + "ITER", + "def _repr_latex_(self):\n return None", + ); + let scope = child_node(iter, 1); + + assert_eq!( + child_node(scope, 2).r#type, + "NIL", + "Ruby only elides implicit nil bodies for Ruby, not explicit Python return None: {scope:#?}" + ); + } + + #[test] + fn python_with_attribute_item_uses_ruby_clause_children() { + let root = parse_language_source( + r#" +def page(self): + with self._console._lock: + buffer = self._console._buffer[:] +"#, + Language::Python, + ".py", + ); + let clause = first_node(&root, "WITH_CLAUSE", "self._console._lock"); + + assert_eq!( + child_types(clause), + vec!["CALL", "LVAR"], + "Ruby with_clause exposes attribute receiver and field separately: {clause:#?}" + ); + assert_eq!(child_node(clause, 0).text, "self._console"); + assert_eq!(child_node(clause, 1).text, "_lock"); + } + + #[test] + fn python_bare_identifier_expression_statement_has_no_children() { + let root = parse_language_source( + r#" +def _is_jupyter(): + try: + get_ipython # type: ignore[name-defined] + except NameError: + return False +"#, + Language::Python, + ".py", + ); + let expression = first_node(&root, "EXPRESSION_STATEMENT", "get_ipython"); + + assert!( + expression.children.is_empty(), + "Ruby parser exposes bare identifier expression statements without named children: {expression:#?}" + ); + } + + #[test] + fn python_bare_identifier_only_block_has_no_children() { + let root = parse_language_source( + r#" +def get_exception(): + try: + pass + except: + foobarbaz +"#, + Language::Python, + ".py", + ); + let block = first_node(&root, "BLOCK", "foobarbaz"); + + assert!( + block.children.is_empty(), + "Ruby exposes a bare identifier-only block as an empty block: {block:#?}" + ); + } + + #[test] + fn python_bare_dotted_expression_statement_keeps_statement_wrapper() { + let root = parse_language_source("os.get_terminal_size\n", Language::Python, ".py"); + let expression = first_node(&root, "EXPRESSION_STATEMENT", "os.get_terminal_size"); + + assert_eq!( + child_types(expression), + vec!["LVAR", "LVAR"], + "Ruby exposes bare dotted expression statements as expression_statement identifier children: {expression:#?}" + ); + } + + #[test] + fn python_bare_comparison_expression_statement_keeps_statement_wrapper() { + let root = parse_language_source( + r#" +def test_get_style(): + console.get_style("repr.brace") == Style(bold=True) +"#, + Language::Python, + ".py", + ); + let expression = first_node( + &root, + "EXPRESSION_STATEMENT", + r#"console.get_style("repr.brace") == Style(bold=True)"#, + ); + + assert_eq!( + child_types(expression), + vec!["CALL", "FCALL"], + "Ruby exposes bare comparison statements as expression_statement operand children: {expression:#?}" + ); + } + + #[test] + fn python_delete_statement_matches_ruby_block_contexts() { + let root = parse_language_source( + r#" +def save(self, clear): + if clear: + del self._record_buffer[:] + with self._record_buffer_lock: + del self._record_buffer[:] + text = "" +"#, + Language::Python, + ".py", + ); + let if_node = first_node(&root, "IF", "if clear:\n del self._record_buffer[:]"); + assert_eq!( + child_node(if_node, 1).r#type, + "SUBSCRIPT", + "Ruby unwraps a single delete body to the deleted subscript: {if_node:#?}" + ); + + let delete = first_node(&root, "DELETE_STATEMENT", "del self._record_buffer[:]"); + assert_eq!( + child_types(delete), + vec!["SUBSCRIPT"], + "Ruby keeps delete_statement wrapper in multi-statement bodies: {delete:#?}" + ); + } + + #[test] + fn python_single_subscript_expression_block_exposes_subscript_children() { + let root = parse_language_source( + r#" +def test_render(): + with pytest.raises(KeyError): + top["asdasd"] +"#, + Language::Python, + ".py", + ); + let block = first_node(&root, "BLOCK", r#"top["asdasd"]"#); + + assert_eq!( + child_types(block), + vec!["LVAR", "STR"], + "Ruby exposes a single subscript expression block as subscript children: {block:#?}" + ); + } + + #[test] + fn python_single_if_block_under_try_exposes_ruby_if_children() { + let root = parse_language_source( + r#" +def load(args): + try: + if args.path == "-": + json_data = sys.stdin.read() + else: + json_data = Path(args.path).read_text() + except Exception as error: + sys.exit(-1) +"#, + Language::Python, + ".py", + ); + let block = first_node( + &root, + "BLOCK", + "if args.path == \"-\":\n json_data = sys.stdin.read()\n else:\n json_data = Path(args.path).read_text()", + ); + + assert_eq!( + child_types(block), + vec!["OPCALL", "BLOCK", "ELSE_CLAUSE"], + "Ruby block lacks an if_statement wrapper in this parser shape: {block:#?}" + ); + } + + #[test] + fn python_single_decorated_definition_block_exposes_decorator_and_function() { + let root = parse_language_source( + r#" +def test_inspect_swig_edge_case(): + class Thing: + @property + def __class__(self): + raise AttributeError +"#, + Language::Python, + ".py", + ); + let block = first_node( + &root, + "BLOCK", + "@property\n def __class__(self):\n raise AttributeError", + ); + + assert_eq!( + child_types(block), + vec!["IVAR", "DEFN"], + "Ruby exposes decorated definitions as direct block children: {block:#?}" + ); + } + + #[test] + fn python_nested_class_inside_class_body_matches_ruby_iter_shape() { + let root = parse_language_source( + r#" +def test_can_handle_special_characters_in_docstrings(): + class Something: + class Thing: + pass +"#, + Language::Python, + ".py", + ); + let iter = first_node(&root, "ITER", "class Thing:\n pass"); + + assert_eq!(child_node(iter, 0).r#type, "VCALL"); + assert_eq!( + child_node(iter, 0).children, + vec![Child::Symbol("Thing".to_string()), Child::Nil] + ); + assert_eq!(child_node(iter, 1).r#type, "SCOPE"); + } + + #[test] + fn lua_local_assignment_call_rhs_matches_ruby_expression_list_shape() { + let root = parse_language_source( + r#"local test_env = require("spec.util.test_env") +"#, + Language::Lua, + ".lua", + ); + let expression_list = + first_node(&root, "EXPRESSION_LIST", r#"require("spec.util.test_env")"#); + + assert_eq!( + child_types(expression_list), + vec!["LVAR", "ARGUMENTS"], + "Ruby exposes a Lua call RHS expression_list as the call function and arguments, without a FUNCTION_CALL wrapper: {expression_list:#?}" + ); + } + + #[test] + fn lua_local_assignment_member_rhs_matches_ruby_expression_list_shape() { + let root = parse_language_source("local run = test_env.run\n", Language::Lua, ".lua"); + let expression_list = first_node(&root, "EXPRESSION_LIST", "test_env.run"); + + assert_eq!( + child_types(expression_list), + vec!["LVAR", "LVAR"], + "Ruby exposes a Lua dotted RHS expression_list as receiver and field, without a DOT_INDEX_EXPRESSION wrapper: {expression_list:#?}" + ); + } + + #[test] + fn lua_table_string_entry_matches_ruby_field_shape() { + let root = parse_language_source( + "local extra_rocks = {\n \"/luasocket-${LUASOCKET}.src.rock\",\n}\n", + Language::Lua, + ".lua", + ); + let expression_list = first_node( + &root, + "EXPRESSION_LIST", + "{\n \"/luasocket-${LUASOCKET}.src.rock\",\n}", + ); + let field = child_node(expression_list, 0); + let string = child_node(field, 0); + + assert_eq!( + child_types(expression_list), + vec!["FIELD"], + "Ruby exposes a Lua table constructor assignment RHS as its field children: {expression_list:#?}" + ); + assert_eq!(string.r#type, "STR"); + assert_eq!( + string.children, + vec![Child::String("/luasocket-${LUASOCKET}.src.rock".to_string())], + "Ruby normalizes a Lua table string field from string_content, without quotes: {string:#?}" + ); + } + + #[test] + fn lua_table_dollar_string_entry_matches_ruby_str_not_gvar() { + let root = parse_language_source( + "local incdirs = { \"$(FOO1_INCDIR)\" }\n", + Language::Lua, + ".lua", + ); + let string = first_node(&root, "STR", "$(FOO1_INCDIR)"); + let mut gvars = Vec::new(); + nodes_of_type(&root, "GVAR", &mut gvars); + + assert_eq!( + string.children, + vec![Child::String("$(FOO1_INCDIR)".to_string())], + "Ruby normalizes Lua table strings starting with $ as STR, not GVAR: {string:#?}" + ); + assert!( + gvars.is_empty(), + "Lua string_content starting with $ must not normalize as GVAR: {gvars:#?}" + ); + } + + #[test] + fn lua_table_call_entry_matches_ruby_field_children_shape() { + let root = parse_language_source( + "assert.same(install, { bin = { P\"bin/binfile\" } })\n", + Language::Lua, + ".lua", + ); + let field = first_node(&root, "FIELD", "P\"bin/binfile\""); + + assert_eq!( + child_types(field), + vec!["LVAR", "ARGUMENTS"], + "Ruby exposes a Lua table field call as the call children, without FUNCTION_CALL wrapper: {field:#?}" + ); + } + + #[test] + fn lua_table_identifier_entry_matches_ruby_empty_field_shape() { + let root = parse_language_source( + "local rocks_path = table.concat({rocks_tree, \"a_rock\"})\n", + Language::Lua, + ".lua", + ); + let field = first_node(&root, "FIELD", "rocks_tree"); + + assert!( + field.children.is_empty(), + "Ruby exposes a bare identifier Lua table field with no normalized children: {field:#?}" + ); + } + + #[test] + fn lua_single_call_function_body_matches_ruby_block_shape() { + let root = parse_language_source( + "before_each(function()\n test_env.setup_specs(extra_rocks)\nend)\n", + Language::Lua, + ".lua", + ); + let defn = first_node( + &root, + "DEFN", + "function()\n test_env.setup_specs(extra_rocks)\nend", + ); + let scope = child_node(defn, 1); + let body = child_node(scope, 2); + + assert_eq!(body.r#type, "BLOCK"); + assert_eq!( + child_types(body), + vec!["DOT_INDEX_EXPRESSION", "ARGUMENTS"], + "Ruby exposes a single Lua function-call body as a BLOCK of the call target and arguments: {body:#?}" + ); + } + + #[test] + fn lua_single_assignment_function_body_matches_ruby_block_shape() { + let root = parse_language_source( + "lazy_setup(function()\n git = git_repo.start()\nend)\n", + Language::Lua, + ".lua", + ); + let defn = first_node(&root, "DEFN", "function()\n git = git_repo.start()\nend"); + let scope = child_node(defn, 1); + let body = child_node(scope, 2); + + assert_eq!(body.r#type, "BLOCK"); + assert_eq!( + child_types(body), + vec!["VARIABLE_LIST", "EXPRESSION_LIST"], + "Ruby exposes a single Lua assignment body as a BLOCK of assignment children, without LASGN: {body:#?}" + ); + } + + #[test] + fn lua_single_bare_assignment_function_body_matches_ruby_empty_lists() { + let root = parse_language_source("function()\n x = y\nend\n", Language::Lua, ".lua"); + let defn = first_node(&root, "DEFN", "function()\n x = y\nend"); + let scope = child_node(defn, 1); + let body = child_node(scope, 2); + let variable_list = child_node(body, 0); + let expression_list = child_node(body, 1); + + assert_eq!(body.r#type, "BLOCK"); + assert_eq!(variable_list.r#type, "VARIABLE_LIST"); + assert_eq!(expression_list.r#type, "EXPRESSION_LIST"); + assert!( + variable_list.children.is_empty(), + "Ruby exposes a bare Lua single-assignment variable_list with no children: {variable_list:#?}" + ); + assert!( + expression_list.children.is_empty(), + "Ruby exposes a bare identifier Lua single-assignment RHS with no children: {expression_list:#?}" + ); + } + + #[test] + fn lua_single_dotted_assignment_function_body_keeps_ruby_variable_list_children() { + let root = parse_language_source( + "function()\n package.path = oldpath\nend\n", + Language::Lua, + ".lua", + ); + let defn = first_node(&root, "DEFN", "function()\n package.path = oldpath\nend"); + let scope = child_node(defn, 1); + let body = child_node(scope, 2); + let variable_list = child_node(body, 0); + let expression_list = child_node(body, 1); + + assert_eq!(body.r#type, "BLOCK"); + assert_eq!(variable_list.r#type, "VARIABLE_LIST"); + assert_eq!( + child_types(variable_list), + vec!["LVAR", "LVAR"], + "Ruby keeps Lua dotted assignment targets as variable_list children: {variable_list:#?}" + ); + assert!( + expression_list.children.is_empty(), + "Ruby exposes a bare identifier Lua dotted-assignment RHS with no children: {expression_list:#?}" + ); + } + + #[test] + fn lua_single_local_assignment_function_body_matches_ruby_lasgn_shape() { + let root = parse_language_source( + "it(function()\n local output = run.luarocks(\"show --rock-tree luacov\")\nend)\n", + Language::Lua, + ".lua", + ); + let defn = first_node( + &root, + "DEFN", + "function()\n local output = run.luarocks(\"show --rock-tree luacov\")\nend", + ); + let scope = child_node(defn, 1); + let body = child_node(scope, 2); + + assert_eq!(body.r#type, "LASGN"); + assert_eq!( + body.children.first(), + Some(&Child::String("output".to_string())), + "Ruby exposes a single Lua local assignment function body as the inner LASGN: {body:#?}" + ); + } + + #[test] + fn lua_assigned_function_expression_matches_ruby_expression_list_shape() { + let root = parse_language_source( + "local test_with_location = function(location)\n lfs.mkdir(location)\nend\n", + Language::Lua, + ".lua", + ); + let assignment = first_node( + &root, + "LASGN", + "test_with_location = function(location)\n lfs.mkdir(location)\nend", + ); + let expression_list = child_node(assignment, 1); + + assert_eq!(expression_list.r#type, "EXPRESSION_LIST"); + assert_eq!( + child_types(expression_list), + vec!["PARAMETERS", "BLOCK"], + "Ruby exposes a Lua assigned function expression as PARAMETERS and BLOCK inside the RHS expression_list: {expression_list:#?}" + ); + } + + #[test] + fn lua_assigned_function_if_else_matches_fixed_ruby_if_shape() { + let root = parse_language_source( + "local make_unreadable = function(path)\n if is_win then\n fs.execute(\"x\")\n else\n fs.execute(\"y\")\n end\nend\n", + Language::Lua, + ".lua", + ); + let expression_list = first_node( + &root, + "EXPRESSION_LIST", + "function(path)\n if is_win then\n fs.execute(\"x\")\n else\n fs.execute(\"y\")\n end\nend", + ); + let if_node = child_node(expression_list, 1); + let mut iters = Vec::new(); + nodes_of_type(&root, "ITER", &mut iters); + + assert_eq!(if_node.r#type, "IF"); + assert_eq!(child_node(if_node, 2).r#type, "ELSE_STATEMENT"); + assert!( + iters.is_empty(), + "Ruby no longer misclassifies a Lua if/else in an assigned function expression as ITER: {iters:#?}" + ); + } + + #[test] + fn lua_single_return_function_body_matches_ruby_expression_list_shape() { + let root = parse_language_source( + "function sum.sum(a, b)\n return a + b\nend\n", + Language::Lua, + ".lua", + ); + let defn = first_node( + &root, + "DEFN", + "function sum.sum(a, b)\n return a + b\nend", + ); + let scope = child_node(defn, 1); + let body = child_node(scope, 2); + + assert_eq!(body.r#type, "EXPRESSION_LIST"); + assert_eq!( + child_types(body), + vec!["LVAR", "LVAR"], + "Ruby exposes a single Lua return body as the returned expression_list, without RETURN: {body:#?}" + ); + } + + #[test] + fn lua_top_level_return_identifier_matches_ruby_empty_expression_list() { + let root = parse_language_source("return sum\n", Language::Lua, ".lua"); + let return_node = first_node(&root, "RETURN", "return sum"); + let expression_list = child_node(return_node, 0); + + assert_eq!(expression_list.r#type, "EXPRESSION_LIST"); + assert!( + expression_list.children.is_empty(), + "Ruby exposes a Lua return of a bare identifier as an empty expression_list: {expression_list:#?}" + ); + } + + #[test] + fn lua_top_level_return_scalar_literals_match_ruby_empty_expression_list() { + for literal in ["true", "false", "nil", "0"] { + let root = parse_language_source(&format!("return {literal}\n"), Language::Lua, ".lua"); + let return_node = first_node(&root, "RETURN", &format!("return {literal}")); + let expression_list = child_node(return_node, 0); + + assert_eq!(expression_list.r#type, "EXPRESSION_LIST"); + assert!( + expression_list.children.is_empty(), + "Ruby exposes a Lua return of {literal} as an empty expression_list: {expression_list:#?}" + ); + } + } + + #[test] + fn lua_assignment_scalar_literals_match_ruby_empty_expression_list() { + for literal in ["true", "false", "nil", "0"] { + let root = + parse_language_source(&format!("tmpfile = {literal}\n"), Language::Lua, ".lua"); + let assignment = first_node(&root, "LASGN", &format!("tmpfile = {literal}")); + let expression_list = child_node(assignment, 1); + + assert_eq!(expression_list.r#type, "EXPRESSION_LIST"); + assert!( + expression_list.children.is_empty(), + "Ruby exposes a Lua scalar literal assignment RHS as an empty expression_list: {expression_list:#?}" + ); + } + } + + #[test] + fn lua_no_paren_string_argument_matches_ruby_string_content_shape() { + let root = parse_language_source("V\"foo\"\n", Language::Lua, ".lua"); + let call = first_node(&root, "FUNCTION_CALL", "V\"foo\""); + let arguments = child_node(call, 1); + let string = child_node(arguments, 0); + + assert_eq!(arguments.r#type, "ARGUMENTS"); + assert_eq!(arguments.text, "\"foo\""); + assert_eq!(string.r#type, "STR"); + assert_eq!(string.text, "foo"); + assert_eq!(string.children, vec![Child::String("foo".to_string())]); + } + + #[test] + fn lua_long_string_assignment_matches_ruby_expression_list_content_shape() { + let root = parse_language_source( + "local c_module_source = [[\n #include \n]]\n", + Language::Lua, + ".lua", + ); + let expression_list = first_node(&root, "EXPRESSION_LIST", "[[\n #include \n]]"); + let string = child_node(expression_list, 0); + + assert_eq!(child_types(expression_list), vec!["STR"]); + assert_eq!( + string.children, + vec![Child::String("\n #include \n".to_string())], + "Ruby normalizes a Lua long string assignment from string_content, without bracket delimiters: {string:#?}" + ); + } + + #[test] + fn lua_elseif_branch_is_preserved_as_if_alternative() { + let root = parse_language_source( + r#"if test_env.LUA_V == "5.1" then + one() +elseif test_env.LUA_V == "5.2" then + two() +end +"#, + Language::Lua, + ".lua", + ); + let if_node = first_node( + &root, + "IF", + "if test_env.LUA_V == \"5.1\" then\n one()\nelseif test_env.LUA_V == \"5.2\" then\n two()\nend", + ); + let alternative = child_node(if_node, 2); + + assert_eq!(alternative.r#type, "ELSEIF_STATEMENT"); + } + + #[test] + fn lua_binary_assignment_rhs_matches_ruby_expression_list_shape() { + let root = parse_language_source( + "local rockspec = testing_paths.fixtures_dir .. \"/build_only_deps-0.1-1.rockspec\"\n", + Language::Lua, + ".lua", + ); + let expression_list = first_node( + &root, + "EXPRESSION_LIST", + "testing_paths.fixtures_dir .. \"/build_only_deps-0.1-1.rockspec\"", + ); + + assert_eq!( + child_types(expression_list), + vec!["DOT_INDEX_EXPRESSION", "STR"], + "Ruby exposes a Lua binary RHS expression_list as the binary operands, without a BINARY_EXPRESSION wrapper: {expression_list:#?}" + ); + } + + #[test] + fn lua_local_declaration_without_rhs_matches_ruby_empty_variable_list() { + let root = parse_language_source("local tmpdir\n", Language::Lua, ".lua"); + let variable_list = first_node(&root, "VARIABLE_LIST", "tmpdir"); + + assert!( + variable_list.children.is_empty(), + "Ruby exposes a Lua local declaration without RHS as an empty VARIABLE_LIST: {variable_list:#?}" + ); + } + + #[test] + fn lua_multi_local_declaration_without_rhs_keeps_ruby_variable_list_children() { + let root = parse_language_source("local cfg, fs\n", Language::Lua, ".lua"); + let variable_list = first_node(&root, "VARIABLE_LIST", "cfg, fs"); + + assert_eq!( + child_types(variable_list), + vec!["LVAR", "LVAR"], + "Ruby keeps children for a multi-name Lua local declaration without RHS: {variable_list:#?}" + ); + } + + #[test] + fn lua_single_generic_for_variable_matches_ruby_empty_variable_list() { + let root = parse_language_source( + "for f in lfs.dir(spec_quick) do end\n", + Language::Lua, + ".lua", + ); + let variable_list = first_node(&root, "VARIABLE_LIST", "f"); + + assert!( + variable_list.children.is_empty(), + "Ruby exposes a single Lua generic-for variable list as empty: {variable_list:#?}" + ); + } + + #[test] + fn lua_multi_generic_for_variable_list_keeps_ruby_children() { + let root = + parse_language_source("for _, t in ipairs(tests) do end\n", Language::Lua, ".lua"); + let variable_list = first_node(&root, "VARIABLE_LIST", "_, t"); + + assert_eq!( + child_types(variable_list), + vec!["LVAR", "LVAR"], + "Ruby keeps children for a multi-name Lua generic-for variable list: {variable_list:#?}" + ); + } + #[test] fn normalizes_safe_navigation_inside_multi_statement_else_body() { let root = parse_source( diff --git a/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs b/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs index 05e1e4ea8..3bbb894d7 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs @@ -63,7 +63,7 @@ fn language_grammar(language: Language) -> TreeSitterLanguage { Language::Lua => tree_sitter_lua::LANGUAGE.into(), Language::C => tree_sitter_c::LANGUAGE.into(), Language::Cpp => tree_sitter_cpp::LANGUAGE.into(), - Language::CSharp => tree_sitter_c_sharp::language().into(), + Language::CSharp => tree_sitter_c_sharp::LANGUAGE.into(), } } diff --git a/gems/decomplex/test/ast_test.rb b/gems/decomplex/test/ast_test.rb new file mode 100644 index 000000000..8d90e35c6 --- /dev/null +++ b/gems/decomplex/test/ast_test.rb @@ -0,0 +1,494 @@ +# frozen_string_literal: true + +require "minitest/autorun" +require "tempfile" +require_relative "../lib/decomplex/ast" +require_relative "../lib/decomplex/syntax" + +class AstTest < Minitest::Test + def test_python_f_string_interpolation_after_literal_equals_is_not_dropped + with_python_file(<<~PY) do |file| + class Tag: + @property + def markup(self): + return f"[{self.name}={self.parameters}]" + PY + root, = parse_python(file) + dstr = nodes_of_type(root, "DSTR").find { |node| node.text == 'f"[{self.name}={self.parameters}]"' } + + refute_nil dstr + assert_equal %w[STRING_START STR EVSTR STR EVSTR STR STRING_END], dstr.children.map(&:type).map(&:to_s) + end + end + + def test_lua_elseif_branch_is_preserved_as_if_alternative + with_language_file(<<~LUA, ".lua", :lua) do |file| + if test_env.LUA_V == "5.1" then + one() + elseif test_env.LUA_V == "5.2" then + two() + end + LUA + root, = parse_language(file, :lua) + if_node = nodes_of_type(root, "IF").find { |node| node.text.include?("test_env.LUA_V") } + + refute_nil if_node + assert_equal "ELSEIF_STATEMENT", if_node.children[2].type.to_s + end + end + + def test_lua_assigned_function_if_else_normalizes_as_if_not_iter + with_language_file(<<~LUA, ".lua", :lua) do |file| + local make_unreadable = function(path) + if is_win then + fs.execute("x") + else + fs.execute("y") + end + end + LUA + root, = parse_language(file, :lua) + expression_list = nodes_of_type(root, "EXPRESSION_LIST").find { |node| node.text.start_with?("function(path)") } + + refute_nil expression_list + if_node = expression_list.children.find { |child| Decomplex::Ast.node?(child) && child.type.to_s == "IF" } + refute_nil if_node + assert_empty nodes_of_type(root, "ITER") + assert_equal "ELSE_STATEMENT", if_node.children[2].type.to_s + end + end + + def test_python_yield_statement_predicate_recognizes_expression_statement_wrapper + with_python_file(<<~PY) do |file| + def gen(): + yield item + other() + PY + document = parse_syntax(file, :python) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + yield_statement = ts_nodes(document.root).find do |node| + node.kind == "expression_statement" && node.text == "yield item" + end + block = ts_nodes(document.root).find do |node| + node.kind == "block" && node.text == "yield item\n other()" + end + + refute_nil yield_statement + refute_nil block + assert normalizer.send(:yield_statement?, yield_statement) + refute normalizer.send(:yield_statement?, block) + end + end + + def test_python_yield_in_multi_statement_body_stays_statement_not_whole_block + with_python_file(<<~PY) do |file| + def gen(): + yield item + other() + PY + root, = parse_python(file) + defn = nodes_of_type(root, "DEFN").find { |node| node.text == "def gen():\n yield item\n other()" } + scope = defn.children[1] + body = scope.children[2] + + refute_nil defn + assert_equal "BLOCK", body.type.to_s + assert_equal %w[YIELD EXPRESSION_STATEMENT], body.children.map(&:type).map(&:to_s) + end + end + + def test_ruby_super_statement_predicate_recognizes_bare_and_argument_forms + with_language_file(<<~RUBY, ".rb", :ruby) do |file| + class Child < Parent + def bare + super + end + + def with_arg + super :item + end + + def other + value + end + end + RUBY + document = parse_syntax(file, :ruby) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + bare = ts_nodes(document.root).find { |node| node.kind == "body_statement" && node.text == "super" } + with_arg = ts_nodes(document.root).find { |node| node.kind == "body_statement" && node.text == "super :item" } + other = ts_nodes(document.root).find { |node| node.kind == "body_statement" && node.text == "value" } + + refute_nil bare + refute_nil with_arg + refute_nil other + assert normalizer.send(:super_statement?, bare) + assert normalizer.send(:super_statement?, with_arg) + refute normalizer.send(:super_statement?, other) + end + end + + def test_ruby_super_statement_normalizes_bare_and_arguments + with_language_file(<<~RUBY, ".rb", :ruby) do |file| + class Child < Parent + def bare + super + end + + def with_arg + super :item + end + end + RUBY + root, = parse_language(file, :ruby) + bare = nodes_of_type(root, "SUPER").find { |node| node.text == "super" } + with_arg = nodes_of_type(root, "SUPER").find { |node| node.text == "super :item" } + + refute_nil bare + refute_nil with_arg + assert_nil bare.children.first + assert_equal "LIST", with_arg.children.first.type.to_s + assert_equal "LIT", with_arg.children.first.children.first.type.to_s + end + end + + def test_ruby_argument_list_element_reference_predicate + with_language_file(<<~RUBY, ".rb", :ruby) do |file| + def indexed + return items[0] + return obj.foo[0] + return [0] + return items[0], other + return items[] + return items[0] { nope } + end + RUBY + document = parse_syntax(file, :ruby) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + argument_lists = ts_nodes(document.root).select { |node| node.kind == "argument_list" } + + assert normalizer.send(:argument_list_element_reference?, argument_lists.find { |node| node.text == "items[0]" }) + assert normalizer.send(:argument_list_element_reference?, argument_lists.find { |node| node.text == "obj.foo[0]" }) + refute normalizer.send(:argument_list_element_reference?, argument_lists.find { |node| node.text == "[0]" }) + refute normalizer.send(:argument_list_element_reference?, argument_lists.find { |node| node.text == "items[0], other" }) + refute normalizer.send(:argument_list_element_reference?, argument_lists.find { |node| node.text == "items[]" }) + refute normalizer.send(:argument_list_element_reference?, argument_lists.find { |node| node.text == "items[0] { nope }" }) + end + end + + def test_dynamic_scope_rewrites_locals_without_crossing_scope_boundaries + inner_assignment = ast_node(:LASGN, children: [:inner]) + node = ast_node(:BLOCK, children: [ + ast_node(:LASGN, children: [:value]), + ast_node(:LVAR, children: [:value]), + ast_node(:DEFN, children: [:nested, ast_node(:SCOPE, children: [nil, nil, inner_assignment])]) + ]) + + result = Decomplex::Ast::TreeSitterNormalizer.allocate.send(:dynamic_scope, node) + + assert_equal :DASGN, result.children[0].type + assert_equal :DVAR, result.children[1].type + assert_equal :DEFN, result.children[2].type + assert_equal :LASGN, inner_assignment.type + end + + def test_link_when_chain_sets_next_arm_and_pads_short_when_nodes + fallback = ast_node(:ELSE) + first = ast_node(:WHEN, children: [:patterns, :body, nil]) + second = ast_node(:WHEN, children: [:patterns, :body, nil]) + normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + + result = normalizer.send(:link_when_chain, [first, second], fallback) + + assert_same first, result + assert_same second, first.children[2] + assert_same fallback, second.children[2] + + short = ast_node(:WHEN, children: [:patterns]) + result = normalizer.send(:link_when_chain, [short], fallback) + + assert_same short, result + assert_nil short.children[1] + assert_same fallback, short.children[2] + end + + def test_link_rescue_chain_sets_next_rescue_and_pads_short_resbody_nodes + first = ast_node(:RESBODY, children: [:exceptions, :body, nil]) + second = ast_node(:RESBODY, children: [:exceptions, :body, nil]) + normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + + result = normalizer.send(:link_rescue_chain, [first, second]) + + assert_same first, result + assert_same second, first.children[2] + assert_nil second.children[2] + + short = ast_node(:RESBODY, children: [:exceptions]) + result = normalizer.send(:link_rescue_chain, [short]) + + assert_same short, result + assert_nil short.children[1] + assert_nil short.children[2] + end + + def test_infix_statement_parts_extracts_allowed_wrapper_parts + body = ruby_syntax_node("def calc\n left + right\nend\n", "body_statement", "left + right") + return_args = ruby_syntax_node("def calc\n return left + right\nend\n", "argument_list", "left + right") + boolean = ruby_syntax_node("def calc\n left && right\nend\n", "body_statement", "left && right") + unsupported = ruby_syntax_node("def calc\n left + right\nend\n", "identifier", "left") + normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + + assert_equal ["left", "+", "right"], infix_parts_text(normalizer, body) + assert_equal ["left", "+", "right"], infix_parts_text(normalizer, return_args) + assert_equal [nil, nil, nil], infix_parts_text(normalizer, boolean) + assert_equal [nil, nil, nil], infix_parts_text(normalizer, unsupported) + end + + def test_argument_list_unary_not_predicate + normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + + assert normalizer.send(:argument_list_unary_not?, ruby_syntax_node("def check\n return !flag\nend\n", "argument_list", "!flag")) + assert normalizer.send(:argument_list_unary_not?, ruby_syntax_node("def check\n return !!flag\nend\n", "argument_list", "!!flag")) + refute normalizer.send(:argument_list_unary_not?, ruby_syntax_node("def check\n return flag\nend\n", "argument_list", "flag")) + refute normalizer.send(:argument_list_unary_not?, ruby_syntax_node("def check\n return !flag, other\nend\n", "argument_list", "!flag, other")) + refute normalizer.send(:argument_list_unary_not?, ruby_syntax_node("def check\n return (!flag)\nend\n", "argument_list", "(!flag)")) + refute normalizer.send(:argument_list_unary_not?, ruby_syntax_node("def check\n return not flag\nend\n", "argument_list", "not flag")) + end + + def test_unary_not_statement_predicate + normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + + assert normalizer.send(:unary_not_statement?, ruby_syntax_node("def check\n !flag\nend\n", "body_statement", "!flag")) + assert normalizer.send(:unary_not_statement?, ruby_syntax_node("def check\n !!flag\nend\n", "body_statement", "!!flag")) + refute normalizer.send(:unary_not_statement?, ruby_syntax_node("def check\n flag\nend\n", "body_statement", "flag")) + refute normalizer.send(:unary_not_statement?, ruby_syntax_node("def check\n !flag; other\nend\n", "body_statement", "!flag; other")) + refute normalizer.send(:unary_not_statement?, ruby_syntax_node("def check\n (!flag)\nend\n", "body_statement", "(!flag)")) + refute normalizer.send(:unary_not_statement?, ruby_syntax_node("def check\n not flag\nend\n", "body_statement", "not flag")) + end + + def test_unary_not_expression_predicate + normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + ruby_source = "def check\n !flag\n !!flag\n -flag\n not flag\nend\n" + + assert normalizer.send(:unary_not_expression?, ruby_syntax_node(ruby_source, "unary", "!flag")) + assert normalizer.send(:unary_not_expression?, ruby_syntax_node(ruby_source, "unary", "!!flag")) + refute normalizer.send(:unary_not_expression?, ruby_syntax_node(ruby_source, "unary", "-flag")) + refute normalizer.send(:unary_not_expression?, ruby_syntax_node(ruby_source, "unary", "not flag")) + + with_language_file("function check(flag: boolean) { return !flag; }\n", ".ts", :typescript) do |file| + document = parse_syntax(file, :typescript) + node = ts_nodes(document.root).find { |candidate| candidate.kind == "unary_expression" && candidate.text == "!flag" } + refute_nil node + assert normalizer.send(:unary_not_expression?, node) + end + + with_language_file("if not flag:\n pass\n", ".py", :python) do |file| + document = parse_syntax(file, :python) + node = ts_nodes(document.root).find { |candidate| candidate.kind == "not_operator" && candidate.text == "not flag" } + refute_nil node + refute normalizer.send(:unary_not_expression?, node) + end + + with_language_file("if not flag then end\n", ".lua", :lua) do |file| + document = parse_syntax(file, :lua) + node = ts_nodes(document.root).find { |candidate| candidate.kind == "unary_expression" && candidate.text == "not flag" } + refute_nil node + refute normalizer.send(:unary_not_expression?, node) + end + end + + def test_unary_minus_expression_predicate + normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + ruby_source = "def check\n -flag\n !flag\n value\nend\n" + + assert normalizer.send(:unary_minus_expression?, ruby_syntax_node(ruby_source, "unary", "-flag")) + refute normalizer.send(:unary_minus_expression?, ruby_syntax_node(ruby_source, "unary", "!flag")) + + with_language_file("function check(value: number) { return -value; }\n", ".ts", :typescript) do |file| + document = parse_syntax(file, :typescript) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == "unary_expression" && candidate.text == "-value" } + refute_nil node + assert normalizer.send(:unary_minus_expression?, node) + end + + with_language_file("x = -value\n", ".py", :python) do |file| + document = parse_syntax(file, :python) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == "unary_operator" && candidate.text == "-value" } + refute_nil node + assert normalizer.send(:unary_minus_expression?, node) + end + + with_language_file("local x = -value\n", ".lua", :lua) do |file| + document = parse_syntax(file, :lua) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == "expression_list" && candidate.text == "-value" } + refute_nil node + assert normalizer.send(:unary_minus_expression?, node) + end + end + + def test_tree_sitter_normalizer_selects_language_specific_normalization_adapters + { + ruby: Decomplex::Ast::RubyTreeSitterNormalizationAdapter, + python: Decomplex::Ast::PythonTreeSitterNormalizationAdapter, + lua: Decomplex::Ast::LuaTreeSitterNormalizationAdapter, + typescript: Decomplex::Ast::TypeScriptTreeSitterNormalizationAdapter, + javascript: Decomplex::Ast::TypeScriptTreeSitterNormalizationAdapter + }.each do |language, adapter_class| + assert_instance_of adapter_class, Decomplex::Ast::TreeSitterNormalizationAdapter.for(fake_document(language)) + end + end + + def test_binary_operator + ruby_source = "def calc\n left + right\n left && right\n value\nend\n" + + with_language_file(ruby_source, ".rb", :ruby) do |file| + document = parse_syntax(file, :ruby) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + + assert_equal "+", normalizer.send(:binary_operator, ts_nodes(document.root).find { |node| node.kind == "binary" && node.text == "left + right" }) + assert_equal "&&", normalizer.send(:binary_operator, ts_nodes(document.root).find { |node| node.kind == "binary" && node.text == "left && right" }) + assert_equal "", normalizer.send(:binary_operator, ts_nodes(document.root).find { |node| node.kind == "body_statement" && node.text == "left + right\n left && right\n value" }) + end + + with_language_file("const value = left + right && other;\n", ".ts", :typescript) do |file| + document = parse_syntax(file, :typescript) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + outer = ts_nodes(document.root).find { |candidate| candidate.kind == "binary_expression" && candidate.text == "left + right && other" } + inner = ts_nodes(document.root).find { |candidate| candidate.kind == "binary_expression" && candidate.text == "left + right" } + + refute_nil outer + refute_nil inner + assert_equal "&&", normalizer.send(:binary_operator, outer) + assert_equal "+", normalizer.send(:binary_operator, inner) + end + + with_language_file("value = left + right and other\n", ".py", :python) do |file| + document = parse_syntax(file, :python) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + outer = ts_nodes(document.root).find { |candidate| candidate.kind == "boolean_operator" && candidate.text == "left + right and other" } + inner = ts_nodes(document.root).find { |candidate| candidate.kind == "binary_operator" && candidate.text == "left + right" } + + refute_nil outer + refute_nil inner + assert_equal "and", normalizer.send(:binary_operator, outer) + assert_equal "+", normalizer.send(:binary_operator, inner) + end + + with_language_file("local value = left + right and other\n", ".lua", :lua) do |file| + document = parse_syntax(file, :lua) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + outer = ts_nodes(document.root).find { |candidate| candidate.kind == "expression_list" && candidate.text == "left + right and other" } + inner = ts_nodes(document.root).find { |candidate| candidate.kind == "binary_expression" && candidate.text == "left + right" } + + refute_nil outer + refute_nil inner + assert_equal "and", normalizer.send(:binary_operator, outer) + assert_equal "+", normalizer.send(:binary_operator, inner) + end + end + + private + + def ast_node(type, children: []) + Decomplex::Ast::Node.new( + type: type, + children: children, + first_lineno: 1, + first_column: 0, + last_lineno: 1, + last_column: 1, + text: type.to_s + ) + end + + def fake_document(language) + Object.new.tap { |document| document.define_singleton_method(:language) { language } } + end + + def ruby_syntax_node(source, kind, text) + found = nil + with_language_file(source, ".rb", :ruby) do |file| + document = parse_syntax(file, :ruby) + found = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + end + refute_nil found + found + end + + def infix_parts_text(normalizer, node) + normalizer.send(:infix_statement_parts, node).map do |part| + part.respond_to?(:text) ? part.text : part + end + end + + def parse_python(file) + parse_language(file, :python) + end + + def parse_language(file, language) + with_env("DECOMPLEX_FORCE_LANGUAGE", language.to_s) do + Decomplex::Ast.normalized_cache.clear + Decomplex::Ast.parse(file) + end + rescue LoadError => e + skip e.message + end + + def parse_syntax(file, language) + with_env("DECOMPLEX_FORCE_LANGUAGE", language.to_s) do + Decomplex::Syntax.parse(file, parser: "tree_sitter") + end + rescue LoadError => e + skip e.message + end + + def nodes_of_type(node, type) + out = [] + walk_nodes(node) { |child| out << child if child.type.to_s == type } + out + end + + def walk_nodes(node, &block) + return unless Decomplex::Ast.node?(node) + + yield node + node.children.each { |child| walk_nodes(child, &block) } + end + + def ts_nodes(node) + out = [] + walk_ts_nodes(node) { |child| out << child } + out + end + + def walk_ts_nodes(node, &block) + return unless node.respond_to?(:kind) + + yield node + node.named_children.each { |child| walk_ts_nodes(child, &block) } + end + + def with_python_file(source) + with_language_file(source, ".py", :python) { |file| yield file } + end + + def with_language_file(source, suffix, _language) + file = Tempfile.new(["decomplex_ast", suffix]) + file.write(source) + file.close + yield file.path + ensure + file&.unlink + end + + def with_env(key, value) + old = ENV[key] + value.nil? ? ENV.delete(key) : ENV[key] = value + yield + ensure + old.nil? ? ENV.delete(key) : ENV[key] = old + end +end diff --git a/gems/espalier/exe/espalier b/gems/espalier/exe/espalier index 2531b0b29..739d0ebda 100755 --- a/gems/espalier/exe/espalier +++ b/gems/espalier/exe/espalier @@ -27,7 +27,7 @@ end OptionParser.new do |opts| opts.banner = "Usage: espalier [options] " - opts.on("-f", "--format FORMAT", [:markdown, :yaml, :report, :sarif, :json], "Output format (markdown, yaml, report, sarif, json). Default: markdown") do |f| + opts.on("-f", "--format FORMAT", [:markdown, :yaml, :report, :dot, :sarif, :json], "Output format (markdown, yaml, report, dot, sarif, json). Default: markdown") do |f| options[:format] = f end @@ -70,6 +70,8 @@ if options[:manifest] ).to_markdown elsif options[:format] == :yaml Espalier::Formatter.to_yaml(manifest) + elsif options[:format] == :dot + Espalier::Formatter.to_dot(manifest) elsif %i[sarif json].include?(options[:format]) Espalier::Formatter.to_sarif(manifest) else @@ -147,6 +149,8 @@ manifest = aggregator.aggregate(modules) # Format & Outflow output_contents = if options[:format] == :yaml Espalier::Formatter.to_yaml(manifest) + elsif options[:format] == :dot + Espalier::Formatter.to_dot(manifest) elsif %i[sarif json].include?(options[:format]) Espalier::Formatter.to_sarif(manifest) elsif options[:format] == :report diff --git a/gems/espalier/lib/espalier.rb b/gems/espalier/lib/espalier.rb index 54fcdc5ce..709f5d7c1 100644 --- a/gems/espalier/lib/espalier.rb +++ b/gems/espalier/lib/espalier.rb @@ -5,6 +5,8 @@ require_relative "espalier/privacy_analyzer" require_relative "espalier/architecture_analyzer" require_relative "espalier/aggregator" +require_relative "espalier/dependency_graph" +require_relative "espalier/graphviz_formatter" require_relative "espalier/formatter" require_relative "espalier/reporter" diff --git a/gems/espalier/lib/espalier/dependency_graph.rb b/gems/espalier/lib/espalier/dependency_graph.rb new file mode 100644 index 000000000..bc4a29d8d --- /dev/null +++ b/gems/espalier/lib/espalier/dependency_graph.rb @@ -0,0 +1,486 @@ +# frozen_string_literal: true + +require "set" + +module Espalier + # Builds a manifest-derived dependency graph without owning rendering. + class DependencyGraph + Node = Struct.new(:id, :kind, :label, :owner, :file, :line, :metadata, keyword_init: true) + Edge = Struct.new(:source, :target, :kind, :label, :conditional, :weight, :metadata, keyword_init: true) + + CORE_TYPES = %w[ + Array BasicObject Boolean Class FalseClass Float Hash Integer NilClass + Object Proc Set String Symbol T TrueClass + ].freeze + + attr_reader :nodes_by_id, :edges_by_key + + def self.from_manifest(manifest, include_external: false) + Builder.new(manifest, include_external: include_external).build + end + + def self.owner_node_id(owner) + "owner:#{owner}" + end + + def self.function_node_id(owner, function_name) + "fn:#{owner}##{function_name}" + end + + def self.external_node_id(name) + "external:#{name}" + end + + def initialize + @nodes_by_id = {} + @edges_by_key = {} + end + + def add_node(node) + @nodes_by_id[node.id] ||= node + end + + def add_edge(edge) + edge.weight ||= 1 + edge.conditional = !!edge.conditional + key = [edge.source, edge.target, edge.kind, edge.label, edge.conditional] + existing = @edges_by_key[key] + if existing + existing.weight += edge.weight + else + @edges_by_key[key] = edge + end + end + + def nodes + @nodes_by_id.values.sort_by { |node| [node.kind.to_s, node.owner.to_s, node.id] } + end + + def edges + @edges_by_key.values.sort_by do |edge| + [edge.source, edge.target, edge.kind.to_s, edge.label.to_s, edge.conditional ? 1 : 0] + end + end + + def owner_nodes + nodes.select { |node| node.kind == :owner } + end + + def function_nodes + nodes.select { |node| node.kind == :function } + end + + def nodes_for_owner(owner) + nodes.select { |node| node.owner == owner && node.kind != :external } + end + + def cyclic_node_ids + @cyclic_node_ids ||= begin + cyclic = Set.new + strongly_connected_components.each do |component| + next if component.size <= 1 + + component.each { |node_id| cyclic << node_id } + end + edges.each { |edge| cyclic << edge.source if edge.source == edge.target } + cyclic + end + end + + def cycle_component_by_node + @cycle_component_by_node ||= begin + out = {} + strongly_connected_components.each_with_index do |component, index| + next if component.size <= 1 + + component.each { |node_id| out[node_id] = index } + end + out + end + end + + private + + def strongly_connected_components + @strongly_connected_components ||= begin + index = 0 + stack = [] + indices = {} + lowlinks = {} + on_stack = Set.new + components = [] + adjacency = edges.each_with_object(Hash.new { |h, k| h[k] = [] }) do |edge, out| + out[edge.source] << edge.target + end + + visit = lambda do |node_id| + indices[node_id] = index + lowlinks[node_id] = index + index += 1 + stack << node_id + on_stack << node_id + + adjacency[node_id].each do |target| + if !indices.key?(target) + visit.call(target) + lowlinks[node_id] = [lowlinks[node_id], lowlinks[target]].min + elsif on_stack.include?(target) + lowlinks[node_id] = [lowlinks[node_id], indices[target]].min + end + end + + return unless lowlinks[node_id] == indices[node_id] + + component = [] + loop do + member = stack.pop + on_stack.delete(member) + component << member + break if member == node_id + end + components << component.sort + end + + @nodes_by_id.each_key { |node_id| visit.call(node_id) unless indices.key?(node_id) } + components + end + end + + class Builder + def initialize(manifest, include_external:) + @manifest = Array(manifest) + @include_external = include_external + @graph = DependencyGraph.new + @owners = Set.new + @owner_by_simple = {} + @functions_by_owner = Hash.new { |h, k| h[k] = Set.new } + @state_types_by_owner = Hash.new { |h, k| h[k] = {} } + end + + def build + index_manifest + add_nodes + add_state_type_edges + add_internal_call_edges + add_delegation_edges + @graph + end + + private + + def index_manifest + @manifest.each do |mod| + owner = value(mod, :module).to_s + next if owner.empty? + + @owners << owner + functions(mod).each { |fn| @functions_by_owner[owner] << value(fn, :name).to_s } + @state_types_by_owner[owner] = state_type_index(mod) + end + + grouped = @owners.group_by { |owner| owner.split("::").last } + @owner_by_simple = grouped.each_with_object({}) do |(simple, owners), out| + out[simple] = owners.first if owners.size == 1 + end + end + + def add_nodes + @manifest.each do |mod| + owner = value(mod, :module).to_s + next if owner.empty? + + @graph.add_node(owner_node(mod, owner)) + functions(mod).each do |fn| + name = value(fn, :name).to_s + next if name.empty? + + @graph.add_node(function_node(mod, fn, owner, name)) + end + end + end + + def owner_node(mod, owner) + Node.new( + id: DependencyGraph.owner_node_id(owner), + kind: :owner, + label: owner, + owner: owner, + file: value(mod, :file), + line: value(mod, :line), + metadata: { + type: value(mod, :type), + language: value(mod, :language), + function_count: functions(mod).size, + state_count: states(mod).size + } + ) + end + + def function_node(mod, fn, owner, name) + effects = value(fn, :EFFECTS) || {} + Node.new( + id: DependencyGraph.function_node_id(owner, name), + kind: :function, + label: name, + owner: owner, + file: value(mod, :file), + line: value(fn, :line), + metadata: { + visibility: value(fn, :visibility) || :public, + signature: value(fn, :signature), + reads: Array(value(effects, :reads)), + writes: Array(value(effects, :writes)) + } + ) + end + + def add_state_type_edges + @manifest.each do |mod| + source_owner = value(mod, :module).to_s + states(mod).each do |state| + target_owner = owner_for_type(value(state, :type)) + next unless target_owner + next if target_owner == source_owner + + @graph.add_edge( + Edge.new( + source: DependencyGraph.owner_node_id(source_owner), + target: DependencyGraph.owner_node_id(target_owner), + kind: :state_type, + label: "state #{value(state, :name)}", + conditional: false, + weight: 1, + metadata: { state: value(state, :name) } + ) + ) + end + end + end + + def add_internal_call_edges + @manifest.each do |mod| + owner = value(mod, :module).to_s + graph = value(mod, :call_graph) || {} + Array(value(graph, :internal_edges)).each do |edge| + caller = value(edge, :caller).to_s + callee = value(edge, :callee).to_s + next unless function?(owner, caller) && function?(owner, callee) + + conditional = value(edge, :type).to_s == "conditional" + add_call_edge( + source_owner: owner, + source_function: caller, + target_id: DependencyGraph.function_node_id(owner, callee), + kind: :internal_call, + label: conditional ? "conditional internal" : "internal", + conditional: conditional + ) + end + end + end + + def add_delegation_edges + @manifest.each do |mod| + owner = value(mod, :module).to_s + functions(mod).each do |fn| + source_function = value(fn, :name).to_s + delegation_calls(fn).each do |call| + target = target_for_call(owner, call[:name]) + next unless target + + add_call_edge( + source_owner: owner, + source_function: source_function, + target_id: target[:id], + kind: target[:kind], + label: target[:kind] == :internal_call ? internal_label(call[:conditional]) : call_label(call[:name], call[:conditional], target[:method]), + conditional: call[:conditional], + metadata: { call: call[:name] } + ) + end + end + end + end + + def add_call_edge(source_owner:, source_function:, target_id:, kind:, label:, conditional:, metadata: {}) + source_id = DependencyGraph.function_node_id(source_owner, source_function) + return unless @graph.nodes_by_id.key?(source_id) + return unless @graph.nodes_by_id.key?(target_id) + + @graph.add_edge( + Edge.new( + source: source_id, + target: target_id, + kind: kind, + label: label, + conditional: conditional, + weight: 1, + metadata: metadata + ) + ) + end + + def delegation_calls(fn) + delegations = value(fn, :DELEGATIONS) || {} + always = Array(value(delegations, :always_calls)).map do |name| + { name: name.to_s, conditional: false } + end + conditional = Array(value(delegations, :conditionally_calls)).map do |name| + { name: name.to_s, conditional: true } + end + always + conditional + end + + def target_for_call(source_owner, call_name) + if function?(source_owner, call_name) + return { + id: DependencyGraph.function_node_id(source_owner, call_name), + kind: :internal_call, + method: call_name + } + end + + receiver = receiver_for(call_name) + return nil unless receiver + + method = method_for(call_name) + target_owner = owner_for_receiver(source_owner, receiver) + if target_owner + if method && function?(target_owner, method) + return { + id: DependencyGraph.function_node_id(target_owner, method), + kind: :delegation, + method: method + } + end + + return { + id: DependencyGraph.owner_node_id(target_owner), + kind: :owner_call, + method: method + } + end + + external_target(receiver, method) + end + + def external_target(receiver, method) + return nil unless @include_external + return nil unless receiver.match?(/\A[A-Z]/) + + id = DependencyGraph.external_node_id(receiver) + @graph.add_node( + Node.new( + id: id, + kind: :external, + label: receiver, + owner: nil, + file: nil, + line: nil, + metadata: { method: method } + ) + ) + { id: id, kind: :external_call, method: method } + end + + def owner_for_receiver(source_owner, receiver) + return nil if receiver == "self" || receiver == "this" + return source_owner if receiver == source_owner + + state_type = state_type_for(source_owner, receiver) + return owner_for_type(state_type) if state_type + + return nil unless receiver.match?(/\A[A-Z]/) + + owner_for_type(receiver) + end + + def state_type_for(owner, receiver) + state_types = @state_types_by_owner[owner] + return state_types[receiver] if state_types.key?(receiver) + + if receiver.start_with?("@") + state_name = receiver.split(".").first + return state_types[state_name] + end + + if receiver.start_with?("self.", "this.") + field = receiver.split(".")[1] + return state_types[field] || state_types["@#{field}"] + end + + nil + end + + def call_label(call_name, conditional, method) + label = method ? "calls #{method}" : "calls" + conditional ? "conditional #{label}" : label + end + + def internal_label(conditional) + conditional ? "conditional internal" : "internal" + end + + def receiver_for(call_name) + return nil unless call_name.include?(".") + + parts = call_name.split(".") + parts[0...-1].join(".") + end + + def method_for(call_name) + return nil unless call_name.include?(".") + + call_name.split(".").last + end + + def function?(owner, function_name) + @functions_by_owner[owner].include?(function_name.to_s) + end + + def owner_for_type(type_text) + return nil if type_text.nil? + + text = type_text.to_s + return text if @owners.include?(text) + return @owner_by_simple[text] if @owner_by_simple.key?(text) + + owner_type_tokens(text).each do |token| + next if CORE_TYPES.include?(token) + return token if @owners.include?(token) + return @owner_by_simple[token] if @owner_by_simple.key?(token) + + simple = token.split("::").last + return @owner_by_simple[simple] if @owner_by_simple.key?(simple) + end + nil + end + + def owner_type_tokens(text) + text.scan(/[A-Z][A-Za-z0-9]*(?:::[A-Z][A-Za-z0-9]*)*/) + end + + def state_type_index(mod) + states(mod).each_with_object({}) do |state, out| + state_name = value(state, :name).to_s + type = value(state, :type) + out[state_name] = type.to_s if type && !type.to_s.empty? + end + end + + def functions(mod) + Array(value(mod, :functions)) + end + + def states(mod) + Array(value(mod, :state)) + end + + def value(hash, key) + return nil unless hash.respond_to?(:[]) + + hash[key] || hash[key.to_s] + end + end + end +end diff --git a/gems/espalier/lib/espalier/formatter.rb b/gems/espalier/lib/espalier/formatter.rb index 730db63e4..c05e21f6c 100644 --- a/gems/espalier/lib/espalier/formatter.rb +++ b/gems/espalier/lib/espalier/formatter.rb @@ -2,6 +2,8 @@ require "yaml" require "json" +require_relative "dependency_graph" +require_relative "graphviz_formatter" sibling_sarif = File.expand_path("../../../decomplex/lib/decomplex/sarif", __dir__) if File.file?("#{sibling_sarif}.rb") require sibling_sarif @@ -83,6 +85,10 @@ def to_yaml(manifest) YAML.dump(manifest) end + def to_dot(manifest) + GraphvizFormatter.new(DependencyGraph.from_manifest(manifest)).to_dot + end + def to_sarif(manifest) JSON.pretty_generate(to_sarif_hash(manifest)) end diff --git a/gems/espalier/lib/espalier/graphviz_formatter.rb b/gems/espalier/lib/espalier/graphviz_formatter.rb new file mode 100644 index 000000000..17e98b8de --- /dev/null +++ b/gems/espalier/lib/espalier/graphviz_formatter.rb @@ -0,0 +1,223 @@ +# frozen_string_literal: true + +module Espalier + # Renders an Espalier::DependencyGraph as Graphviz DOT. + class GraphvizFormatter + GRAPH_ATTRIBUTES = { + rankdir: "LR", + compound: true, + concentrate: true, + fontsize: 12, + fontname: "Arial", + label: "Espalier Dependency Graph", + labelloc: "t", + nodesep: 0.35, + ranksep: 0.75 + }.freeze + + NODE_ATTRIBUTES = { + shape: "box", + style: "rounded,filled", + fillcolor: "#ffffff", + color: "#6b7280", + fontname: "Arial", + fontsize: 10 + }.freeze + + EDGE_ATTRIBUTES = { + color: "#4b5563", + fontname: "Arial", + fontsize: 9, + arrowsize: 0.7 + }.freeze + + def initialize(graph) + @graph = graph + end + + def to_dot + lines = [] + lines << "digraph espalier_dependencies {" + lines << " graph#{attributes(GRAPH_ATTRIBUTES)};" + lines << " node#{attributes(NODE_ATTRIBUTES)};" + lines << " edge#{attributes(EDGE_ATTRIBUTES)};" + lines << "" + owner_clusters.each do |owner, nodes| + lines.concat(cluster_lines(owner, nodes)) + end + external_nodes.each do |node| + lines << " #{quote(node.id)}#{attributes(node_attributes(node))};" + end + lines << "" + @graph.edges.each do |edge| + lines << " #{quote(edge.source)} -> #{quote(edge.target)}#{attributes(edge_attributes(edge))};" + end + lines << "}" + lines.join("\n") + end + + private + + def owner_clusters + @graph.owner_nodes.map(&:owner).sort.to_h do |owner| + [owner, @graph.nodes_for_owner(owner)] + end + end + + def external_nodes + @graph.nodes.select { |node| node.kind == :external } + end + + def cluster_lines(owner, nodes) + lines = [] + lines << " subgraph #{quote(cluster_id(owner))} {" + lines << " label=#{quote(owner)};" + lines << " color=#{quote("#d1d5db")};" + lines << " style=#{quote("rounded")};" + nodes.sort_by { |node| [node.kind == :owner ? 0 : 1, node.label.to_s] }.each do |node| + lines << " #{quote(node.id)}#{attributes(node_attributes(node))};" + end + lines << " }" + lines << "" + lines + end + + def cluster_id(owner) + "cluster_#{owner.to_s.gsub(/[^A-Za-z0-9_]/, "_")}" + end + + def node_attributes(node) + attrs = case node.kind + when :owner + owner_node_attributes(node) + when :function + function_node_attributes(node) + else + external_node_attributes(node) + end + if @graph.cyclic_node_ids.include?(node.id) + attrs = attrs.merge(color: "#b91c1c", penwidth: 2.0, fillcolor: cycle_fill(node)) + end + attrs + end + + def owner_node_attributes(node) + metadata = node.metadata || {} + details = [] + details << metadata[:type].to_s if metadata[:type] + details << "#{metadata[:function_count]} fn" + details << "#{metadata[:state_count]} state" + { + shape: "component", + fillcolor: "#e0f2fe", + color: "#0369a1", + label: ([node.label] + details).join("\n"), + tooltip: tooltip_for(node) + }.merge(url_attribute(node)) + end + + def function_node_attributes(node) + metadata = node.metadata || {} + reads = Array(metadata[:reads]).size + writes = Array(metadata[:writes]).size + details = ["#{metadata[:visibility] || :public} R#{reads} W#{writes}"] + details << "L#{node.line}" if node.line + { + shape: writes.positive? ? "box3d" : "box", + fillcolor: writes.positive? ? "#fff7ed" : "#ffffff", + color: writes.positive? ? "#c2410c" : "#6b7280", + label: ([node.label] + details).join("\n"), + tooltip: tooltip_for(node) + }.merge(url_attribute(node)) + end + + def external_node_attributes(node) + { + shape: "box", + style: "rounded,dashed,filled", + fillcolor: "#f3f4f6", + color: "#9ca3af", + label: node.label, + tooltip: tooltip_for(node) + } + end + + def edge_attributes(edge) + attrs = { + label: edge.weight && edge.weight > 1 ? "#{edge.label} x#{edge.weight}" : edge.label + }.merge(edge_style(edge)) + + source_component = @graph.cycle_component_by_node[edge.source] + if source_component && source_component == @graph.cycle_component_by_node[edge.target] + attrs = attrs.merge(color: "#b91c1c", penwidth: 2.0) + end + attrs + end + + def edge_style(edge) + case edge.kind + when :state_type + { color: "#7c3aed", style: "dotted", arrowhead: "vee" } + when :internal_call + { color: "#374151", style: edge.conditional ? "dashed" : "solid" } + when :delegation + { color: "#2563eb", style: edge.conditional ? "dashed" : "solid" } + when :owner_call + { color: "#0891b2", style: edge.conditional ? "dashed" : "solid" } + when :external_call + { color: "#9ca3af", style: edge.conditional ? "dashed" : "dotted" } + else + { color: "#4b5563", style: edge.conditional ? "dashed" : "solid" } + end + end + + def cycle_fill(node) + node.kind == :owner ? "#fee2e2" : "#fff1f2" + end + + def tooltip_for(node) + parts = [node.label] + parts << node.file if node.file + parts << "line #{node.line}" if node.line + if (signature = node.metadata && node.metadata[:signature]) + parts << signature + end + parts.join(" | ") + end + + def url_attribute(node) + return {} unless node.file + + url = node.file.to_s + url += "#L#{node.line}" if node.line + { URL: url } + end + + def attributes(hash) + return "" if hash.empty? + + " [" + hash.sort_by { |key, _value| key.to_s }.map { |key, value| "#{key}=#{dot_value(value)}" }.join(", ") + "]" + end + + def dot_value(value) + case value + when true + "true" + when false + "false" + when Numeric + value.to_s + else + quote(value) + end + end + + def quote(value) + text = value.to_s + text = text.gsub("\\", "\\\\\\\\") + .gsub("\"", "\\\"") + .gsub("\n", "\\n") + "\"#{text}\"" + end + end +end diff --git a/gems/espalier/test/dependency_graph_test.rb b/gems/espalier/test/dependency_graph_test.rb new file mode 100644 index 000000000..79f288f86 --- /dev/null +++ b/gems/espalier/test/dependency_graph_test.rb @@ -0,0 +1,193 @@ +# frozen_string_literal: true + +require "minitest/autorun" +require_relative "../lib/espalier" + +class DependencyGraphTest < Minitest::Test + def test_dot_output_renders_owner_function_and_dependency_edges + dot = Espalier::Formatter.to_dot(service_manifest) + + assert_includes dot, "digraph espalier_dependencies" + assert_includes dot, "\"cluster_Service\"" + assert_includes dot, "\"owner:Service\"" + assert_includes dot, "\"fn:Service#run\"" + assert_includes dot, "\"fn:Repository#fetch\"" + assert_includes dot, "\"fn:Service#run\" -> \"fn:Service#prepare\"" + assert_includes dot, "label=\"internal x2\"" + assert_includes dot, "\"fn:Service#run\" -> \"fn:Repository#fetch\"" + assert_includes dot, "label=\"calls fetch\"" + assert_includes dot, "\"fn:Service#run\" -> \"fn:Repository#retry\"" + assert_includes dot, "label=\"conditional calls retry\"" + assert_includes dot, "style=\"dashed\"" + assert_includes dot, "\"owner:Service\" -> \"owner:Repository\"" + assert_includes dot, "label=\"state @repo\"" + assert_includes dot, "style=\"dotted\"" + refute_includes dot, "external:String" + end + + def test_graph_aggregates_duplicate_internal_edges + graph = Espalier::DependencyGraph.from_manifest(service_manifest) + edges = graph.edges.select do |edge| + edge.source == "fn:Service#run" && + edge.target == "fn:Service#prepare" && + edge.kind == :internal_call + end + + assert_equal 1, edges.size + assert_equal 2, edges.first.weight + end + + def test_dot_output_escapes_labels_and_tooltips + manifest = [ + { + module: "Quoted\"Owner", + file: "src/quoted.rb", + type: :class, + functions: [ + { + name: "say_\"hello\"", + signature: "def say_\"hello\"", + line: 3, + EFFECTS: { reads: [], writes: [] }, + DELEGATIONS: {} + } + ] + } + ] + + dot = Espalier::Formatter.to_dot(manifest) + + assert_includes dot, "\"owner:Quoted\\\"Owner\"" + assert_includes dot, "label=\"Quoted\\\"Owner" + assert_includes dot, "say_\\\"hello\\\"" + end + + def test_string_key_manifest_from_yaml_is_supported + manifest = [ + { + "module" => "Client", + "file" => "src/client.rb", + "type" => "class", + "state" => [{ "name" => "@server", "type" => "Server" }], + "functions" => [ + { + "name" => "call", + "line" => 5, + "EFFECTS" => { "reads" => ["@server"], "writes" => [] }, + "DELEGATIONS" => { "always_calls" => ["@server.handle"] } + } + ] + }, + { + "module" => "Server", + "file" => "src/server.rb", + "type" => "class", + "functions" => [ + { + "name" => "handle", + "EFFECTS" => { "reads" => [], "writes" => [] }, + "DELEGATIONS" => {} + } + ] + } + ] + + dot = Espalier::Formatter.to_dot(manifest) + + assert_includes dot, "\"fn:Client#call\" -> \"fn:Server#handle\"" + assert_includes dot, "URL=\"src/client.rb#L5\"" + end + + def test_cycles_are_highlighted + dot = Espalier::Formatter.to_dot( + [ + owner("A", calls: ["B.call"]), + owner("B", calls: ["A.call"]) + ] + ) + + assert_includes dot, "\"fn:A#call\" -> \"fn:B#call\"" + assert_includes dot, "\"fn:B#call\" -> \"fn:A#call\"" + assert_includes dot, "penwidth=2.0" + assert_includes dot, "color=\"#b91c1c\"" + end + + private + + def service_manifest + [ + { + module: "Service", + file: "src/service.rb", + type: :class, + line: 1, + state: [{ name: "@repo", type: "Repository", properties: [] }], + functions: [ + { + name: "run", + signature: "def run", + visibility: :public, + line: 4, + EFFECTS: { reads: ["@repo"], writes: [] }, + DELEGATIONS: { + always_calls: ["prepare", "@repo.fetch", "String.upcase"], + conditionally_calls: ["Repository.retry"] + }, + CALL_GRAPH: { internal_calls: ["prepare"] } + }, + { + name: "prepare", + signature: "def prepare", + visibility: :private, + line: 10, + EFFECTS: { reads: [], writes: ["@repo"] }, + DELEGATIONS: {}, + CALL_GRAPH: { internal_callers: ["run"] } + } + ], + call_graph: { + internal_edges: [{ caller: "run", callee: "prepare", type: :always }] + } + }, + { + module: "Repository", + file: "src/repository.rb", + type: :class, + state: [], + functions: [ + { + name: "fetch", + signature: "def fetch", + visibility: :public, + line: 3, + EFFECTS: { reads: [], writes: [] }, + DELEGATIONS: {} + }, + { + name: "retry", + signature: "def retry", + visibility: :public, + line: 8, + EFFECTS: { reads: [], writes: [] }, + DELEGATIONS: {} + } + ] + } + ] + end + + def owner(name, calls:) + { + module: name, + file: "src/#{name.downcase}.rb", + type: :class, + functions: [ + { + name: "call", + EFFECTS: { reads: [], writes: [] }, + DELEGATIONS: { always_calls: calls } + } + ] + } + end +end From 19687ed9da583b9b1a7b75638655418eb4d400cf Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Thu, 18 Jun 2026 22:18:10 +0000 Subject: [PATCH 19/52] Improve tree-sitter facade lookup performance --- gems/decomplex/lib/decomplex/syntax.rb | 203 ++++++++++++++++++++++++- 1 file changed, 202 insertions(+), 1 deletion(-) diff --git a/gems/decomplex/lib/decomplex/syntax.rb b/gems/decomplex/lib/decomplex/syntax.rb index aa4f579cc..60960b709 100644 --- a/gems/decomplex/lib/decomplex/syntax.rb +++ b/gems/decomplex/lib/decomplex/syntax.rb @@ -377,7 +377,8 @@ def initialize(file:, language:, source:, lines:, root:, adapter:) @language = language @source = source @lines = lines - @root = root + @tree_sitter_facade = TreeSitterFacadeContext.new(root) + @root = @tree_sitter_facade.root @adapter = adapter end @@ -489,6 +490,206 @@ def type_aliases(lines) end end + class TreeSitterFacadeContext + attr_reader :root + + def initialize(raw_root) + @wrappers = {} + @children_cache = {} + @named_children_cache = {} + @named_field_cache = {} + @parent_cache = {} + @prev_sibling_cache = {} + @next_sibling_cache = {} + @prev_named_sibling_cache = {} + @next_named_sibling_cache = {} + @root = wrap(raw_root) + index_tree(raw_root) + end + + def wrap(raw) + return nil unless raw + return raw if raw.is_a?(TreeSitterNodeFacade) + + key = node_key(raw) + @wrappers[key] ||= TreeSitterNodeFacade.new(self, raw, key) + end + + def children(raw) + node = unwrap(raw) + @children_cache.fetch(node_key(node)) { [] } + end + + def named_children(raw) + node = unwrap(raw) + @named_children_cache.fetch(node_key(node)) { [] } + end + + def child_by_field_name(raw, name) + node = unwrap(raw) + key = [node_key(node), name.to_s] + return @named_field_cache[key] if @named_field_cache.key?(key) + + @named_field_cache[key] = wrap(node.child_by_field_name(name)) + rescue StandardError + nil + end + + def parent(raw) + @parent_cache[node_key(unwrap(raw))] + end + + def prev_sibling(raw) + @prev_sibling_cache[node_key(unwrap(raw))] + end + + def next_sibling(raw) + @next_sibling_cache[node_key(unwrap(raw))] + end + + def prev_named_sibling(raw) + @prev_named_sibling_cache[node_key(unwrap(raw))] + end + + def next_named_sibling(raw) + @next_named_sibling_cache[node_key(unwrap(raw))] + end + + def node_key(raw) + node = unwrap(raw) + [node.kind, node.start_byte, node.end_byte, node.named?] + end + + private + + def unwrap(raw) + raw.is_a?(TreeSitterNodeFacade) ? raw.raw : raw + end + + def index_tree(raw_root) + pending = [raw_root] + until pending.empty? + raw = pending.pop + key = node_key(raw) + raw_children = Array(raw.children) + wrapped_children = raw_children.map { |child| wrap(child) } + @children_cache[key] = wrapped_children + @named_children_cache[key] = wrapped_children.select(&:named?) + + raw_children.each do |child| + child_key = node_key(child) + @parent_cache[child_key] = wrap(raw) + end + + index_siblings(raw_children, @prev_sibling_cache, @next_sibling_cache) + index_siblings(raw_children.select(&:named?), @prev_named_sibling_cache, @next_named_sibling_cache) + + pending.concat(raw_children.reverse) + end + end + + def index_siblings(raw_children, prev_cache, next_cache) + raw_children.each_with_index do |child, index| + key = node_key(child) + prev_cache[key] = wrap(raw_children[index - 1]) if index.positive? + next_cache[key] = wrap(raw_children[index + 1]) if index + 1 < raw_children.length + end + end + end + + class TreeSitterNodeFacade + attr_reader :context, :raw + + def initialize(context, raw, key) + @context = context + @raw = raw + @key = key + end + + def kind + @kind ||= raw.kind + end + + def text + @text ||= raw.text.to_s + end + + def start_byte + raw.start_byte + end + + def end_byte + raw.end_byte + end + + def start_point + raw.start_point + end + + def end_point + raw.end_point + end + + def named? + raw.named? + end + + def has_error? + raw.respond_to?(:has_error?) && raw.has_error? + end + + def children + context.children(self) + end + + def named_children + context.named_children(self) + end + + def child_by_field_name(name) + context.child_by_field_name(self, name) + end + + def parent + context.parent(self) + end + + def prev_sibling + context.prev_sibling(self) + end + + def next_sibling + context.next_sibling(self) + end + + def prev_named_sibling + context.prev_named_sibling(self) + end + + def next_named_sibling + context.next_named_sibling(self) + end + + def ==(other) + other = other.raw if other.is_a?(TreeSitterNodeFacade) + other.respond_to?(:kind) && + kind == other.kind && + start_byte == other.start_byte && + end_byte == other.end_byte && + named? == other.named? + end + + alias eql? == + + def hash + @key.hash + end + + def inspect + "#<#{self.class} kind=#{kind.inspect} start_byte=#{start_byte} end_byte=#{end_byte}>" + end + end + class TreeSitterAdapter BRANCH_KINDS = %w[if unless if_statement if_modifier unless_modifier if_expression while until while_statement for for_statement From 0603acde121c454cd1cab7ba3a1b08bef34e7a36 Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Fri, 19 Jun 2026 11:09:27 +0000 Subject: [PATCH 20/52] Fix cross-language tree normalizer parity --- gems/decomplex/lib/decomplex/ast.rb | 1869 +- gems/decomplex/rust/src/decomplex/ast.rs | 32460 ++++++++++++++++++--- gems/decomplex/test/ast_test.rb | 1846 +- 3 files changed, 31104 insertions(+), 5071 deletions(-) diff --git a/gems/decomplex/lib/decomplex/ast.rb b/gems/decomplex/lib/decomplex/ast.rb index e2b9974ec..44f72eee1 100644 --- a/gems/decomplex/lib/decomplex/ast.rb +++ b/gems/decomplex/lib/decomplex/ast.rb @@ -46,6 +46,83 @@ class TreeSitterNormalizationAdapter BINARY_WRAPPER_KINDS = %w[ binary binary_expression binary_operator boolean_operator comparison_operator ].freeze + CLASS_KINDS = %w[class class_definition class_declaration class_specifier].freeze + COMMON_ASSIGNMENT_OPERATORS = %w[= += -= *= /= %=].freeze + RUBY_ASSIGNMENT_OPERATORS = (COMMON_ASSIGNMENT_OPERATORS + %w[**= &&= ||= &= |= ^= <<= >>=]).freeze + PYTHON_ASSIGNMENT_OPERATORS = (COMMON_ASSIGNMENT_OPERATORS + %w[//= **= @= &= |= ^= <<= >>= :=]).freeze + LUA_ASSIGNMENT_OPERATORS = %w[=].freeze + TYPESCRIPT_ASSIGNMENT_OPERATORS = ( + COMMON_ASSIGNMENT_OPERATORS + %w[**= <<= >>= >>>= &= |= ^= &&= ||= ??=] + ).freeze + OPERATOR_CALL_OPERATORS = %w[+ - * / % ** | & ^ << >> =~ !~].freeze + BOOLEAN_EXPRESSION_KINDS = %w[binary binary_expression boolean_operator].freeze + COMPARISON_EXPRESSION_KINDS = %w[binary binary_expression comparison_operator].freeze + DOTTED_EXPRESSION_WRAPPER_KINDS = %w[body_statement block_body statement argument_list].freeze + PYTHON_DOTTED_EXPRESSION_WRAPPER_KINDS = (DOTTED_EXPRESSION_WRAPPER_KINDS + %w[expression_statement]).freeze + LITERAL_CONTAINER_KINDS = %w[string delimited_symbol regex regex_literal].freeze + LITERAL_FRAGMENT_KINDS = %w[string_content escape_sequence interpolation string_fragment].freeze + CASE_ARGUMENT_WHEN_KINDS = %w[ + when switch_case case_clause expression_case case_statement switch_section + switch_block_statement_group switch_entry when_entry match_arm + ].freeze + CASE_ELSE_KINDS = %w[else switch_default].freeze + CASE_DEFAULT_PATTERN_KINDS = %w[case_pattern match_pattern pattern].freeze + ADAPTER_FUNCTION_KINDS = %w[ + method function_definition function_declaration method_definition + method_declaration function_item singleton_method + ].freeze + STATEMENT_BLOCK_PARENT_KINDS = %w[ + method_declaration constructor_declaration function_declaration function_body + if_statement while_statement for_statement enhanced_for_statement try_statement + catch_clause finally_clause do_statement lambda_expression + ].freeze + IDENTIFIER_KINDS = %w[ + identifier simple_identifier property_identifier field_identifier shorthand_property_identifier + ].freeze + LEADING_FUNCTION_WRAPPER_KINDS = %w[body_statement statement].freeze + PYTHON_LEADING_FUNCTION_WRAPPER_KINDS = %w[block].freeze + LUA_LEADING_FUNCTION_WRAPPER_KINDS = %w[block].freeze + OWNER_STATEMENT_NESTED_KIND = %w[class class_definition class_declaration module].freeze + LEADING_OWNER_WRAPPER_KINDS = %w[body_statement statement].freeze + PYTHON_LEADING_OWNER_WRAPPER_KINDS = %w[block].freeze + IF_NODE_KINDS = %w[if if_statement if_modifier unless unless_modifier if_expression conditional].freeze + LEADING_IF_WRAPPER_KINDS = %w[body_statement block block_body statement].freeze + PYTHON_LEADING_IF_WRAPPER_KINDS = %w[block].freeze + LUA_LEADING_IF_WRAPPER_KINDS = %w[block].freeze + LEADING_CASE_WRAPPER_KINDS = %w[body_statement block block_body statement].freeze + LEADING_LOOP_WRAPPER_KINDS = %w[body_statement block block_body statement].freeze + RESCUE_BODY_WRAPPER_KINDS = %w[body_statement block_body statement].freeze + ENSURE_BODY_WRAPPER_KINDS = %w[body_statement block_body statement].freeze + ARRAY_LITERAL_WRAPPER_KINDS = %w[ + body_statement block block_body statement argument_list expression_statement + ].freeze + ARRAY_LITERAL_NODE_KINDS = %w[array list].freeze + ELEMENT_REFERENCE_WRAPPER_KINDS = %w[ + body_statement block block_body statement expression_statement expression_list + ].freeze + ELEMENT_REFERENCE_NODE_KINDS = %w[ + element_reference subscript subscript_expression bracket_index_expression + ].freeze + HASH_LITERAL_WRAPPER_KINDS = %w[ + body_statement block block_body statement argument_list expression_statement parenthesized_expression + ].freeze + HASH_LITERAL_NODE_KINDS = %w[hash dictionary object table_constructor].freeze + EMPTY_BODY_WRAPPER_KINDS = %w[body_statement block block_body statement].freeze + HEREDOC_BODY_WRAPPER_KINDS = %w[body_statement block_body statement then].freeze + INTERPOLATED_STATEMENT_WRAPPER_KINDS = %w[body_statement block_body statement argument_list].freeze + CONCATENATED_STRING_WRAPPER_KINDS = %w[body_statement block_body statement argument_list].freeze + PYTHON_CONCATENATED_STRING_WRAPPER_KINDS = (CONCATENATED_STRING_WRAPPER_KINDS + %w[block expression_statement]).freeze + CONCATENATED_STRING_NODE_KINDS = %w[chained_string concatenated_string].freeze + UNWRAP_KINDS = %w[ + parenthesized_expression parenthesized_statements expression_statement statement + case_pattern match_pattern pattern + ].freeze + PYTHON_BODY_FIELD_KINDS = %w[ + elif_clause else_clause for_statement function_definition if_statement + try_statement while_statement with_statement + ].freeze + QUESTION_COLON_TERNARY_KINDS = %w[body_statement block_body statement argument_list conditional].freeze + TYPESCRIPT_TERNARY_KINDS = (QUESTION_COLON_TERNARY_KINDS + %w[ternary_expression]).freeze class << self def for(document) @@ -86,109 +163,1274 @@ def explicit_alternative(node) nil end - def unary_not_expression?(node) - %w[unary unary_expression].include?(node.kind) && node.text.to_s.lstrip.start_with?("!") + def unary_not_expression?(node) + %w[unary unary_expression].include?(node.kind) && node.text.to_s.lstrip.start_with?("!") + end + + def unary_minus_expression?(node) + %w[unary unary_expression].include?(node.kind) && node.text.to_s.lstrip.start_with?("-") + end + + def binary_operator(node) + direct_binary_operator(node).to_s + end + + def class_node?(node) + CLASS_KINDS.include?(node.kind) + end + + def unwrap_node?(node) + UNWRAP_KINDS.include?(node.kind) && node.named_children.size == 1 + end + + def interpolated_string?(node) + node.kind == "string" && node.named_children.any? { |child| child.kind == "interpolation" } + end + + def lambda_expression?(node) + !lambda_target(node).nil? + rescue StandardError + false + end + + def lambda_target(node) + return node if node.kind == "lambda" + + nil + rescue StandardError + nil + end + + def interpolation_node?(node) + node.kind == "interpolation" + rescue StandardError + false + end + + def instance_variable?(node) + node.kind == "instance_variable" + rescue StandardError + false + end + + def global_variable?(node) + node.kind == "global_variable" + rescue StandardError + false + end + + def member_assignment_target?(_node) + false + end + + def identifier_text_node?(_node) + false + end + + def literal_fragment_assignment_context?(node) + parent = node.parent + return false unless parent.respond_to?(:kind) + return true if literal_container_kind?(parent) + + literal_fragment_kind?(node) && + parent.parent.respond_to?(:kind) && + literal_container_kind?(parent.parent) + rescue StandardError + false + end + + def assignment_operator?(text) + assignment_operators.include?(text.to_s) + end + + def named_field(node, name) + node.child_by_field_name(name) + rescue StandardError + nil + end + + def safe_navigation_call?(node) + node.children.any? { |child| !child.named? && child.text == "&." } + rescue StandardError + false + end + + def ternary_statement?(node) + !ternary_parts(node).nil? + end + + def ternary_parts(node) + question_colon_ternary_parts(node, QUESTION_COLON_TERNARY_KINDS) + end + + def case_argument_list?(_node) + false + end + + def case_arm?(node) + case_arm_kind?(node) && !case_else_arm?(node) + rescue StandardError + false + end + + def case_else_node(node) + stack = node.named_children.dup + until stack.empty? + child = stack.shift + next unless child.respond_to?(:kind) + + return child if case_else_node?(child) + next if case_arm_kind?(child) + + stack.concat(child.named_children) unless adapter_function_kind?(child) + end + + nil + rescue StandardError + nil + end + + def case_else_arm?(_node) + false + end + + def case_else_node?(node) + CASE_ELSE_KINDS.include?(node&.kind) || case_else_arm?(node) + rescue StandardError + false + end + + def leading_function_statement?(node) + leading_function_statement_with_keyword?(node, "def", LEADING_FUNCTION_WRAPPER_KINDS) + end + + def leading_function_name(node) + node.named_children.find { |child| identifier_kind?(child) }&.text + rescue StandardError + nil + end + + def leading_function_body(node) + node.named_children.reverse.find { |child| child.kind == "body_statement" } + rescue StandardError + nil + end + + def leading_owner_statement?(node) + target = leading_owner_target(node) + return false unless target + + %w[class module].include?(target.children.first&.kind.to_s) && + target.named_children.size >= 2 && + !OWNER_STATEMENT_NESTED_KIND.include?(target.named_children.first.kind) + rescue StandardError + false + end + + def leading_owner_target(node) + node if LEADING_OWNER_WRAPPER_KINDS.include?(node.kind) + rescue StandardError + nil + end + + def leading_if_statement?(node) + target = leading_if_target(node) + return false unless target + + !!( + %w[if unless].include?(target.children.first&.kind.to_s) && + target.named_children.size >= 2 && + !IF_NODE_KINDS.include?(target.named_children.first.kind) + ) + rescue StandardError + false + end + + def leading_if_target(node) + node if LEADING_IF_WRAPPER_KINDS.include?(node.kind) + rescue StandardError + nil + end + + def leading_case_statement?(node) + target = leading_case_target(node) + return false unless target + + %w[case match switch].include?(target.children.first&.kind.to_s) && case_arm_descendant?(target) + rescue StandardError + false + end + + def leading_case_target(node) + node if LEADING_CASE_WRAPPER_KINDS.include?(node.kind) + rescue StandardError + nil + end + + def leading_loop_statement?(node) + target = leading_loop_target(node) + return false unless target + + !target.children.first&.named? && + %w[while until].include?(target.children.first&.kind.to_s) && + target.named_children.size >= 2 + rescue StandardError + false + end + + def leading_loop_target(node) + node if LEADING_LOOP_WRAPPER_KINDS.include?(node.kind) + rescue StandardError + nil + end + + def rescue_body_statement?(node) + rescue_clauses(node).any? + rescue StandardError + false + end + + def rescue_body_target(node) + node if RESCUE_BODY_WRAPPER_KINDS.include?(node.kind) + rescue StandardError + nil + end + + def rescue_body_nodes(node) + target = rescue_body_target(node) || node + named = target.named_children + rescue_index = named.index { |child| rescue_clause?(child) } + return [] unless rescue_index + + named[0...rescue_index] + rescue StandardError + [] + end + + def rescue_clauses(node) + target = rescue_body_target(node) + return [] unless target + + target.named_children.select { |child| rescue_clause?(child) } + rescue StandardError + [] + end + + def rescue_clause_exceptions(node) + exceptions = node.named_children.find { |child| child.kind == "exceptions" } + return [] unless exceptions + return [exceptions] if exceptions.text.to_s.match?(/\A[A-Z]\w*(?:::\w+)*\z/) + return [exceptions] if exceptions.named_children.empty? && !exceptions.text.to_s.strip.empty? + + exceptions.named_children + rescue StandardError + [] + end + + def rescue_clause_exceptions_source(node) + node.named_children.find { |child| child.kind == "exceptions" } + rescue StandardError + nil + end + + def rescue_clause_exception_variable_name(node) + var = node.named_children.find { |child| child.kind == "exception_variable" } + var&.named_children&.find { |child| identifier_kind?(child) } + rescue StandardError + nil + end + + def rescue_clause_exception_variable_source(node) + node.named_children.find { |child| child.kind == "exception_variable" } + rescue StandardError + nil + end + + def rescue_clause_handler(node) + node.named_children.reverse.find do |child| + !%w[exceptions exception_variable comment].include?(child.kind) + end + rescue StandardError + nil + end + + def ensure_body_statement?(node) + !ensure_clause(node).nil? + rescue StandardError + false + end + + def ensure_body_target(node) + node if ENSURE_BODY_WRAPPER_KINDS.include?(node.kind) + rescue StandardError + nil + end + + def ensure_body_nodes(node) + target = ensure_body_target(node) || node + named = target.named_children + ensure_index = named.index { |child| ensure_clause?(child) } + return [] unless ensure_index + + named[0...ensure_index] + rescue StandardError + [] + end + + def ensure_clause(node) + target = ensure_body_target(node) + return nil unless target + + target.named_children.find { |child| ensure_clause?(child) } + rescue StandardError + nil + end + + def ensure_clause_body(_node) + nil + end + + def array_literal_statement?(node) + !array_literal_target(node).nil? + rescue StandardError + false + end + + def array_literal_target(node) + return node if ARRAY_LITERAL_NODE_KINDS.include?(node.kind) + return nil unless ARRAY_LITERAL_WRAPPER_KINDS.include?(node.kind) + return node if bracketed?(node, "[", "]") + + child = exact_single_named_child(node, kinds: ARRAY_LITERAL_NODE_KINDS) + return child if child + + named = node.named_children + return nil unless named.size == 1 && ARRAY_LITERAL_NODE_KINDS.include?(named.first.kind) + + child = named.first + stripped = node.text.to_s.strip + child if stripped == child.text.to_s || stripped == "#{child.text};" + rescue StandardError + nil + end + + def array_literal_values(node) + target = array_literal_target(node) || node + target.named_children + rescue StandardError + [] + end + + def element_reference_statement?(node) + !element_reference_target(node).nil? + rescue StandardError + false + end + + def element_reference_target(node) + return node if ELEMENT_REFERENCE_NODE_KINDS.include?(node.kind) + return nil unless ELEMENT_REFERENCE_WRAPPER_KINDS.include?(node.kind) + + named = node.named_children + if named.size == 1 && ELEMENT_REFERENCE_NODE_KINDS.include?(named.first.kind) + stripped = node.text.to_s.strip + child = named.first + return child if stripped == child.text.to_s || stripped == "#{child.text};" + end + + node if element_reference_shape?(node) + rescue StandardError + nil + end + + def element_reference_receiver(node) + target = element_reference_target(node) || node + target.named_children.first + rescue StandardError + nil + end + + def element_reference_arguments(node) + target = element_reference_target(node) || node + target.named_children.drop(1) + rescue StandardError + [] + end + + def hash_literal_statement?(node) + !hash_literal_target(node).nil? + rescue StandardError + false + end + + def hash_literal_target(node) + return node if HASH_LITERAL_NODE_KINDS.include?(node.kind) + return nil unless HASH_LITERAL_WRAPPER_KINDS.include?(node.kind) + return nil if statement_block_wrapper?(node) + return node if bracketed?(node, "{", "}") + + named = node.named_children + return nil unless named.size == 1 + + child = named.first + return hash_literal_target(child) if node.kind == "parenthesized_expression" + + stripped = node.text.to_s.strip + if stripped == child.text.to_s || stripped == "#{child.text};" + return child if HASH_LITERAL_NODE_KINDS.include?(child.kind) + return hash_literal_target(child) if HASH_LITERAL_WRAPPER_KINDS.include?(child.kind) + end + + nil + rescue StandardError + nil + end + + def hash_literal_values(node) + target = hash_literal_target(node) || node + target.named_children + rescue StandardError + [] + end + + def empty_body_statement?(node) + EMPTY_BODY_WRAPPER_KINDS.include?(node.kind) && + node.named_children.empty? && + node.text.to_s.strip.empty? + rescue StandardError + false + end + + def heredoc_body_statement?(node) + ruby? && + HEREDOC_BODY_WRAPPER_KINDS.include?(node.kind) && + node.named_children.any? { |child| child.kind == "heredoc_body" } + rescue StandardError + false + end + + def heredoc_call_for_body?(_node) + false + end + + def interpolated_statement?(node) + INTERPOLATED_STATEMENT_WRAPPER_KINDS.include?(node.kind) && + node.named_children.any? { |child| child.kind == "interpolation" } + rescue StandardError + false + end + + def concatenated_string_statement?(node) + !concatenated_string_target(node).nil? + rescue StandardError + false + end + + def concatenated_string_target(node) + return node if concatenated_string_node?(node) + return nil unless concatenated_string_wrapper_kinds.include?(node.kind) + + named = node.named_children + return node if named.size > 1 && named.all? { |child| child.kind == "string" } + return named.first if named.size == 1 && concatenated_string_node?(named.first) + + nil + rescue StandardError + nil + end + + def zero_child_identifier_call?(_node) + false + end + + def operator_call_expression?(node) + operator_call_expression_kinds.include?(node.kind) && + OPERATOR_CALL_OPERATORS.include?(binary_operator(node)) + rescue StandardError + false + end + + def boolean_expression_kind?(node) + boolean_expression_kinds.include?(node.kind) + rescue StandardError + false + end + + def comparison_expression_kind?(node) + comparison_expression_kinds.include?(node.kind) + rescue StandardError + false + end + + def dotted_expression_wrapper?(node) + dotted_expression_wrapper_kinds.include?(node.kind) + rescue StandardError + false + end + + private + + def assignment_operators + COMMON_ASSIGNMENT_OPERATORS + end + + def operator_call_expression_kinds + %w[binary binary_expression] + end + + def boolean_expression_kinds + BOOLEAN_EXPRESSION_KINDS + end + + def comparison_expression_kinds + COMPARISON_EXPRESSION_KINDS + end + + def dotted_expression_wrapper_kinds + DOTTED_EXPRESSION_WRAPPER_KINDS + end + + def concatenated_string_wrapper_kinds + CONCATENATED_STRING_WRAPPER_KINDS + end + + def concatenated_string_node?(node) + CONCATENATED_STRING_NODE_KINDS.include?(node&.kind) && + node.named_children.size > 1 && + node.named_children.all? { |child| child.kind == "string" } + end + + def direct_binary_operator(node) + node.children.find { |child| !child.named? && !%w[( )].include?(child.text.to_s) }&.text + rescue StandardError + nil + end + + def question_colon_ternary_parts(node, kinds) + return nil unless kinds.include?(node.kind) + return nil unless node.children.any? { |child| !child.named? && child.text == "?" } + return nil unless node.children.any? { |child| !child.named? && child.text == ":" } + + children = node.named_children + return nil unless children.size >= 3 + + children.first(3) + rescue StandardError + nil + end + + def leading_function_statement_with_keyword?(node, keyword, wrapper_kinds) + wrapper_kinds.include?(node.kind) && + node.children.first&.kind.to_s == keyword && + node.named_children.any? { |child| identifier_kind?(child) } + rescue StandardError + false + end + + def identifier_kind?(node) + IDENTIFIER_KINDS.include?(node&.kind) + end + + def exact_single_named_child(node, kinds:) + children = node.named_children + return nil unless children.size == 1 + + child = children.first + return nil unless kinds.include?(child.kind) + return nil unless node.text.to_s == child.text.to_s + + child + rescue StandardError + nil + end + + def case_arm_kind?(node) + CASE_ARGUMENT_WHEN_KINDS.include?(node&.kind) + end + + def default_case_pattern?(node) + pattern = node.named_children.find { |child| CASE_DEFAULT_PATTERN_KINDS.include?(child.kind) } + pattern&.text.to_s.strip == "_" + rescue StandardError + false + end + + def adapter_function_kind?(node) + ADAPTER_FUNCTION_KINDS.include?(node&.kind) + end + + def statement_block_wrapper?(node) + node.kind == "block" && STATEMENT_BLOCK_PARENT_KINDS.include?(node.parent&.kind) + rescue StandardError + false + end + + def case_arm_descendant?(node) + stack = node.named_children.dup + until stack.empty? + child = stack.shift + next unless child.respond_to?(:kind) + return true if CASE_ARGUMENT_WHEN_KINDS.include?(child.kind) + + stack.concat(child.named_children) + end + + false + rescue StandardError + false + end + + def ruby_instance_variable_text?(text) + text.to_s.match?(/\A@[A-Za-z_]\w*[!?=]?\z/) + end + + def ruby_global_variable_text?(text) + text.to_s.match?(/\A\$[A-Za-z_]\w*[!?=]?\z/) + end + + def literal_container_kind?(node) + LITERAL_CONTAINER_KINDS.include?(node&.kind) + end + + def literal_fragment_kind?(node) + LITERAL_FRAGMENT_KINDS.include?(node&.kind) + end + + def rescue_clause?(node) + node&.kind == "rescue" + end + + def ensure_clause?(node) + node&.kind == "ensure" + end + + def bracketed?(node, opening, closing) + node.children.first&.text == opening && node.children.last&.text == closing + rescue StandardError + false + end + + def element_reference_shape?(node) + node.children.first&.text != "[" && + node.children.any? { |child| !child.named? && child.text == "[" } && + node.children.any? { |child| !child.named? && child.text == "]" } && + node.named_children.size >= 2 && + node.named_children.none? { |child| %w[block do_block].include?(child.kind) } + rescue StandardError + false + end + + def descendant(node, kinds:) + stack = node&.named_children.to_a + until stack.empty? + child = stack.shift + next unless child.respond_to?(:kind) + return child if kinds.include?(child.kind) + + stack.concat(child.named_children) + end + + nil + end + end + + class RubyTreeSitterNormalizationAdapter < TreeSitterNormalizationAdapter + def ruby? + true + end + + def super_statement?(node) + %w[body_statement block block_body statement].include?(node.kind) && + (node.text.to_s.strip == "super" || + (node.named_children.first&.kind == "super" && + node.named_children.drop(1).all? { |child| child.kind == "argument_list" })) + rescue StandardError + false + end + + def explicit_alternative(node) + node.named_children.find { |child| %w[elsif else].include?(child.kind) } + rescue StandardError + nil + end + + def instance_variable?(node) + node.kind == "instance_variable" || ruby_instance_variable_text?(node.text) + rescue StandardError + false + end + + def global_variable?(node) + node.kind == "global_variable" || ruby_global_variable_text?(node.text) + rescue StandardError + false + end + + def case_argument_list?(node) + node.kind == "argument_list" && + node.children.any? { |child| !child.named? && child.kind == "case" } && + node.named_children.any? { |child| CASE_ARGUMENT_WHEN_KINDS.include?(child.kind) } + rescue StandardError + false + end + + def zero_child_identifier_call?(node) + node.kind == "call" && node.named_children.empty? && + node.text.to_s.match?(/\A[A-Za-z_]\w*[!?=]?\z/) + rescue StandardError + false + end + + def heredoc_call_for_body?(node) + return true if node.kind == "heredoc_beginning" + return true if %w[call argument_list].include?(node.kind) && + node.text.to_s.match?(/(?:\A|[\s(,])<<[-~]?[A-Za-z_]\w*/) + + node.named_children.any? do |child| + next false if child.named_children.any? { |grandchild| grandchild.kind == "heredoc_body" } + + heredoc_call_for_body?(child) + end + rescue StandardError + false + end + + private + + def assignment_operators + RUBY_ASSIGNMENT_OPERATORS + end + end + + class PythonTreeSitterNormalizationAdapter < TreeSitterNormalizationAdapter + def yield_statement?(node) + (%w[body_statement block block_body expression_statement statement].include?(node.kind) && + node.children.first&.text == "yield") + rescue StandardError + false + end + + def explicit_alternative(node) + node.named_children.find { |child| %w[elif_clause else else_clause].include?(child.kind) } + rescue StandardError + nil + end + + def case_else_arm?(node) + node.kind == "case_clause" && default_case_pattern?(node) + rescue StandardError + false + end + + def named_field(node, name) + super || python_body_field(node, name) + end + + def leading_function_statement?(node) + leading_function_statement_with_keyword?(node, "def", PYTHON_LEADING_FUNCTION_WRAPPER_KINDS) + end + + def leading_function_body(node) + node.named_children.reverse.find { |child| child.kind == "block" } + rescue StandardError + nil + end + + def leading_owner_target(node) + return node if PYTHON_LEADING_OWNER_WRAPPER_KINDS.include?(node.kind) + + super + rescue StandardError + nil + end + + def leading_if_target(node) + if PYTHON_LEADING_IF_WRAPPER_KINDS.include?(node.kind) + child = exact_single_named_child(node, kinds: %w[if_statement]) + return child if child + end + + super + end + + def rescue_body_target(node) + return node if node.kind == "try_statement" + return node if flattened_try_block?(node, clauses: %w[except_clause]) + + if node.kind == "block" + child = exact_single_named_child(node, kinds: %w[try_statement]) + return child if child + end + + super + rescue StandardError + nil + end + + def rescue_body_nodes(node) + target = rescue_body_target(node) || node + return super unless target.kind == "try_statement" || flattened_try_block?(target, clauses: %w[except_clause]) + + target.named_children.take_while { |child| !%w[except_clause finally_clause].include?(child.kind) } + rescue StandardError + [] + end + + def rescue_clauses(node) + target = rescue_body_target(node) + return [] unless target + + target.named_children.select { |child| child.kind == "except_clause" } + rescue StandardError + [] + end + + def rescue_clause_exceptions(node) + pattern = node.named_children.find { |child| !%w[block comment].include?(child.kind) } + return [] unless pattern + return [pattern] unless pattern.kind == "as_pattern" + + exception = pattern.named_children.find { |child| child.kind != "as_pattern_target" } + exception ? [exception] : [] + rescue StandardError + [] + end + + def rescue_clause_exceptions_source(node) + rescue_clause_exceptions(node).first + rescue StandardError + nil + end + + def rescue_clause_exception_variable_name(node) + pattern = node.named_children.find { |child| child.kind == "as_pattern" } + descendant(pattern, kinds: %w[as_pattern_target]) + rescue StandardError + nil + end + + def rescue_clause_exception_variable_source(node) + rescue_clause_exception_variable_name(node) + rescue StandardError + nil + end + + def rescue_clause_handler(node) + node.named_children.reverse.find { |child| child.kind == "block" } + rescue StandardError + nil + end + + def ensure_body_target(node) + return node if node.kind == "try_statement" + return node if flattened_try_block?(node, clauses: %w[finally_clause]) + + if node.kind == "block" + child = exact_single_named_child(node, kinds: %w[try_statement]) + return child if child + end + + super + rescue StandardError + nil + end + + def ensure_body_nodes(node) + target = ensure_body_target(node) || node + return super unless target.kind == "try_statement" || flattened_try_block?(target, clauses: %w[finally_clause]) + + target.named_children.take_while { |child| child.kind != "finally_clause" } + rescue StandardError + [] + end + + def ensure_clause(node) + target = ensure_body_target(node) + return nil unless target + + target.named_children.find { |child| child.kind == "finally_clause" } + rescue StandardError + nil + end + + def ensure_clause_body(node) + node.named_children.reverse.find { |child| child.kind == "block" } + rescue StandardError + nil + end + + def ternary_parts(node) + return nil unless node.kind == "conditional_expression" + + children = node.named_children + return nil unless children.size >= 3 + + [children[1], children[0], children[2]] + rescue StandardError + nil + end + + def unary_minus_expression?(node) + (%w[unary unary_expression unary_operator].include?(node.kind) && node.text.to_s.lstrip.start_with?("-")) + end + + def empty_body_statement?(node) + super || + (node.kind == "block" && node.named_children.empty? && node.text.to_s.strip == "pass") || + node.kind == "pass_statement" + rescue StandardError + false + end + + private + + def flattened_try_block?(node, clauses:) + node.kind == "block" && + node.children.first&.text == "try" && + node.named_children.any? { |child| clauses.include?(child.kind) } + rescue StandardError + false + end + + def python_body_field(node, name) + return nil unless %w[body consequence].include?(name.to_s) + return nil unless PYTHON_BODY_FIELD_KINDS.include?(node.kind) + + node.named_children.find { |child| child.kind == "block" } + rescue StandardError + nil + end + + def assignment_operators + PYTHON_ASSIGNMENT_OPERATORS + end + + def operator_call_expression_kinds + super + %w[binary_operator] + end + + def concatenated_string_wrapper_kinds + PYTHON_CONCATENATED_STRING_WRAPPER_KINDS + end + + def dotted_expression_wrapper_kinds + PYTHON_DOTTED_EXPRESSION_WRAPPER_KINDS + end + end + + class LuaTreeSitterNormalizationAdapter < TreeSitterNormalizationAdapter + def explicit_alternative(node) + node.named_children.find { |child| %w[elseif_statement else else_statement].include?(child.kind) } + rescue StandardError + nil + end + + def unary_minus_expression?(node) + super || + (node.kind == "expression_list" && node.children.first&.text == "-" && node.named_children.size == 1) + rescue StandardError + false + end + + def binary_operator(node) + direct = direct_binary_operator(node) + return direct.to_s if direct + + child = exact_single_named_child(node, kinds: BINARY_WRAPPER_KINDS) + child ? binary_operator(child) : "" + end + + def unwrap_node?(node) + super || + (node.kind == "expression_list" && + node.named_children.size == 1 && + node.children.first&.text == "(" && + node.children.last&.text == ")") + rescue StandardError + false + end + + def leading_function_statement?(node) + leading_function_statement_with_keyword?(node, "function", LUA_LEADING_FUNCTION_WRAPPER_KINDS) + end + + def leading_function_body(node) + node.named_children.reverse.find { |child| child.kind == "block" } + rescue StandardError + nil + end + + def leading_if_target(node) + if LUA_LEADING_IF_WRAPPER_KINDS.include?(node.kind) + child = exact_single_named_child(node, kinds: %w[if_statement]) + return child if child + end + + super + end + + def array_literal_target(node) + if node.kind == "block" + named = node.named_children + if named.size == 2 && named.first.kind == "identifier" && named.first.text.to_s.empty? + target = lua_positional_table_arguments(named[1]) + return target if target + end + end + + target = lua_positional_table_arguments(node) + return target if target + + super + rescue StandardError + nil + end + + def hash_literal_target(node) + target = lua_keyed_table_arguments(node) + return target if target + + super + rescue StandardError + nil + end + + def hash_literal_values(node) + target = hash_literal_target(node) || node + return target.named_children if target.kind == "arguments" + + super + rescue StandardError + [] + end + + def identifier_text_node?(node) + %w[variable_list expression_list].include?(node.kind) && + node.text.to_s.match?(/\A[A-Za-z_]\w*\z/) + rescue StandardError + false + end + + def member_assignment_target?(node) + return false unless node.kind == "variable_list" + + node.named_children.size == 2 && + node.children.any? { |child| !child.named? && child.text == "." } + rescue StandardError + false + end + + def literal_fragment_assignment_context?(node) + return true if super + + literal_fragment_kind?(node) && node.parent&.kind == "expression_list" + rescue StandardError + false + end + + def lambda_target(node) + return node if node.kind == "function_definition" + + if node.kind == "expression_list" + return node if node.children.first&.kind == "function" && + node.named_children.any? { |child| child.kind == "block" } + + named = node.named_children + return named.first if named.size == 1 && named.first.kind == "function_definition" + end + + super + rescue StandardError + nil + end + + private + + def lua_positional_table_arguments(node) + return nil unless node&.kind == "arguments" + return nil unless bracketed?(node, "{", "}") + + fields = node.named_children + return nil if fields.empty? + return nil unless fields.all? { |field| field.kind == "field" && field.named_children.size <= 1 } + + node + end + + def lua_keyed_table_arguments(node) + if node&.kind == "block" + named = node.named_children + if named.size == 2 && named.first.kind == "identifier" && named.first.text.to_s.empty? + return lua_keyed_table_arguments(named[1]) + end + end + + return nil unless node&.kind == "arguments" + return nil unless bracketed?(node, "{", "}") + + fields = node.named_children + return node if fields.empty? + return nil if fields.all? { |field| field.kind == "field" && field.named_children.size <= 1 } + + node + end + + private + + def assignment_operators + LUA_ASSIGNMENT_OPERATORS + end + + def operator_call_expression_kinds + super + %w[expression_list] + end + + def boolean_expression_kinds + super + %w[expression_list] + end + + def comparison_expression_kinds + super + %w[expression_list] + end + end + + class TypeScriptTreeSitterNormalizationAdapter < TreeSitterNormalizationAdapter + def explicit_alternative(node) + node.named_children.find { |child| %w[else else_clause].include?(child.kind) } + rescue StandardError + nil + end + + def safe_navigation_call?(node) + super || + node.children.any? { |child| child.kind == "optional_chain" && child.text.to_s == "?." } || + (node.kind == "call_expression" && node.named_children.any? { |child| safe_navigation_call?(child) }) + rescue StandardError + false end - def unary_minus_expression?(node) - %w[unary unary_expression].include?(node.kind) && node.text.to_s.lstrip.start_with?("-") + def ternary_parts(node) + question_colon_ternary_parts(node, TYPESCRIPT_TERNARY_KINDS) end - def binary_operator(node) - direct_binary_operator(node).to_s + def interpolated_string?(node) + super || + (node.kind == "template_string" && + node.named_children.any? { |child| child.kind == "template_substitution" }) end - private + def lambda_target(node) + return node if %w[arrow_function function_expression].include?(node.kind) - def direct_binary_operator(node) - node.children.find { |child| !child.named? && !%w[( )].include?(child.text.to_s) }&.text + super rescue StandardError nil end - def exact_single_named_child(node, kinds:) - children = node.named_children - return nil unless children.size == 1 + def interpolation_node?(node) + super || node.kind == "template_substitution" + rescue StandardError + false + end - child = children.first - return nil unless kinds.include?(child.kind) - return nil unless node.text.to_s == child.text.to_s + def rescue_body_target(node) + return node if node.kind == "try_statement" - child + if node.kind == "statement_block" + child = exact_single_named_child(node, kinds: %w[try_statement]) + return child if child + end + + super rescue StandardError nil end - end - class RubyTreeSitterNormalizationAdapter < TreeSitterNormalizationAdapter - def ruby? - true + def rescue_body_nodes(node) + target = rescue_body_target(node) || node + return super unless target.kind == "try_statement" + + target.named_children.take_while { |child| !%w[catch_clause finally_clause].include?(child.kind) } + rescue StandardError + [] end - def super_statement?(node) - %w[body_statement block block_body statement].include?(node.kind) && - (node.text.to_s.strip == "super" || - (node.named_children.first&.kind == "super" && - node.named_children.drop(1).all? { |child| child.kind == "argument_list" })) + def rescue_clauses(node) + target = rescue_body_target(node) + return [] unless target + + target.named_children.select { |child| child.kind == "catch_clause" } rescue StandardError - false + [] end - def explicit_alternative(node) - node.named_children.find { |child| %w[elsif else].include?(child.kind) } + def rescue_clause_exception_variable_name(node) + node.named_children.find { |child| IDENTIFIER_KINDS.include?(child.kind) } rescue StandardError nil end - end - class PythonTreeSitterNormalizationAdapter < TreeSitterNormalizationAdapter - def yield_statement?(node) - (%w[body_statement block block_body expression_statement statement].include?(node.kind) && - node.children.first&.text == "yield") + def rescue_clause_exception_variable_source(node) + rescue_clause_exception_variable_name(node) rescue StandardError - false + nil end - def explicit_alternative(node) - node.named_children.find { |child| %w[elif_clause else else_clause].include?(child.kind) } + def rescue_clause_handler(node) + node.named_children.reverse.find { |child| child.kind == "statement_block" } rescue StandardError nil end - def unary_minus_expression?(node) - (%w[unary unary_expression unary_operator].include?(node.kind) && node.text.to_s.lstrip.start_with?("-")) - end - end + def ensure_body_target(node) + return node if node.kind == "try_statement" - class LuaTreeSitterNormalizationAdapter < TreeSitterNormalizationAdapter - def explicit_alternative(node) - node.named_children.find { |child| %w[elseif_statement else else_statement].include?(child.kind) } + if node.kind == "statement_block" + child = exact_single_named_child(node, kinds: %w[try_statement]) + return child if child + end + + super rescue StandardError nil end - def unary_minus_expression?(node) - super || - (node.kind == "expression_list" && node.children.first&.text == "-" && node.named_children.size == 1) + def ensure_body_nodes(node) + target = ensure_body_target(node) || node + return super unless target.kind == "try_statement" + + target.named_children.take_while { |child| child.kind != "finally_clause" } rescue StandardError - false + [] end - def binary_operator(node) - direct = direct_binary_operator(node) - return direct.to_s if direct + def ensure_clause(node) + target = ensure_body_target(node) + return nil unless target - child = exact_single_named_child(node, kinds: BINARY_WRAPPER_KINDS) - child ? binary_operator(child) : "" + target.named_children.find { |child| child.kind == "finally_clause" } + rescue StandardError + nil end - end - class TypeScriptTreeSitterNormalizationAdapter < TreeSitterNormalizationAdapter - def explicit_alternative(node) - node.named_children.find { |child| %w[else else_clause].include?(child.kind) } + def ensure_clause_body(node) + node.named_children.reverse.find { |child| child.kind == "statement_block" } rescue StandardError nil end + + def empty_body_statement?(node) + super || + (node.kind == "statement_block" && node.named_children.empty? && node.text.to_s.strip == "{}") + rescue StandardError + false + end + + private + + def assignment_operators + TYPESCRIPT_ASSIGNMENT_OPERATORS + end end # Tree-sitter exposes each grammar's native node names. Decomplex's @@ -254,7 +1496,7 @@ class TreeSitterNormalizer "continue_statement" => :NEXT }.freeze COMPARISON_OPERATORS = %w[== != === !== < <= > >=].freeze - OPERATOR_CALL_OPERATORS = %w[+ - * / % ** | & ^ << >> =~ !~].freeze + OPERATOR_CALL_OPERATORS = TreeSitterNormalizationAdapter::OPERATOR_CALL_OPERATORS INFIX_STATEMENT_OPERATORS = (OPERATOR_CALL_OPERATORS + COMPARISON_OPERATORS).freeze INLINE_DEF_WRAPPER_MIDS = %w[ public protected private private_class_method module_function @@ -291,11 +1533,16 @@ def normalize_node(node) return normalize_infix_statement(node) if infix_statement?(node) return normalize_dotted_expression(node) if dotted_expression?(node) return normalize_unary_not_statement(node) if unary_not_statement?(node) + return normalize_wrapped_return_statement(node) if wrapped_return_statement?(node) if leading_function_statement?(node) normalize_leading_function_statement(node) elsif leading_if_statement?(node) normalize_leading_if_statement(node) + elsif ensure_body_statement?(node) + normalize_ensure_body_statement(node) + elsif rescue_body_statement?(node) + normalize_rescue_body_statement(node) elsif modifier_statement?(node) normalize_modifier_statement(node) elsif ternary_statement?(node) @@ -304,6 +1551,8 @@ def normalize_node(node) normalize_statement_call_with_block(node) elsif command_call_statement?(node) normalize_command_call_statement(node) + elsif lambda_expression?(node) + normalize_lambda(node) elsif FUNCTION_KINDS.include?(node.kind) normalize_function(node) elsif class_node?(node) @@ -320,6 +1569,12 @@ def normalize_node(node) normalize_loop(node) elsif CASE_KINDS.include?(node.kind) || hidden_match?(node) normalize_case(node) + elsif hash_literal_statement?(node) + normalize_hash_literal_statement(node) + elsif array_literal_statement?(node) + normalize_array_literal_statement(node) + elsif element_reference_statement?(node) + normalize_element_reference_statement(node) elsif node.kind == "element_reference" normalize_element_reference(node) elsif node.kind == "rescue_modifier" @@ -340,8 +1595,6 @@ def normalize_node(node) normalize_pair(node) elsif node.kind == "singleton_class" normalize_singleton_class(node) - elsif node.kind == "lambda" - normalize_lambda(node) elsif node.kind == "yield" normalize_yield(node) elsif yield_statement?(node) @@ -352,7 +1605,7 @@ def normalize_node(node) normalize_heredoc_beginning(node) elsif node.kind == "chained_string" normalize_chained_string(node) - elsif node.kind == "interpolation" + elsif interpolation_node?(node) normalize_interpolation(node) elsif unary_minus_expression?(node) normalize_unary_minus(node) @@ -421,7 +1674,7 @@ def normalize_function(node) ) ) end - wrap(:DEFN, children: [name, scope(body, args: args)], source: node) + wrap(:DEFN, children: [name, scope(body, args: args, source: node)], source: node) end def normalize_singleton_function(node) @@ -436,19 +1689,19 @@ def normalize_singleton_function(node) ) ) end - wrap(:DEFS, children: [normalize_node(receiver), name, scope(body, args: args)], source: node) + wrap(:DEFS, children: [normalize_node(receiver), name, scope(body, args: args, source: node)], source: node) end def normalize_class(node) name = const_for(named_field(node, "name") || first_named(node)) body = normalize_body(named_field(node, "body") || block_child(node)) - wrap(:CLASS, children: [name, nil, scope(body)], source: node) + wrap(:CLASS, children: [name, nil, scope(body, source: node)], source: node) end def normalize_module(node) name = const_for(named_field(node, "name") || first_named(node)) body = normalize_body(named_field(node, "body") || block_child(node)) - wrap(:MODULE, children: [name, scope(body)], source: node) + wrap(:MODULE, children: [name, scope(body, source: node)], source: node) end def normalize_impl(node) @@ -458,7 +1711,7 @@ def normalize_impl(node) end name = const_for(type_node || node) body = normalize_body(named_field(node, "body") || block_child(node) || node) - wrap(:CLASS, children: [name, nil, scope(body)], source: node) + wrap(:CLASS, children: [name, nil, scope(body, source: node)], source: node) end def normalize_if(node) @@ -675,15 +1928,16 @@ def normalize_block_argument(node) def normalize_singleton_class(node) recv = normalize_node(node.named_children.first) body = normalize_body(node.named_children[1]) - wrap(:SCLASS, children: [recv, scope(body)], source: node) + wrap(:SCLASS, children: [recv, scope(body, source: node)], source: node) end def normalize_lambda(node) - body_node = named_field(node, "body") || block_child(node) || node.named_children.last - body = with_ruby_scope(node) do + target = lambda_target(node) || node + body_node = named_field(target, "body") || block_child(target) || target.named_children.last + body = with_ruby_scope(target) do dynamic_scope(normalize_body(body_node)) end - wrap(:LAMBDA, children: [scope(body)], source: node) + wrap(:LAMBDA, children: [scope(body, source: target)], source: target) end def normalize_yield(node) @@ -782,27 +2036,36 @@ def normalize_dotted_expression(node) body = with_ruby_scope(block) do dynamic_scope(normalize_body(named_field(block, "body") || block_child(block) || block)) end - wrap(:ITER, children: [call, scope(body, args: args)], source: node) + wrap(:ITER, children: [call, scope(body, args: args, source: node)], source: node) end def normalize_dotted_call_expression(node, source: node) - recv, mid = dotted_call_parts(node) - args = call_arguments(node, nil) - type = safe_navigation_call?(node) ? :QCALL : :CALL + target = dotted_call_target(node) || node + recv, mid = dotted_call_parts(target) + args = call_arguments(target, nil) + type = safe_navigation_call?(target) ? :QCALL : :CALL wrap(type, children: [normalize_node(recv), mid.to_sym, list(args, source: source)], source: source) end def normalize_argument_list_call_with_block(node) + return nil unless ruby? && ts_node?(node) && node.kind == "argument_list" + block = call_block(node) + return nil unless block + call = normalize_argument_list_call(node) + return nil unless call + args = normalize_block_parameters(block) body = with_ruby_scope(block) do dynamic_scope(normalize_body(named_field(block, "body") || block_child(block) || block)) end - wrap(:ITER, children: [call, scope(body, args: args)], source: node) + wrap(:ITER, children: [call, scope(body, args: args, source: node)], source: node) end def normalize_argument_list_call(node) + return nil unless ruby? && ts_node?(node) && node.kind == "argument_list" + function = node.named_children.first args_node = node.named_children.find { |child| child.kind == "argument_list" } args = args_node ? args_node.named_children.map { |child| normalize_node(child) }.compact : [] @@ -846,6 +2109,21 @@ def normalize_return(node) normalize_return_node(node, elide_symbol: false) end + def wrapped_return_statement?(node) + return false unless ts_node?(node) + return false unless %w[body_statement block_body statement block].include?(node.kind) + return false if node.text.to_s.include?("\n") + + keyword = node.children.first + keyword && !keyword.named? && RETURN_KINDS.key?(keyword.kind) + end + + def normalize_wrapped_return_statement(node) + keyword = node.children.first + children = node.named_children.map { |child| normalize_return_value(child) }.compact + wrap(RETURN_KINDS.fetch(keyword.kind), children: children, source: node) + end + def normalize_return_node(node, elide_symbol:) children = node.named_children.map { |child| normalize_return_value(child) }.compact return children.first if elide_symbol && ruby? && children.size == 1 && symbol_literal_node?(children.first) @@ -888,6 +2166,8 @@ def argument_list_element_reference?(node) end def normalize_argument_list_element_reference(node) + return nil unless ruby? && ts_node?(node) && argument_list_element_reference?(node) + recv = node.named_children.first args = node.named_children.drop(1).map { |child| normalize_node(child) }.compact wrap(:CALL, children: [normalize_node(recv), :[], list(args, source: node)], source: node) @@ -900,7 +2180,7 @@ def normalize_call_with_block(node) body = with_ruby_scope(block) do dynamic_scope(normalize_body(named_field(block, "body") || block_child(block) || block)) end - wrap(:ITER, children: [call, scope(body, args: args)], source: node) + wrap(:ITER, children: [call, scope(body, args: args, source: node)], source: node) end def normalize_call_without_block(node, block) @@ -989,7 +2269,7 @@ def normalize_command_call_statement(node) body = with_ruby_scope(block) do dynamic_scope(normalize_body(named_field(block, "body") || block_child(block) || block)) end - wrap(:ITER, children: [call, scope(body, args: block_args)], source: node) + wrap(:ITER, children: [call, scope(body, args: block_args, source: node)], source: node) end def dynamic_scope(node) @@ -1026,6 +2306,7 @@ def normalize_const(node) def normalize_children(node) node.named_children.filter_map do |child| + next if child.kind == "heredoc_body" next if assignment_rhs?(child) normalize_node(child) @@ -1043,6 +2324,7 @@ def normalize_body(node) return normalize_leading_loop_statement(node) if leading_loop_statement?(node) return normalize_leading_if_statement(node) if leading_if_statement?(node) return normalize_elsif(node) if node.kind == "elsif" + return normalize_wrapped_return_statement(node) if wrapped_return_statement?(node) return normalize_yield_statement(node) if yield_statement?(node) return normalize_super_statement(node) if super_statement?(node) return normalize_unary_not_statement(node) if unary_not_statement?(node) @@ -1092,10 +2374,21 @@ def normalize_patterns(node) patterns = [node.named_children.find { |child| !BLOCK_KINDS.include?(child.kind) && !statement_node?(child) }].compact if patterns.empty? patterns.flat_map do |pattern| - if pattern.text.to_s.include?("::") - [wrap(:CONST, children: [pattern.text.to_s.to_sym], source: pattern)] + pattern_text = pattern.text.to_s + pattern_children = pattern.named_children + if pattern_text.include?("::") + [wrap(:CONST, children: [pattern_text.to_sym], source: pattern)] + elsif %w[pattern case_pattern match_pattern switch_pattern when_condition expression_list].include?(pattern.kind) && + pattern_children.empty? && pattern_text.match?(/\A-?\d+\z/) + [wrap(:INTEGER, children: [], source: pattern)] + elsif ruby? && %w[pattern case_pattern match_pattern switch_pattern when_condition expression_list].include?(pattern.kind) && + pattern_children.empty? && pattern_text.match?(/\A[A-Z]\w*\z/) + [wrap(:CONST, children: [pattern_text.to_sym], source: pattern)] + elsif ruby? && %w[pattern case_pattern match_pattern switch_pattern when_condition expression_list].include?(pattern.kind) && + pattern_children.empty? && pattern_text.match?(/\A[A-Za-z_]\w*[!?=]?\z/) + [local_or_call_for_name(pattern_text, pattern)] elsif %w[pattern case_pattern match_pattern switch_pattern when_condition expression_list].include?(pattern.kind) - pattern.named_children.map { |child| normalize_node(child) }.compact + pattern_children.map { |child| normalize_node(child) }.compact else [normalize_node(pattern)].compact end @@ -1121,7 +2414,7 @@ def assignment_target(left, right, source: nil) source: source) end - if member_read_node?(left) + if member_read_node?(left) || normalization_adapter.member_assignment_target?(left) recv, mid = member_parts(left) writer = left.text.to_s.include?("&.") ? mid.to_sym : "#{mid}=".to_sym return wrap(:ATTRASGN, children: [normalize_node(recv), writer, list([right], source: left)], @@ -1137,7 +2430,7 @@ def normalize_assignment_lhs(node) right = normalize_node(next_named_sibling(node)) source = parent_node(node) || node assignment_target(node, right, source: source) || - wrap(:LASGN, children: [target_name(node), right], source: node) + wrap(:LASGN, children: [target_name(node), right], source: source) end def target_name(left) @@ -1163,8 +2456,10 @@ def case_arms(node) child = stack.shift next unless ts_node?(child) - if WHEN_KINDS.include?(child.kind) + if normalization_adapter.case_arm?(child) arms << child + elsif normalization_adapter.case_else_node?(child) + next else stack.concat(child.named_children) unless FUNCTION_KINDS.include?(child.kind) end @@ -1186,7 +2481,14 @@ def link_when_chain(whens, fallback = nil) end def case_else_body(node) - else_node = node.named_children.find { |child| child.kind == "else" } + else_node = normalization_adapter.case_else_node(node) + return nil unless else_node + + if normalization_adapter.case_else_arm?(else_node) || else_node.kind == "switch_default" + body = when_body(else_node) + return normalize_body(body) if body + end + normalize_else_or_branch(else_node) end @@ -1205,7 +2507,7 @@ def link_rescue_chain(resbodies) end def boolean_expression?(node) - (%w[binary binary_expression boolean_operator].include?(node.kind) || boolean_statement?(node)) && + (normalization_adapter.boolean_expression_kind?(node) || boolean_statement?(node)) && %w[and or].include?(boolean_operator(node)) end @@ -1220,8 +2522,7 @@ def boolean_statement?(node) end def operator_call_expression?(node) - %w[binary binary_expression].include?(node.kind) && - OPERATOR_CALL_OPERATORS.include?(binary_operator(node)) + normalization_adapter.operator_call_expression?(node) end def infix_statement?(node) @@ -1230,7 +2531,7 @@ def infix_statement?(node) end def dotted_expression?(node) - %w[body_statement block_body statement argument_list].include?(node.kind) && dotted_call?(node) + normalization_adapter.dotted_expression_wrapper?(node) && dotted_call?(node) end def argument_list_call_with_block?(node) @@ -1281,12 +2582,16 @@ def unary_not_statement?(node) end def normalize_argument_list_unary_not(node) + return nil unless ruby? && ts_node?(node) && argument_list_unary_not?(node) + operand = node.named_children.first wrap(:OPCALL, children: [normalize_node(operand), :!, nil], source: node) end def comparison_expression?(node) - %w[binary binary_expression comparison_operator].include?(node.kind) && + return false if literal_fragment_expression_list?(node) + + normalization_adapter.comparison_expression_kind?(node) && COMPARISON_OPERATORS.include?(comparison_operator(node)) end @@ -1316,7 +2621,10 @@ def boolean_operator(node) end def comparison_operator(node) - binary_operator(node) || spaced_text(node)[/(===|!==|==|!=|<=|>=|<|>)/, 1] + direct = binary_operator(node) + return direct if COMPARISON_OPERATORS.include?(direct) + + spaced_text(node)[/(===|!==|==|!=|<=|>=|<|>)/, 1] end def binary_operator(node) @@ -1328,7 +2636,7 @@ def spaced_text(node) end def class_node?(node) - CLASS_KINDS.include?(node.kind) + normalization_adapter.class_node?(node) end def module_node?(node) @@ -1336,10 +2644,7 @@ def module_node?(node) end def unwrap_node?(node) - %w[ - parenthesized_expression parenthesized_statements expression_statement statement - case_pattern match_pattern pattern - ].include?(node.kind) && node.named_children.size == 1 + normalization_adapter.unwrap_node?(node) end def statement_node?(node) @@ -1389,7 +2694,19 @@ def normalization_adapter end def interpolated_string?(node) - node.kind == "string" && node.named_children.any? { |child| child.kind == "interpolation" } + normalization_adapter.interpolated_string?(node) + end + + def lambda_expression?(node) + normalization_adapter.lambda_expression?(node) + end + + def lambda_target(node) + normalization_adapter.lambda_target(node) + end + + def interpolation_node?(node) + normalization_adapter.interpolation_node?(node) end def normalize_interpolated_string(node) @@ -1404,6 +2721,7 @@ def vcall_identifier?(node) return false unless ts_node?(parent) return false if %w[method method_parameters parameter_list argument_list arguments].include?(parent.kind) return false if member_read_node?(parent) + return false if dotted_expression?(parent) return false if assignment_lhs?(node) || assignment_rhs?(node) return true if %w[body_statement block_body then].include?(parent.kind) && parent_named_child?(parent, node) @@ -1421,11 +2739,11 @@ def self_node?(node) end def instance_variable?(node) - node.kind == "instance_variable" || node.text.to_s.match?(/\A@[A-Za-z_]\w*[!?=]?\z/) + normalization_adapter.instance_variable?(node) end def global_variable?(node) - node.kind == "global_variable" || node.text.to_s.match?(/\A\$[A-Za-z_]\w*[!?=]?\z/) + normalization_adapter.global_variable?(node) end def member_read_node?(node) @@ -1448,17 +2766,20 @@ def assignment_rhs?(node) end def literal_fragment_assignment_context?(node) - parent = parent_node(node) - return false unless ts_node?(parent) - return true if %w[string delimited_symbol regex regex_literal].include?(parent.kind) + normalization_adapter.literal_fragment_assignment_context?(node) + end - %w[string_content escape_sequence interpolation].include?(node.kind) && - ts_node?(parent_node(parent)) && - %w[string delimited_symbol regex regex_literal].include?(parent_node(parent).kind) + def literal_fragment_expression_list?(node) + return false unless ts_node?(node) && node.kind == "expression_list" + + named = node.named_children + named.size == 1 && literal_fragment_assignment_context?(named.first) + rescue StandardError + false end def assignment_operator?(text) - %w[= += -= *= /= %= &&= ||=].include?(text.to_s) + normalization_adapter.assignment_operator?(text) end def operator_assignment_operator(node) @@ -1490,6 +2811,7 @@ def assignment_receiver(left) return nil unless ts_node?(left) return wrap(:LVAR, children: [left.text.to_s], source: left) if IDENTIFIER_KINDS.include?(left.kind) return wrap(:IVAR, children: [left.text.to_s], source: left) if instance_variable?(left) + return normalize_global_variable(left) if global_variable?(left) return normalize_const(left) if const_node?(left) normalize_node(left) @@ -1551,7 +2873,7 @@ def ruby_assignment_node?(node) return true if %w[assignment operator_assignment].include?(node.kind) return true if node.kind == "pattern" && node.children.any? { |child| !child.named? && child.text == "=" } - %w[body_statement statement].include?(node.kind) && + %w[body_statement block_body statement].include?(node.kind) && node.children.any? { |child| !child.named? && assignment_operator?(child.text) } end @@ -1572,7 +2894,8 @@ def collect_identifier_names(node, locals) return unless ts_node?(node) locals.add(node.text.to_s.sub(/\A\*/, "")) if IDENTIFIER_KINDS.include?(node.kind) - node.named_children.each { |child| collect_identifier_names(child, locals) } + locals.add(node.text.to_s) if normalization_adapter.identifier_text_node?(node) + node.children.select(&:named?).each { |child| collect_identifier_names(child, locals) } end def ruby_scope_boundary?(node) @@ -1655,9 +2978,18 @@ def function_name(node) end def singleton_receiver(node) - named_field(node, "receiver") || - node.named_children.find { |child| child.kind != "identifier" } || - node.named_children.first + receiver = named_field(node, "receiver") + return receiver if receiver + + name = named_field(node, "name") || + node.named_children.reverse.find { |child| IDENTIFIER_KINDS.include?(child.kind) } + parameters = named_field(node, "parameters") + body = named_field(node, "body") || block_child(node) + node.named_children.find do |child| + !same_ts_node?(child, name) && + !same_ts_node?(child, parameters) && + !same_ts_node?(child, body) + end end def singleton_name(node) @@ -1742,8 +3074,8 @@ def collect_destructured_parameter_targets(node, targets) node.named_children.each { |child| collect_destructured_parameter_targets(child, targets) } end - def scope(body, args: nil) - wrap(:SCOPE, children: [nil, args, body], source: body || args || @document.root) + def scope(body, args: nil, source: nil) + wrap(:SCOPE, children: [nil, args, body], source: body || args || source || @document.root) end def list(children, source:) @@ -1839,9 +3171,7 @@ def source_from_normalized_nodes(first_node, last_node) end def named_field(node, name) - node.child_by_field_name(name) - rescue StandardError - nil + normalization_adapter.named_field(node, name) end def parent_node(node) @@ -1875,42 +3205,29 @@ def modifier_statement?(node) end def ternary_statement?(node) - %w[body_statement block_body statement argument_list].include?(node.kind) && - node.named_children.size >= 3 && - node.children.any? { |child| !child.named? && child.text == "?" } && - node.children.any? { |child| !child.named? && child.text == ":" } - rescue StandardError - false + normalization_adapter.ternary_statement?(node) end def normalize_ternary_statement(node) - cond, positive, negative = node.named_children + cond, positive, negative = normalization_adapter.ternary_parts(node) wrap(:IF, children: [normalize_node(cond), normalize_node(positive), normalize_node(negative)], source: node) end def case_argument_list?(node) - node.kind == "argument_list" && - node.children.any? { |child| !child.named? && child.kind == "case" } && - node.named_children.any? { |child| WHEN_KINDS.include?(child.kind) } - rescue StandardError - false + normalization_adapter.case_argument_list?(node) end def leading_function_statement?(node) - %w[body_statement statement].include?(node.kind) && - node.children.first&.kind.to_s == "def" && - node.named_children.any? { |child| IDENTIFIER_KINDS.include?(child.kind) } - rescue StandardError - false + normalization_adapter.leading_function_statement?(node) end def normalize_leading_function_statement(node) - name = node.named_children.find { |child| IDENTIFIER_KINDS.include?(child.kind) }&.text.to_s.to_sym - body = node.named_children.reverse.find { |child| child.kind == "body_statement" } + name = normalization_adapter.leading_function_name(node).to_s.to_sym + body = normalization_adapter.leading_function_body(node) normalized_body = with_ruby_scope(node, reset: true) do elide_tail_returns(normalize_body(body)) end - wrap(:DEFN, children: [name, scope(normalized_body)], source: node) + wrap(:DEFN, children: [name, scope(normalized_body, source: node)], source: node) end def command_call_statement?(node) @@ -1923,107 +3240,118 @@ def command_call_statement?(node) end def zero_child_identifier_call?(node) - node.kind == "call" && node.named_children.empty? && - node.text.to_s.match?(/\A[A-Za-z_]\w*[!?=]?\z/) + normalization_adapter.zero_child_identifier_call?(node) end def dotted_call?(node) + return false unless ts_node?(node) + target = dotted_call_target(node) + return true if target && dotted_call_node?(target) + + dotted_call_node?(node) + end + + def dotted_call_node?(node) return false unless ts_node?(node) return false unless node.children.any? { |child| child.text == "." || child.text == "&." } - callable = node.named_children.reject { |child| %w[block do_block argument_list arguments].include?(child.kind) } + callable = dotted_callable_children(node) return false if callable.any? { |child| %w[string_content interpolation].include?(child.kind) } callable.size >= 2 end + def dotted_call_target(node) + return nil unless ts_node?(node) + + named = node.named_children + return nil unless named.size == 1 + + child = named.first + dotted_call_node?(child) ? child : nil + rescue StandardError + nil + end + + def dotted_callable_children(node) + node.named_children.reject { |child| %w[block do_block argument_list arguments].include?(child.kind) } + end + def safe_navigation_call?(node) - ts_node?(node) && node.children.any? { |child| !child.named? && child.text == "&." } + ts_node?(node) && normalization_adapter.safe_navigation_call?(node) end def dotted_call_parts(node) - callable = node.named_children.reject { |child| %w[block do_block argument_list arguments].include?(child.kind) } + target = dotted_call_target(node) || node + callable = dotted_callable_children(target) [callable.first, callable[1].text.to_s.sub(/=\z/, "")] end def leading_if_statement?(node) - %w[body_statement block block_body statement].include?(node.kind) && - %w[if unless].include?(node.children.first&.kind.to_s) && - node.named_children.size >= 2 && - !IF_KINDS.include?(node.named_children.first.kind) - rescue StandardError - false + normalization_adapter.leading_if_statement?(node) end def leading_case_statement?(node) - %w[body_statement block_body statement].include?(node.kind) && - node.children.first&.kind.to_s == "case" && - node.named_children.any? { |child| WHEN_KINDS.include?(child.kind) } - rescue StandardError - false + normalization_adapter.leading_case_statement?(node) end def normalize_leading_case_statement(node) - value = normalize_node(case_value(node)) - whens = case_arms(node).map { |arm| normalize_when(arm) }.compact - wrap(:CASE, children: [value, link_when_chain(whens, case_else_body(node))], source: node) + target = normalization_adapter.leading_case_target(node) || node + value = normalize_node(case_value(target)) + whens = case_arms(target).map { |arm| normalize_when(arm) }.compact + wrap(:CASE, children: [value, link_when_chain(whens, case_else_body(target))], source: target) end def leading_loop_statement?(node) - %w[body_statement block_body statement].include?(node.kind) && - !node.children.first&.named? && - %w[while until].include?(node.children.first&.kind.to_s) && - node.named_children.size >= 2 - rescue StandardError - false + normalization_adapter.leading_loop_statement?(node) end def rescue_body_statement?(node) - %w[body_statement block_body statement].include?(node.kind) && - node.named_children.any? { |child| child.kind == "rescue" } - rescue StandardError - false + normalization_adapter.rescue_body_statement?(node) end def normalize_rescue_body_statement(node) - named = node.named_children - rescue_index = named.index { |child| child.kind == "rescue" } - body = normalize_body_nodes(named[0...rescue_index], source: node) - rescue_nodes = named[rescue_index..].select { |child| child.kind == "rescue" } + target = normalization_adapter.rescue_body_target(node) || node + body_nodes = normalization_adapter.rescue_body_nodes(target) + body = normalize_body_nodes(body_nodes, source: target) + rescue_nodes = normalization_adapter.rescue_clauses(target) resbodies = rescue_nodes.map { |child| normalize_rescue_clause(child) } - source = source_from_nodes(named.first || node, rescue_source_end(rescue_nodes.last) || rescue_nodes.last || node) + source = source_from_nodes(body_nodes.first || target, rescue_source_end(rescue_nodes.last) || rescue_nodes.last || target) wrap(:RESCUE, children: [body, link_rescue_chain(resbodies), nil], source: source) end def normalize_rescue_clause(node) - exceptions = node.named_children.find { |child| child.kind == "exceptions" } - exception_nodes = exceptions ? exceptions.named_children.map { |child| normalize_node(child) }.compact : [] + exceptions = normalization_adapter.rescue_clause_exceptions(node) + exception_nodes = exceptions.map do |child| + if child.kind == "exceptions" && child.text.to_s.match?(/\A[A-Z]\w*(?:::\w+)*\z/) + normalize_const(child) + else + normalize_node(child) + end + end.compact + exception_source = normalization_adapter.rescue_clause_exceptions_source(node) exception_variable = rescue_exception_variable(node) - handler = node.named_children.reverse.find do |child| - !%w[exceptions exception_variable comment].include?(child.kind) - end + handler = normalization_adapter.rescue_clause_handler(node) body = prepend_rescue_exception_assignment(normalize_body(handler), exception_variable) - wrap(:RESBODY, children: [list(exception_nodes, source: exceptions || node), body, nil], + wrap(:RESBODY, children: [list(exception_nodes, source: exception_source || node), body, nil], source: node) end def rescue_source_end(node) return nil unless ts_node?(node) - handler = node.named_children.reverse.find do |child| - !%w[exceptions exception_variable comment].include?(child.kind) - end + handler = normalization_adapter.rescue_clause_handler(node) return handler.named_children.last || handler if ts_node?(handler) node.named_children.reverse.find { |child| !%w[comment].include?(child.kind) } || node end def rescue_exception_variable(node) - var = node.named_children.find { |child| child.kind == "exception_variable" } - name = var&.named_children&.find { |child| IDENTIFIER_KINDS.include?(child.kind) } + name = normalization_adapter.rescue_clause_exception_variable_name(node) return nil unless name - wrap(:LASGN, children: [name.text.to_s, wrap(:ERRINFO, children: [], source: var)], source: var) + source = normalization_adapter.rescue_clause_exception_variable_source(node) || name + wrap(:LASGN, children: [name.text.to_s, wrap(:ERRINFO, children: [], source: source)], source: source) end def prepend_rescue_exception_assignment(body, assignment) @@ -2039,74 +3367,92 @@ def prepend_rescue_exception_assignment(body, assignment) end def ensure_body_statement?(node) - %w[body_statement block_body statement].include?(node.kind) && - node.named_children.any? { |child| child.kind == "ensure" } - rescue StandardError - false + normalization_adapter.ensure_body_statement?(node) end def normalize_ensure_body_statement(node) - named = node.named_children - ensure_index = named.index { |child| child.kind == "ensure" } - body = normalize_body_nodes(named[0...ensure_index], source: node) - ensure_body = normalize_body(named[ensure_index]) + target = normalization_adapter.ensure_body_target(node) || node + body = if rescue_body_statement?(target) + normalize_rescue_body_statement(target) + else + normalize_body_nodes(normalization_adapter.ensure_body_nodes(target), source: target) + end + ensure_node = normalization_adapter.ensure_clause(target) + ensure_body = normalize_body(normalization_adapter.ensure_clause_body(ensure_node) || ensure_node) wrap(:ENSURE, children: [body, ensure_body], source: body || node) end def array_literal_statement?(node) - %w[body_statement block_body statement argument_list].include?(node.kind) && - node.children.first&.text == "[" && - node.children.last&.text == "]" - rescue StandardError - false + normalization_adapter.array_literal_statement?(node) end def element_reference_statement?(node) - %w[body_statement block_body statement].include?(node.kind) && - node.children.first&.text != "[" && - node.children.any? { |child| !child.named? && child.text == "[" } && - node.children.any? { |child| !child.named? && child.text == "]" } && - node.named_children.size >= 2 - rescue StandardError - false + normalization_adapter.element_reference_statement?(node) end def normalize_element_reference_statement(node) - recv = node.named_children.first - args = node.named_children.drop(1).map { |child| normalize_node(child) }.compact - wrap(:CALL, children: [normalize_node(recv), :[], list(args, source: node)], source: node) + target = normalization_adapter.element_reference_target(node) || node + recv = normalization_adapter.element_reference_receiver(target) + args = normalization_adapter.element_reference_arguments(target).map { |child| normalize_node(child) }.compact + if ruby? && self_node?(recv) + return wrap(:FCALL, children: [:[], list(args, source: target)], source: target) + end + + wrap(:CALL, children: [normalize_node(recv), :[], list(args, source: target)], source: target) end def hash_literal_statement?(node) - %w[body_statement block_body statement argument_list].include?(node.kind) && - node.children.first&.text == "{" && - node.children.last&.text == "}" - rescue StandardError - false + normalization_adapter.hash_literal_statement?(node) end def normalize_hash_literal_statement(node) - wrap(:HASH, children: normalize_children(node), source: node) + target = normalization_adapter.hash_literal_target(node) || node + children = normalization_adapter.hash_literal_values(target).map do |child| + normalize_hash_literal_value(child) + end.compact + wrap(:HASH, children: children, source: target) + end + + def normalize_hash_literal_value(node) + if node.kind == "field" + named = node.named_children + if named.size >= 2 + key = named.first + value = named[1] + key_lit = wrap(:LIT, children: [key.text.to_s.to_sym], source: key || node) + return wrap(:HASH, children: [key_lit, normalize_node(value)].compact, source: node) + end + end + + normalize_node(node) end def normalize_array_literal_statement(node) - values = node.named_children.map { |child| normalize_node(child) }.compact - return wrap(:ZLIST, children: [], source: node) if values.empty? + target = normalization_adapter.array_literal_target(node) || node + values = normalization_adapter.array_literal_values(target).map do |child| + normalize_array_literal_value(child) + end.compact + return wrap(:ZLIST, children: [], source: target) if values.empty? - list(values, source: node) + list(values, source: target) + end + + def normalize_array_literal_value(node) + if node.kind == "field" + named = node.named_children + return normalize_node(named.first) if named.size == 1 + return normalize_terminal_statement(node) if named.empty? + end + + normalize_node(node) end def empty_body_statement?(node) - %w[body_statement block_body statement].include?(node.kind) && - node.named_children.empty? && - node.text.to_s.strip.empty? + normalization_adapter.empty_body_statement?(node) end def heredoc_body_statement?(node) - %w[body_statement block_body statement then].include?(node.kind) && - node.named_children.any? { |child| child.kind == "heredoc_body" } - rescue StandardError - false + normalization_adapter.heredoc_body_statement?(node) end def normalize_heredoc_body_statement(node) @@ -2128,13 +3474,8 @@ def normalize_heredoc_body_statement(node) def heredoc_call_for_body?(node) return false unless ts_node?(node) - return true if node.kind == "heredoc_beginning" - node.named_children.any? do |child| - next false if child.named_children.any? { |grandchild| grandchild.kind == "heredoc_body" } - - heredoc_call_for_body?(child) - end + normalization_adapter.heredoc_call_for_body?(node) end def with_current_heredoc_body(body) @@ -2146,7 +3487,8 @@ def with_current_heredoc_body(body) end def normalize_heredoc_beginning(node) - body = @current_heredoc_body + body = @current_heredoc_body || + parent_node(parent_node(node))&.named_children&.find { |child| child.kind == "heredoc_body" } children = body ? normalize_heredoc_children(body) : [] wrap(:DSTR, children: children, source: node) end @@ -2172,10 +3514,7 @@ def normalize_interpolation(node) end def interpolated_statement?(node) - %w[body_statement block_body statement argument_list].include?(node.kind) && - node.named_children.any? { |child| child.kind == "interpolation" } - rescue StandardError - false + normalization_adapter.interpolated_statement?(node) end def normalize_interpolated_statement(node) @@ -2183,11 +3522,7 @@ def normalize_interpolated_statement(node) end def concatenated_string_statement?(node) - %w[body_statement block_body statement argument_list].include?(node.kind) && - node.named_children.size > 1 && - node.named_children.all? { |child| child.kind == "string" } - rescue StandardError - false + normalization_adapter.concatenated_string_statement?(node) end def normalize_concatenated_string_statement(node) @@ -2248,10 +3583,13 @@ def normalize_global_variable(node) end def normalize_leading_loop_statement(node) - keyword = node.children.first.kind - cond = normalize_node(node.named_children.first) - body = normalize_body(node.named_children[1]) - wrap(keyword == "until" ? :UNTIL : :WHILE, children: [cond, body], source: node) + target = normalization_adapter.leading_loop_target(node) || node + return normalize_loop(target) unless same_ts_node?(target, node) + + keyword = target.children.first.kind + cond = normalize_node(target.named_children.first) + body = normalize_body(target.named_children[1]) + wrap(keyword == "until" ? :UNTIL : :WHILE, children: [cond, body], source: target) end def operator_assignment_statement?(node) @@ -2309,38 +3647,35 @@ def operator_assignment_statement_parts(node) end def leading_owner_statement?(node) - %w[body_statement statement].include?(node.kind) && - %w[class module].include?(node.children.first&.kind.to_s) && - node.named_children.size >= 2 && - !OWNER_STATEMENT_NESTED_KIND.include?(node.named_children.first.kind) - rescue StandardError - false + normalization_adapter.leading_owner_statement?(node) end - OWNER_STATEMENT_NESTED_KIND = %w[class class_definition class_declaration module].freeze - def normalize_leading_owner_statement(node) - keyword = node.children.first.kind - name = const_for(node.named_children.first) - body_node = named_field(node, "body") || - node.named_children.reverse.find { |child| BLOCK_KINDS.include?(child.kind) } + target = normalization_adapter.leading_owner_target(node) || node + keyword = target.children.first.kind + name = const_for(target.named_children.first) + body_node = named_field(target, "body") || + target.named_children.reverse.find { |child| BLOCK_KINDS.include?(child.kind) } body = normalize_body(body_node) if keyword == "module" - wrap(:MODULE, children: [name, scope(body)], source: node) + wrap(:MODULE, children: [name, scope(body, source: target)], source: target) else - wrap(:CLASS, children: [name, nil, scope(body)], source: node) + wrap(:CLASS, children: [name, nil, scope(body, source: target)], source: target) end end def normalize_leading_if_statement(node) - keyword = node.children.first.kind - cond = node.named_children.find { |child| !%w[comment then elsif else].include?(child.kind) } - consequence = node.named_children.find { |child| child.kind == "then" } || - branch_child(node, cond, 0) - alternative = explicit_alternative(node) + target = normalization_adapter.leading_if_target(node) || node + return normalize_if(target) unless same_ts_node?(target, node) + + keyword = target.children.first.kind + cond = target.named_children.find { |child| !%w[comment then elsif else].include?(child.kind) } + consequence = target.named_children.find { |child| child.kind == "then" } || + branch_child(target, cond, 0) + alternative = explicit_alternative(target) type = keyword == "unless" ? :UNLESS : :IF wrap(type, children: [normalize_node(cond), normalize_body(consequence), normalize_else_or_branch(alternative)], - source: node) + source: target) end def modifier_keyword(node) @@ -2370,6 +3705,7 @@ def statement_call_with_block?(node) def statement_block_call(node) return node if dotted_call?(node) + return node if member_read_node?(node) block = call_block(node) node.named_children.find do |child| @@ -2384,7 +3720,7 @@ def normalize_statement_call_with_block(node) body = with_ruby_scope(block) do dynamic_scope(normalize_body(named_field(block, "body") || block_child(block) || block)) end - wrap(:ITER, children: [call, scope(body, args: args)], source: node) + wrap(:ITER, children: [call, scope(body, args: args, source: node)], source: node) end def visibility_inline_def_call?(node) @@ -2402,7 +3738,7 @@ def visibility_inline_def_statement?(node, function) end def inline_def_from_argument_list(args) - return nil unless ts_node?(args) + return nil unless ruby? && ts_node?(args) inline_def_from_source(args) end @@ -2413,6 +3749,8 @@ def inline_def_from_statement(node) end def inline_def_from_source(source) + return nil unless ruby? && ts_node?(source) + body = inline_def_body(source) receiver = inline_def_receiver(source) normalized_body = with_ruby_scope(source, reset: true) do @@ -2422,14 +3760,14 @@ def inline_def_from_source(source) name = inline_def_name_after_receiver(source, receiver) return nil if name.to_s.empty? - return wrap(:DEFS, children: [normalize_node(receiver), name.to_sym, scope(normalized_body)], + return wrap(:DEFS, children: [normalize_node(receiver), name.to_sym, scope(normalized_body, source: source)], source: source) end name = source.named_children.find { |child| IDENTIFIER_KINDS.include?(child.kind) }&.text.to_s return nil if name.to_s.empty? - wrap(:DEFN, children: [name.to_sym, scope(normalized_body)], source: source) + wrap(:DEFN, children: [name.to_sym, scope(normalized_body, source: source)], source: source) end def inline_def_receiver(source) @@ -2455,7 +3793,10 @@ def inline_def_body(node) end def literal_arguments_from_text(args) - args.text.to_s.scan(/:([A-Za-z_]\w*[!?=]?)/).map do |name| + text = args.text.to_s + return [normalize_heredoc_beginning(args)] if text.match?(/\A\s*<<[-~]?[A-Za-z_]\w*/) + + text.scan(/:([A-Za-z_]\w*[!?=]?)/).map do |name| wrap(:LIT, children: [name.first.to_sym], source: args) end end diff --git a/gems/decomplex/rust/src/decomplex/ast.rs b/gems/decomplex/rust/src/decomplex/ast.rs index 7ba90ca27..7914d3e19 100644 --- a/gems/decomplex/rust/src/decomplex/ast.rs +++ b/gems/decomplex/rust/src/decomplex/ast.rs @@ -11,6 +11,25 @@ const COMPARISON_OPERATORS: &[&str] = &["==", "!=", "===", "!==", "<", "<=", ">" const OPERATOR_CALL_OPERATORS: &[&str] = &[ "+", "-", "*", "/", "%", "**", "|", "&", "^", "<<", ">>", "=~", "!~", ]; +const BINARY_WRAPPER_KINDS: &[&str] = &[ + "binary", + "binary_expression", + "binary_operator", + "boolean_operator", + "comparison_operator", +]; +const BOOLEAN_EXPRESSION_KINDS: &[&str] = &["binary", "binary_expression", "boolean_operator"]; +const COMPARISON_EXPRESSION_KINDS: &[&str] = + &["binary", "binary_expression", "comparison_operator"]; +const DOTTED_EXPRESSION_WRAPPER_KINDS: &[&str] = + &["body_statement", "block_body", "statement", "argument_list"]; +const PYTHON_DOTTED_EXPRESSION_WRAPPER_KINDS: &[&str] = &[ + "body_statement", + "block_body", + "statement", + "argument_list", + "expression_statement", +]; #[derive(Clone, Debug, Eq, PartialEq, Serialize)] pub struct RawNode { @@ -163,6 +182,26 @@ pub fn normalize_text(text: &str) -> String { text.split_whitespace().collect::>().join(" ") } +fn ruby_exception_constant_text(text: &str) -> bool { + let mut parts = text.split("::"); + let Some(first) = parts.next() else { + return false; + }; + let mut first_chars = first.chars(); + if !matches!(first_chars.next(), Some(ch) if ch.is_ascii_uppercase()) { + return false; + } + if !first_chars.all(|ch| ch == '_' || ch.is_ascii_alphanumeric()) { + return false; + } + parts.all(|part| { + !part.is_empty() + && part + .chars() + .all(|ch| ch == '_' || ch.is_ascii_alphanumeric()) + }) +} + pub fn span(node: TreeSitterNode<'_>) -> Span { let start = node.start_position(); let end = node.end_position(); @@ -182,6 +221,8 @@ pub enum Child { Node(Box), Symbol(String), String(String), + Integer(i64), + Bool(bool), Nil, } @@ -289,4926 +330,27589 @@ pub fn flatten_and(node: &Node) -> Vec<&Node> { .collect() } -struct TreeSitterNormalizer<'source> { - source: &'source str, - language: Language, - local_stack: Vec>, - root_span: Option, +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum TreeSitterNormalizationAdapter { + Default, + Ruby, + Python, + Lua, + TypeScript, } -impl<'source> TreeSitterNormalizer<'source> { - fn new(source: &'source str, language: Language) -> Self { - Self { - source, - language, - local_stack: Vec::new(), - root_span: None, +const QUESTION_COLON_TERNARY_KINDS: &[&str] = &[ + "body_statement", + "block_body", + "statement", + "argument_list", + "conditional", +]; +const TYPESCRIPT_TERNARY_KINDS: &[&str] = &[ + "body_statement", + "block_body", + "statement", + "argument_list", + "conditional", + "ternary_expression", +]; +const CASE_ARGUMENT_WHEN_KINDS: &[&str] = &[ + "when", + "switch_case", + "case_clause", + "expression_case", + "case_statement", + "switch_section", + "switch_block_statement_group", + "switch_entry", + "when_entry", + "match_arm", +]; +const CASE_ELSE_KINDS: &[&str] = &["else", "switch_default"]; +const CASE_DEFAULT_PATTERN_KINDS: &[&str] = &["case_pattern", "match_pattern", "pattern"]; +const LEADING_FUNCTION_WRAPPER_KINDS: &[&str] = &["body_statement", "statement"]; +const PYTHON_LEADING_FUNCTION_WRAPPER_KINDS: &[&str] = &["block"]; +const LUA_LEADING_FUNCTION_WRAPPER_KINDS: &[&str] = &["block"]; +const RUBY_LEADING_FUNCTION_TARGET_KINDS: &[&str] = &["method", "singleton_method"]; +const PYTHON_LEADING_FUNCTION_TARGET_KINDS: &[&str] = &["function_definition"]; +const LUA_LEADING_FUNCTION_TARGET_KINDS: &[&str] = &["function_declaration"]; +const OWNER_STATEMENT_NESTED_KINDS: &[&str] = + &["class", "class_definition", "class_declaration", "module"]; +const LEADING_OWNER_WRAPPER_KINDS: &[&str] = &["body_statement", "statement"]; +const PYTHON_LEADING_OWNER_WRAPPER_KINDS: &[&str] = &["block"]; +const OWNER_NODE_KINDS: &[&str] = &["class", "class_definition", "class_declaration", "module"]; +const IF_NODE_KINDS: &[&str] = &[ + "if", + "if_statement", + "if_modifier", + "unless", + "unless_modifier", + "if_expression", + "conditional", +]; +const LEADING_IF_WRAPPER_KINDS: &[&str] = &["body_statement", "block", "block_body", "statement"]; +const PYTHON_LEADING_IF_WRAPPER_KINDS: &[&str] = &["block"]; +const LUA_LEADING_IF_WRAPPER_KINDS: &[&str] = &["block"]; +const LEADING_CASE_WRAPPER_KINDS: &[&str] = &["body_statement", "block", "block_body", "statement"]; +const CASE_NODE_KINDS: &[&str] = &[ + "case", + "switch_statement", + "expression_switch_statement", + "switch_expression", + "match_statement", + "match_expression", + "when_expression", +]; +const LEADING_LOOP_WRAPPER_KINDS: &[&str] = &["body_statement", "block", "block_body", "statement"]; +const LOOP_NODE_KINDS: &[&str] = &[ + "while", + "while_statement", + "while_modifier", + "until", + "until_modifier", +]; +const RESCUE_BODY_WRAPPER_KINDS: &[&str] = &["body_statement", "block_body", "statement"]; +const ENSURE_BODY_WRAPPER_KINDS: &[&str] = &["body_statement", "block_body", "statement"]; +const ARRAY_LITERAL_WRAPPER_KINDS: &[&str] = &[ + "body_statement", + "block", + "block_body", + "statement", + "argument_list", + "expression_statement", +]; +const ARRAY_LITERAL_NODE_KINDS: &[&str] = &["array", "list"]; +const ELEMENT_REFERENCE_WRAPPER_KINDS: &[&str] = &[ + "body_statement", + "block", + "block_body", + "statement", + "expression_statement", + "expression_list", +]; +const ELEMENT_REFERENCE_NODE_KINDS: &[&str] = &[ + "element_reference", + "subscript", + "subscript_expression", + "bracket_index_expression", +]; +const HASH_LITERAL_WRAPPER_KINDS: &[&str] = &[ + "body_statement", + "block", + "block_body", + "statement", + "argument_list", + "expression_statement", + "parenthesized_expression", +]; +const HASH_LITERAL_NODE_KINDS: &[&str] = &["hash", "dictionary", "object", "table_constructor"]; +const STATEMENT_BLOCK_PARENT_KINDS: &[&str] = &[ + "method_declaration", + "constructor_declaration", + "function_declaration", + "function_body", + "if_statement", + "while_statement", + "for_statement", + "enhanced_for_statement", + "try_statement", + "catch_clause", + "finally_clause", + "do_statement", + "lambda_expression", +]; +const EMPTY_BODY_WRAPPER_KINDS: &[&str] = &["body_statement", "block", "block_body", "statement"]; +const HEREDOC_BODY_WRAPPER_KINDS: &[&str] = &["body_statement", "block_body", "statement", "then"]; +const INTERPOLATED_STATEMENT_WRAPPER_KINDS: &[&str] = + &["body_statement", "block_body", "statement", "argument_list"]; +const CONCATENATED_STRING_WRAPPER_KINDS: &[&str] = + &["body_statement", "block_body", "statement", "argument_list"]; +const PYTHON_CONCATENATED_STRING_WRAPPER_KINDS: &[&str] = &[ + "body_statement", + "block_body", + "statement", + "argument_list", + "block", + "expression_statement", +]; +const CONCATENATED_STRING_NODE_KINDS: &[&str] = &["chained_string", "concatenated_string"]; + +struct TernaryParts<'tree> { + condition: TreeSitterNode<'tree>, + positive: Vec>, + negative: Vec>, +} + +impl TreeSitterNormalizationAdapter { + fn for_language(language: Language) -> Self { + match language { + Language::Ruby => Self::Ruby, + Language::Python => Self::Python, + Language::Lua => Self::Lua, + Language::TypeScript | Language::JavaScript => Self::TypeScript, + _ => Self::Default, } } - fn normalize(mut self, root: TreeSitterNode<'_>) -> Node { - self.root_span = Some(span(root)); - let children = if self.language == Language::Ruby { - self.with_ruby_scope(root, true, |normalizer| normalizer.normalize_children(root)) - } else { - self.normalize_children(root) + fn ruby(self) -> bool { + self == Self::Ruby + } + + fn yield_statement(self, node: TreeSitterNode<'_>, source: &str) -> bool { + let allowed = match self { + Self::Python => matches!( + node.kind(), + "body_statement" | "block" | "block_body" | "expression_statement" | "statement" + ), + _ => matches!( + node.kind(), + "body_statement" | "block" | "block_body" | "statement" + ), }; - self.wrap("ROOT", children, root) + if !allowed { + return false; + } + let named_children = node + .children(&mut node.walk()) + .filter(|child| child.is_named()) + .collect::>(); + named_children.len() == 1 + && named_children[0].kind() == "yield" + && node_text(named_children[0], source) == node_text(node, source) } - fn normalize_node(&mut self, node: TreeSitterNode<'_>) -> Option { - if node.kind() == "comment" { - return None; + fn super_statement(self, node: TreeSitterNode<'_>, source: &str) -> bool { + if self != Self::Ruby { + return false; } - if self.assignment_lhs(node) { - return self.normalize_assignment_lhs(node); + if !matches!( + node.kind(), + "body_statement" | "block" | "block_body" | "call" | "statement" + ) { + return false; } - if self.infix_statement(node) { - return self.normalize_infix_statement(node); + if node_text(node, source).trim() == "super" { + return true; } - if self.ternary_statement(node) { - return self.normalize_ternary_statement(node); + let raw = raw_named_children(node); + let named = if raw.len() == 1 && raw[0].kind() == "call" { + raw_named_children(raw[0]) + } else { + raw + }; + named + .first() + .map(|child| child.kind() == "super") + .unwrap_or(false) + && named + .iter() + .skip(1) + .all(|child| child.kind() == "argument_list") + } + + fn safe_navigation_call(self, node: TreeSitterNode<'_>, source: &str) -> bool { + let ruby_safe_navigation = node + .children(&mut node.walk()) + .any(|child| !child.is_named() && node_text(child, source) == "&."); + if self != Self::TypeScript { + return ruby_safe_navigation; } - if self.leading_if_statement(node) { - return self.normalize_leading_if_statement(node); + + ruby_safe_navigation + || node + .children(&mut node.walk()) + .any(|child| child.kind() == "optional_chain" && node_text(child, source) == "?.") + || (node.kind() == "call_expression" + && named_children(node) + .into_iter() + .any(|child| self.safe_navigation_call(child, source))) + } + + fn ternary_statement(self, node: TreeSitterNode<'_>, source: &str) -> bool { + self.ternary_parts(node, source).is_some() + } + + fn ternary_parts<'tree>( + self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + match self { + Self::Python => { + if node.kind() != "conditional_expression" { + return None; + } + let named = named_children(node); + Some(TernaryParts { + condition: *named.get(1)?, + positive: vec![*named.first()?], + negative: vec![*named.get(2)?], + }) + } + Self::Lua => None, + Self::TypeScript => { + question_colon_ternary_parts(node, source, TYPESCRIPT_TERNARY_KINDS) + } + Self::Default | Self::Ruby => { + question_colon_ternary_parts(node, source, QUESTION_COLON_TERNARY_KINDS) + } } - if if_kind(node.kind()) { - return self.normalize_if(node); + } + + fn case_argument_list(self, node: TreeSitterNode<'_>, source: &str) -> bool { + if self != Self::Ruby || node.kind() != "argument_list" { + return false; } - if let Some(loop_type) = loop_kind(node.kind()) { - return self.normalize_loop(node, loop_type); + let raw_named = named_children(node); + let target = if raw_named.len() == 1 + && raw_named[0].kind() == "case" + && node_text(raw_named[0], source) == node_text(node, source) + { + raw_named[0] + } else { + node + }; + let has_case_keyword = target + .children(&mut target.walk()) + .any(|child| !child.is_named() && child.kind() == "case"); + has_case_keyword + && named_children(target) + .iter() + .any(|child| CASE_ARGUMENT_WHEN_KINDS.contains(&child.kind())) + } + + fn case_arm(self, node: TreeSitterNode<'_>, source: &str) -> bool { + CASE_ARGUMENT_WHEN_KINDS.contains(&node.kind()) && !self.case_else_arm(node, source) + } + + fn case_else_node<'tree>( + self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + let mut stack = named_children(node); + while !stack.is_empty() { + let child = stack.remove(0); + if self.case_else_node_kind(child, source) { + return Some(child); + } + if CASE_ARGUMENT_WHEN_KINDS.contains(&child.kind()) { + continue; + } + if !function_kind(child.kind()) { + stack.extend(named_children(child)); + } } - if self.case_kind(node.kind()) { - return self.normalize_case(node); + None + } + + fn case_else_node_kind(self, node: TreeSitterNode<'_>, source: &str) -> bool { + CASE_ELSE_KINDS.contains(&node.kind()) || self.case_else_arm(node, source) + } + + fn case_else_arm(self, node: TreeSitterNode<'_>, source: &str) -> bool { + if self != Self::Python || node.kind() != "case_clause" { + return false; } - if self.modifier_statement(node) { - return self.normalize_modifier_statement(node); + + named_children(node) + .into_iter() + .find(|child| CASE_DEFAULT_PATTERN_KINDS.contains(&child.kind())) + .map(|pattern| node_text(pattern, source).trim() == "_") + .unwrap_or(false) + } + + fn leading_function_statement(self, node: TreeSitterNode<'_>, source: &str) -> bool { + let Some(target) = self.leading_function_target(node, source) else { + return false; + }; + let expected_keyword = match self { + Self::Lua => "function", + _ => "def", + }; + target + .children(&mut target.walk()) + .next() + .map(|child| child.kind() == expected_keyword) + .unwrap_or(false) + && named_children(target) + .iter() + .any(|child| identifier_kind_name(child.kind())) + } + + fn leading_function_target<'tree>( + self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + let (wrapper_kinds, target_kinds) = match self { + Self::Ruby | Self::Default => ( + LEADING_FUNCTION_WRAPPER_KINDS, + RUBY_LEADING_FUNCTION_TARGET_KINDS, + ), + Self::Python => ( + PYTHON_LEADING_FUNCTION_WRAPPER_KINDS, + PYTHON_LEADING_FUNCTION_TARGET_KINDS, + ), + Self::Lua => ( + LUA_LEADING_FUNCTION_WRAPPER_KINDS, + LUA_LEADING_FUNCTION_TARGET_KINDS, + ), + Self::TypeScript => return None, + }; + if !wrapper_kinds.contains(&node.kind()) { + return None; } - if self.statement_call_with_block(node) { - return self.normalize_statement_call_with_block(node); + if node + .children(&mut node.walk()) + .next() + .map(|child| matches!(child.kind(), "def" | "function")) + .unwrap_or(false) + { + return Some(node); } - if self.super_statement(node) { - return Some(self.normalize_super_statement(node)); + let raw_named = named_children(node); + if raw_named.len() == 1 + && target_kinds.contains(&raw_named[0].kind()) + && node_text(raw_named[0], source) == node_text(node, source) + { + return Some(raw_named[0]); } - if self.command_call_statement(node) { - return self.normalize_command_call_statement(node); + None + } + + fn leading_owner_statement(self, node: TreeSitterNode<'_>, source: &str) -> bool { + let Some(target) = self.leading_owner_target(node, source) else { + return false; + }; + target + .children(&mut target.walk()) + .next() + .map(|child| matches!(child.kind(), "class" | "module")) + .unwrap_or(false) + && named_children(target).len() >= 2 + && named_children(target) + .first() + .map(|child| !OWNER_STATEMENT_NESTED_KINDS.contains(&child.kind())) + .unwrap_or(false) + } + + fn leading_owner_target<'tree>( + self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + let wrapper_kinds = match self { + Self::Python => PYTHON_LEADING_OWNER_WRAPPER_KINDS, + Self::Ruby | Self::Default => LEADING_OWNER_WRAPPER_KINDS, + Self::Lua | Self::TypeScript => LEADING_OWNER_WRAPPER_KINDS, + }; + if !wrapper_kinds.contains(&node.kind()) { + return None; } - if self.yield_statement(node) { - return Some(self.normalize_yield_statement(node)); + let raw_named = named_children(node); + if raw_named.len() == 1 + && OWNER_NODE_KINDS.contains(&raw_named[0].kind()) + && node_text(raw_named[0], source) == node_text(node, source) + { + return Some(raw_named[0]); } - if self.super_statement(node) { - return Some(self.normalize_super_statement(node)); + Some(node) + } + + fn leading_if_statement(self, node: TreeSitterNode<'_>, source: &str) -> bool { + let Some(target) = self.leading_if_target(node, source) else { + return false; + }; + target + .children(&mut target.walk()) + .next() + .map(|child| matches!(child.kind(), "if" | "unless")) + .unwrap_or(false) + && named_children(target).len() >= 2 + && named_children(target) + .first() + .map(|child| !IF_NODE_KINDS.contains(&child.kind())) + .unwrap_or(false) + } + + fn leading_if_target<'tree>( + self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + let wrapper_kinds = match self { + Self::Python => PYTHON_LEADING_IF_WRAPPER_KINDS, + Self::Lua => LUA_LEADING_IF_WRAPPER_KINDS, + Self::Ruby | Self::TypeScript | Self::Default => LEADING_IF_WRAPPER_KINDS, + }; + if !wrapper_kinds.contains(&node.kind()) { + return None; } - if self.unary_not_statement(node) { - return self.normalize_unary_not(node); + if matches!(self, Self::Python | Self::Lua) { + let raw_named = named_children(node); + if raw_named.len() == 1 + && raw_named[0].kind() == "if_statement" + && node_text(raw_named[0], source) == node_text(node, source) + { + return Some(raw_named[0]); + } } - if self.interpolated_statement(node) { - return Some(self.normalize_interpolated_statement(node)); + let raw_named = named_children(node); + if raw_named.len() == 1 + && IF_NODE_KINDS.contains(&raw_named[0].kind()) + && node_text(raw_named[0], source) == node_text(node, source) + { + return Some(raw_named[0]); } - if self.dotted_expression(node) { - return self.normalize_dotted_expression(node); + Some(node) + } + + fn leading_case_statement(self, node: TreeSitterNode<'_>, source: &str) -> bool { + let Some(target) = self.leading_case_target(node, source) else { + return false; + }; + target + .children(&mut target.walk()) + .next() + .map(|child| matches!(child.kind(), "case" | "match" | "switch")) + .unwrap_or(false) + && case_arm_descendant(target) + } + + fn leading_case_target<'tree>( + self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if !LEADING_CASE_WRAPPER_KINDS.contains(&node.kind()) { + return None; } - if self.unary_not_expression(node) { - return self.normalize_unary_not(node); + let raw_named = named_children(node); + if raw_named.len() == 1 + && CASE_NODE_KINDS.contains(&raw_named[0].kind()) + && node_text(raw_named[0], source) == node_text(node, source) + { + return Some(raw_named[0]); } - if self.boolean_expression(node) { - return self.normalize_boolean(node); + Some(node) + } + + fn leading_loop_statement(self, node: TreeSitterNode<'_>, source: &str) -> bool { + let Some(target) = self.leading_loop_target(node, source) else { + return false; + }; + target + .children(&mut target.walk()) + .next() + .map(|child| !child.is_named() && matches!(child.kind(), "while" | "until")) + .unwrap_or(false) + && named_children(target).len() >= 2 + } + + fn leading_loop_target<'tree>( + self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if !LEADING_LOOP_WRAPPER_KINDS.contains(&node.kind()) { + return None; } - if self.operator_call_expression(node) { - return self.normalize_operator_call(node); - } - if self.comparison_expression(node) { - return self.normalize_comparison(node); - } - if self.self_node(node) { - return Some(self.wrap("SELF", Vec::new(), node)); - } - if instance_variable_node(node, self.source) { - return Some(self.wrap( - "IVAR", - vec![Child::String(node_text(node, self.source).to_string())], - node, - )); - } - if global_variable_node(node, self.source) { - return Some(self.normalize_global_variable(node)); + let raw_named = named_children(node); + if raw_named.len() == 1 + && LOOP_NODE_KINDS.contains(&raw_named[0].kind()) + && node_text(raw_named[0], source) == node_text(node, source) + { + return Some(raw_named[0]); } + Some(node) + } - match node.kind() { - "program" => { - let children = self.normalize_children(node); - Some(self.wrap("ROOT", children, node)) - } - "method" - | "function_definition" - | "function_declaration" - | "method_definition" - | "method_declaration" - | "function_item" => self.normalize_function(node), - "singleton_method" => self.normalize_singleton_function(node), - "class" | "class_definition" | "class_declaration" | "class_specifier" => { - self.normalize_class(node) - } - "module" => self.normalize_module(node), - "lambda" => self.normalize_lambda(node), - _ if self.block_kind(node.kind()) => { - let children = self.normalize_children(node); - Some(self.wrap("BLOCK", children, node)) - } - "ensure" => self.normalize_ensure_clause(node), - "begin" => self.normalize_begin(node), - "subshell" => Some(self.normalize_subshell(node)), - "block_argument" => self.normalize_block_argument(node), - "singleton_class" => self.normalize_singleton_class(node), - "yield" => Some(self.normalize_yield(node)), - "operator_assignment" => self.normalize_operator_assignment(node), - "assignment" | "assignment_expression" | "assignment_statement" => { - self.normalize_assignment(node) - } - "variable_declarator" if !self.has_assignment_operator_child(node) => { - Some(self.wrap(&kind_type(node.kind()), Vec::new(), node)) + fn rescue_body_statement(self, node: TreeSitterNode<'_>, source: &str) -> bool { + !self.rescue_clauses(node, source).is_empty() + } + + fn rescue_body_target<'tree>( + self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + match self { + Self::Python => { + if node.kind() == "try_statement" { + return Some(node); + } + if node.kind() == "block" { + let raw_named = named_children(node); + if raw_named.len() == 1 + && raw_named[0].kind() == "try_statement" + && node_text(raw_named[0], source) == node_text(node, source) + { + return Some(raw_named[0]); + } + } } - "expression_list" if self.single_short_var_lhs(node) => { - Some(self.wrap(&kind_type(node.kind()), Vec::new(), node)) + Self::TypeScript => { + if node.kind() == "try_statement" { + return Some(node); + } + if node.kind() == "statement_block" { + let raw_named = named_children(node); + if raw_named.len() == 1 + && raw_named[0].kind() == "try_statement" + && node_text(raw_named[0], source) == node_text(node, source) + { + return Some(raw_named[0]); + } + } } - "call" | "call_expression" | "method_call" | "method_call_expression" => { - self.normalize_call(node) + _ => {} + } + + if RESCUE_BODY_WRAPPER_KINDS.contains(&node.kind()) { + Some(node) + } else { + None + } + } + + fn rescue_body_nodes<'tree>( + self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Vec> { + let Some(target) = self.rescue_body_target(node, source) else { + return Vec::new(); + }; + let named = named_children(target); + match self { + Self::Python => { + if target.kind() == "try_statement" { + return named + .into_iter() + .take_while(|child| { + !matches!(child.kind(), "except_clause" | "finally_clause") + }) + .collect(); + } } - _ if self.member_read_node(node) => self.normalize_member_read(node), - _ if self.unwrap_node(node) => self - .named_children(node) - .into_iter() - .next() - .and_then(|child| self.normalize_node(child)), - "element_reference" => self.normalize_element_reference(node), - "rescue_modifier" => self.normalize_rescue_modifier(node), - "super" => Some(self.normalize_super(node)), - "return" | "return_statement" | "return_expression" | "break" | "break_statement" - | "break_expression" | "next" | "continue_statement" => self.normalize_return(node), - "nil" | "none" | "null" => Some(self.wrap("NIL", Vec::new(), node)), - "true" => Some(self.wrap("TRUE", Vec::new(), node)), - "false" => Some(self.wrap("FALSE", Vec::new(), node)), - "instance_variable" => Some(self.wrap( - "IVAR", - vec![Child::String(node_text(node, self.source).to_string())], - node, - )), - "identifier" - | "simple_identifier" - | "property_identifier" - | "field_identifier" - | "shorthand_property_identifier" => Some(self.normalize_identifier(node)), - "constant" | "scope_resolution" | "type_identifier" | "scoped_type_identifier" => { - Some(self.normalize_const(node)) + Self::TypeScript => { + if target.kind() == "try_statement" { + return named + .into_iter() + .take_while(|child| { + !matches!(child.kind(), "catch_clause" | "finally_clause") + }) + .collect(); + } } - "self" | "this" => Some(self.wrap("SELF", Vec::new(), node)), - "global_variable" => Some(self.normalize_global_variable(node)), - "array" => Some(self.normalize_array_literal(node)), - "interpolation" => self.normalize_interpolation(node), - "heredoc_beginning" => Some(self.normalize_heredoc_beginning(node)), - "chained_string" => Some(self.normalize_chained_string(node)), - "string" - | "string_content" - | "string_literal" - | "interpreted_string_literal" - | "raw_string_literal" => { - if self.interpolated_string(node) { - Some(self.normalize_interpolated_string(node)) - } else if let Some(content) = self.lua_no_paren_string_argument_content(node) { - Some(self.wrap( - "STR", - vec![Child::String(node_text(content, self.source).to_string())], - content, - )) - } else { - Some(self.wrap( - "STR", - vec![Child::String(node_text(node, self.source).to_string())], - node, - )) + _ => {} + } + + let Some(index) = named.iter().position(|child| self.rescue_clause(*child)) else { + return Vec::new(); + }; + named[..index].to_vec() + } + + fn rescue_clauses<'tree>( + self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Vec> { + let Some(target) = self.rescue_body_target(node, source) else { + return Vec::new(); + }; + let clause_kind = match self { + Self::Python => "except_clause", + Self::TypeScript => "catch_clause", + _ => "rescue", + }; + named_children(target) + .into_iter() + .filter(|child| child.kind() == clause_kind) + .collect() + } + + fn rescue_clause_exceptions<'tree>( + self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Vec> { + match self { + Self::Python => { + let Some(pattern) = named_children(node) + .into_iter() + .find(|child| !matches!(child.kind(), "block" | "comment")) + else { + return Vec::new(); + }; + if pattern.kind() != "as_pattern" { + return vec![pattern]; } + named_children(pattern) + .into_iter() + .find(|child| child.kind() != "as_pattern_target") + .into_iter() + .collect() } - "integer" => Some(self.wrap("INTEGER", Vec::new(), node)), - "float" | "float_literal" => Some(self.wrap("FLOAT", Vec::new(), node)), - "pair" => self.normalize_pair(node), - "simple_symbol" | "symbol" => Some(self.wrap( - "LIT", - vec![Child::Symbol( - node_text(node, self.source).trim_start_matches(':').to_string(), - )], - node, - )), + Self::TypeScript => Vec::new(), _ => { - let children = self.normalize_children(node); - Some(self.wrap(&kind_type(node.kind()), children, node)) + let Some(exceptions) = named_children(node) + .into_iter() + .find(|child| child.kind() == "exceptions") + else { + return Vec::new(); + }; + let text = node_text(exceptions, source).trim(); + if ruby_exception_constant_text(text) + || (named_children(exceptions).is_empty() && !text.is_empty()) + { + return vec![exceptions]; + } + named_children(exceptions) } } } - fn normalize_function(&mut self, node: TreeSitterNode<'_>) -> Option { - let name = self.function_name(node)?; - let args = self.normalize_parameters(self.parameters_child(node)); - let body = self.with_ruby_scope(node, true, |normalizer| { - let body_node = normalizer - .named_field(node, "body") - .or_else(|| normalizer.block_child(node))?; - let body = normalizer.normalize_body(body_node); - let body = normalizer.elide_tail_returns(body); - normalizer.elide_implicit_nil_body(body) - }); - let scope = self.scope(body, args, node); - Some(self.wrap( - "DEFN", - vec![Child::Symbol(name), Child::Node(Box::new(scope))], - node, - )) + fn rescue_clause_exceptions_source<'tree>( + self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + match self { + Self::Python => self + .rescue_clause_exceptions(node, source) + .into_iter() + .next(), + Self::TypeScript => None, + _ => named_children(node) + .into_iter() + .find(|child| child.kind() == "exceptions"), + } } - fn normalize_singleton_function(&mut self, node: TreeSitterNode<'_>) -> Option { - let name = self.function_name(node)?; - let receiver = self - .named_children(node) - .into_iter() - .find(|child| matches!(child.kind(), "self" | "constant" | "identifier")) - .and_then(|child| self.normalize_node(child)) - .unwrap_or_else(|| self.wrap("SELF", Vec::new(), node)); - let args = self.normalize_parameters(self.parameters_child(node)); - let body = self.with_ruby_scope(node, true, |normalizer| { - let body_node = normalizer - .named_field(node, "body") - .or_else(|| normalizer.block_child(node))?; - let body = normalizer.normalize_body(body_node); - let body = normalizer.elide_tail_returns(body); - normalizer.elide_implicit_nil_body(body) - }); - let scope = self.scope(body, args, node); - Some(self.wrap( - "DEFS", - vec![ - Child::Node(Box::new(receiver)), - Child::Symbol(name), - Child::Node(Box::new(scope)), - ], - node, - )) + fn rescue_clause_exception_variable_name<'tree>( + self, + node: TreeSitterNode<'tree>, + ) -> Option> { + match self { + Self::Python => named_children(node) + .into_iter() + .find(|child| child.kind() == "as_pattern") + .and_then(|pattern| descendant(pattern, &["as_pattern_target"])), + Self::TypeScript => named_children(node) + .into_iter() + .find(|child| identifier_kind_name(child.kind())), + _ => named_children(node) + .into_iter() + .find(|child| child.kind() == "exception_variable") + .and_then(|variable| { + named_children(variable) + .into_iter() + .find(|child| identifier_kind_name(child.kind())) + }), + } } - fn normalize_class(&mut self, node: TreeSitterNode<'_>) -> Option { - let name = self.const_for( - self.named_field(node, "name") - .or_else(|| self.first_named(node)), - node, - ); - let body = self - .named_field(node, "body") - .or_else(|| self.block_child(node)) - .and_then(|body| self.normalize_body(body)); - Some(self.wrap( - "CLASS", - vec![ - Child::Node(Box::new(name)), - Child::Nil, - Child::Node(Box::new(self.scope(body, None, node))), - ], - node, - )) + fn rescue_clause_exception_variable_source<'tree>( + self, + node: TreeSitterNode<'tree>, + ) -> Option> { + match self { + Self::Python | Self::TypeScript => self.rescue_clause_exception_variable_name(node), + _ => named_children(node) + .into_iter() + .find(|child| child.kind() == "exception_variable"), + } } - fn normalize_python_nested_class_as_iter(&mut self, node: TreeSitterNode<'_>) -> Option { - let name_node = self - .named_field(node, "name") - .or_else(|| self.first_named(node))?; - let name = node_text(name_node, self.source).to_string(); - let header_end = node - .children(&mut node.walk()) - .find(|child| !child.is_named() && node_text(*child, self.source) == ":") - .unwrap_or(name_node); - let call = self.wrap_from_nodes( - "VCALL", - vec![Child::Symbol(name), Child::Nil], - node, - header_end, - ); - let body = self - .named_field(node, "body") - .or_else(|| self.block_child(node)) - .and_then(|body| self.normalize_body(body)); - let scope = self.scope(body, None, node); - Some(self.wrap( - "ITER", - vec![Child::Node(Box::new(call)), Child::Node(Box::new(scope))], - node, - )) + fn rescue_clause_handler<'tree>( + self, + node: TreeSitterNode<'tree>, + ) -> Option> { + match self { + Self::Python => named_children(node) + .into_iter() + .rev() + .find(|child| child.kind() == "block"), + Self::TypeScript => named_children(node) + .into_iter() + .rev() + .find(|child| child.kind() == "statement_block"), + _ => named_children(node).into_iter().rev().find(|child| { + !matches!( + child.kind(), + "exceptions" | "exception_variable" | "comment" + ) + }), + } } - fn normalize_module(&mut self, node: TreeSitterNode<'_>) -> Option { - let name = self.const_for( - self.named_field(node, "name") - .or_else(|| self.first_named(node)), - node, - ); - let body = self - .named_field(node, "body") - .or_else(|| self.block_child(node)) - .and_then(|body| self.normalize_body(body)); - Some(self.wrap( - "MODULE", - vec![ - Child::Node(Box::new(name)), - Child::Node(Box::new(self.scope(body, None, node))), - ], - node, - )) + fn rescue_clause(self, node: TreeSitterNode<'_>) -> bool { + node.kind() == "rescue" } - fn normalize_singleton_class(&mut self, node: TreeSitterNode<'_>) -> Option { - let named = self.named_children(node); - let receiver = named - .first() - .and_then(|receiver| self.normalize_node(*receiver)); - let body = named.get(1).and_then(|body| self.normalize_body(*body)); - Some(self.wrap( - "SCLASS", - vec![ - optional_node(receiver), - Child::Node(Box::new(self.scope(body, None, node))), - ], - node, - )) + fn ensure_body_statement(self, node: TreeSitterNode<'_>, source: &str) -> bool { + self.ensure_clause(node, source).is_some() } - fn normalize_lambda(&mut self, node: TreeSitterNode<'_>) -> Option { - let body_node = self - .named_field(node, "body") - .or_else(|| self.block_child(node)) - .or_else(|| self.named_children(node).into_iter().last())?; - let body = self.with_ruby_scope(node, false, |normalizer| { - normalizer.normalize_body(body_node).map(dynamic_scope) - }); - let scope = self.scope(body, None, node); - Some(self.wrap("LAMBDA", vec![Child::Node(Box::new(scope))], node)) - } + fn ensure_body_target<'tree>( + self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + match self { + Self::Python => { + if node.kind() == "try_statement" { + return Some(node); + } + if node.kind() == "block" { + let raw_named = named_children(node); + if raw_named.len() == 1 + && raw_named[0].kind() == "try_statement" + && node_text(raw_named[0], source) == node_text(node, source) + { + return Some(raw_named[0]); + } + } + } + Self::TypeScript => { + if node.kind() == "try_statement" { + return Some(node); + } + if node.kind() == "statement_block" { + let raw_named = named_children(node); + if raw_named.len() == 1 + && raw_named[0].kind() == "try_statement" + && node_text(raw_named[0], source) == node_text(node, source) + { + return Some(raw_named[0]); + } + } + } + _ => {} + } - fn normalize_yield(&mut self, node: TreeSitterNode<'_>) -> Node { - let args_node = self - .named_children(node) - .into_iter() - .find(|child| child.kind() == "argument_list"); - let args = args_node - .map(|args| { - self.named_children(args) - .into_iter() - .filter_map(|child| self.normalize_node(child)) - .collect::>() - }) - .unwrap_or_else(|| { - self.named_children(node) - .into_iter() - .filter(|child| child.kind() != "yield") - .filter_map(|child| self.normalize_node(child)) - .collect() - }); - self.wrap( - "YIELD", - vec![list_or_nil(args, args_node.unwrap_or(node), self)], - node, - ) + if ENSURE_BODY_WRAPPER_KINDS.contains(&node.kind()) { + Some(node) + } else { + None + } } - fn normalize_yield_statement(&mut self, node: TreeSitterNode<'_>) -> Node { - let args_node = self - .named_children(node) - .into_iter() - .find(|child| child.kind() == "argument_list"); - let args = args_node - .map(|args| self.yield_argument_nodes(args)) - .unwrap_or_else(|| { - self.named_children(node) - .into_iter() - .filter(|child| child.kind() != "yield") - .filter_map(|child| self.normalize_node(child)) - .collect() - }); - self.wrap( - "YIELD", - vec![list_or_nil(args, args_node.unwrap_or(node), self)], - node, - ) + fn ensure_body_nodes<'tree>( + self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Vec> { + let Some(target) = self.ensure_body_target(node, source) else { + return Vec::new(); + }; + let named = named_children(target); + let ensure_kind = match self { + Self::Python | Self::TypeScript => "finally_clause", + _ => "ensure", + }; + let Some(index) = named.iter().position(|child| child.kind() == ensure_kind) else { + return Vec::new(); + }; + named[..index].to_vec() } - fn normalize_super_statement(&mut self, node: TreeSitterNode<'_>) -> Node { - let raw = self.raw_named_children(node); - let children = if raw.len() == 1 && raw[0].kind() == "call" { - self.raw_named_children(raw[0]) - } else { - raw + fn ensure_clause<'tree>( + self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + let target = self.ensure_body_target(node, source)?; + let ensure_kind = match self { + Self::Python | Self::TypeScript => "finally_clause", + _ => "ensure", }; - let args_node = children + named_children(target) .into_iter() - .find(|child| child.kind() == "argument_list"); - let args = args_node - .map(|args| self.yield_argument_nodes(args)) - .unwrap_or_default(); - self.wrap( - "SUPER", - vec![list_or_nil(args, args_node.unwrap_or(node), self)], - node, - ) + .find(|child| child.kind() == ensure_kind) } - fn normalize_body(&mut self, node: TreeSitterNode<'_>) -> Option { - if self.language == Language::Python && node.kind() == "block" { - let raw_children = self.raw_named_children(node); - if raw_children.len() == 1 - && raw_children[0].kind() == "class_definition" - && node - .parent() - .map(|parent| parent.kind() == "class_definition") - .unwrap_or(false) - { - return self.normalize_python_nested_class_as_iter(raw_children[0]); - } - } - if self.leading_if_statement(node) { - return self.normalize_leading_if_statement(node); - } - if self.ternary_statement(node) { - return self.normalize_ternary_statement(node); - } - if if_kind(node.kind()) { - return self.normalize_if(node); - } - if self.modifier_statement(node) { - return self.normalize_modifier_statement(node); - } - if self.statement_call_with_block(node) { - return self.normalize_statement_call_with_block(node); - } - if self.command_call_statement(node) { - return self.normalize_command_call_statement(node); + fn ensure_clause_body<'tree>( + self, + node: TreeSitterNode<'tree>, + ) -> Option> { + match self { + Self::Python => named_children(node) + .into_iter() + .rev() + .find(|child| child.kind() == "block"), + Self::TypeScript => named_children(node) + .into_iter() + .rev() + .find(|child| child.kind() == "statement_block"), + _ => None, } - if self.yield_statement(node) { - return Some(self.normalize_yield_statement(node)); + } + + fn array_literal_statement(self, node: TreeSitterNode<'_>, source: &str) -> bool { + self.array_literal_target(node, source).is_some() + } + + fn array_literal_target<'tree>( + self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if self == Self::Lua { + if let Some(target) = lua_positional_table_target(node, source) { + return Some(target); + } } - if self.unary_not_statement(node) { - return self.normalize_unary_not(node); + + if ARRAY_LITERAL_NODE_KINDS.contains(&node.kind()) { + return Some(node); } - if self.dotted_expression(node) { - return self.normalize_dotted_expression(node); + if !ARRAY_LITERAL_WRAPPER_KINDS.contains(&node.kind()) { + return None; } - if self.infix_statement(node) { - return self.normalize_infix_statement(node); + if bracketed(node, source, "[", "]") { + return Some(node); } - if self.boolean_expression(node) { - return self.normalize_boolean(node); + + let named = named_children(node); + let child = *named.first()?; + if named.len() == 1 { + if ARRAY_LITERAL_NODE_KINDS.contains(&child.kind()) { + return Some(child); + } + + if matches!(child.kind(), "expression_statement" | "statement") + && node_text(child, source).trim() == node_text(node, source).trim() + { + return self.array_literal_target(child, source); + } + + let stripped = node_text(node, source).trim(); + if stripped == node_text(child, source) + || stripped == format!("{};", node_text(child, source)) + { + if ARRAY_LITERAL_NODE_KINDS.contains(&child.kind()) { + return Some(child); + } + } } - if self.block_kind(node.kind()) { - let children = self.normalize_children(node); - if children.is_empty() { - let text = node_text(node, self.source).trim(); - if bare_identifier_text(text) { - return Some(self.wrap("VCALL", vec![Child::Symbol(text.to_string())], node)); + None + } + + fn array_literal_values<'tree>( + self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Vec> { + let target = self.array_literal_target(node, source).unwrap_or(node); + if self == Self::Lua { + if target.kind() == "arguments" { + if let Some(table) = named_children(target) + .into_iter() + .find(|child| child.kind() == "table_constructor") + { + if node_text(target, source).trim() == node_text(table, source).trim() { + return named_children(table); + } } - return None; } - if children.len() == 1 { - return child_node(children.into_iter().next().unwrap()); + if target.kind() == "table_constructor" { + return named_children(target); } - - return Some(self.wrap("BLOCK", children, node)); } - self.normalize_node(node) + named_children(target) } - fn normalize_if(&mut self, node: TreeSitterNode<'_>) -> Option { - if matches!(node.kind(), "if_modifier" | "unless_modifier") { - let named = self.named_children(node); - let action = *named.first()?; - let condition = *named.get(1)?; - let node_type = if node.kind().starts_with("unless") { - "UNLESS" - } else { - "IF" - }; - let condition = optional_node(self.normalize_node(condition)); - let action = optional_node(self.normalize_modifier_action(action)); - return Some(self.wrap(node_type, vec![condition, action, Child::Nil], node)); + fn element_reference_statement(self, node: TreeSitterNode<'_>, source: &str) -> bool { + self.element_reference_target(node, source).is_some() + } + + fn element_reference_target<'tree>( + self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if ELEMENT_REFERENCE_NODE_KINDS.contains(&node.kind()) { + return Some(node); + } + if !ELEMENT_REFERENCE_WRAPPER_KINDS.contains(&node.kind()) { + return None; } - let condition_raw = self - .named_field(node, "condition") - .or_else(|| self.named_field(node, "predicate")) - .or_else(|| self.first_named(node))?; - let condition = optional_node(self.normalize_node(condition_raw)); - let positive_raw = self - .named_field(node, "consequence") - .or_else(|| self.named_field(node, "body")) - .or_else(|| { - self.named_children(node) - .into_iter() - .find(|child| child.kind() == "then") - }) - .or_else(|| self.branch_child(node, condition_raw, 0)); - let negative_raw = self - .named_field(node, "alternative") - .or_else(|| self.explicit_alternative(node)); - let positive = optional_node(positive_raw.and_then(|child| self.normalize_body(child))); - let negative = - optional_node(negative_raw.and_then(|child| self.normalize_else_or_branch(child))); - let node_type = if node.kind().starts_with("unless") { - "UNLESS" + let named = named_children(node); + if named.len() == 1 + && ELEMENT_REFERENCE_WRAPPER_KINDS.contains(&named[0].kind()) + && node_text(named[0], source).trim() == node_text(node, source).trim() + { + return self.element_reference_target(named[0], source); + } + if named.len() == 1 && ELEMENT_REFERENCE_NODE_KINDS.contains(&named[0].kind()) { + let stripped = node_text(node, source).trim(); + let child_text = node_text(named[0], source); + if stripped == child_text || stripped == format!("{child_text};") { + return Some(named[0]); + } + } + + if element_reference_shape(node, source) { + Some(node) } else { - "IF" - }; - Some(self.wrap(node_type, vec![condition, positive, negative], node)) + None + } } - fn normalize_loop(&mut self, node: TreeSitterNode<'_>, node_type: &str) -> Option { - if matches!(node.kind(), "while_modifier" | "until_modifier") { - let named = self.named_children(node); - let action = *named.first()?; - let condition = *named.get(1)?; - let condition = optional_node(self.normalize_node(condition)); - let action = optional_node(self.normalize_modifier_action(action)); - return Some(self.wrap( - node_type, - vec![condition, action, Child::String("true".to_string())], - node, - )); - } + fn element_reference_receiver<'tree>( + self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + let target = self.element_reference_target(node, source).unwrap_or(node); + named_children(target).first().copied() + } - let condition = self - .named_field(node, "condition") - .or_else(|| self.first_named(node)); - let body = self - .named_field(node, "body") - .or_else(|| self.named_field(node, "consequence")) - .or_else(|| self.block_child(node)); - let condition = - optional_node(condition.and_then(|condition| self.normalize_node(condition))); - let body = optional_node(body.and_then(|body| self.normalize_body(body))); - Some(self.wrap(node_type, vec![condition, body], node)) + fn element_reference_arguments<'tree>( + self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Vec> { + let target = self.element_reference_target(node, source).unwrap_or(node); + named_children(target).into_iter().skip(1).collect() } - fn normalize_else_or_branch(&mut self, node: TreeSitterNode<'_>) -> Option { - if self.language == Language::Python && node.kind() == "else_clause" { - if let Some(block) = self - .raw_named_children(node) - .into_iter() - .find(|child| child.kind() == "block") - { - if let Some(normalized) = self.normalize_python_else_if_block(block) { - return Some(self.wrap( - "ELSE_CLAUSE", - vec![Child::Node(Box::new(normalized))], - node, - )); - } - } - } - if node.kind() != "else" { - return self.normalize_body(node); - } - if let Some(call) = self.first_dotted_call_descendant(node) { - let trailing = self - .source - .get(call.end_byte()..node.end_byte()) - .unwrap_or("") - .trim(); - if trailing.is_empty() { - return self.normalize_node(call); + fn hash_literal_statement(self, node: TreeSitterNode<'_>, source: &str) -> bool { + self.hash_literal_target(node, source).is_some() + } + + fn hash_literal_target<'tree>( + self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if self == Self::Lua { + if let Some(target) = lua_keyed_table_target(node, source) { + return Some(target); } } - self.normalize_body_nodes(self.named_children(node), node) - } - fn normalize_python_else_if_block(&mut self, node: TreeSitterNode<'_>) -> Option { - let statements = self - .raw_named_children(node) - .into_iter() - .filter(|child| child.kind() != "comment") - .collect::>(); - if statements.len() != 1 || statements[0].kind() != "if_statement" { - return None; + if HASH_LITERAL_NODE_KINDS.contains(&node.kind()) { + return Some(node); } - let if_node = statements[0]; - let condition = self - .named_field(if_node, "condition") - .or_else(|| self.named_field(if_node, "predicate")) - .or_else(|| self.first_named(if_node))?; - if self.identifier_kind(condition.kind()) { - return self.normalize_python_if_statement_as_iter(if_node); + if !HASH_LITERAL_WRAPPER_KINDS.contains(&node.kind()) { + return None; } - let consequence = self - .named_field(if_node, "consequence") - .or_else(|| self.named_field(if_node, "body")) - .or_else(|| self.branch_child(if_node, condition, 0)); - let alternative = self.explicit_alternative(if_node); - let mut children = Vec::new(); - if let Some(condition) = self.normalize_node(condition) { - children.push(Child::Node(Box::new(condition))); + if statement_block_wrapper(node) { + return None; } - if let Some(consequence) = consequence.and_then(|child| { - self.normalize_python_else_if_block(child) - .or_else(|| self.normalize_body(child)) - }) { - children.push(Child::Node(Box::new(consequence))); + if bracketed(node, source, "{", "}") { + return Some(node); } - if let Some(alternative) = - alternative.and_then(|child| self.normalize_else_or_branch(child)) - { - children.push(Child::Node(Box::new(alternative))); + + let named = named_children(node); + if named.len() != 1 { + return None; } - Some(self.wrap("BLOCK", children, node)) - } - fn normalize_python_if_statement_as_iter(&mut self, node: TreeSitterNode<'_>) -> Option { - let condition = self - .named_field(node, "condition") - .or_else(|| self.named_field(node, "predicate")) - .or_else(|| self.first_named(node))?; - let body = self - .named_field(node, "consequence") - .or_else(|| self.named_field(node, "body")) - .or_else(|| self.branch_child(node, condition, 0))?; - let call_source = self.source_before_child(node, body); - let call = self.wrap_from_source_node( - "VCALL", - vec![ - Child::Symbol(node_text(condition, self.source).to_string()), - Child::Nil, - ], - &call_source, - ); - let body = self.with_ruby_scope(body, false, |normalizer| { - normalizer.normalize_body(body).map(dynamic_scope) - }); - let scope = self.scope(body, None, node); - Some(self.wrap( - "ITER", - vec![Child::Node(Box::new(call)), Child::Node(Box::new(scope))], - node, - )) - } + let child = named[0]; + if node.kind() == "parenthesized_expression" { + return self.hash_literal_target(child, source); + } - fn normalize_case(&mut self, node: TreeSitterNode<'_>) -> Option { - let value_raw = self.case_value(node); - let value = value_raw.and_then(|value| self.normalize_node(value)); - let whens = self - .case_arms(node) - .into_iter() - .filter_map(|arm| self.normalize_when(arm)) - .collect::>(); - let fallback = self.case_else_body(node); - let chain = self.link_when_chain(whens, fallback); - if value_raw.is_none() { - Some(self.wrap("CASE2", vec![optional_node(chain)], node)) - } else { - Some(self.wrap( - "CASE", - vec![optional_node(value), optional_node(chain)], - node, - )) + let stripped = node_text(node, source).trim(); + let child_text = node_text(child, source); + if stripped == child_text || stripped == format!("{child_text};") { + if HASH_LITERAL_NODE_KINDS.contains(&child.kind()) { + return Some(child); + } + if HASH_LITERAL_WRAPPER_KINDS.contains(&child.kind()) { + return self.hash_literal_target(child, source); + } } - } - fn normalize_when(&mut self, node: TreeSitterNode<'_>) -> Option { - let patterns = self.normalize_patterns(node); - let body = self - .when_body(node) - .and_then(|body| self.normalize_body(body)); - Some(self.wrap( - "WHEN", - vec![ - list_or_nil(patterns, node, self), - optional_node(body), - Child::Nil, - ], - node, - )) + None } - fn normalize_patterns(&mut self, node: TreeSitterNode<'_>) -> Vec { - let body = self.when_body(node); - let mut patterns = Vec::new(); - for child in self.named_children(node) { - if Some(child) == body - || self.block_kind(child.kind()) - || self.statement_node(child.kind()) - || self.when_kind(child.kind()) - { - continue; + fn hash_literal_values<'tree>( + self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Vec> { + let target = self.hash_literal_target(node, source).unwrap_or(node); + if self == Self::Lua { + if target.kind() == "arguments" { + if let Some(table) = named_children(target) + .into_iter() + .find(|child| child.kind() == "table_constructor") + { + return named_children(table); + } + return named_children(target); } - if let Some(pattern) = self.normalize_node(child) { - patterns.push(pattern); + if target.kind() == "table_constructor" { + return named_children(target); } } - patterns - } - fn link_when_chain(&self, whens: Vec, fallback: Option) -> Option { - whens - .into_iter() - .rev() - .fold(fallback, |next_when, mut current| { - while current.children.len() <= 2 { - current.children.push(Child::Nil); - } - current.children[2] = optional_node(next_when); - Some(current) - }) + named_children(target) } - fn case_else_body(&mut self, node: TreeSitterNode<'_>) -> Option { - self.named_children(node) - .into_iter() - .find(|child| child.kind() == "else") - .and_then(|else_node| self.normalize_else_or_branch(else_node)) - } + fn empty_body_statement(self, node: TreeSitterNode<'_>, source: &str) -> bool { + if EMPTY_BODY_WRAPPER_KINDS.contains(&node.kind()) + && named_children(node).is_empty() + && node_text(node, source).trim().is_empty() + { + return true; + } - fn normalize_body_nodes( - &mut self, - nodes: Vec>, - source: TreeSitterNode<'_>, - ) -> Option { - let mut children = Vec::new(); - let mut index = 0; - while index < nodes.len() { - if index + 1 < nodes.len() { - if let Some(call) = self.normalize_flat_dotted_nodes(&nodes[index..=index + 1]) { - children.push(Child::Node(Box::new(call))); - index += 2; - continue; + match self { + Self::Python => { + if node.kind() == "pass_statement" { + return true; } + if node.kind() == "block" && node_text(node, source).trim() == "pass" { + let named = named_children(node); + return named.is_empty() + || named.iter().all(|child| child.kind() == "pass_statement"); + } + false } - if let Some(child) = self.normalize_body(nodes[index]) { - children.push(Child::Node(Box::new(child))); + Self::TypeScript => { + node.kind() == "statement_block" + && named_children(node).is_empty() + && node_text(node, source).trim() == "{}" } - index += 1; - } - if children.is_empty() { - None - } else if children.len() == 1 { - child_node(children.into_iter().next().unwrap()) - } else { - Some(self.wrap("BLOCK", children, source)) + _ => false, } } - fn normalize_return(&mut self, node: TreeSitterNode<'_>) -> Option { - self.normalize_return_node(node) + fn heredoc_body_statement(self, node: TreeSitterNode<'_>) -> bool { + self == Self::Ruby + && HEREDOC_BODY_WRAPPER_KINDS.contains(&node.kind()) + && named_children(node) + .iter() + .any(|child| child.kind() == "heredoc_body") } - fn normalize_super(&mut self, node: TreeSitterNode<'_>) -> Node { - let args_node = self - .named_children(node) - .into_iter() - .find(|child| child.kind() == "argument_list"); - let args = args_node - .map(|args| { - self.named_children(args) - .into_iter() - .filter_map(|child| self.normalize_node(child)) - .collect::>() - }) - .unwrap_or_default(); - self.wrap( - "SUPER", - vec![list_or_nil(args, args_node.unwrap_or(node), self)], - node, - ) + fn heredoc_call_for_body(self, node: TreeSitterNode<'_>, source: &str) -> bool { + if self != Self::Ruby { + return false; + } + if node.kind() == "heredoc_beginning" { + return true; + } + if matches!(node.kind(), "call" | "argument_list") + && heredoc_marker_text(node_text(node, source)) + { + return true; + } + + named_children(node).into_iter().any(|child| { + if named_children(child) + .into_iter() + .any(|grandchild| grandchild.kind() == "heredoc_body") + { + return false; + } + + self.heredoc_call_for_body(child, source) + }) } - fn normalize_return_node(&mut self, node: TreeSitterNode<'_>) -> Option { - let children = self - .named_children(node) - .into_iter() - .filter_map(|child| self.normalize_return_value(child)) - .map(|child| Child::Node(Box::new(child))) - .collect::>(); - Some(self.wrap(return_kind(node.kind()), children, node)) + fn interpolated_statement( + self, + node: TreeSitterNode<'_>, + children: &[TreeSitterNode<'_>], + ) -> bool { + INTERPOLATED_STATEMENT_WRAPPER_KINDS.contains(&node.kind()) + && children.iter().any(|child| child.kind() == "interpolation") } - fn normalize_return_value(&mut self, node: TreeSitterNode<'_>) -> Option { - if node.kind() != "argument_list" { - return self.normalize_node(node); - } - if self.boolean_expression(node) { - return self.normalize_boolean(node); - } - if self.ternary_statement(node) { - return self.normalize_ternary_statement(node); + fn concatenated_string_statement( + self, + node: TreeSitterNode<'_>, + children: &[TreeSitterNode<'_>], + ) -> bool { + if concatenated_string_node(node).is_some() { + return true; } - if self.dotted_expression(node) { - return self.normalize_dotted_expression(node); + let wrapper_kinds = match self { + Self::Python => PYTHON_CONCATENATED_STRING_WRAPPER_KINDS, + _ => CONCATENATED_STRING_WRAPPER_KINDS, + }; + if !wrapper_kinds.contains(&node.kind()) { + return false; } - if self.infix_statement(node) { - return self.normalize_infix_statement(node); + if children.len() > 1 && children.iter().all(|child| child.kind() == "string") { + return true; } - let values = self - .named_children(node) - .into_iter() - .filter_map(|child| self.normalize_node(child)) - .collect::>(); - if values.len() == 1 { - values.into_iter().next() - } else if values.is_empty() { - None - } else { - Some(self.list(values, node)) + children.len() == 1 && concatenated_string_target(children[0]).is_some() + } + + fn zero_child_identifier_call(self, node: TreeSitterNode<'_>, source: &str) -> bool { + if self != Self::Ruby + || node.kind() != "call" + || !ruby_variable_name_text(node_text(node, source)) + { + return false; } + let named = named_children(node); + named.is_empty() + || (named.len() == 1 + && identifier_kind_name(named[0].kind()) + && node_text(named[0], source) == node_text(node, source)) } - fn normalize_ternary_statement(&mut self, node: TreeSitterNode<'_>) -> Option { - let (question_byte, colon_byte) = self.ternary_separator_bytes(node)?; - let named = self.named_children(node); - let condition = *named.first()?; - let positive_nodes = named - .iter() - .copied() - .filter(|child| child.start_byte() > question_byte && child.end_byte() <= colon_byte) - .collect::>(); - let negative_nodes = named - .iter() - .copied() - .filter(|child| child.start_byte() > colon_byte) - .collect::>(); - let condition = optional_node(self.normalize_node(condition)); - let positive = optional_node(self.normalize_ternary_branch(&positive_nodes)); - let negative = optional_node(self.normalize_ternary_branch(&negative_nodes)); - Some(self.wrap("IF", vec![condition, positive, negative], node)) + fn boolean_expression_kind(self, node: TreeSitterNode<'_>) -> bool { + BOOLEAN_EXPRESSION_KINDS.contains(&node.kind()) + || (self == Self::Lua && node.kind() == "expression_list") } - fn normalize_boolean(&mut self, node: TreeSitterNode<'_>) -> Option { - let operator = self.boolean_operator(node)?; - let node_type = if operator == "or" { "OR" } else { "AND" }; - let mut operands = Vec::new(); - for child in self.named_children(node) { - if let Some(normalized) = self.normalize_node(child) { - if normalized.r#type == node_type { - operands.extend(normalized.children); - } else { - operands.push(Child::Node(Box::new(normalized))); - } + fn comparison_expression_kind(self, node: TreeSitterNode<'_>) -> bool { + COMPARISON_EXPRESSION_KINDS.contains(&node.kind()) + || (self == Self::Lua && node.kind() == "expression_list") + } + + fn dotted_expression_wrapper(self, node: TreeSitterNode<'_>) -> bool { + let kinds = match self { + Self::Python => PYTHON_DOTTED_EXPRESSION_WRAPPER_KINDS, + _ => DOTTED_EXPRESSION_WRAPPER_KINDS, + }; + kinds.contains(&node.kind()) + } + + fn unary_not_expression(self, node: TreeSitterNode<'_>, source: &str) -> bool { + matches!(node.kind(), "unary" | "unary_expression") + && node_text(node, source).trim_start().starts_with('!') + } + + fn unary_minus_expression(self, node: TreeSitterNode<'_>, source: &str) -> bool { + match self { + Self::Python => { + matches!(node.kind(), "unary" | "unary_expression" | "unary_operator") + && node_text(node, source).trim_start().starts_with('-') + } + Self::Lua => { + (matches!(node.kind(), "unary" | "unary_expression") + && node_text(node, source).trim_start().starts_with('-')) + || (node.kind() == "expression_list" + && node + .children(&mut node.walk()) + .next() + .map(|child| node_text(child, source) == "-") + .unwrap_or(false) + && named_children(node).len() == 1) + } + _ => { + matches!(node.kind(), "unary" | "unary_expression") + && node_text(node, source).trim_start().starts_with('-') } } - Some(self.wrap(node_type, operands, node)) } - fn normalize_comparison(&mut self, node: TreeSitterNode<'_>) -> Option { - let operands = self.named_children(node); - let left = operands.first().and_then(|left| self.normalize_node(*left)); - let right_raw = operands.get(1).copied()?; - let right = self.normalize_node(right_raw); - Some(self.wrap( - "OPCALL", - vec![ - optional_node(left), - Child::Symbol(self.comparison_operator(node)?), - list_or_nil(right.into_iter().collect(), right_raw, self), - ], - node, - )) - } + fn binary_operator(self, node: TreeSitterNode<'_>, source: &str) -> Option { + if let Some(operator) = direct_binary_operator(node, source) { + return Some(operator.to_string()); + } - fn normalize_operator_call(&mut self, node: TreeSitterNode<'_>) -> Option { - let operands = self.named_children(node); - let left = operands.first().and_then(|left| self.normalize_node(*left)); - let right_raw = operands.get(1).copied()?; - let right = self.normalize_node(right_raw); - Some(self.wrap( - "OPCALL", - vec![ - optional_node(left), - Child::Symbol(self.binary_operator(node)?), - list_or_nil(right.into_iter().collect(), right_raw, self), - ], - node, - )) + let raw_named = raw_named_children(node); + if raw_named.len() == 1 + && BINARY_WRAPPER_KINDS.contains(&raw_named[0].kind()) + && node_text(node, source) == node_text(raw_named[0], source) + { + return self.binary_operator(raw_named[0], source); + } + + None } - fn normalize_infix_statement(&mut self, node: TreeSitterNode<'_>) -> Option { - let (left_raw, operator, right_raw) = self.infix_statement_parts(node)?; - let left = self.normalize_node(left_raw); - let right = self.normalize_node(right_raw); - Some(self.wrap( - "OPCALL", - vec![ - optional_node(left), - Child::Symbol(operator), - list_or_nil(right.into_iter().collect(), right_raw, self), - ], - node, - )) + fn class_node(self, node: TreeSitterNode<'_>) -> bool { + matches!( + node.kind(), + "class" | "class_definition" | "class_declaration" | "class_specifier" + ) } - fn normalize_unary_not(&mut self, node: TreeSitterNode<'_>) -> Option { - let operand = self.named_children(node).into_iter().next()?; - let operand = optional_node(self.normalize_node(operand)); - Some(self.wrap( - "OPCALL", - vec![operand, Child::Symbol("!".to_string()), Child::Nil], - node, - )) + fn identifier_text_node(self, node: TreeSitterNode<'_>, source: &str) -> bool { + self == Self::Lua + && matches!(node.kind(), "variable_list" | "expression_list") + && bare_identifier_text(node_text(node, source)) } - fn normalize_ternary_branch(&mut self, nodes: &[TreeSitterNode<'_>]) -> Option { - if nodes.is_empty() { - return None; - } - if nodes.len() == 1 { - return self.normalize_node(nodes[0]); + fn member_assignment_target(self, node: TreeSitterNode<'_>, source: &str) -> bool { + if self != Self::Lua || node.kind() != "variable_list" { + return false; } - if let Some(call) = self.normalize_flat_dotted_nodes(nodes) { - return Some(call); + + let raw_named = raw_named_children(node); + let target = if raw_named.len() == 1 + && raw_named[0].kind() == "dot_index_expression" + && node_text(node, source) == node_text(raw_named[0], source) + { + raw_named[0] + } else { + node + }; + + raw_named_children(target).len() == 2 + && target + .children(&mut target.walk()) + .any(|child| !child.is_named() && node_text(child, source) == ".") + } + + fn instance_variable(self, node: TreeSitterNode<'_>, source: &str) -> bool { + if node.kind() == "instance_variable" { + return true; } - self.normalize_body_nodes(nodes.to_vec(), nodes[0]) + + self == Self::Ruby + && node_text(node, source) + .strip_prefix('@') + .map(ruby_variable_name_text) + .unwrap_or(false) } - fn normalize_flat_dotted_nodes(&mut self, nodes: &[TreeSitterNode<'_>]) -> Option { - let receiver = *nodes.first()?; - let method = *nodes.get(1)?; - let connector = self - .source - .get(receiver.end_byte()..method.start_byte()) - .unwrap_or("") - .trim(); - if !matches!(connector, "." | "&.") { - return None; + fn global_variable(self, node: TreeSitterNode<'_>, source: &str) -> bool { + if node.kind() == "global_variable" { + return true; } - let node_type = if connector == "&." { "QCALL" } else { "CALL" }; - let receiver_node = optional_node(self.normalize_node(receiver)); - Some(self.wrap_from_nodes( - node_type, - vec![ - receiver_node, - Child::Symbol(node_text(method, self.source).trim_end_matches('=').to_string()), - Child::Nil, - ], - receiver, - method, - )) + + self == Self::Ruby + && node_text(node, source) + .strip_prefix('$') + .map(ruby_variable_name_text) + .unwrap_or(false) } - fn ternary_separator_bytes(&self, node: TreeSitterNode<'_>) -> Option<(usize, usize)> { - let mut question = None; - let mut colon = None; - for child in node.children(&mut node.walk()) { - if child.is_named() { - continue; - } - let text = node_text(child, self.source); - if text == "?" && question.is_none() { - question = Some(child.start_byte()); - } else if text == ":" && question.is_some() { - colon = Some(child.start_byte()); - break; - } + fn assignment_operator(self, text: &str) -> bool { + match self { + Self::Ruby => matches!( + text, + "=" | "+=" + | "-=" + | "*=" + | "/=" + | "%=" + | "**=" + | "&&=" + | "||=" + | "&=" + | "|=" + | "^=" + | "<<=" + | ">>=" + ), + Self::Python => matches!( + text, + "=" | "+=" + | "-=" + | "*=" + | "/=" + | "%=" + | "//=" + | "**=" + | "@=" + | "&=" + | "|=" + | "^=" + | "<<=" + | ">>=" + | ":=" + ), + Self::Lua => text == "=", + Self::TypeScript => matches!( + text, + "=" | "+=" + | "-=" + | "*=" + | "/=" + | "%=" + | "**=" + | "<<=" + | ">>=" + | ">>>=" + | "&=" + | "|=" + | "^=" + | "&&=" + | "||=" + | "??=" + ), + Self::Default => matches!(text, "=" | "+=" | "-=" | "*=" | "/=" | "%="), } - Some((question?, colon?)) } - fn normalize_assignment(&mut self, node: TreeSitterNode<'_>) -> Option { - let left = self.assignment_left(node)?; - let right = self - .assignment_right(node) - .and_then(|right| self.normalize_node(right)); - if left.kind() == "left_assignment_list" { - return Some(self.normalize_multiple_assignment(left, right, node)); + fn unwrap_node(self, node: TreeSitterNode<'_>, source: &str, named_child_count: usize) -> bool { + if matches!( + node.kind(), + "parenthesized_expression" + | "parenthesized_statements" + | "expression_statement" + | "statement" + | "case_pattern" + | "match_pattern" + | "pattern" + ) && named_child_count == 1 + { + return true; } - if let Some(target) = self.assignment_target(left, right.clone(), node) { - return Some(target); + + if self != Self::Lua || node.kind() != "expression_list" || named_child_count != 1 { + return false; } - Some(self.wrap( - "LASGN", - vec![Child::String(self.target_name(left)), optional_node(right)], - node, - )) - } - fn normalize_operator_assignment(&mut self, node: TreeSitterNode<'_>) -> Option { - let left = self.assignment_left(node)?; - let right_raw = self.assignment_right(node); - let right = right_raw.and_then(|right| self.normalize_node(right)); - let operator = self.operator_assignment_operator(node); + let raw_named = raw_named_children(node); + if raw_named.len() == 1 + && raw_named[0].kind() == "parenthesized_expression" + && node_text(raw_named[0], source) == node_text(node, source) + { + return true; + } - if left.kind() == "element_reference" { - let named = self.named_children(left); - let receiver = *named.first()?; - let args = named - .iter() - .skip(1) - .filter_map(|arg| self.normalize_node(*arg)) - .collect::>(); - let receiver = optional_node(self.normalize_node(receiver)); - return Some(self.wrap( - "OP_ASGN1", - vec![ - receiver, - Child::Symbol(operator), - list_or_nil(args, left, self), - optional_node(right), - ], - node, - )); - } + let mut cursor = node.walk(); + let raw_children = node.children(&mut cursor).collect::>(); + raw_children + .first() + .map(|child| node_text(*child, source) == "(") + .unwrap_or(false) + && raw_children + .last() + .map(|child| node_text(*child, source) == ")") + .unwrap_or(false) + } - if self.member_read_node(left) { - let (receiver, method) = self.member_parts(left)?; - let receiver = optional_node(self.normalize_node(receiver)); - return Some(self.wrap( - "OP_ASGN2", - vec![ - receiver, - Child::Nil, - Child::Symbol(method), - Child::Symbol(operator), - optional_node(right), - ], - node, - )); + fn interpolated_string( + self, + node: TreeSitterNode<'_>, + children: &[TreeSitterNode<'_>], + ) -> bool { + if node.kind() == "string" && children.iter().any(|child| child.kind() == "interpolation") { + return true; } - if let Some(logical) = - self.normalize_logical_operator_assignment(left, &operator, right.clone(), node) - { - return Some(logical); + self == Self::TypeScript + && node.kind() == "template_string" + && children + .iter() + .any(|child| child.kind() == "template_substitution") + } + + fn lambda_expression(self, node: TreeSitterNode<'_>, source: &str) -> bool { + self.lambda_target(node, source).is_some() + } + + fn lambda_target<'tree>( + self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if node.kind() == "lambda" { + return Some(node); } - if left.kind() == "instance_variable" - || left.kind() == "global_variable" - || node_text(left, self.source).starts_with('@') - || node_text(left, self.source).starts_with('$') + if self == Self::TypeScript + && matches!(node.kind(), "arrow_function" | "function_expression") { - let value = self.augmented_assignment_value(left, &operator, right_raw, node); - return self.assignment_target(left, Some(value), node); + return Some(node); } - let value = self.augmented_assignment_value(left, &operator, right_raw, node); - self.assignment_target(left, Some(value.clone()), node) - .or_else(|| { - Some(self.wrap( - "LASGN", - vec![ - Child::String(self.target_name(left)), - Child::Node(Box::new(value)), - ], - node, - )) - }) - } + if self == Self::Lua { + if node.kind() == "function_definition" { + return Some(node); + } - fn normalize_logical_operator_assignment( - &mut self, - left: TreeSitterNode<'_>, - operator: &str, - right: Option, - source: TreeSitterNode<'_>, - ) -> Option { - if self.language != Language::Ruby || !matches!(operator, "||" | "&&") { - return None; - } - if !self.identifier_kind(left.kind()) { - return None; + if node.kind() == "expression_list" { + let named = named_children(node); + if named.len() == 1 + && named[0].kind() == "function_definition" + && node_text(named[0], source) == node_text(node, source) + { + return Some(named[0]); + } + } } - let name = self.target_name(left); - let node_type = if operator == "||" { - "OP_ASGN_OR" - } else { - "OP_ASGN_AND" - }; - let receiver = self.wrap("LVAR", vec![Child::String(name.clone())], left); - let assignment = self.wrap( - "LASGN", - vec![Child::String(name), optional_node(right)], - source, - ); - Some(self.wrap( - node_type, - vec![ - Child::Node(Box::new(receiver)), - Child::Symbol(operator.to_string()), - Child::Node(Box::new(assignment)), - ], - source, - )) - } - fn augmented_assignment_value( - &mut self, - left: TreeSitterNode<'_>, - operator: &str, - right_raw: Option>, - source: TreeSitterNode<'_>, - ) -> Node { - let receiver = optional_node(self.assignment_receiver(left)); - let right = right_raw.and_then(|right| self.normalize_node(right)); - self.wrap( - "CALL", - vec![ - receiver, - Child::Symbol(operator.to_string()), - list_or_nil(right.into_iter().collect(), right_raw.unwrap_or(left), self), - ], - source, - ) + None } - fn assignment_receiver(&mut self, left: TreeSitterNode<'_>) -> Option { - if self.identifier_kind(left.kind()) { - return Some(self.wrap( - "LVAR", - vec![Child::String(node_text(left, self.source).to_string())], - left, - )); - } - if left.kind() == "instance_variable" || node_text(left, self.source).starts_with('@') { - return Some(self.wrap( - "IVAR", - vec![Child::String(node_text(left, self.source).to_string())], - left, - )); - } - if left.kind() == "global_variable" || node_text(left, self.source).starts_with('$') { - return Some(self.wrap( - "GVAR", - vec![Child::String(node_text(left, self.source).to_string())], - left, - )); - } - if self.const_kind(left.kind()) { - return Some(self.normalize_const(left)); - } - self.normalize_node(left) + fn interpolation_node(self, node: TreeSitterNode<'_>) -> bool { + node.kind() == "interpolation" + || (self == Self::TypeScript && node.kind() == "template_substitution") } - fn normalize_multiple_assignment( - &self, - left: TreeSitterNode<'_>, - right: Option, - source: TreeSitterNode<'_>, - ) -> Node { - let targets = self - .named_children(left) + fn explicit_alternative<'tree>( + self, + node: TreeSitterNode<'tree>, + ) -> Option> { + let alternatives: &[&str] = match self { + Self::Ruby => &["elsif", "else"], + Self::Python => &["elif_clause", "else", "else_clause"], + Self::Lua => &["elseif_statement", "else", "else_statement"], + Self::TypeScript => &["else", "else_clause"], + Self::Default => &["else", "else_clause", "else_statement"], + }; + named_children(node) .into_iter() - .map(|child| { - let node_type = if child.kind() == "global_variable" - || node_text(child, self.source).starts_with('$') - { - "GASGN" - } else { - "LASGN" - }; - self.wrap( - node_type, - vec![Child::String(self.target_name(child)), Child::Nil], - child, - ) - }) - .collect::>(); - self.wrap( - "MASGN", - vec![optional_node(right), list_or_nil(targets, left, self)], - source, - ) + .find(|child| alternatives.contains(&child.kind())) } +} - fn normalize_declaration(&mut self, node: TreeSitterNode<'_>) -> Option { - let mut assignments = Vec::new(); - for entry in self.declaration_entries(node) { - let Some(name) = self.declaration_name(entry) else { - continue; - }; - let right = self - .declaration_value(entry) - .and_then(|value| self.normalize_node(value)); - assignments.push(self.wrap( - "LASGN", - vec![Child::String(self.target_name(name)), optional_node(right)], - entry, - )); - } +fn direct_binary_operator<'source>( + node: TreeSitterNode<'_>, + source: &'source str, +) -> Option<&'source str> { + node.children(&mut node.walk()) + .find(|child| !child.is_named() && !matches!(node_text(*child, source), "(" | ")")) + .map(|child| node_text(child, source)) +} - if assignments.is_empty() { - None - } else if assignments.len() == 1 { - assignments.into_iter().next() - } else { - Some( - self.wrap( - "BLOCK", - assignments - .into_iter() - .map(|assignment| Child::Node(Box::new(assignment))) - .collect(), - node, - ), - ) +fn question_colon_ternary_parts<'tree>( + node: TreeSitterNode<'tree>, + source: &str, + kinds: &[&str], +) -> Option> { + if !kinds.contains(&node.kind()) { + return None; + } + let Some((question_byte, colon_byte)) = ternary_separator_bytes(node, source) else { + let raw_named = raw_named_children(node); + if raw_named.len() == 1 && node_text(raw_named[0], source) == node_text(node, source) { + return question_colon_ternary_parts(raw_named[0], source, kinds); } + return None; + }; + let named = named_children(node); + let condition = *named.first()?; + let positive = named + .iter() + .copied() + .filter(|child| child.start_byte() > question_byte && child.end_byte() <= colon_byte) + .collect::>(); + let negative = named + .iter() + .copied() + .filter(|child| child.start_byte() > colon_byte) + .collect::>(); + + if positive.is_empty() || negative.is_empty() { + return None; } - fn normalize_call(&mut self, node: TreeSitterNode<'_>) -> Option { - if self.call_block(node).is_some() { - return self.normalize_call_with_block(node); + Some(TernaryParts { + condition, + positive, + negative, + }) +} + +fn ternary_separator_bytes(node: TreeSitterNode<'_>, source: &str) -> Option<(usize, usize)> { + let mut question = None; + let mut colon = None; + for child in node.children(&mut node.walk()) { + if child.is_named() { + continue; } - if self.visibility_inline_def_call(node) { - return self.normalize_visibility_inline_def(node); + let text = node_text(child, source); + if text == "?" && question.is_none() { + question = Some(child.start_byte()); + } else if text == ":" && question.is_some() { + colon = Some(child.start_byte()); + break; } - self.normalize_call_without_block(node, None) } + Some((question?, colon?)) +} - fn normalize_member_read(&mut self, node: TreeSitterNode<'_>) -> Option { - let Some((receiver, method)) = self.member_parts(node) else { - let children = self.normalize_children(node); - return Some(self.wrap(&kind_type(node.kind()), children, node)); - }; - let receiver = optional_node(self.normalize_node(receiver)); - Some(self.wrap( - "CALL", - vec![receiver, Child::Symbol(method), Child::Nil], - node, - )) +fn named_children<'tree>(node: TreeSitterNode<'tree>) -> Vec> { + node.children(&mut node.walk()) + .filter(|child| child.is_named()) + .collect() +} + +fn raw_named_children<'tree>(node: TreeSitterNode<'tree>) -> Vec> { + node.children(&mut node.walk()) + .filter(|child| child.is_named()) + .collect() +} + +fn identifier_kind_name(kind: &str) -> bool { + matches!( + kind, + "identifier" + | "simple_identifier" + | "property_identifier" + | "field_identifier" + | "shorthand_property_identifier" + ) +} + +fn case_arm_descendant(node: TreeSitterNode<'_>) -> bool { + let mut stack = named_children(node); + while let Some(child) = stack.pop() { + if CASE_ARGUMENT_WHEN_KINDS.contains(&child.kind()) { + return true; + } + stack.extend(named_children(child)); } + false +} - fn normalize_call_with_block(&mut self, node: TreeSitterNode<'_>) -> Option { - let block = self.call_block(node); - let call = self.normalize_call_without_block(node, block)?; - let args = self.normalize_block_parameters(block); - let body = block.and_then(|block| { - self.with_ruby_scope(block, false, |normalizer| { - let body_node = normalizer - .named_field(block, "body") - .or_else(|| normalizer.block_child(block)) - .unwrap_or(block); - normalizer.normalize_body(body_node).map(dynamic_scope) - }) - }); - let scope = self.scope(body, args, node); - Some(self.wrap( - "ITER", - vec![Child::Node(Box::new(call)), Child::Node(Box::new(scope))], - node, - )) +fn descendant<'tree>(node: TreeSitterNode<'tree>, kinds: &[&str]) -> Option> { + let mut stack = named_children(node); + while let Some(child) = stack.pop() { + if kinds.contains(&child.kind()) { + return Some(child); + } + stack.extend(named_children(child)); } + None +} - fn normalize_statement_call_with_block(&mut self, node: TreeSitterNode<'_>) -> Option { - let block = self.call_block(node); - let call_source = if self.dotted_call(node) { - node - } else { - self.named_children(node).into_iter().find(|child| { - Some(*child) != block - && (self.call_kind(child.kind()) || self.member_read_node(*child)) - })? - }; - let call = self.normalize_call_without_block(call_source, block)?; - let args = self.normalize_block_parameters(block); - let body = block.and_then(|block| { - self.with_ruby_scope(block, false, |normalizer| { - let body_node = normalizer - .named_field(block, "body") - .or_else(|| normalizer.block_child(block)) - .unwrap_or(block); - normalizer.normalize_body(body_node).map(dynamic_scope) - }) - }); - let scope = self.scope(body, args, node); - Some(self.wrap( - "ITER", - vec![Child::Node(Box::new(call)), Child::Node(Box::new(scope))], - node, - )) +fn concatenated_string_node<'tree>(node: TreeSitterNode<'tree>) -> Option> { + if !CONCATENATED_STRING_NODE_KINDS.contains(&node.kind()) { + return None; + } + let children = named_children(node); + if children.len() > 1 && children.iter().all(|child| child.kind() == "string") { + Some(node) + } else { + None } +} - fn normalize_dotted_expression(&mut self, node: TreeSitterNode<'_>) -> Option { - let block = self.call_block(node); - let call = self.normalize_dotted_call_expression(node)?; - let Some(block) = block else { - return Some(call); - }; - let args = self.normalize_block_parameters(Some(block)); - let body = self.with_ruby_scope(block, false, |normalizer| { - let body_node = normalizer - .named_field(block, "body") - .or_else(|| normalizer.block_child(block)) - .unwrap_or(block); - normalizer.normalize_body(body_node).map(dynamic_scope) - }); - let scope = self.scope(body, args, node); - Some(self.wrap( - "ITER", - vec![Child::Node(Box::new(call)), Child::Node(Box::new(scope))], - node, - )) +fn concatenated_string_target<'tree>(node: TreeSitterNode<'tree>) -> Option> { + if let Some(target) = concatenated_string_node(node) { + return Some(target); } + let children = named_children(node); + if children.len() == 1 { + return concatenated_string_target(children[0]); + } + None +} - fn normalize_call_without_block( - &mut self, - node: TreeSitterNode<'_>, - block: Option>, - ) -> Option { - let call_source = block.map(|block| self.source_before_child(node, block)); - if self.dotted_call(node) { - let (receiver, method) = self.dotted_call_parts(node, block)?; - let args = self.call_arguments(node, None); - let node_type = if self.safe_navigation_call(node) { - "QCALL" - } else { - "CALL" - }; - let receiver = optional_node(self.normalize_node(receiver)); - let args = list_or_nil(args, node, self); - if let Some(source) = call_source.as_ref() { - return Some(self.wrap_from_source_node( - node_type, - vec![receiver, Child::Symbol(method), args], - source, - )); - } - return Some(self.wrap(node_type, vec![receiver, Child::Symbol(method), args], node)); - } +fn bracketed(node: TreeSitterNode<'_>, source: &str, opening: &str, closing: &str) -> bool { + let children = node.children(&mut node.walk()).collect::>(); + children + .first() + .map(|child| node_text(*child, source) == opening) + .unwrap_or(false) + && children + .last() + .map(|child| node_text(*child, source) == closing) + .unwrap_or(false) +} - let function = self - .named_field(node, "function") - .or_else(|| self.named_field(node, "call")) - .or_else(|| { - self.named_children(node) - .into_iter() - .find(|child| Some(*child) != block) - })?; - let args = self.call_arguments(node, Some(function)); - if self.identifier_kind(function.kind()) { - let node_type = if args.is_empty() { "VCALL" } else { "FCALL" }; - return Some(self.wrap( - node_type, - vec![ - Child::Symbol(node_text(function, self.source).to_string()), - list_or_nil(args, node, self), - ], - node, - )); - } - if self.language == Language::Ruby && self.const_kind(function.kind()) { - return Some(self.wrap( - "FCALL", - vec![ - Child::Symbol(node_text(function, self.source).to_string()), - list_or_nil(args, node, self), - ], - node, - )); +fn statement_block_wrapper(node: TreeSitterNode<'_>) -> bool { + node.kind() == "block" + && node + .parent() + .map(|parent| STATEMENT_BLOCK_PARENT_KINDS.contains(&parent.kind())) + .unwrap_or(false) +} + +fn element_reference_shape(node: TreeSitterNode<'_>, source: &str) -> bool { + let children = node.children(&mut node.walk()).collect::>(); + children + .first() + .map(|child| node_text(*child, source) != "[") + .unwrap_or(false) + && children + .iter() + .any(|child| !child.is_named() && node_text(*child, source) == "[") + && children + .iter() + .any(|child| !child.is_named() && node_text(*child, source) == "]") + && named_children(node).len() >= 2 + && named_children(node) + .iter() + .all(|child| !matches!(child.kind(), "block" | "do_block")) +} + +fn lua_positional_table_target<'tree>( + node: TreeSitterNode<'tree>, + source: &str, +) -> Option> { + if node.kind() == "block" { + let named = named_children(node); + if named.len() == 1 && named[0].kind() == "function_call" { + return lua_positional_table_target(named[0], source); } - if self.member_read_node(function) { - let (receiver, method) = self.member_parts(function)?; - let receiver = optional_node(self.normalize_node(receiver)); - let args = list_or_nil(args, node, self); - return Some(self.wrap("CALL", vec![receiver, Child::Symbol(method), args], node)); + } + + if node.kind() == "function_call" { + let named = named_children(node); + if named.len() == 2 + && named[0].kind() == "identifier" + && node_text(named[0], source).is_empty() + { + return lua_positional_table_target(named[1], source); } - let function = optional_node(self.normalize_node(function)); - let args = list_or_nil(args, node, self); - Some(self.wrap( - "CALL", - vec![function, Child::Symbol("call".to_string()), args], - node, - )) } - fn normalize_element_reference(&mut self, node: TreeSitterNode<'_>) -> Option { - let named = self.named_children(node); - let receiver = *named.first()?; - let args = named - .iter() - .skip(1) - .filter_map(|arg| self.normalize_node(*arg)) - .collect::>(); - if receiver.kind() == "self" { - return Some(self.wrap( - "FCALL", - vec![ - Child::Symbol("[]".to_string()), - list_or_nil(args, node, self), - ], - node, - )); + if node.kind() == "arguments" { + let table = named_children(node) + .into_iter() + .find(|child| child.kind() == "table_constructor")?; + if node_text(node, source).trim() == node_text(table, source).trim() { + return lua_positional_table_target(table, source).map(|_| node); } - let receiver = optional_node(self.normalize_node(receiver)); - let args = list_or_nil(args, node, self); - Some(self.wrap( - "CALL", - vec![receiver, Child::Symbol("[]".to_string()), args], - node, - )) + return None; } - fn normalize_rescue_modifier(&mut self, node: TreeSitterNode<'_>) -> Option { - let named = self.named_children(node); - let body = named.first().and_then(|body| self.normalize_node(*body)); - let handler = named - .get(1) - .and_then(|handler| self.normalize_node(*handler)); - let resbody = self.wrap( - "RESBODY", - vec![Child::Nil, optional_node(handler), Child::Nil], - node, - ); - Some(self.wrap( - "RESCUE", - vec![ - optional_node(body), - Child::Node(Box::new(resbody)), - Child::Nil, - ], - node, - )) + if node.kind() == "table_constructor" { + let fields = named_children(node); + if fields.is_empty() { + return None; + } + if fields.iter().all(|field| { + field.kind() == "field" && { + let named = named_children(*field); + named.len() <= 1 + } + }) { + return Some(node); + } } - fn normalize_ensure_clause(&mut self, node: TreeSitterNode<'_>) -> Option { - if self.dotted_call(node) { - return self.normalize_dotted_call_expression(node); + None +} + +fn lua_keyed_table_target<'tree>( + node: TreeSitterNode<'tree>, + source: &str, +) -> Option> { + if node.kind() == "block" { + let named = named_children(node); + if named.len() == 1 && node_text(named[0], source).trim() == node_text(node, source).trim() + { + return lua_keyed_table_target(named[0], source); } - if let Some(call) = self.first_dotted_call_descendant(node) { - return self.normalize_node(call); + if named.len() == 2 + && named[0].kind() == "identifier" + && node_text(named[0], source).is_empty() + { + return lua_keyed_table_target(named[1], source); } - self.normalize_body_nodes(self.named_children(node), node) } - fn normalize_dotted_call_expression(&mut self, node: TreeSitterNode<'_>) -> Option { - let (receiver_raw, method) = self.dotted_call_parts(node, None)?; - let args = self.call_arguments(node, None); - let args = list_or_nil(args, node, self); - let receiver = optional_node(self.normalize_node(receiver_raw)); - let node_type = if self.safe_navigation_call(node) { - "QCALL" - } else { - "CALL" - }; - let source_end = self - .named_children(node) - .into_iter() - .filter(|child| !matches!(child.kind(), "block" | "do_block")) - .last() - .unwrap_or(receiver_raw); - Some(self.wrap_from_nodes( - node_type, - vec![receiver, Child::Symbol(method), args], - receiver_raw, - source_end, - )) + if node.kind() == "function_call" { + let named = named_children(node); + if named.len() == 2 + && named[0].kind() == "identifier" + && node_text(named[0], source).is_empty() + { + return lua_keyed_table_target(named[1], source); + } } - fn normalize_begin(&mut self, node: TreeSitterNode<'_>) -> Option { - let named = self.named_children(node); - let rescue_nodes = named - .iter() - .copied() - .filter(|child| child.kind() == "rescue") - .collect::>(); - let ensure_node = named.iter().copied().find(|child| child.kind() == "ensure"); - if rescue_nodes.is_empty() { - let Some(ensure_node) = ensure_node else { - let children = self.normalize_children(node); - return Some(self.wrap("BEGIN", children, node)); - }; - let body_nodes = named + if node.kind() == "arguments" { + if bracketed(node, source, "{", "}") { + let fields = named_children(node); + if fields.is_empty() { + return Some(node); + } + if fields .iter() - .copied() - .take_while(|child| child.kind() != "ensure") - .collect::>(); - let body = - self.normalize_body_nodes(body_nodes.clone(), *body_nodes.first().unwrap_or(&node)); - let ensure_body = self.normalize_body(ensure_node); - return Some(self.wrap( - "ENSURE", - vec![optional_node(body), optional_node(ensure_body)], - node, - )); + .any(|field| field.kind() != "field" || named_children(*field).len() > 1) + { + return Some(node); + } + return None; } - let body_nodes = named + let table = named_children(node) + .into_iter() + .find(|child| child.kind() == "table_constructor")?; + if node_text(node, source).trim() == node_text(table, source).trim() { + return lua_keyed_table_target(table, source).map(|_| node); + } + return None; + } + + if node.kind() == "table_constructor" { + let fields = named_children(node); + if fields.is_empty() { + return Some(node); + } + if fields .iter() - .copied() - .take_while(|child| child.kind() != "rescue") - .collect::>(); - let body = - self.normalize_body_nodes(body_nodes.clone(), *body_nodes.first().unwrap_or(&node)); - let resbodies = rescue_nodes - .iter() - .filter_map(|child| self.normalize_rescue_clause(*child)) - .collect::>(); - let rescued = self.wrap( - "RESCUE", - vec![ - optional_node(body), - optional_node(self.link_rescue_chain(resbodies)), - Child::Nil, - ], - node, - ); - let Some(ensure_node) = ensure_node else { - return Some(rescued); - }; - let ensure_body = self.normalize_body(ensure_node); - Some(self.wrap( - "ENSURE", - vec![Child::Node(Box::new(rescued)), optional_node(ensure_body)], - node, - )) - } - - fn normalize_rescue_clause(&mut self, node: TreeSitterNode<'_>) -> Option { - let exceptions = self - .named_children(node) - .into_iter() - .find(|child| child.kind() == "exceptions"); - let exception_nodes = exceptions - .map(|exceptions| { - self.named_children(exceptions) - .into_iter() - .filter_map(|child| self.normalize_node(child)) - .collect::>() - }) - .unwrap_or_default(); - let exception_variable = self.rescue_exception_variable(node); - let handler = self.named_children(node).into_iter().rev().find(|child| { - !matches!( - child.kind(), - "exceptions" | "exception_variable" | "comment" - ) - }); - let normalized_handler = handler.and_then(|handler| self.normalize_body(handler)); - let body = self.prepend_rescue_exception_assignment(normalized_handler, exception_variable); - Some(self.wrap( - "RESBODY", - vec![ - list_or_nil(exception_nodes, exceptions.unwrap_or(node), self), - optional_node(body), - Child::Nil, - ], - node, - )) - } - - fn link_rescue_chain(&self, mut resbodies: Vec) -> Option { - let mut next = None; - while let Some(mut current) = resbodies.pop() { - while current.children.len() <= 2 { - current.children.push(Child::Nil); - } - current.children[2] = optional_node(next); - next = Some(current); + .any(|field| field.kind() != "field" || named_children(*field).len() > 1) + { + return Some(node); } - next } - fn rescue_exception_variable(&self, node: TreeSitterNode<'_>) -> Option { - let variable = self - .named_children(node) - .into_iter() - .find(|child| child.kind() == "exception_variable")?; - let name = self - .named_children(variable) - .into_iter() - .find(|child| self.identifier_kind(child.kind()))?; - let errinfo = self.wrap("ERRINFO", Vec::new(), variable); - Some(self.wrap( - "LASGN", - vec![ - Child::String(node_text(name, self.source).to_string()), - Child::Node(Box::new(errinfo)), - ], - variable, - )) - } + None +} - fn prepend_rescue_exception_assignment( - &self, - body: Option, - assignment: Option, - ) -> Option { - let Some(assignment) = assignment else { - return body; - }; - let Some(mut body) = body else { - return Some(assignment); - }; - if body.r#type == "BLOCK" { - let mut children = vec![Child::Node(Box::new(assignment))]; - children.extend(body.children); - body.children = children; - Some(body) - } else { - let first_lineno = assignment.first_lineno; - let first_column = assignment.first_column; - let last_lineno = body.last_lineno; - let last_column = body.last_column; - let text = if assignment.text.is_empty() { - body.text.clone() - } else if body.text.is_empty() { - assignment.text.clone() - } else { - format!("{} {}", assignment.text, body.text) - }; - Some(Node { - r#type: "BLOCK".to_string(), - children: vec![ - Child::Node(Box::new(assignment)), - Child::Node(Box::new(body)), - ], - first_lineno, - first_column, - last_lineno, - last_column, - text, - }) - } - } +struct TreeSitterNormalizer<'source> { + source: &'source str, + language: Language, + normalization_adapter: TreeSitterNormalizationAdapter, + local_stack: Vec>, + root_span: Option, + current_heredoc_body_span: Option, +} - fn normalize_modifier_statement(&mut self, node: TreeSitterNode<'_>) -> Option { - let named = self.named_children(node); - let action = *named.first()?; - let condition = *named.last()?; - let condition = optional_node(self.normalize_node(condition)); - let action = optional_node(self.normalize_node(action)); - Some(self.wrap("IF", vec![condition, action, Child::Nil], node)) +impl<'source> TreeSitterNormalizer<'source> { + fn new(source: &'source str, language: Language) -> Self { + Self { + source, + language, + normalization_adapter: TreeSitterNormalizationAdapter::for_language(language), + local_stack: Vec::new(), + root_span: None, + current_heredoc_body_span: None, + } } - fn normalize_modifier_action(&mut self, node: TreeSitterNode<'_>) -> Option { - self.normalize_node(node) + fn normalize(mut self, root: TreeSitterNode<'_>) -> Node { + self.root_span = Some(span(root)); + let children = if self.ruby() { + self.with_ruby_scope(root, true, |normalizer| normalizer.normalize_children(root)) + } else { + self.normalize_children(root) + }; + self.wrap("ROOT", children, root) } - fn normalize_command_call_statement(&mut self, node: TreeSitterNode<'_>) -> Option { - let function = self.named_children(node).into_iter().next()?; - if self.visibility_inline_def_statement(node, function) { - let method = self.inline_def_from_statement(node); + fn normalize_node(&mut self, node: TreeSitterNode<'_>) -> Option { + if node.kind() == "comment" { + return None; + } + if self.assignment_lhs(node) { + return self.normalize_assignment_lhs(node); + } + if self.infix_statement(node) { + return self.normalize_infix_statement(node); + } + if self.ternary_statement(node) { + return self.normalize_ternary_statement(node); + } + if self.leading_function_statement(node) { + return self.normalize_leading_function_statement(node); + } + if self.leading_owner_statement(node) { + return self.normalize_leading_owner_statement(node); + } + if self.leading_if_statement(node) { + return self.normalize_leading_if_statement(node); + } + if node.kind() == "elsif" { + return Some(self.normalize_elsif(node)); + } + if self.ensure_body_statement(node) { + return self.normalize_ensure_body_statement(node); + } + if self.rescue_body_statement(node) { + return self.normalize_rescue_body_statement(node); + } + if if_kind(node.kind()) { + return self.normalize_if(node); + } + if self.leading_case_statement(node) { + return self.normalize_leading_case_statement(node); + } + if self.leading_loop_statement(node) { + return self.normalize_leading_loop_statement(node); + } + if let Some(loop_type) = loop_kind(node.kind()) { + return self.normalize_loop(node, loop_type); + } + if self.case_kind(node.kind()) || self.hidden_match(node) { + return self.normalize_case(node); + } + if self.hash_literal_statement(node) { + return self.normalize_hash_literal_statement(node); + } + if self.array_literal_statement(node) { + return self.normalize_array_literal_statement(node); + } + if self.element_reference_statement(node) { + return self.normalize_element_reference_statement(node); + } + if self.concatenated_string_statement(node) { + return Some(self.normalize_concatenated_string_statement(node)); + } + if self.interpolated_statement(node) { + return Some(self.normalize_interpolated_statement(node)); + } + if self.wrapped_return_statement(node) { + return self.normalize_wrapped_return_statement(node); + } + if self.heredoc_body_statement(node) { + return self.normalize_heredoc_body_statement(node); + } + if self.empty_body_statement(node) { + return None; + } + if self.terminal_statement(node) { + return Some(self.normalize_terminal_statement(node)); + } + if self.modifier_statement(node) { + return self.normalize_modifier_statement(node); + } + if self.statement_call_with_block(node) { + return self.normalize_statement_call_with_block(node); + } + if self.super_statement(node) { + return Some(self.normalize_super_statement(node)); + } + if self.command_call_statement(node) { + return self.normalize_command_call_statement(node); + } + if self.yield_statement(node) { + return Some(self.normalize_yield_statement(node)); + } + if self.yield_argument_list(node) { + return Some(self.normalize_yield_argument_list(node)); + } + if self.super_statement(node) { + return Some(self.normalize_super_statement(node)); + } + if self.unary_not_statement(node) { + return self.normalize_unary_not_statement(node); + } + if self.dotted_expression(node) { + return self.normalize_dotted_expression(node); + } + if self.unary_minus_expression(node) { + return self.normalize_unary_minus(node); + } + if self.unary_not_expression(node) { + return self.normalize_unary_not(node); + } + if self.boolean_expression(node) { + return self.normalize_boolean(node); + } + if self.operator_call_expression(node) { + return self.normalize_operator_call(node); + } + if self.comparison_expression(node) { + return self.normalize_comparison(node); + } + if self.self_node(node) { + return Some(self.wrap("SELF", Vec::new(), node)); + } + if self.instance_variable(node) { return Some(self.wrap( - "FCALL", - vec![ - Child::Symbol(node_text(function, self.source).to_string()), - list_or_nil(method.into_iter().collect(), node, self), - ], + "IVAR", + vec![Child::String(node_text(node, self.source).to_string())], node, )); } - let args_node = self - .named_children(node) - .into_iter() - .find(|child| matches!(child.kind(), "argument_list" | "arguments")); - let args = args_node - .map(|args| self.command_arguments(args)) - .unwrap_or_default(); - let block = self.call_block(node); - let call_source = block.map(|block| self.source_before_child(node, block)); - if node_text(function, self.source) == "yield" { - let children = vec![list_or_nil(args, args_node.unwrap_or(node), self)]; - if let Some(source) = call_source.as_ref() { - return Some(self.wrap_from_source_node("YIELD", children, source)); - } - return Some(self.wrap("YIELD", children, node)); + if self.global_variable(node) { + return Some(self.normalize_global_variable(node)); } - let call_type = if args.is_empty() { "VCALL" } else { "FCALL" }; - let call_children = vec![ - Child::Symbol(node_text(function, self.source).to_string()), - list_or_nil(args, args_node.unwrap_or(node), self), - ]; - let call = if let Some(source) = call_source.as_ref() { - self.wrap_from_source_node(call_type, call_children, source) - } else { - self.wrap(call_type, call_children, node) - }; - let Some(block) = block else { - return Some(call); - }; - let block_args = self.normalize_block_parameters(Some(block)); - let body = self.with_ruby_scope(block, false, |normalizer| { + if self.class_node(node) { + return self.normalize_class(node); + } + if self.module_node(node) { + return self.normalize_module(node); + } + if self.lambda_expression(node) { + return self.normalize_lambda(node); + } + + match node.kind() { + "program" => { + let children = self.normalize_children(node); + Some(self.wrap("ROOT", children, node)) + } + "method" + | "function_definition" + | "function_declaration" + | "method_definition" + | "method_declaration" + | "function_item" => self.normalize_function(node), + "impl_item" => self.normalize_impl(node), + "singleton_method" => self.normalize_singleton_function(node), + _ if self.block_kind(node.kind()) => { + let children = self.normalize_children(node); + Some(self.wrap("BLOCK", children, node)) + } + "ensure" => self.normalize_ensure_clause(node), + "begin" => self.normalize_begin(node), + "subshell" => Some(self.normalize_subshell(node)), + "block_argument" => self.normalize_block_argument(node), + "singleton_class" => self.normalize_singleton_class(node), + "yield" => Some(self.normalize_yield(node)), + "operator_assignment" => self.normalize_operator_assignment(node), + "assignment" | "assignment_expression" | "assignment_statement" => { + self.normalize_assignment(node) + } + "variable_declarator" if !self.has_assignment_operator_child(node) => { + Some(self.wrap(&kind_type(node.kind()), Vec::new(), node)) + } + "expression_list" if self.single_short_var_lhs(node) => { + Some(self.wrap(&kind_type(node.kind()), Vec::new(), node)) + } + "call" | "call_expression" | "method_call" | "method_call_expression" => { + self.normalize_call(node) + } + _ if self.member_read_node(node) => self.normalize_member_read(node), + _ if self.unwrap_node(node) => self + .named_children(node) + .into_iter() + .next() + .and_then(|child| self.normalize_node(child)), + "element_reference" => self.normalize_element_reference(node), + "rescue_modifier" => self.normalize_rescue_modifier(node), + "super" => Some(self.normalize_super(node)), + "return" | "return_statement" | "return_expression" | "break" | "break_statement" + | "break_expression" | "next" | "continue_statement" => self.normalize_return(node), + "nil" | "none" | "null" => Some(self.wrap("NIL", Vec::new(), node)), + "true" => Some(self.wrap("TRUE", Vec::new(), node)), + "false" => Some(self.wrap("FALSE", Vec::new(), node)), + "instance_variable" => Some(self.wrap( + "IVAR", + vec![Child::String(node_text(node, self.source).to_string())], + node, + )), + "identifier" + | "simple_identifier" + | "property_identifier" + | "field_identifier" + | "shorthand_property_identifier" => Some(self.normalize_identifier(node)), + "constant" | "scope_resolution" | "type_identifier" | "scoped_type_identifier" => { + Some(self.normalize_const(node)) + } + "self" | "this" => Some(self.wrap("SELF", Vec::new(), node)), + "global_variable" => Some(self.normalize_global_variable(node)), + "array" => Some(self.normalize_array_literal(node)), + _ if self.interpolation_node(node) => self.normalize_interpolation(node), + "heredoc_beginning" => Some(self.normalize_heredoc_beginning(node)), + "chained_string" | "concatenated_string" => Some(self.normalize_chained_string(node)), + "string" + | "string_content" + | "string_literal" + | "interpreted_string_literal" + | "raw_string_literal" => { + if self.interpolated_string(node) { + Some(self.normalize_interpolated_string(node)) + } else if let Some(content) = self.lua_no_paren_string_argument_content(node) { + Some(self.wrap( + "STR", + vec![Child::String(node_text(content, self.source).to_string())], + content, + )) + } else { + Some(self.wrap( + "STR", + vec![Child::String(node_text(node, self.source).to_string())], + node, + )) + } + } + "integer" => Some(self.wrap("INTEGER", Vec::new(), node)), + "float" | "float_literal" => Some(self.wrap("FLOAT", Vec::new(), node)), + "pair" => self.normalize_pair(node), + "simple_symbol" | "symbol" => Some(self.wrap( + "LIT", + vec![Child::Symbol( + node_text(node, self.source).trim_start_matches(':').to_string(), + )], + node, + )), + _ => { + let children = self.normalize_children(node); + Some(self.wrap(&kind_type(node.kind()), children, node)) + } + } + } + + fn normalize_function(&mut self, node: TreeSitterNode<'_>) -> Option { + if node.kind() == "singleton_method" { + return self.normalize_singleton_function(node); + } + + let name = self.function_name(node)?; + let args = self.normalize_parameters(self.parameters_child(node)); + let body = self.with_ruby_scope(node, true, |normalizer| { let body_node = normalizer - .named_field(block, "body") - .or_else(|| normalizer.block_child(block)) - .unwrap_or(block); - normalizer.normalize_body(body_node).map(dynamic_scope) + .named_field(node, "body") + .or_else(|| normalizer.block_child(node))?; + let body = normalizer.normalize_body(body_node); + let body = normalizer.elide_tail_returns(body); + let body = normalizer.prepend_inline_parameter_begin(node, body); + normalizer.elide_implicit_nil_body(body) }); + let scope = self.scope(body, args, node); Some(self.wrap( - "ITER", - vec![ - Child::Node(Box::new(call)), - Child::Node(Box::new(self.scope(body, block_args, node))), - ], + "DEFN", + vec![Child::Symbol(name), Child::Node(Box::new(scope))], node, )) } - fn normalize_visibility_inline_def(&mut self, node: TreeSitterNode<'_>) -> Option { - let message = - node_text(self.named_children(node).into_iter().next()?, self.source).to_string(); - let args = self - .named_children(node) - .into_iter() - .find(|child| child.kind() == "argument_list"); - let method = args.and_then(|args| self.inline_def_from_source(args)); + fn normalize_leading_function_statement(&mut self, node: TreeSitterNode<'_>) -> Option { + let target = self.leading_function_target(node)?; + if function_kind(target.kind()) { + return self.normalize_function(target); + } + let name = self + .leading_function_name(target) + .map(|name| node_text(name, self.source).to_string())?; + let body_node = self.leading_function_body(target); + let body = self.with_ruby_scope(target, true, |normalizer| { + let body = body_node.and_then(|body| normalizer.normalize_body(body)); + normalizer.elide_tail_returns(body) + }); Some(self.wrap( - "FCALL", + "DEFN", vec![ - Child::Symbol(message), - list_or_nil(method.into_iter().collect(), args.unwrap_or(node), self), + Child::Symbol(name), + Child::Node(Box::new(self.scope(body, None, target))), ], - node, + target, )) } - fn normalize_const(&mut self, node: TreeSitterNode<'_>) -> Node { - if matches!(node.kind(), "scope_resolution" | "scoped_type_identifier") { - let parts = self.named_children(node); - let base = parts - .first() - .map(|part| self.normalize_const(*part)) - .map(|part| Child::Node(Box::new(part))) - .unwrap_or(Child::Nil); - let name = self - .named_field(node, "name") - .or_else(|| parts.last().copied()) - .map(|name| node_text(name, self.source).to_string()) - .unwrap_or_default(); - return self.wrap("COLON2", vec![base, Child::Symbol(name)], node); - } - - self.wrap( - "CONST", - vec![Child::Symbol(node_text(node, self.source).to_string())], + fn normalize_singleton_function(&mut self, node: TreeSitterNode<'_>) -> Option { + let name = self.function_name(node)?; + let receiver = self + .singleton_receiver(node) + .and_then(|child| self.normalize_node(child)) + .unwrap_or_else(|| self.wrap("SELF", Vec::new(), node)); + let args = self.normalize_parameters(self.parameters_child(node)); + let body = self.with_ruby_scope(node, true, |normalizer| { + let body_node = normalizer + .named_field(node, "body") + .or_else(|| normalizer.block_child(node))?; + let body = normalizer.normalize_body(body_node); + let body = normalizer.elide_tail_returns(body); + let body = normalizer.prepend_inline_parameter_begin(node, body); + normalizer.elide_implicit_nil_body(body) + }); + let scope = self.scope(body, args, node); + Some(self.wrap( + "DEFS", + vec![ + Child::Node(Box::new(receiver)), + Child::Symbol(name), + Child::Node(Box::new(scope)), + ], node, - ) + )) } - fn const_for(&mut self, node: Option>, source: TreeSitterNode<'_>) -> Node { - let Some(node) = node else { - return self.wrap( - "CONST", - vec![Child::Symbol("(anonymous)".to_string())], - source, - ); - }; - if matches!( - node.kind(), - "constant" | "scope_resolution" | "type_identifier" | "scoped_type_identifier" - ) { - return self.normalize_const(node); - } - self.wrap( - "CONST", - vec![Child::Symbol(node_text(node, self.source).to_string())], + fn normalize_class(&mut self, node: TreeSitterNode<'_>) -> Option { + let name = self.const_for( + self.named_field(node, "name") + .or_else(|| self.first_named(node)), node, - ) - } - - fn normalize_global_variable(&self, node: TreeSitterNode<'_>) -> Node { - let text = node_text(node, self.source).to_string(); - if let Some(number) = text - .strip_prefix('$') - .and_then(|value| value.parse::().ok()) - { - return self.wrap("NTH_REF", vec![Child::String(number.to_string())], node); - } - self.wrap("GVAR", vec![Child::String(text)], node) - } - - fn normalize_array_literal(&mut self, node: TreeSitterNode<'_>) -> Node { - let values = self - .named_children(node) - .into_iter() - .filter_map(|child| self.normalize_node(child)) - .collect::>(); - if values.is_empty() { - self.wrap("ZLIST", Vec::new(), node) - } else { - self.list(values, node) - } - } - - fn normalize_pair(&mut self, node: TreeSitterNode<'_>) -> Option { - let named = self.named_children(node); - let key = *named.first()?; - let value = named.get(1).and_then(|value| self.normalize_node(*value)); - let key_text = node_text(key, self.source) - .trim_end_matches(':') - .trim_start_matches(':') - .to_string(); - let key_lit = self.wrap("LIT", vec![Child::Symbol(key_text)], key); + ); + let body = self + .named_field(node, "body") + .or_else(|| self.block_child(node)) + .and_then(|body| self.normalize_body(body)); Some(self.wrap( - "HASH", - vec![Child::Node(Box::new(key_lit)), optional_node(value)], + "CLASS", + vec![ + Child::Node(Box::new(name)), + Child::Nil, + Child::Node(Box::new(self.scope(body, None, node))), + ], node, )) } - fn normalize_block_argument(&mut self, node: TreeSitterNode<'_>) -> Option { - let value = self - .named_children(node) - .into_iter() - .next() - .and_then(|child| self.normalize_node(child)); - Some(self.wrap("BLOCK_PASS", vec![Child::Nil, optional_node(value)], node)) + fn normalize_impl(&mut self, node: TreeSitterNode<'_>) -> Option { + let type_node = self.named_field(node, "type").or_else(|| { + self.named_children(node).into_iter().find(|child| { + matches!( + child.kind(), + "type_identifier" | "scoped_type_identifier" | "identifier" + ) + }) + }); + let name = self.const_for(type_node, node); + let body = self + .named_field(node, "body") + .or_else(|| self.block_child(node)) + .or(Some(node)) + .and_then(|body| self.normalize_body(body)); + Some(self.wrap( + "CLASS", + vec![ + Child::Node(Box::new(name)), + Child::Nil, + Child::Node(Box::new(self.scope(body, None, node))), + ], + node, + )) } - fn normalize_interpolated_string(&mut self, node: TreeSitterNode<'_>) -> Node { - let children = self.normalize_children(node); - self.wrap("DSTR", children, node) + fn normalize_python_nested_class_as_iter(&mut self, node: TreeSitterNode<'_>) -> Option { + let name_node = self + .named_field(node, "name") + .or_else(|| self.first_named(node))?; + let name = node_text(name_node, self.source).to_string(); + let header_end = node + .children(&mut node.walk()) + .find(|child| !child.is_named() && node_text(*child, self.source) == ":") + .unwrap_or(name_node); + let call = self.wrap_from_nodes( + "VCALL", + vec![Child::Symbol(name), Child::Nil], + node, + header_end, + ); + let body = self + .named_field(node, "body") + .or_else(|| self.block_child(node)) + .and_then(|body| self.normalize_body(body)); + let scope = self.scope(body, None, node); + Some(self.wrap( + "ITER", + vec![Child::Node(Box::new(call)), Child::Node(Box::new(scope))], + node, + )) } - fn normalize_subshell(&mut self, node: TreeSitterNode<'_>) -> Node { - let children = self - .named_children(node) - .into_iter() - .filter_map(|child| match child.kind() { - "interpolation" => self - .normalize_interpolation(child) - .map(|node| Child::Node(Box::new(node))), - "string_content" => Some(Child::Node(Box::new(self.wrap( - "STR", - vec![Child::String(node_text(child, self.source).to_string())], - child, - )))), - _ => None, - }) - .collect::>(); - let node_type = if children - .iter() - .any(|child| matches!(child, Child::Node(node) if node.r#type == "EVSTR")) - { - "DXSTR" - } else { - "XSTR" - }; - self.wrap(node_type, children, node) + fn normalize_module(&mut self, node: TreeSitterNode<'_>) -> Option { + let name = self.const_for( + self.named_field(node, "name") + .or_else(|| self.first_named(node)), + node, + ); + let body = self + .named_field(node, "body") + .or_else(|| self.block_child(node)) + .and_then(|body| self.normalize_body(body)); + Some(self.wrap( + "MODULE", + vec![ + Child::Node(Box::new(name)), + Child::Node(Box::new(self.scope(body, None, node))), + ], + node, + )) } - fn normalize_chained_string(&mut self, node: TreeSitterNode<'_>) -> Node { - let mut parts = Vec::new(); - let mut dynamic_source = None; - let mut first_child = None; - for child in self.named_children(node) { - first_child.get_or_insert(child); - let Some(normalized) = self.normalize_node(child) else { - continue; - }; - if normalized.r#type == "DSTR" { - if dynamic_source.is_none() - && normalized - .children - .iter() - .filter_map(self::node) - .any(|part| part.r#type == "EVSTR") - { - dynamic_source = Some(child); - } - parts.extend(normalized.children); - } else { - parts.push(Child::Node(Box::new(normalized))); - } - } - self.wrap( - "DSTR", - parts, - dynamic_source.or(first_child).unwrap_or(node), - ) + fn normalize_singleton_class(&mut self, node: TreeSitterNode<'_>) -> Option { + let named = self.named_children(node); + let receiver = named + .first() + .and_then(|receiver| self.normalize_node(*receiver)); + let body = named.get(1).and_then(|body| self.normalize_body(*body)); + Some(self.wrap( + "SCLASS", + vec![ + optional_node(receiver), + Child::Node(Box::new(self.scope(body, None, node))), + ], + node, + )) } - fn normalize_interpolated_statement(&mut self, node: TreeSitterNode<'_>) -> Node { - let children = self.normalize_children(node); - self.wrap("DSTR", children, node) + fn normalize_lambda(&mut self, node: TreeSitterNode<'_>) -> Option { + let target = self.lambda_target(node).unwrap_or(node); + let body_node = self + .named_field(target, "body") + .or_else(|| self.block_child(target)) + .or_else(|| self.named_children(target).into_iter().last())?; + let body = self.with_ruby_scope(target, false, |normalizer| { + normalizer.normalize_body(body_node).map(dynamic_scope) + }); + let scope = self.scope(body, None, target); + Some(self.wrap("LAMBDA", vec![Child::Node(Box::new(scope))], target)) } - fn normalize_interpolation(&mut self, node: TreeSitterNode<'_>) -> Option { - let exprs = self + fn normalize_yield(&mut self, node: TreeSitterNode<'_>) -> Node { + let args_node = self .named_children(node) .into_iter() - .filter_map(|child| self.normalize_node(child)) - .collect::>(); - let body = if exprs.len() == 1 { - exprs.into_iter().next() - } else if exprs.is_empty() { - None - } else { - Some(self.list(exprs, node)) - }; - Some( - self.wrap( - "EVSTR", - body.into_iter() - .map(|node| Child::Node(Box::new(node))) - .collect(), - node, - ), + .find(|child| child.kind() == "argument_list"); + let args = args_node + .map(|args| self.yield_argument_nodes(args)) + .unwrap_or_else(|| self.yield_inline_arguments(node)); + self.wrap( + "YIELD", + vec![list_or_nil(args, args_node.unwrap_or(node), self)], + node, ) } - fn normalize_heredoc_beginning(&mut self, node: TreeSitterNode<'_>) -> Node { - let heredoc_body = - node.parent() - .and_then(|parent| parent.parent()) - .and_then(|body_statement| { - self.named_children(body_statement) - .into_iter() - .find(|child| child.kind() == "heredoc_body") - }); - let children = heredoc_body - .map(|body| self.normalize_heredoc_children(body)) - .unwrap_or_default(); - self.wrap("DSTR", children, node) + fn normalize_yield_statement(&mut self, node: TreeSitterNode<'_>) -> Node { + let args_node = self + .named_children(node) + .into_iter() + .find(|child| child.kind() == "argument_list"); + let args = args_node + .map(|args| self.yield_argument_nodes(args)) + .unwrap_or_else(|| self.yield_inline_arguments(node)); + self.wrap( + "YIELD", + vec![list_or_nil(args, args_node.unwrap_or(node), self)], + node, + ) } - fn normalize_heredoc_children(&mut self, node: TreeSitterNode<'_>) -> Vec { - self.named_children(node) - .into_iter() - .filter_map(|child| match child.kind() { - "interpolation" => self.normalize_interpolation(child), - "heredoc_content" => { - let text = node_text(child, self.source).to_string(); - if text.is_empty() { - None - } else { - Some(self.wrap("STR", vec![Child::String(text)], child)) - } - } - _ => None, - }) - .map(|child| Child::Node(Box::new(child))) - .collect() + fn normalize_yield_argument_list(&mut self, node: TreeSitterNode<'_>) -> Node { + let args = self.yield_argument_nodes(node); + let source = self.parent_node(node).unwrap_or(node); + self.wrap("YIELD", vec![list_or_nil(args, node, self)], source) } - fn normalize_identifier(&mut self, node: TreeSitterNode<'_>) -> Node { - let name = node_text(node, self.source).to_string(); - if self.ruby_vcall_identifier(node, &name) || self.vcall_identifier(node, &name) { - self.wrap("VCALL", vec![Child::Symbol(name)], node) + fn normalize_super_statement(&mut self, node: TreeSitterNode<'_>) -> Node { + let raw = self.raw_named_children(node); + let children = if raw.len() == 1 && raw[0].kind() == "call" { + self.raw_named_children(raw[0]) } else { - self.wrap("LVAR", vec![Child::String(name)], node) - } + raw + }; + let args_node = children + .into_iter() + .find(|child| child.kind() == "argument_list"); + let args = args_node + .map(|args| self.yield_argument_nodes(args)) + .unwrap_or_default(); + self.wrap( + "SUPER", + vec![list_or_nil(args, args_node.unwrap_or(node), self)], + node, + ) } - fn normalize_parameters(&mut self, node: Option>) -> Option { - if self.language != Language::Ruby { - return None; + fn normalize_body(&mut self, node: TreeSitterNode<'_>) -> Option { + if self.language == Language::Python && node.kind() == "block" { + let raw_children = self.raw_named_children(node); + if raw_children.len() == 1 + && raw_children[0].kind() == "class_definition" + && node + .parent() + .map(|parent| parent.kind() == "class_definition") + .unwrap_or(false) + { + return self.normalize_python_nested_class_as_iter(raw_children[0]); + } } - let node = node?; - let defaults = self - .named_children(node) - .into_iter() - .filter_map(|param| { - let name = self.named_field(param, "name")?; - let value = self.named_field(param, "value")?; - let value = optional_node(self.normalize_node(value)); - Some(self.wrap( - "LASGN", - vec![ - Child::Symbol(node_text(name, self.source).to_string()), - value, - ], - param, - )) - }) - .map(|node| Child::Node(Box::new(node))) - .collect::>(); - if defaults.is_empty() { - None - } else { - Some(self.wrap("ARGS", defaults, node)) + if self.leading_function_statement(node) { + return self.normalize_leading_function_statement(node); } - } - - fn normalize_block_parameters(&mut self, block: Option>) -> Option { - if self.language != Language::Ruby { - return None; + if self.leading_owner_statement(node) { + return self.normalize_leading_owner_statement(node); } - let block = block?; - let params = self - .named_children(block) - .into_iter() - .find(|child| child.kind() == "block_parameters")?; - let pre_init = self - .named_children(params) - .into_iter() - .filter(|param| param.kind() == "destructured_parameter") - .filter_map(|param| self.normalize_destructured_block_parameter(param)) - .map(|node| Child::Node(Box::new(node))) - .collect::>(); - if pre_init.is_empty() { - None - } else { - Some(self.wrap("ARGS", pre_init, params)) + if self.leading_if_statement(node) { + return self.normalize_leading_if_statement(node); } - } - - fn normalize_destructured_block_parameter( - &mut self, - param: TreeSitterNode<'_>, - ) -> Option { - let mut targets = Vec::new(); - self.collect_destructured_parameter_targets(param, &mut targets); - if targets.is_empty() { - return None; + if node.kind() == "elsif" { + return Some(self.normalize_elsif(node)); } - let dvar = self.wrap("DVAR", vec![Child::Nil], param); - Some(self.wrap( - "MASGN", - vec![ - Child::Node(Box::new(dvar)), - list_or_nil(targets, param, self), - Child::Nil, - ], - param, - )) - } - - fn collect_destructured_parameter_targets( - &mut self, - node: TreeSitterNode<'_>, - targets: &mut Vec, - ) { - if self.identifier_kind(node.kind()) { - targets.push(self.wrap( - "DASGN", - vec![ - Child::String(node_text(node, self.source).to_string()), - Child::Nil, - ], - node, - )); - return; + if self.ternary_statement(node) { + return self.normalize_ternary_statement(node); } - - for child in self.named_children(node) { - self.collect_destructured_parameter_targets(child, targets); + if if_kind(node.kind()) { + return self.normalize_if(node); } - } - - fn normalize_children(&mut self, node: TreeSitterNode<'_>) -> Vec { - let mut children = Vec::new(); - for child in self.named_children(node) { - if child.kind() == "heredoc_body" { - continue; - } - if self.assignment_rhs(child) { - continue; - } - if let Some(normalized) = self.normalize_node(child) { - children.push(Child::Node(Box::new(normalized))); - } + if self.leading_case_statement(node) { + return self.normalize_leading_case_statement(node); } - children - } - - fn scope(&self, body: Option, args: Option, source: TreeSitterNode<'_>) -> Node { - let source_node = body.as_ref().or(args.as_ref()).cloned(); - let children = vec![Child::Nil, optional_node(args), optional_node(body)]; - if let Some(source_node) = source_node { - self.wrap_from_source_node("SCOPE", children, &source_node) - } else if let Some(root_span) = self.root_span { - self.wrap_from_span_text("SCOPE", children, root_span, self.source) - } else { - self.wrap("SCOPE", children, source) + if self.leading_loop_statement(node) { + return self.normalize_leading_loop_statement(node); } - } - - fn list(&self, children: Vec, source: TreeSitterNode<'_>) -> Node { - self.wrap( - "LIST", - children - .into_iter() - .map(|child| Child::Node(Box::new(child))) - .collect(), - source, - ) - } - - fn wrap(&self, node_type: &str, children: Vec, source: TreeSitterNode<'_>) -> Node { - let node_span = span(source); - Node { - r#type: node_type.to_string(), - children, - first_lineno: node_span[0], - first_column: node_span[1], - last_lineno: node_span[2], - last_column: node_span[3], - text: node_text(source, self.source).to_string(), + if self.ensure_body_statement(node) { + return self.normalize_ensure_body_statement(node); } - } - - fn wrap_from_nodes( - &self, - node_type: &str, - children: Vec, - first: TreeSitterNode<'_>, - last: TreeSitterNode<'_>, - ) -> Node { - let first_span = span(first); - let last_span = span(last); - let text = self - .source - .get(first.start_byte()..last.end_byte()) - .unwrap_or("") - .to_string(); - Node { - r#type: node_type.to_string(), - children, - first_lineno: first_span[0], - first_column: first_span[1], - last_lineno: last_span[2], - last_column: last_span[3], - text, + if self.rescue_body_statement(node) { + return self.normalize_rescue_body_statement(node); } - } - - fn wrap_from_source_node(&self, node_type: &str, children: Vec, source: &Node) -> Node { - Node { - r#type: node_type.to_string(), - children, - first_lineno: source.first_lineno, - first_column: source.first_column, - last_lineno: source.last_lineno, - last_column: source.last_column, - text: source.text.clone(), + if self.hash_literal_statement(node) { + return self.normalize_hash_literal_statement(node); } - } - - fn wrap_from_span_text( - &self, - node_type: &str, - children: Vec, - node_span: Span, - text: &str, - ) -> Node { - Node { - r#type: node_type.to_string(), - children, - first_lineno: node_span[0], - first_column: node_span[1], - last_lineno: node_span[2], - last_column: node_span[3], - text: text.to_string(), + if self.array_literal_statement(node) { + return self.normalize_array_literal_statement(node); } - } - - fn with_ruby_scope( - &mut self, - node: TreeSitterNode<'_>, - reset: bool, - f: impl FnOnce(&mut Self) -> T, - ) -> T { - if self.language != Language::Ruby { - return f(self); + if self.element_reference_statement(node) { + return self.normalize_element_reference_statement(node); } - let previous = self.local_stack.clone(); - if reset { - self.local_stack.clear(); + if self.interpolated_statement(node) { + return Some(self.normalize_interpolated_statement(node)); } - self.local_stack.push(self.ruby_scope_locals(node)); - let result = f(self); - self.local_stack = previous; - result - } - - fn ruby_scope_locals(&self, node: TreeSitterNode<'_>) -> BTreeSet { - let mut locals = BTreeSet::new(); - self.collect_ruby_scope_locals(node, &mut locals, true); - locals - } - - fn collect_ruby_scope_locals( - &self, - node: TreeSitterNode<'_>, - locals: &mut BTreeSet, - root: bool, - ) { - if !root && self.ruby_scope_boundary(node) { - return; + if self.wrapped_return_statement(node) { + return self.normalize_wrapped_return_statement(node); } - if matches!( - node.kind(), - "method_parameters" - | "parameters" - | "parameter_list" - | "formal_parameters" - | "function_value_parameters" - | "parameter" - | "block_parameters" - | "lambda_parameters" - ) { - if node.kind() == "parameter" { - self.collect_parameter_names(node, locals); - } else { - for child in self.named_children(node) { - if child.kind() == "parameter" { - self.collect_parameter_names(child, locals); - } else { - self.collect_identifier_names(child, locals); - } - } - } + if self.heredoc_body_statement(node) { + return self.normalize_heredoc_body_statement(node); } - if matches!(node.kind(), "assignment" | "operator_assignment") { - if let Some(left) = self.assignment_left(node) { - self.collect_assignment_target_names(left, locals); - } + if self.empty_body_statement(node) { + return None; } - for target in self.declaration_entries(node) { - if let Some(name) = self.declaration_name(target) { - self.collect_assignment_target_names(name, locals); - } + if self.modifier_statement(node) { + return self.normalize_modifier_statement(node); } - for child in self.named_children(node) { - if !self.ruby_scope_boundary(child) { - self.collect_ruby_scope_locals(child, locals, false); + if self.statement_call_with_block(node) { + return self.normalize_statement_call_with_block(node); + } + if self.command_call_statement(node) { + return self.normalize_command_call_statement(node); + } + if self.yield_statement(node) { + return Some(self.normalize_yield_statement(node)); + } + if self.unary_not_statement(node) { + return self.normalize_unary_not_statement(node); + } + if self.operator_assignment_statement(node) { + return self.normalize_operator_assignment_statement(node); + } + if self.dotted_expression(node) { + return self.normalize_dotted_expression(node); + } + if self.unary_minus_expression(node) { + return self.normalize_unary_minus(node); + } + if self.argument_list_unary_not(node) { + return self.normalize_argument_list_unary_not(node); + } + if self.infix_statement(node) { + return self.normalize_infix_statement(node); + } + if self.boolean_expression(node) { + return self.normalize_boolean(node); + } + if self.block_kind(node.kind()) { + let children = self.normalize_children(node); + if children.is_empty() { + let text = node_text(node, self.source).trim(); + if bare_identifier_text(text) { + return Some(self.wrap("VCALL", vec![Child::Symbol(text.to_string())], node)); + } + return None; + } + if children.len() == 1 { + return child_node(children.into_iter().next().unwrap()); } + + return Some(self.wrap("BLOCK", children, node)); } + + self.normalize_node(node) } - fn collect_assignment_target_names( - &self, - node: TreeSitterNode<'_>, - locals: &mut BTreeSet, - ) { - if self.identifier_kind(node.kind()) { - locals.insert( - node_text(node, self.source) - .trim_start_matches('*') - .to_string(), - ); - return; - } - if matches!( - node.kind(), - "left_assignment_list" - | "expression_list" - | "splat" - | "splat_parameter" - | "rest_assignment" - | "pattern" - ) { - for child in self.named_children(node) { - self.collect_assignment_target_names(child, locals); - } + fn normalize_if(&mut self, node: TreeSitterNode<'_>) -> Option { + if matches!(node.kind(), "if_modifier" | "unless_modifier") { + let named = self.named_children(node); + let action = *named.first()?; + let condition = *named.get(1)?; + let node_type = if node.kind().starts_with("unless") { + "UNLESS" + } else { + "IF" + }; + let condition = optional_node(self.normalize_node(condition)); + let action = optional_node(self.normalize_modifier_action(action)); + return Some(self.wrap(node_type, vec![condition, action, Child::Nil], node)); } - } - fn collect_identifier_names(&self, node: TreeSitterNode<'_>, locals: &mut BTreeSet) { - if self.identifier_kind(node.kind()) { - locals.insert( - node_text(node, self.source) - .trim_start_matches('*') - .to_string(), - ); - } - for child in self.named_children(node) { - self.collect_identifier_names(child, locals); - } + let condition_raw = self + .named_field(node, "condition") + .or_else(|| self.named_field(node, "predicate")) + .or_else(|| self.first_named(node))?; + let condition = optional_node(self.normalize_node(condition_raw)); + let positive_raw = self + .named_field(node, "consequence") + .or_else(|| self.named_field(node, "body")) + .or_else(|| { + self.named_children(node) + .into_iter() + .find(|child| child.kind() == "then") + }) + .or_else(|| self.branch_child(node, condition_raw, 0)); + let negative_raw = self + .named_field(node, "alternative") + .or_else(|| self.explicit_alternative(node)) + .or_else(|| { + if self.ruby() { + None + } else { + self.branch_child(node, condition_raw, 1) + } + }); + let positive = optional_node(positive_raw.and_then(|child| self.normalize_body(child))); + let negative = + optional_node(negative_raw.and_then(|child| self.normalize_else_or_branch(child))); + let node_type = if node.kind().starts_with("unless") { + "UNLESS" + } else { + "IF" + }; + Some(self.wrap(node_type, vec![condition, positive, negative], node)) } - fn collect_parameter_names(&self, node: TreeSitterNode<'_>, locals: &mut BTreeSet) { - if let Some(name) = self.named_field(node, "name") { - self.collect_identifier_names(name, locals); - return; - } - if let Some(name) = self + fn normalize_elsif(&mut self, node: TreeSitterNode<'_>) -> Node { + let condition = self .named_children(node) .into_iter() - .find(|child| self.identifier_kind(child.kind())) - { - locals.insert( - node_text(name, self.source) - .trim_start_matches('*') - .to_string(), - ); - } + .find(|child| !matches!(child.kind(), "comment" | "then" | "elsif" | "else")); + let positive = self + .named_children(node) + .into_iter() + .find(|child| child.kind() == "then"); + let negative = self + .named_children(node) + .into_iter() + .find(|child| matches!(child.kind(), "elsif" | "else")); + let condition = optional_node(condition.and_then(|child| self.normalize_node(child))); + let positive = optional_node(positive.and_then(|child| self.normalize_body(child))); + let negative = + optional_node(negative.and_then(|child| self.normalize_else_or_branch(child))); + + self.wrap("IF", vec![condition, positive, negative], node) } - fn ruby_scope_boundary(&self, node: TreeSitterNode<'_>) -> bool { - if node.kind() == "block" - && node - .parent() - .map(|parent| function_kind(parent.kind())) - .unwrap_or(false) - { - return false; - } - if node.kind() == "block" - && node - .parent() - .and_then(|parent| parent.parent()) - .map(|grandparent| function_kind(grandparent.kind())) - .unwrap_or(false) - { - return false; - } - if matches!(node.kind(), "block" | "do_block") - && node - .parent() - .map(|parent| parent.kind() == "lambda") - .unwrap_or(false) - { - return false; + fn normalize_loop(&mut self, node: TreeSitterNode<'_>, node_type: &str) -> Option { + if matches!(node.kind(), "while_modifier" | "until_modifier") { + let named = self.named_children(node); + let action = *named.first()?; + let condition = *named.get(1)?; + let condition = optional_node(self.normalize_node(condition)); + let action = optional_node(self.normalize_modifier_action(action)); + return Some(self.wrap(node_type, vec![condition, action, Child::Bool(true)], node)); } - matches!( - node.kind(), - "method" - | "function_definition" - | "function_declaration" - | "method_definition" - | "method_declaration" - | "function_item" - | "singleton_method" - | "class" - | "module" - | "singleton_class" - | "lambda" - | "block" - | "do_block" - ) - } - fn ruby_vcall_identifier(&self, node: TreeSitterNode<'_>, name: &str) -> bool { - self.language == Language::Ruby - && !self.assignment_lhs(node) - && !self.ruby_definition_identifier(node) - && !self - .local_stack - .iter() - .rev() - .any(|scope| scope.contains(name)) + let condition = self + .named_field(node, "condition") + .or_else(|| self.first_named(node)); + let body = self + .named_field(node, "body") + .or_else(|| self.named_field(node, "consequence")) + .or_else(|| self.block_child(node)); + let condition = + optional_node(condition.and_then(|condition| self.normalize_node(condition))); + let body = optional_node(body.and_then(|body| self.normalize_body(body))); + Some(self.wrap(node_type, vec![condition, body], node)) } - fn vcall_identifier(&self, node: TreeSitterNode<'_>, name: &str) -> bool { - if self.language == Language::Ruby - && self - .local_stack - .iter() - .rev() - .any(|scope| scope.contains(name)) - { - return false; - } - let Some(parent) = node.parent() else { - return false; - }; - if matches!( - parent.kind(), - "method" | "method_parameters" | "parameter_list" | "argument_list" | "arguments" - ) { - return false; + fn normalize_else_or_branch(&mut self, node: TreeSitterNode<'_>) -> Option { + if self.language == Language::Python && node.kind() == "else_clause" { + if let Some(block) = self + .raw_named_children(node) + .into_iter() + .find(|child| child.kind() == "block") + { + if let Some(normalized) = self.normalize_python_else_if_block(block) { + return Some(self.wrap( + "ELSE_CLAUSE", + vec![Child::Node(Box::new(normalized))], + node, + )); + } + } } - if self.member_read_node(parent) { - return false; + if node.kind() != "else" { + return self.normalize_body(node); } - if self.assignment_lhs(node) || self.assignment_rhs(node) { - return false; + if let Some(call) = self.first_dotted_call_descendant(node) { + let trailing = self + .source + .get(call.end_byte()..node.end_byte()) + .unwrap_or("") + .trim(); + if trailing.is_empty() { + return self.normalize_node(call); + } } + self.normalize_body_nodes(self.named_children(node), node) + } - if matches!(parent.kind(), "body_statement" | "block_body" | "then") - && self - .named_children(parent) - .into_iter() - .any(|child| child == node) - { - return true; - } - if matches!(parent.kind(), "if_modifier" | "unless_modifier") - && self - .named_children(parent) - .into_iter() - .next() - .map(|child| child == node) - .unwrap_or(false) - { - return true; + fn normalize_python_else_if_block(&mut self, node: TreeSitterNode<'_>) -> Option { + let statements = self + .raw_named_children(node) + .into_iter() + .filter(|child| child.kind() != "comment") + .collect::>(); + if statements.len() != 1 || statements[0].kind() != "if_statement" { + return None; } - - false + let if_node = statements[0]; + self.normalize_if(if_node) } - fn ruby_definition_identifier(&self, node: TreeSitterNode<'_>) -> bool { - let Some(parent) = node.parent() else { - return false; - }; - if matches!(parent.kind(), "method" | "singleton_method") { - return self - .named_field(parent, "name") - .map(|name| name == node) - .unwrap_or(false); + fn normalize_case(&mut self, node: TreeSitterNode<'_>) -> Option { + let value_raw = self.case_value(node); + let value = value_raw.and_then(|value| self.normalize_node(value)); + let whens = self + .case_arms(node) + .into_iter() + .filter_map(|arm| self.normalize_when(arm)) + .collect::>(); + let fallback = self.case_else_body(node); + let chain = self.link_when_chain(whens, fallback); + if value_raw.is_none() { + Some(self.wrap("CASE2", vec![optional_node(chain)], node)) + } else { + Some(self.wrap( + "CASE", + vec![optional_node(value), optional_node(chain)], + node, + )) } - matches!( - parent.kind(), - "method_parameters" - | "parameters" - | "parameter_list" - | "formal_parameters" - | "function_value_parameters" - | "block_parameters" - | "lambda_parameters" - | "parameter" - | "optional_parameter" - | "keyword_parameter" - | "block_parameter" - ) } - fn self_node(&self, node: TreeSitterNode<'_>) -> bool { - matches!(node.kind(), "self" | "this") - || matches!(node_text(node, self.source), "self" | "this") + fn normalize_when(&mut self, node: TreeSitterNode<'_>) -> Option { + let patterns = self.normalize_patterns(node); + let body = self + .when_body(node) + .and_then(|body| self.normalize_body(body)); + Some(self.wrap( + "WHEN", + vec![ + list_or_nil(patterns, node, self), + optional_node(body), + Child::Nil, + ], + node, + )) } - fn assignment_lhs(&self, node: TreeSitterNode<'_>) -> bool { - if self.lua_single_assignment_block_child(node) { - return false; - } - if node - .prev_sibling() - .map(|sibling| node_text(sibling, self.source) == ":") - .unwrap_or(false) - { - return false; + fn normalize_patterns(&mut self, node: TreeSitterNode<'_>) -> Vec { + let mut patterns = self + .raw_named_children(node) + .into_iter() + .filter(|child| { + matches!( + child.kind(), + "pattern" + | "case_pattern" + | "match_pattern" + | "switch_pattern" + | "when_condition" + ) + }) + .collect::>(); + if patterns.is_empty() { + if let Some(value) = self.named_field(node, "value") { + patterns.push(value); + } } - if self.literal_fragment_assignment_context(node) { - return false; + if patterns.is_empty() { + if let Some(pattern) = self + .named_children(node) + .into_iter() + .find(|child| !self.block_kind(child.kind()) && !self.statement_node(child.kind())) + { + patterns.push(pattern); + } } - node.next_sibling() - .map(|sibling| assignment_operator(node_text(sibling, self.source))) - .unwrap_or(false) - } - fn literal_fragment_assignment_context(&self, node: TreeSitterNode<'_>) -> bool { - let Some(parent) = node.parent() else { - return false; - }; - if matches!( - parent.kind(), - "string" | "delimited_symbol" | "regex" | "regex_literal" - ) { - return true; + let mut normalized = Vec::new(); + for pattern in patterns { + let pattern_text = node_text(pattern, self.source).to_string(); + let pattern_wrapper = matches!( + pattern.kind(), + "pattern" + | "case_pattern" + | "match_pattern" + | "switch_pattern" + | "when_condition" + | "expression_list" + ); + let pattern_children = self.named_children(pattern); + if pattern_text.contains("::") { + normalized.push(self.wrap("CONST", vec![Child::Symbol(pattern_text)], pattern)); + } else if pattern_wrapper && pattern_children.is_empty() && integer_text(&pattern_text) + { + normalized.push(self.wrap("INTEGER", Vec::new(), pattern)); + } else if self.ruby() + && pattern_wrapper + && pattern_children.is_empty() + && ruby_constant_text(&pattern_text) + { + normalized.push(self.wrap("CONST", vec![Child::Symbol(pattern_text)], pattern)); + } else if self.ruby() + && pattern_wrapper + && pattern_children.is_empty() + && bare_identifier_text(&pattern_text) + { + normalized.push(self.local_or_call_for_name(&pattern_text, pattern)); + } else if pattern_wrapper { + normalized.extend( + pattern_children + .into_iter() + .filter_map(|child| self.normalize_node(child)), + ); + } else if let Some(pattern) = self.normalize_node(pattern) { + normalized.push(pattern); + } } + normalized + } - matches!( - node.kind(), - "string_content" | "escape_sequence" | "interpolation" - ) && parent - .parent() - .map(|grandparent| { - matches!( - grandparent.kind(), - "string" | "delimited_symbol" | "regex" | "regex_literal" - ) + fn link_when_chain(&self, whens: Vec, fallback: Option) -> Option { + whens + .into_iter() + .rev() + .fold(fallback, |next_when, mut current| { + while current.children.len() <= 2 { + current.children.push(Child::Nil); + } + current.children[2] = optional_node(next_when); + Some(current) }) - .unwrap_or(false) } - fn assignment_rhs(&self, node: TreeSitterNode<'_>) -> bool { - if self.lua_single_assignment_block_child(node) { - return false; - } - if self.literal_fragment_assignment_context(node) { - return false; + fn case_else_body(&mut self, node: TreeSitterNode<'_>) -> Option { + let else_node = self + .normalization_adapter + .case_else_node(node, self.source)?; + if self + .normalization_adapter + .case_else_arm(else_node, self.source) + || else_node.kind() == "switch_default" + { + if let Some(body) = self.when_body(else_node) { + return self.normalize_body(body); + } } - node.prev_sibling() - .map(|sibling| assignment_operator(node_text(sibling, self.source))) - .unwrap_or(false) + self.normalize_else_or_branch(else_node) } - fn lua_single_assignment_block_child(&self, node: TreeSitterNode<'_>) -> bool { - if self.language != Language::Lua { - return false; + fn normalize_body_nodes( + &mut self, + nodes: Vec>, + source: TreeSitterNode<'_>, + ) -> Option { + let mut children = Vec::new(); + let mut index = 0; + while index < nodes.len() { + if index + 1 < nodes.len() { + if let Some(call) = self.normalize_flat_dotted_nodes(&nodes[index..=index + 1]) { + children.push(Child::Node(Box::new(call))); + index += 2; + continue; + } + } + if let Some(child) = self.normalize_body(nodes[index]) { + children.push(Child::Node(Box::new(child))); + } + index += 1; } - let Some(parent) = node.parent() else { - return false; - }; - if parent.kind() != "assignment_statement" { - return false; + if children.is_empty() { + None + } else if children.len() == 1 { + child_node(children.into_iter().next().unwrap()) + } else { + Some(self.wrap("BLOCK", children, source)) } - let Some(grandparent) = parent.parent() else { - return false; - }; - grandparent.kind() == "block" - && node_text(grandparent, self.source) == node_text(parent, self.source) - && self.raw_named_children(grandparent).len() == 1 } - fn has_assignment_operator_child(&self, node: TreeSitterNode<'_>) -> bool { - node.children(&mut node.walk()) - .any(|child| !child.is_named() && assignment_operator(node_text(child, self.source))) + fn normalize_return(&mut self, node: TreeSitterNode<'_>) -> Option { + self.normalize_return_node(node) } - fn single_short_var_lhs(&self, node: TreeSitterNode<'_>) -> bool { - let Some(parent) = node.parent() else { - return false; - }; - if parent.kind() != "short_var_declaration" { - return false; - } - if self.named_children(node).len() != 1 { - return false; - } - self.named_children(parent) + fn normalize_super(&mut self, node: TreeSitterNode<'_>) -> Node { + let args_node = self + .named_children(node) .into_iter() - .next() - .map(|child| child == node) - .unwrap_or(false) - } - - fn modifier_statement(&self, node: TreeSitterNode<'_>) -> bool { - let named = self.named_children(node); - matches!(node.kind(), "body_statement" | "block_body" | "statement") - && self.modifier_keyword(node).is_some() - && named.len() == 2 + .find(|child| child.kind() == "argument_list"); + let args = args_node + .map(|args| { + self.named_children(args) + .into_iter() + .filter_map(|child| self.normalize_node(child)) + .collect::>() + }) + .unwrap_or_default(); + self.wrap( + "SUPER", + vec![list_or_nil(args, args_node.unwrap_or(node), self)], + node, + ) } - fn leading_if_statement(&self, node: TreeSitterNode<'_>) -> bool { - let first_child = node.children(&mut node.walk()).next(); - let single_named_if_block = matches!(self.language, Language::Python | Language::Lua) - && node.kind() == "block" - && self.raw_named_children(node).len() == 1 - && first_child - .map(|child| child.kind() == "if_statement") - .unwrap_or(false); - if single_named_if_block { - return true; - } - matches!( - node.kind(), - "body_statement" | "block" | "block_body" | "statement" - ) && (first_child - .map(|child| matches!(child.kind(), "if" | "unless")) - .unwrap_or(false)) - && self.named_children(node).len() >= 2 - && self - .named_children(node) - .first() - .map(|child| !if_kind(child.kind())) - .unwrap_or(false) + fn normalize_return_node(&mut self, node: TreeSitterNode<'_>) -> Option { + self.normalize_return_node_with_elide_symbol(node, false) } - fn normalize_leading_if_statement(&mut self, node: TreeSitterNode<'_>) -> Option { - if self.language == Language::Python && node.kind() == "block" { - if let Some(if_node) = self - .raw_named_children(node) - .into_iter() - .find(|child| child.kind() == "if_statement") - { - let condition = self - .named_field(if_node, "condition") - .or_else(|| self.named_field(if_node, "predicate")) - .or_else(|| self.first_named(if_node))?; - let consequence = self - .named_field(if_node, "consequence") - .or_else(|| self.named_field(if_node, "body")) - .or_else(|| self.branch_child(if_node, condition, 0)); - let condition = optional_node(self.normalize_node(condition)); - let consequence = - optional_node(consequence.and_then(|child| self.normalize_body(child))); - return Some(self.wrap("IF", vec![condition, consequence, Child::Nil], if_node)); - } - } - if self.language == Language::Lua && node.kind() == "block" { - if let Some(if_node) = self - .raw_named_children(node) - .into_iter() - .find(|child| child.kind() == "if_statement") - { - return self.normalize_if(if_node); - } - } - let keyword = node - .children(&mut node.walk()) - .next() - .map(|child| child.kind().to_string())?; - let condition = self + fn normalize_return_node_with_elide_symbol( + &mut self, + node: TreeSitterNode<'_>, + elide_symbol: bool, + ) -> Option { + let children = self .named_children(node) .into_iter() - .find(|child| !matches!(child.kind(), "comment" | "then" | "elsif" | "else"))?; - let consequence = self - .named_children(node) + .filter_map(|child| self.normalize_return_value(child)) + .collect::>(); + if elide_symbol + && self.ruby() + && children.len() == 1 + && self.symbol_literal_node(children.first()) + { + return children.into_iter().next(); + } + let children = children .into_iter() - .find(|child| child.kind() == "then") - .or_else(|| self.branch_child(node, condition, 0)); - let alternative = self.explicit_alternative(node); - let node_type = if keyword == "unless" { "UNLESS" } else { "IF" }; - let condition = optional_node(self.normalize_node(condition)); - let consequence = optional_node(consequence.and_then(|child| self.normalize_body(child))); - let alternative = - optional_node(alternative.and_then(|child| self.normalize_else_or_branch(child))); - Some(self.wrap(node_type, vec![condition, consequence, alternative], node)) + .map(|child| Child::Node(Box::new(child))) + .collect::>(); + Some(self.wrap(return_kind(node.kind()), children, node)) } - fn command_call_statement(&self, node: TreeSitterNode<'_>) -> bool { + fn wrapped_return_statement(&self, node: TreeSitterNode<'_>) -> bool { matches!( node.kind(), - "body_statement" | "block" | "block_body" | "statement" - ) && !self.dotted_call(node) - && self - .named_children(node) - .into_iter() + "body_statement" | "block_body" | "statement" | "block" + ) && !node_text(node, self.source).contains('\n') + && node + .children(&mut node.walk()) .next() - .map(|child| self.identifier_kind(child.kind())) + .map(|child| { + return_statement_kind(child.kind()) + && (!child.is_named() + || node_text(node, self.source) == node_text(child, self.source)) + }) .unwrap_or(false) - && (self - .named_children(node) - .into_iter() - .any(|child| matches!(child.kind(), "argument_list" | "arguments")) - || self.call_block(node).is_some()) } - fn visibility_inline_def_call(&self, node: TreeSitterNode<'_>) -> bool { - if node.kind() != "call" { - return false; - } - let Some(message) = self.named_children(node).into_iter().next() else { - return false; - }; - if !inline_def_wrapper_mid(node_text(message, self.source)) { - return false; + fn normalize_wrapped_return_statement(&mut self, node: TreeSitterNode<'_>) -> Option { + let keyword = node.children(&mut node.walk()).next()?; + if keyword.is_named() + && return_statement_kind(keyword.kind()) + && node_text(node, self.source) == node_text(keyword, self.source) + { + return self.normalize_return_node(keyword); } - self.named_children(node) + let children = self + .named_children(node) .into_iter() - .find(|child| child.kind() == "argument_list") - .map(|args| { - node_text(args, self.source) - .trim_start() - .starts_with("def ") - }) - .unwrap_or(false) - } - - fn visibility_inline_def_statement( - &self, - node: TreeSitterNode<'_>, - function: TreeSitterNode<'_>, - ) -> bool { - inline_def_wrapper_mid(node_text(function, self.source)) - && node_text(node, self.source).contains("def ") - } - - fn inline_def_from_statement(&mut self, node: TreeSitterNode<'_>) -> Option { - let source = self - .named_children(node) - .into_iter() - .find(|child| child.kind() == "argument_list") - .unwrap_or(node); - self.inline_def_from_source(source) + .filter_map(|child| self.normalize_return_value(child)) + .map(|child| Child::Node(Box::new(child))) + .collect::>(); + Some(self.wrap(return_kind(keyword.kind()), children, node)) } - fn inline_def_from_source(&mut self, source: TreeSitterNode<'_>) -> Option { - if let Some(method) = self - .named_children(source) - .into_iter() - .find(|child| matches!(child.kind(), "method" | "singleton_method")) + fn normalize_return_value(&mut self, node: TreeSitterNode<'_>) -> Option { + if node.kind() != "argument_list" { + return self.normalize_node(node); + } + if self.named_children(node).is_empty() { + return self.scalar_argument_list_value(node); + } + if self.argument_list_element_reference(node) { + return self.normalize_argument_list_element_reference(node); + } + if self.boolean_expression(node) { + return self.normalize_boolean(node); + } + if self.ternary_statement(node) { + return self.normalize_ternary_statement(node); + } + if self.case_argument_list(node) { + return self.normalize_case(node); + } + if self.argument_list_call_with_block(node) { + return self.normalize_argument_list_call_with_block(node); + } + if self.dotted_expression(node) { + return self.normalize_dotted_expression(node); + } + if self.argument_list_unary_not(node) { + return self.normalize_argument_list_unary_not(node); + } + if self.infix_statement(node) { + return self.normalize_infix_statement(node); + } + let children = self.named_children(node); + if children.len() == 1 + && self.call_kind(children[0].kind()) + && node_text(children[0], self.source) == node_text(node, self.source) { - return if method.kind() == "singleton_method" { - self.normalize_singleton_function(method) - } else { - self.normalize_function(method) - }; + if let Some(call) = self.normalize_return_value_call(children[0]) { + return Some(call); + } } - let body = self.inline_def_body(source); - let receiver = self.inline_def_receiver(source); - let normalized_body = self.with_ruby_scope(source, true, |normalizer| { - let body = body.and_then(|body| normalizer.normalize_body(body)); - normalizer.elide_tail_returns(body) - }); - if let Some(receiver) = receiver { - let name = self.inline_def_name_after_receiver(source, receiver)?; - if name.is_empty() { - return None; + if let (Some(function), Some(nested_args)) = (children.first(), children.get(1)) { + if self.identifier_kind(function.kind()) && nested_args.kind() == "argument_list" { + let args = self + .named_children(*nested_args) + .into_iter() + .filter_map(|child| self.normalize_node(child)) + .collect::>(); + let args_source = self + .parenthesized_source(*nested_args) + .or_else(|| self.parenthesized_source(node)); + let args_child = if let Some(source) = args_source { + self.list_or_nil_from_source_node(args, &source) + } else { + list_or_nil(args, *nested_args, self) + }; + return Some(self.wrap( + "FCALL", + vec![ + Child::Symbol(node_text(*function, self.source).to_string()), + args_child, + ], + node, + )); } - let receiver = self.normalize_node(receiver)?; - return Some(self.wrap( - "DEFS", - vec![ - Child::Node(Box::new(receiver)), - Child::Symbol(name), - Child::Node(Box::new(self.scope(normalized_body, None, source))), - ], - source, - )); } - - let name = self - .named_children(source) + let values = self + .named_children(node) .into_iter() - .find(|child| self.identifier_kind(child.kind())) - .map(|child| node_text(child, self.source).to_string())?; - if name.is_empty() { + .filter_map(|child| self.normalize_node(child)) + .collect::>(); + if values.len() == 1 { + values.into_iter().next() + } else if values.is_empty() { + None + } else { + Some(self.list_node(values, node)) + } + } + + fn normalize_return_value_call(&mut self, node: TreeSitterNode<'_>) -> Option { + let function = self + .named_field(node, "function") + .or_else(|| self.named_field(node, "call")) + .or_else(|| self.named_children(node).into_iter().next())?; + if !self.identifier_kind(function.kind()) { return None; } + + let args_node = self + .named_field(node, "arguments") + .or_else(|| self.named_field(node, "argument")) + .or_else(|| { + self.named_children(node) + .into_iter() + .find(|child| matches!(child.kind(), "argument_list" | "arguments")) + }); + let args = args_node + .map(|args_node| { + self.named_children(args_node) + .into_iter() + .filter(|child| *child != function) + .filter_map(|child| self.normalize_node(child)) + .collect::>() + }) + .unwrap_or_default(); + let args_child = if let Some(args_node) = args_node { + if let Some(source) = self + .parenthesized_source(args_node) + .or_else(|| self.parenthesized_source(node)) + { + self.list_or_nil_from_source_node(args, &source) + } else { + list_or_nil(args, args_node, self) + } + } else { + Child::Nil + }; + Some(self.wrap( - "DEFN", + "FCALL", vec![ - Child::Symbol(name), - Child::Node(Box::new(self.scope(normalized_body, None, source))), + Child::Symbol(node_text(function, self.source).to_string()), + args_child, ], - source, + node, )) } - fn inline_def_receiver<'tree>( - &self, - source: TreeSitterNode<'tree>, - ) -> Option> { - let text = node_text(source, self.source); - if !text.contains("def ") || !text.split_whitespace().nth(1).unwrap_or("").contains('.') { - return None; - } - self.named_children(source).into_iter().find(|child| { - matches!( - child.kind(), - "self" | "this" | "constant" | "scope_resolution" - ) - }) - } - - fn inline_def_name_after_receiver( - &self, - source: TreeSitterNode<'_>, - receiver: TreeSitterNode<'_>, - ) -> Option { - let children = self.named_children(source); - let index = children.iter().position(|child| *child == receiver)?; - children - .into_iter() - .skip(index + 1) - .find(|child| self.identifier_kind(child.kind())) - .map(|child| node_text(child, self.source).to_string()) + fn normalize_ternary_statement(&mut self, node: TreeSitterNode<'_>) -> Option { + let parts = self.ternary_parts(node)?; + let condition = optional_node(self.normalize_node(parts.condition)); + let positive = optional_node(self.normalize_ternary_branch(&parts.positive)); + let negative = optional_node(self.normalize_ternary_branch(&parts.negative)); + Some(self.wrap("IF", vec![condition, positive, negative], node)) } - fn inline_def_body<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { - let mut stack = self - .named_children(node) - .into_iter() - .rev() - .collect::>(); - while let Some(child) = stack.pop() { - if child.kind() == "body_statement" { - return Some(child); + fn normalize_boolean(&mut self, node: TreeSitterNode<'_>) -> Option { + let operator = self.boolean_operator(node)?; + let node_type = if operator == "or" { "OR" } else { "AND" }; + let mut operands = Vec::new(); + for child in self.named_children(node) { + if let Some(normalized) = self.normalize_node(child) { + if normalized.r#type == node_type { + operands.extend(normalized.children); + } else { + operands.push(Child::Node(Box::new(normalized))); + } } - stack.extend(self.named_children(child).into_iter().rev()); } - None + Some(self.wrap(node_type, operands, node)) } - fn modifier_keyword(&self, node: TreeSitterNode<'_>) -> Option { - let mut seen_named = false; - for child in node.children(&mut node.walk()) { - seen_named = seen_named || child.is_named(); - if seen_named - && !child.is_named() - && matches!(child.kind(), "if" | "unless" | "while" | "until") - { - return Some(child.kind().to_string()); - } - } - None + fn normalize_comparison(&mut self, node: TreeSitterNode<'_>) -> Option { + let raw_named = self.raw_named_children(node); + let target = if raw_named.len() == 1 + && BINARY_WRAPPER_KINDS.contains(&raw_named[0].kind()) + && node_text(node, self.source) == node_text(raw_named[0], self.source) + { + raw_named[0] + } else { + node + }; + let operands = self.named_children(target); + let left = operands.first().and_then(|left| self.normalize_node(*left)); + let right_raw = operands.get(1).copied()?; + let right = self.normalize_node(right_raw); + Some(self.wrap( + "OPCALL", + vec![ + optional_node(left), + Child::Symbol(self.comparison_operator(node)?), + list_or_nil(right.into_iter().collect(), right_raw, self), + ], + node, + )) } - fn ternary_statement(&self, node: TreeSitterNode<'_>) -> bool { - matches!( - node.kind(), - "body_statement" | "block_body" | "statement" | "argument_list" | "conditional" - ) && self.named_children(node).len() >= 3 - && node - .children(&mut node.walk()) - .any(|child| !child.is_named() && node_text(child, self.source) == "?") - && node - .children(&mut node.walk()) - .any(|child| !child.is_named() && node_text(child, self.source) == ":") - } + fn normalize_operator_call(&mut self, node: TreeSitterNode<'_>) -> Option { + let operands = self.named_children(node); + let direct_parts = match ( + operands.first().copied(), + self.binary_operator(node), + operands.get(1).copied(), + ) { + (Some(left), Some(operator), Some(right)) => Some((left, operator, right)), + _ => None, + }; + let (left_raw, operator, right_raw) = + direct_parts.or_else(|| self.infix_statement_parts(node))?; + let left = self.normalize_node(left_raw); + let right = self.normalize_node(right_raw); + if self.ruby() && operator == "=~" && self.regex_literal(Some(right_raw)) { + return Some(self.wrap( + "MATCH3", + vec![optional_node(right), optional_node(left)], + node, + )); + } else if self.ruby() && operator == "=~" { + return Some(self.wrap( + "CALL", + vec![ + optional_node(left), + Child::Symbol("=~".to_string()), + list_or_nil(right.into_iter().collect(), right_raw, self), + ], + node, + )); + } - fn boolean_expression(&self, node: TreeSitterNode<'_>) -> bool { - (matches!( - node.kind(), - "binary" | "binary_expression" | "boolean_operator" - ) || self.boolean_statement(node)) - && matches!(self.boolean_operator(node).as_deref(), Some("and" | "or")) + Some(self.wrap( + "OPCALL", + vec![ + optional_node(left), + Child::Symbol(operator), + list_or_nil(right.into_iter().collect(), right_raw, self), + ], + node, + )) } - fn boolean_statement(&self, node: TreeSitterNode<'_>) -> bool { - if !matches!( - node.kind(), - "body_statement" | "block_body" | "statement" | "argument_list" - ) { - return false; - } - if !matches!( - self.binary_operator(node).as_deref(), - Some("&&" | "||" | "and" | "or") - ) { - return false; - } - if self.named_children(node).len() < 2 { - return false; + fn normalize_infix_statement(&mut self, node: TreeSitterNode<'_>) -> Option { + let (left_raw, operator, right_raw) = self.infix_statement_parts(node)?; + let left = self.normalize_node(left_raw); + let right = self.normalize_node(right_raw); + if self.ruby() && operator == "=~" && self.regex_literal(Some(right_raw)) { + return Some(self.wrap( + "MATCH3", + vec![optional_node(right), optional_node(left)], + node, + )); + } else if self.ruby() && operator == "=~" { + return Some(self.wrap( + "CALL", + vec![ + optional_node(left), + Child::Symbol("=~".to_string()), + list_or_nil(right.into_iter().collect(), right_raw, self), + ], + node, + )); } - node.children(&mut node.walk()).all(|child| { - child.is_named() - || matches!( - node_text(child, self.source), - "&&" | "||" | "and" | "or" | "(" | ")" - ) - }) - } - - fn operator_call_expression(&self, node: TreeSitterNode<'_>) -> bool { - matches!(node.kind(), "binary" | "binary_expression") - && self.named_children(node).len() >= 2 - && self - .binary_operator(node) - .map(|operator| OPERATOR_CALL_OPERATORS.contains(&operator.as_str())) - .unwrap_or(false) - } - - fn comparison_expression(&self, node: TreeSitterNode<'_>) -> bool { - matches!( - node.kind(), - "binary" | "binary_expression" | "comparison_operator" - ) && self.named_children(node).len() >= 2 - && self - .comparison_operator(node) - .map(|operator| COMPARISON_OPERATORS.contains(&operator.as_str())) - .unwrap_or(false) - } - - fn infix_statement(&self, node: TreeSitterNode<'_>) -> bool { - self.infix_statement_parts(node).is_some() + Some(self.wrap( + "OPCALL", + vec![ + optional_node(left), + Child::Symbol(operator), + list_or_nil(right.into_iter().collect(), right_raw, self), + ], + node, + )) } - fn argument_list_unary_not(&self, node: TreeSitterNode<'_>) -> bool { - if node.kind() != "argument_list" { - return false; - } - let named = self.named_children(node); - if node - .children(&mut node.walk()) - .next() - .map(|child| node_text(child, self.source) == "!") - .unwrap_or(false) - && named.len() == 1 - { - return true; - } - - let raw_named = self.raw_named_children(node); - if raw_named.len() != 1 || raw_named[0].kind() != "unary" { - return false; - } - node_text(node, self.source) == node_text(raw_named[0], self.source) - && self.unary_not_expression(raw_named[0]) - && self.raw_named_children(raw_named[0]).len() == 1 + fn normalize_unary_not(&mut self, node: TreeSitterNode<'_>) -> Option { + let operand = self.named_children(node).into_iter().next()?; + let operand = optional_node(self.normalize_node(operand)); + Some(self.wrap( + "OPCALL", + vec![operand, Child::Symbol("!".to_string()), Child::Nil], + node, + )) } - fn unary_not_statement(&self, node: TreeSitterNode<'_>) -> bool { - if !matches!( - node.kind(), - "body_statement" | "block_body" | "statement" | "argument_list" - ) { - return false; - } - let named = self.named_children(node); - if node - .children(&mut node.walk()) - .next() - .map(|child| node_text(child, self.source) == "!") - .unwrap_or(false) - && named.len() == 1 - { - return true; - } - + fn normalize_unary_not_statement(&mut self, node: TreeSitterNode<'_>) -> Option { let raw_named = self.raw_named_children(node); - raw_named.len() == 1 - && raw_named[0].kind() == "unary" - && node_text(node, self.source) == node_text(raw_named[0], self.source) + let target = if raw_named.len() == 1 + && node_text(raw_named[0], self.source) == node_text(node, self.source) && self.unary_not_expression(raw_named[0]) - && self.raw_named_children(raw_named[0]).len() == 1 - } - - fn unary_not_expression(&self, node: TreeSitterNode<'_>) -> bool { - matches!(node.kind(), "unary" | "unary_expression") - && node_text(node, self.source).trim_start().starts_with('!') - } - - fn unary_minus_expression(&self, node: TreeSitterNode<'_>) -> bool { - if matches!(node.kind(), "unary" | "unary_expression" | "unary_operator") - && node_text(node, self.source).trim_start().starts_with('-') - { - return true; - } - - if node.kind() != "expression_list" { - return false; - } - let named = self.named_children(node); - if node - .children(&mut node.walk()) - .next() - .map(|child| node_text(child, self.source) == "-") - .unwrap_or(false) - && named.len() == 1 { - return true; - } - - let raw_named = self.raw_named_children(node); - raw_named.len() == 1 - && node_text(node, self.source) == node_text(raw_named[0], self.source) - && self.unary_minus_expression(raw_named[0]) + raw_named[0] + } else { + node + }; + let operand = self.named_children(target).into_iter().next()?; + let operand = optional_node(self.normalize_node(operand)); + Some(self.wrap( + "OPCALL", + vec![operand, Child::Symbol("!".to_string()), Child::Nil], + node, + )) } - fn infix_statement_parts<'tree>( - &self, - node: TreeSitterNode<'tree>, - ) -> Option<(TreeSitterNode<'tree>, String, TreeSitterNode<'tree>)> { - if !matches!( - node.kind(), - "body_statement" | "block_body" | "statement" | "argument_list" - ) { - return None; - } + fn normalize_unary_minus(&mut self, node: TreeSitterNode<'_>) -> Option { let raw_named = self.raw_named_children(node); let target = if raw_named.len() == 1 - && matches!( - raw_named[0].kind(), - "binary" | "binary_expression" | "comparison_operator" - ) - && node_text(node, self.source) == node_text(raw_named[0], self.source) + && node_text(raw_named[0], self.source) == node_text(node, self.source) + && self.unary_minus_expression(raw_named[0]) { raw_named[0] } else { node }; - let mut named_index = 0usize; - let mut left = None; - let mut right = None; - let mut operator = None; - for child in target.children(&mut target.walk()) { - if child.is_named() { - left.get_or_insert(child); - if operator.is_some() { - right = Some(child); - } - named_index += 1; - } else { - let text = node_text(child, self.source); - if COMPARISON_OPERATORS.contains(&text) || OPERATOR_CALL_OPERATORS.contains(&text) { - operator = Some(text.to_string()); - } + let operand = self.named_children(target).into_iter().next()?; + if operand.kind() == "integer" { + if let Ok(value) = node_text(operand, self.source).parse::() { + return Some(self.wrap("INTEGER", vec![Child::Integer(-value)], operand)); } } - if named_index == 2 { - Some((left?, operator?, right?)) - } else { - None - } + let operand = optional_node(self.normalize_node(operand)); + Some(self.wrap( + "OPCALL", + vec![operand, Child::Symbol("-@".to_string()), Child::Nil], + node, + )) } - fn boolean_operator(&self, node: TreeSitterNode<'_>) -> Option { - let direct = self.binary_operator(node)?; - if matches!(direct.as_str(), "&&" | "and") { - Some("and".to_string()) - } else if matches!(direct.as_str(), "||" | "or") { - Some("or".to_string()) - } else { - None + fn normalize_ternary_branch(&mut self, nodes: &[TreeSitterNode<'_>]) -> Option { + if nodes.is_empty() { + return None; + } + if nodes.len() == 1 { + return self.normalize_node(nodes[0]); + } + if let Some(call) = self.normalize_flat_dotted_nodes(nodes) { + return Some(call); } + self.normalize_body_nodes(nodes.to_vec(), nodes[0]) } - fn comparison_operator(&self, node: TreeSitterNode<'_>) -> Option { - self.binary_operator(node) - .or_else(|| comparison_operator_from_text(node_text(node, self.source))) + fn normalize_flat_dotted_nodes(&mut self, nodes: &[TreeSitterNode<'_>]) -> Option { + let receiver = *nodes.first()?; + let method = *nodes.get(1)?; + let connector = self + .source + .get(receiver.end_byte()..method.start_byte()) + .unwrap_or("") + .trim(); + if !matches!(connector, "." | "&.") { + return None; + } + let node_type = if connector == "&." { "QCALL" } else { "CALL" }; + let receiver_node = optional_node(self.normalize_node(receiver)); + Some(self.wrap_from_nodes( + node_type, + vec![ + receiver_node, + Child::Symbol(node_text(method, self.source).trim_end_matches('=').to_string()), + Child::Nil, + ], + receiver, + method, + )) } - fn binary_operator(&self, node: TreeSitterNode<'_>) -> Option { - if let Some(operator) = node - .children(&mut node.walk()) - .find(|child| !child.is_named() && !matches!(node_text(*child, self.source), "(" | ")")) - .map(|child| node_text(child, self.source).to_string()) - { - return Some(operator); + fn normalize_assignment(&mut self, node: TreeSitterNode<'_>) -> Option { + let left = self.assignment_left(node)?; + let right = self + .assignment_right(node) + .and_then(|right| self.normalize_node(right)); + if left.kind() == "left_assignment_list" { + return Some(self.normalize_multiple_assignment(left, right, node)); } - - let raw_named = self.raw_named_children(node); - if raw_named.len() == 1 - && matches!( - raw_named[0].kind(), - "binary" - | "binary_expression" - | "binary_operator" - | "boolean_operator" - | "comparison_operator" - ) - && node_text(node, self.source) == node_text(raw_named[0], self.source) - { - return self.binary_operator(raw_named[0]); + if let Some(target) = self.assignment_target(left, right.clone(), node) { + return Some(target); } - - None + Some(self.wrap( + "LASGN", + vec![Child::String(self.target_name(left)), optional_node(right)], + node, + )) } - fn interpolated_statement(&self, node: TreeSitterNode<'_>) -> bool { - matches!( - node.kind(), - "body_statement" | "block_body" | "statement" | "argument_list" - ) && self - .named_children(node) - .into_iter() - .any(|child| child.kind() == "interpolation") - } - - fn interpolated_string(&self, node: TreeSitterNode<'_>) -> bool { - node.kind() == "string" - && self - .named_children(node) - .into_iter() - .any(|child| child.kind() == "interpolation") - } - - fn statement_call_with_block(&self, node: TreeSitterNode<'_>) -> bool { - matches!(node.kind(), "body_statement" | "block_body" | "statement") - && self.call_block(node).is_some() - && (self.dotted_call(node) - || self - .named_children(node) - .into_iter() - .any(|child| self.call_kind(child.kind()) || self.member_read_node(child))) - } + fn normalize_operator_assignment(&mut self, node: TreeSitterNode<'_>) -> Option { + let left = self.assignment_left(node)?; + let right_raw = self.assignment_right(node); + let right = right_raw.and_then(|right| self.normalize_node(right)); + let operator = self.operator_assignment_operator(node); - fn yield_statement(&self, node: TreeSitterNode<'_>) -> bool { - if !matches!( - node.kind(), - "body_statement" | "block" | "block_body" | "expression_statement" | "statement" - ) { - return false; + if left.kind() == "element_reference" { + let named = self.named_children(left); + let receiver = *named.first()?; + let args = named + .iter() + .skip(1) + .filter_map(|arg| self.normalize_node(*arg)) + .collect::>(); + let receiver = optional_node(self.normalize_node(receiver)); + return Some(self.wrap( + "OP_ASGN1", + vec![ + receiver, + Child::Symbol(operator), + list_or_nil(args, left, self), + optional_node(right), + ], + node, + )); } - let Some(first) = node.children(&mut node.walk()).next() else { - return false; - }; - if node_text(first, self.source) == "yield" { - return true; + + if self.member_read_node(left) { + let (receiver, method) = self.member_parts(left)?; + let receiver = optional_node(self.normalize_node(receiver)); + return Some(self.wrap( + "OP_ASGN2", + vec![ + receiver, + Child::Bool(false), + Child::Symbol(method), + Child::Symbol(operator), + optional_node(right), + ], + node, + )); } - if matches!( - node.kind(), - "body_statement" | "block_body" | "expression_statement" | "statement" - ) && first.kind() == "yield" + if let Some(logical) = + self.normalize_logical_operator_assignment(left, &operator, right.clone(), node) { - let Some(keyword) = first.children(&mut first.walk()).next() else { - return false; - }; - return node_text(keyword, self.source) == "yield"; + return Some(logical); } - false + if self.instance_variable(left) || self.global_variable(left) { + let value = self.augmented_assignment_value(left, &operator, right_raw, node); + return self.assignment_target(left, Some(value), node); + } + + let value = self.augmented_assignment_value(left, &operator, right_raw, node); + self.assignment_target(left, Some(value.clone()), node) + .or_else(|| { + Some(self.wrap( + "LASGN", + vec![ + Child::String(self.target_name(left)), + Child::Node(Box::new(value)), + ], + node, + )) + }) } - fn super_statement(&self, node: TreeSitterNode<'_>) -> bool { - if !matches!( - node.kind(), - "body_statement" | "block" | "block_body" | "call" | "statement" - ) { - return false; - } - if node_text(node, self.source).trim() == "super" { - return true; - } - let raw = self.raw_named_children(node); - let named = if raw.len() == 1 && raw[0].kind() == "call" { - self.raw_named_children(raw[0]) - } else { - raw - }; - named - .first() - .map(|child| child.kind() == "super") - .unwrap_or(false) - && named + fn normalize_operator_assignment_statement( + &mut self, + node: TreeSitterNode<'_>, + ) -> Option { + let (left, operator, right_raw) = self.operator_assignment_statement_parts(node)?; + let right = self.normalize_node(right_raw); + + if left.kind() == "element_reference" { + let named = self.named_children(left); + let receiver = *named.first()?; + let args = named .iter() .skip(1) - .all(|child| child.kind() == "argument_list") - } + .filter_map(|arg| self.normalize_node(*arg)) + .collect::>(); + let receiver = optional_node(self.normalize_node(receiver)); + return Some(self.wrap( + "OP_ASGN1", + vec![ + receiver, + Child::Symbol(operator), + list_or_nil(args, left, self), + optional_node(right), + ], + node, + )); + } - fn argument_list_element_reference(&self, node: TreeSitterNode<'_>) -> bool { - if node.kind() != "argument_list" { - return false; + if self.member_read_node(left) { + let (receiver, method) = self.member_parts(left)?; + let receiver = optional_node(self.normalize_node(receiver)); + return Some(self.wrap( + "OP_ASGN2", + vec![ + receiver, + Child::Bool(false), + Child::Symbol(method), + Child::Symbol(operator), + optional_node(right), + ], + node, + )); } - let named = self.named_children(node); - if named - .iter() - .any(|child| matches!(child.kind(), "block" | "do_block")) + + if let Some(logical) = + self.normalize_logical_operator_assignment(left, &operator, right.clone(), node) { - return false; + return Some(logical); } - let children = node.children(&mut node.walk()).collect::>(); - let direct_bracket_shape = children - .first() - .map(|child| node_text(*child, self.source) != "[") - .unwrap_or(false) - && children - .iter() - .any(|child| !child.is_named() && node_text(*child, self.source) == "[") - && children - .iter() - .any(|child| !child.is_named() && node_text(*child, self.source) == "]") - && named.len() >= 2; - if direct_bracket_shape { - return true; + if self.instance_variable(left) || self.global_variable(left) { + let value = self.augmented_assignment_value(left, &operator, Some(right_raw), node); + return self.assignment_target(left, Some(value), node); } - if named.len() != 1 || named[0].kind() != "element_reference" { - return false; + if let Some(target) = self.assignment_target(left, right, node) { + return Some(target); } - let reference = named[0]; - let reference_named = self.raw_named_children(reference); - if reference_named.len() < 2 - || reference_named - .iter() - .any(|child| matches!(child.kind(), "block" | "do_block")) + + let value = self.augmented_assignment_value(left, &operator, Some(right_raw), node); + Some(self.wrap( + "LASGN", + vec![ + Child::String(self.target_name(left)), + Child::Node(Box::new(value)), + ], + node, + )) + } + + fn operator_assignment_statement_parts<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option<(TreeSitterNode<'tree>, String, TreeSitterNode<'tree>)> { + let mut left = None; + let mut operator = None; + let mut right = None; + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + if child.is_named() { + if left.is_none() { + left = Some(child); + } + if operator.is_some() { + right = Some(child); + } + } else if let Some(found_operator) = + operator_assignment_statement_operator(node_text(child, self.source)) + { + operator = Some(found_operator); + } + } + + if let (Some(left), Some(operator), Some(right)) = (left, operator, right) { + return Some((left, operator, right)); + } + + let raw_named = self.raw_named_children(node); + if raw_named.len() == 1 + && node_text(raw_named[0], self.source) == node_text(node, self.source) { - return false; + return self.operator_assignment_statement_parts(raw_named[0]); } - let reference_children = reference - .children(&mut reference.walk()) - .collect::>(); - reference_children - .first() - .map(|child| node_text(*child, self.source) != "[") - .unwrap_or(false) - && reference_children - .iter() - .any(|child| !child.is_named() && node_text(*child, self.source) == "[") - && reference_children - .iter() - .any(|child| !child.is_named() && node_text(*child, self.source) == "]") - } - fn dotted_expression(&self, node: TreeSitterNode<'_>) -> bool { - matches!( - node.kind(), - "body_statement" | "block_body" | "statement" | "argument_list" - ) && self.dotted_call(node) + None } - fn dotted_call(&self, node: TreeSitterNode<'_>) -> bool { - if !node - .children(&mut node.walk()) - .any(|child| matches!(node_text(child, self.source), "." | "&.")) - { + fn operator_assignment_statement(&self, node: TreeSitterNode<'_>) -> bool { + if !matches!(node.kind(), "body_statement" | "block_body" | "statement") { return false; } - let callable = self - .named_children(node) - .into_iter() - .filter(|child| { - !matches!( - child.kind(), - "block" | "do_block" | "argument_list" | "arguments" - ) - }) - .collect::>(); - callable.len() >= 2 - } + if self.operator_assignment_statement_parts(node).is_some() { + return true; + } - fn safe_navigation_call(&self, node: TreeSitterNode<'_>) -> bool { - node.children(&mut node.walk()) - .any(|child| !child.is_named() && node_text(child, self.source) == "&.") + let raw_named = self.raw_named_children(node); + raw_named.len() == 1 + && node_text(raw_named[0], self.source) == node_text(node, self.source) + && self + .operator_assignment_statement_parts(raw_named[0]) + .is_some() } - fn dotted_call_parts<'tree>( - &self, - node: TreeSitterNode<'tree>, - block: Option>, - ) -> Option<(TreeSitterNode<'tree>, String)> { - let callable = self - .named_children(node) - .into_iter() - .filter(|child| Some(*child) != block) - .filter(|child| { - !matches!( - child.kind(), - "block" | "do_block" | "argument_list" | "arguments" - ) - }) - .collect::>(); - let receiver = *callable.first()?; - let method = node_text(*callable.get(1)?, self.source) - .trim_end_matches('=') - .to_string(); - Some((receiver, method)) - } - - fn member_read_node(&self, node: TreeSitterNode<'_>) -> bool { - if self.language == Language::Lua && node.kind() == "field" { - return false; - } - matches!( - node.kind(), - "attribute" - | "member_expression" - | "member_access_expression" - | "field" - | "field_access" - | "selector_expression" - | "field_expression" - | "navigation_expression" - | "directly_assignable_expression" - | "expression_list" - ) - } - - fn member_parts<'tree>( - &self, - node: TreeSitterNode<'tree>, - ) -> Option<(TreeSitterNode<'tree>, String)> { - if node.kind() == "expression_list" - && !(self.named_field(node, "operand").is_some() - && self.named_field(node, "field").is_some()) - { + fn normalize_logical_operator_assignment( + &mut self, + left: TreeSitterNode<'_>, + operator: &str, + right: Option, + source: TreeSitterNode<'_>, + ) -> Option { + if self.language != Language::Ruby || !matches!(operator, "||" | "&&") { return None; } - if self.dotted_call(node) { - return self.dotted_call_parts(node, None); + if !self.identifier_kind(left.kind()) { + return None; } - let receiver = self - .named_field(node, "receiver") - .or_else(|| self.named_field(node, "object")) - .or_else(|| self.named_field(node, "operand")) - .or_else(|| self.named_field(node, "value")) - .or_else(|| self.named_field(node, "expression")) - .or_else(|| { - self.named_children(node) - .into_iter() - .find(|child| child.kind() != "navigation_suffix") - })?; - let method = self - .named_field(node, "method") - .or_else(|| self.named_field(node, "field")) - .or_else(|| self.named_field(node, "property")) - .or_else(|| self.named_field(node, "suffix")) - .or_else(|| self.named_children(node).into_iter().last())?; - (receiver != method).then(|| { - ( - receiver, - node_text(method, self.source) - .trim_start_matches(['.', '?']) - .trim_end_matches('=') - .to_string(), - ) - }) + let name = self.target_name(left); + let node_type = if operator == "||" { + "OP_ASGN_OR" + } else { + "OP_ASGN_AND" + }; + let receiver = self.wrap("LVAR", vec![Child::String(name.clone())], left); + let assignment = self.wrap( + "LASGN", + vec![Child::String(name), optional_node(right)], + source, + ); + Some(self.wrap( + node_type, + vec![ + Child::Node(Box::new(receiver)), + Child::Symbol(operator.to_string()), + Child::Node(Box::new(assignment)), + ], + source, + )) } - fn call_arguments( + fn augmented_assignment_value( &mut self, - node: TreeSitterNode<'_>, - function: Option>, - ) -> Vec { - let Some(args) = self - .named_field(node, "arguments") - .or_else(|| self.named_field(node, "argument")) - .or_else(|| { - self.named_children(node) - .into_iter() - .find(|child| matches!(child.kind(), "argument_list" | "arguments")) - }) - else { - return Vec::new(); - }; - self.named_children(args) - .into_iter() - .filter(|child| Some(*child) != function) - .filter_map(|child| self.normalize_node(child)) - .collect() + left: TreeSitterNode<'_>, + operator: &str, + right_raw: Option>, + source: TreeSitterNode<'_>, + ) -> Node { + let receiver = optional_node(self.assignment_receiver(left)); + let right = right_raw.and_then(|right| self.normalize_node(right)); + self.wrap( + "CALL", + vec![ + receiver, + Child::Symbol(operator.to_string()), + list_or_nil(right.into_iter().collect(), right_raw.unwrap_or(left), self), + ], + source, + ) } - fn command_arguments(&mut self, args: TreeSitterNode<'_>) -> Vec { - let children = self.named_children(args); - if children.is_empty() { - return self.scalar_argument_list_value(args).into_iter().collect(); + fn assignment_receiver(&mut self, left: TreeSitterNode<'_>) -> Option { + if self.identifier_kind(left.kind()) { + return Some(self.wrap( + "LVAR", + vec![Child::String(node_text(left, self.source).to_string())], + left, + )); } - if self.dotted_expression(args) { - return self.normalize_dotted_expression(args).into_iter().collect(); + if self.instance_variable(left) { + return Some(self.wrap( + "IVAR", + vec![Child::String(node_text(left, self.source).to_string())], + left, + )); } - if children.len() == 1 - && self.call_kind(children[0].kind()) - && self.call_block(children[0]).is_some() - { - return self - .normalize_call_with_block(children[0]) - .into_iter() - .collect(); + if self.global_variable(left) { + return Some(self.normalize_global_variable(left)); } - children - .into_iter() - .filter_map(|child| self.normalize_node(child)) - .collect() + if self.const_kind(left.kind()) { + return Some(self.normalize_const(left)); + } + self.normalize_node(left) } - fn yield_argument_nodes(&mut self, node: TreeSitterNode<'_>) -> Vec { - let children = self.named_children(node); - if children.is_empty() { - return self.scalar_argument_list_value(node).into_iter().collect(); - } - children + fn normalize_multiple_assignment( + &self, + left: TreeSitterNode<'_>, + right: Option, + source: TreeSitterNode<'_>, + ) -> Node { + let targets = self + .named_children(left) .into_iter() - .filter_map(|child| self.normalize_node(child)) - .collect() + .map(|child| { + let node_type = if child.kind() == "global_variable" + || node_text(child, self.source).starts_with('$') + { + "GASGN" + } else { + "LASGN" + }; + self.wrap( + node_type, + vec![Child::String(self.target_name(child)), Child::Nil], + child, + ) + }) + .collect::>(); + self.wrap( + "MASGN", + vec![optional_node(right), list_or_nil(targets, left, self)], + source, + ) } - fn scalar_argument_list_value(&mut self, node: TreeSitterNode<'_>) -> Option { - let text = node_text(node, self.source).trim(); - if text == "yield" { - return Some(self.wrap("YIELD", vec![Child::Nil], node)); + fn normalize_declaration(&mut self, node: TreeSitterNode<'_>) -> Option { + let mut assignments = Vec::new(); + for entry in self.declaration_entries(node) { + let Some(name) = self.declaration_name(entry) else { + continue; + }; + let right = self + .declaration_value(entry) + .and_then(|value| self.normalize_node(value)); + assignments.push(self.wrap( + "LASGN", + vec![Child::String(self.target_name(name)), optional_node(right)], + entry, + )); } - if text == "nil" { - return Some(self.wrap("NIL", Vec::new(), node)); + + if assignments.is_empty() { + None + } else if assignments.len() == 1 { + assignments.into_iter().next() + } else { + Some( + self.wrap( + "BLOCK", + assignments + .into_iter() + .map(|assignment| Child::Node(Box::new(assignment))) + .collect(), + node, + ), + ) } - if text == "true" { - return Some(self.wrap("TRUE", Vec::new(), node)); + } + + fn normalize_call(&mut self, node: TreeSitterNode<'_>) -> Option { + if self.zero_child_identifier_call(node) { + return Some(self.normalize_zero_child_call(node)); } - if text == "false" { - return Some(self.wrap("FALSE", Vec::new(), node)); + if self.call_block(node).is_some() { + return self.normalize_call_with_block(node); } - if let Some(symbol) = text.strip_prefix(':') { - if bare_identifier_text(symbol) { - return Some(self.wrap("LIT", vec![Child::Symbol(symbol.to_string())], node)); - } - } - if bare_identifier_text(text) { - if !self - .local_stack - .iter() - .rev() - .any(|scope| scope.contains(text)) - { - Some(self.wrap("VCALL", vec![Child::Symbol(text.to_string())], node)) - } else { - Some(self.wrap("LVAR", vec![Child::String(text.to_string())], node)) - } - } else { - None + if self.visibility_inline_def_call(node) { + return self.normalize_visibility_inline_def(node); } + self.normalize_call_without_block(node, None) } - fn assignment_left<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { - self.named_field(node, "left") - .or_else(|| self.named_children(node).into_iter().next()) - } - - fn assignment_right<'tree>( - &self, - node: TreeSitterNode<'tree>, - ) -> Option> { - self.named_field(node, "right") - .or_else(|| self.named_children(node).into_iter().nth(1)) + fn normalize_zero_child_call(&self, node: TreeSitterNode<'_>) -> Node { + self.wrap( + "VCALL", + vec![Child::Symbol(node_text(node, self.source).to_string())], + node, + ) } - fn operator_assignment_operator(&self, node: TreeSitterNode<'_>) -> String { - let mut cursor = node.walk(); - let raw = node - .children(&mut cursor) - .find(|child| !child.is_named() && node_text(*child, self.source).ends_with('=')) - .map(|child| node_text(child, self.source)) - .unwrap_or(""); - match raw { - "||=" => "||".to_string(), - "&&=" => "&&".to_string(), - _ => raw.trim_end_matches('=').to_string(), - } + fn normalize_member_read(&mut self, node: TreeSitterNode<'_>) -> Option { + let Some((receiver, method)) = self.member_parts(node) else { + let children = self.normalize_children(node); + return Some(self.wrap(&kind_type(node.kind()), children, node)); + }; + let receiver = optional_node(self.normalize_node(receiver)); + Some(self.wrap( + "CALL", + vec![receiver, Child::Symbol(method), Child::Nil], + node, + )) } - fn parameters_child<'tree>( - &self, - node: TreeSitterNode<'tree>, - ) -> Option> { - self.named_field(node, "parameters").or_else(|| { - self.named_children(node).into_iter().find(|child| { - matches!( - child.kind(), - "parameters" - | "parameter_list" - | "formal_parameters" - | "function_value_parameters" - | "method_parameters" - ) + fn normalize_call_with_block(&mut self, node: TreeSitterNode<'_>) -> Option { + let block = self.call_block(node); + let call_source = if self.language == Language::Ruby + && matches!(node.kind(), "body_statement" | "block_body" | "statement") + { + let raw_named = self.raw_named_children(node); + if raw_named.len() == 1 + && raw_named[0].kind() == "call" + && node_text(node, self.source) == node_text(raw_named[0], self.source) + { + raw_named[0] + } else { + node + } + } else { + node + }; + let call = self.normalize_call_without_block(call_source, block)?; + let args = self.normalize_block_parameters(block); + let body = block.and_then(|block| { + self.with_ruby_scope(block, false, |normalizer| { + let body_node = normalizer + .named_field(block, "body") + .or_else(|| normalizer.block_child(block)) + .unwrap_or(block); + normalizer.normalize_body(body_node).map(dynamic_scope) }) - }) + }); + let scope = self.scope(body, args, node); + Some(self.wrap( + "ITER", + vec![Child::Node(Box::new(call)), Child::Node(Box::new(scope))], + node, + )) } - fn declaration_entries<'tree>( - &self, - node: TreeSitterNode<'tree>, - ) -> Vec> { - if matches!(node.kind(), "local_variable_declaration") { - let entries = self - .named_children(node) - .into_iter() - .filter(|child| child.kind() == "variable_declarator") - .collect::>(); - if !entries.is_empty() { - return entries; - } - } - if matches!( - node.kind(), - "local_variable_declaration" - | "variable_declarator" - | "variable_declaration" - | "property_declaration" - ) { - vec![node] - } else { - Vec::new() + fn normalize_argument_list_call(&mut self, node: TreeSitterNode<'_>) -> Option { + if !self.ruby() || node.kind() != "argument_list" { + return None; } + let target = { + let raw_named = self.raw_named_children(node); + if raw_named.len() == 1 + && raw_named[0].kind() == "call" + && node_text(raw_named[0], self.source) == node_text(node, self.source) + { + raw_named[0] + } else { + node + } + }; + let function = self.named_children(target).into_iter().next()?; + let args_node = self + .named_children(target) + .into_iter() + .find(|child| child.kind() == "argument_list"); + let args = args_node + .map(|args| { + self.named_children(args) + .into_iter() + .filter_map(|child| self.normalize_node(child)) + .collect::>() + }) + .unwrap_or_default(); + Some(self.wrap( + "FCALL", + vec![ + Child::Symbol(node_text(function, self.source).to_string()), + list_or_nil(args, args_node.unwrap_or(node), self), + ], + node, + )) } - fn declaration_name<'tree>( - &self, - node: TreeSitterNode<'tree>, - ) -> Option> { - if let Some(name) = self.named_field(node, "name") { - return Some(name); + fn normalize_argument_list_element_reference( + &mut self, + node: TreeSitterNode<'_>, + ) -> Option { + if !self.ruby() || !self.argument_list_element_reference(node) { + return None; } - - for child in self.named_children(node) { - if child.kind() == "variable_declaration" { - if let Some(name) = self.declaration_name(child) { - return Some(name); - } - } - if matches!(child.kind(), "identifier" | "simple_identifier" | "pattern") { - return Some(child); + let target = { + let raw_named = self.raw_named_children(node); + if raw_named.len() == 1 + && raw_named[0].kind() == "element_reference" + && node_text(raw_named[0], self.source) == node_text(node, self.source) + { + raw_named[0] + } else { + node } - } - None + }; + let named = self.named_children(target); + let recv = *named.first()?; + let args = named + .iter() + .skip(1) + .filter_map(|child| self.normalize_node(*child)) + .collect::>(); + let recv = self.normalize_node(recv)?; + Some(self.wrap( + "CALL", + vec![ + Child::Node(Box::new(recv)), + Child::Symbol("[]".to_string()), + list_or_nil(args, node, self), + ], + node, + )) } - fn declaration_value<'tree>( - &self, - node: TreeSitterNode<'tree>, - ) -> Option> { - if node.kind() == "property_declaration" { - let mut after_target = false; - for child in self.named_children(node) { - if !after_target && matches!(child.kind(), "variable_declaration" | "pattern") { - after_target = true; - continue; - } - if after_target && !declaration_metadata_kind(child.kind()) { - return Some(child); - } - } + fn normalize_argument_list_unary_not(&mut self, node: TreeSitterNode<'_>) -> Option { + if !self.ruby() || !self.argument_list_unary_not(node) { + return None; } - - self.named_field(node, "value").or_else(|| { - self.named_children(node).into_iter().find(|child| { - !declaration_metadata_kind(child.kind()) - && !matches!( - child.kind(), - "identifier" | "simple_identifier" | "pattern" | "variable_declaration" - ) - }) - }) + let target = { + let raw_named = self.raw_named_children(node); + if raw_named.len() == 1 + && raw_named[0].kind() == "unary" + && node_text(raw_named[0], self.source) == node_text(node, self.source) + { + raw_named[0] + } else { + node + } + }; + let operand = self.named_children(target).into_iter().next()?; + let operand = optional_node(self.normalize_node(operand)); + Some(self.wrap( + "OPCALL", + vec![operand, Child::Symbol("!".to_string()), Child::Nil], + node, + )) } - fn assignment_target( + fn normalize_argument_list_call_with_block( &mut self, - left: TreeSitterNode<'_>, - right: Option, - source: TreeSitterNode<'_>, + node: TreeSitterNode<'_>, ) -> Option { - if left.kind() == "instance_variable" || node_text(left, self.source).starts_with('@') { - return Some(self.wrap( - "IASGN", - vec![ - Child::String(node_text(left, self.source).to_string()), - optional_node(right), - ], - source, - )); - } - if left.kind() == "global_variable" || node_text(left, self.source).starts_with('$') { - return Some(self.wrap( - "GASGN", - vec![ - Child::String(node_text(left, self.source).to_string()), - optional_node(right), - ], - source, - )); - } - if left.kind() == "element_reference" { - let named = self.named_children(left); - let receiver = *named.first()?; - let mut args = named - .iter() - .skip(1) - .filter_map(|arg| self.normalize_node(*arg)) - .collect::>(); - if let Some(right) = right { - args.push(right); - } - let receiver = optional_node(self.normalize_node(receiver)); - let args = list_or_nil(args, left, self); - return Some(self.wrap( - "ATTRASGN", - vec![receiver, Child::Symbol("[]=".to_string()), args], - source, - )); + if !self.ruby() || node.kind() != "argument_list" { + return None; } - if self.member_read_node(left) { - let (receiver, method) = self.member_parts(left)?; - let writer = if node_text(left, self.source).contains("&.") { - method + let target = { + let raw_named = self.raw_named_children(node); + if raw_named.len() == 1 + && raw_named[0].kind() == "call" + && node_text(raw_named[0], self.source) == node_text(node, self.source) + { + raw_named[0] } else { - format!("{method}=") - }; - let receiver = optional_node(self.normalize_node(receiver)); - let args = list_or_nil(right.into_iter().collect(), left, self); - return Some(self.wrap( - "ATTRASGN", - vec![receiver, Child::Symbol(writer), args], - source, - )); - } - if left.kind() == "expression_list" { - return self - .named_children(left) - .into_iter() - .next() - .and_then(|child| self.assignment_target(child, right, source)); - } - None + node + } + }; + let block = self.call_block(target)?; + let call = self.normalize_argument_list_call(node)?; + let args = self.normalize_block_parameters(Some(block)); + let body = self.with_ruby_scope(block, false, |normalizer| { + let body_node = normalizer + .named_field(block, "body") + .or_else(|| normalizer.block_child(block)) + .unwrap_or(block); + normalizer.normalize_body(body_node).map(dynamic_scope) + }); + Some(self.wrap( + "ITER", + vec![ + Child::Node(Box::new(call)), + Child::Node(Box::new(self.scope(body, args, node))), + ], + node, + )) } - fn normalize_assignment_lhs(&mut self, node: TreeSitterNode<'_>) -> Option { - let right = node - .next_named_sibling() - .and_then(|sibling| self.normalize_node(sibling)); - let source = node.parent().unwrap_or(node); - self.assignment_target(node, right.clone(), source) - .or_else(|| { - Some(self.wrap( - "LASGN", - vec![Child::String(self.target_name(node)), optional_node(right)], - node, - )) + fn normalize_statement_call_with_block(&mut self, node: TreeSitterNode<'_>) -> Option { + let block = self.call_block(node); + let call_source = self.statement_block_call(node)?; + let call = self.normalize_call_without_block(call_source, block)?; + let args = self.normalize_block_parameters(block); + let body = block.and_then(|block| { + self.with_ruby_scope(block, false, |normalizer| { + let body_node = normalizer + .named_field(block, "body") + .or_else(|| normalizer.block_child(block)) + .unwrap_or(block); + normalizer.normalize_body(body_node).map(dynamic_scope) }) + }); + let scope = self.scope(body, args, node); + Some(self.wrap( + "ITER", + vec![Child::Node(Box::new(call)), Child::Node(Box::new(scope))], + node, + )) } - fn target_name(&self, node: TreeSitterNode<'_>) -> String { - let text = node_text(node, self.source); - if self.identifier_kind(node.kind()) - || matches!(node.kind(), "splat" | "splat_parameter" | "rest_assignment") - { - text.trim_start_matches('*').to_string() + fn normalize_dotted_expression(&mut self, node: TreeSitterNode<'_>) -> Option { + let block = self.call_block(node); + let call_source = block.map(|block| self.source_before_child(node, block)); + let call = self.normalize_dotted_call_expression_with_source(node, call_source.as_ref())?; + let Some(block) = block else { + return Some(call); + }; + let args = self.normalize_block_parameters(Some(block)); + let body = self.with_ruby_scope(block, false, |normalizer| { + let body_node = normalizer + .named_field(block, "body") + .or_else(|| normalizer.block_child(block)) + .unwrap_or(block); + normalizer.normalize_body(body_node).map(dynamic_scope) + }); + let scope = self.scope(body, args, node); + Some(self.wrap( + "ITER", + vec![Child::Node(Box::new(call)), Child::Node(Box::new(scope))], + node, + )) + } + + fn normalize_call_without_block( + &mut self, + node: TreeSitterNode<'_>, + block: Option>, + ) -> Option { + let call_source = block.map(|block| self.source_before_child(node, block)); + if self.dotted_call(node) { + let (receiver, method) = self.dotted_call_parts(node, block)?; + let args = self.call_arguments(node, None); + let node_type = if self.safe_navigation_call(node) { + "QCALL" + } else { + "CALL" + }; + let receiver = optional_node(self.normalize_node(receiver)); + let args = if let Some(source) = call_source.as_ref() { + self.list_or_nil_from_source_node(args, source) + } else { + list_or_nil(args, node, self) + }; + if let Some(source) = call_source.as_ref() { + return Some(self.wrap_from_source_node( + node_type, + vec![receiver, Child::Symbol(method), args], + source, + )); + } + return Some(self.wrap(node_type, vec![receiver, Child::Symbol(method), args], node)); + } + + let function = self + .named_field(node, "function") + .or_else(|| self.named_field(node, "call")) + .or_else(|| { + self.named_children(node) + .into_iter() + .find(|child| Some(*child) != block) + })?; + let args = self.call_arguments(node, Some(function)); + if self.identifier_kind(function.kind()) { + let node_type = if block.is_some() || !args.is_empty() { + "FCALL" + } else { + "VCALL" + }; + let children = vec![ + Child::Symbol(node_text(function, self.source).to_string()), + if let Some(source) = call_source.as_ref() { + self.list_or_nil_from_source_node(args, source) + } else { + list_or_nil(args, node, self) + }, + ]; + if let Some(source) = call_source.as_ref() { + return Some(self.wrap_from_source_node(node_type, children, source)); + } + return Some(self.wrap(node_type, children, node)); + } + if self.language == Language::Ruby && self.const_kind(function.kind()) { + let children = vec![ + Child::Symbol(node_text(function, self.source).to_string()), + if let Some(source) = call_source.as_ref() { + self.list_or_nil_from_source_node(args, source) + } else { + list_or_nil(args, node, self) + }, + ]; + if let Some(source) = call_source.as_ref() { + return Some(self.wrap_from_source_node("FCALL", children, source)); + } + return Some(self.wrap("FCALL", children, node)); + } + if self.member_read_node(function) { + let (receiver, method) = self.member_parts(function)?; + let receiver = optional_node(self.normalize_node(receiver)); + let args = if let Some(source) = call_source.as_ref() { + self.list_or_nil_from_source_node(args, source) + } else { + list_or_nil(args, node, self) + }; + let children = vec![receiver, Child::Symbol(method), args]; + if let Some(source) = call_source.as_ref() { + return Some(self.wrap_from_source_node("CALL", children, source)); + } + return Some(self.wrap("CALL", children, node)); + } + let function = optional_node(self.normalize_node(function)); + let args = if let Some(source) = call_source.as_ref() { + self.list_or_nil_from_source_node(args, source) } else { - text.to_string() + list_or_nil(args, node, self) + }; + let children = vec![function, Child::Symbol("call".to_string()), args]; + if let Some(source) = call_source.as_ref() { + return Some(self.wrap_from_source_node("CALL", children, source)); } + Some(self.wrap("CALL", children, node)) } - fn function_name(&self, node: TreeSitterNode<'_>) -> Option { - Some( - self.named_field(node, "name") - .or_else(|| { - self.named_children(node).into_iter().find(|child| { - self.identifier_kind(child.kind()) || child.kind() == "constant" - }) - }) - .map(|name| node_text(name, self.source).to_string()) - .unwrap_or_default(), - ) + fn normalize_element_reference(&mut self, node: TreeSitterNode<'_>) -> Option { + let target = self + .normalization_adapter + .element_reference_target(node, self.source) + .unwrap_or(node); + let named = self.named_children(target); + let receiver = *named.first()?; + let args = named + .iter() + .skip(1) + .filter_map(|arg| self.normalize_node(*arg)) + .collect::>(); + if self.self_node(receiver) { + return Some(self.wrap( + "FCALL", + vec![ + Child::Symbol("[]".to_string()), + list_or_nil(args, node, self), + ], + node, + )); + } + let receiver = optional_node(self.normalize_node(receiver)); + let args = list_or_nil(args, node, self); + Some(self.wrap( + "CALL", + vec![receiver, Child::Symbol("[]".to_string()), args], + node, + )) } - fn block_child<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { - self.named_children(node).into_iter().find(|child| { - matches!( - child.kind(), - "body_statement" - | "block_body" - | "block" - | "class_body" - | "function_body" - | "statements" - | "control_structure_body" - ) - }) + fn normalize_rescue_modifier(&mut self, node: TreeSitterNode<'_>) -> Option { + let named = self.named_children(node); + let body = named.first().and_then(|body| self.normalize_node(*body)); + let handler = named + .get(1) + .and_then(|handler| self.normalize_node(*handler)); + let resbody = self.wrap( + "RESBODY", + vec![Child::Nil, optional_node(handler), Child::Nil], + node, + ); + Some(self.wrap( + "RESCUE", + vec![ + optional_node(body), + Child::Node(Box::new(resbody)), + Child::Nil, + ], + node, + )) } - fn call_block<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { - self.named_children(node) - .into_iter() - .find(|child| matches!(child.kind(), "block" | "do_block")) + fn normalize_ensure_clause(&mut self, node: TreeSitterNode<'_>) -> Option { + self.normalize_body_nodes(self.named_children(node), node) } - fn named_field<'tree>( - &self, - node: TreeSitterNode<'tree>, - name: &str, - ) -> Option> { - if self.language == Language::Python - && matches!(name, "body" | "consequence") - && matches!( - node.kind(), - "elif_clause" - | "else_clause" - | "for_statement" - | "function_definition" - | "if_statement" - | "try_statement" - | "while_statement" - | "with_statement" - ) - { - if let Some(block) = self - .raw_named_children(node) - .into_iter() - .find(|child| child.kind() == "block") - { - return Some(block); - } - } - node.child_by_field_name(name) + fn normalize_dotted_call_expression(&mut self, node: TreeSitterNode<'_>) -> Option { + self.normalize_dotted_call_expression_with_source(node, None) } - fn named_children<'tree>(&self, node: TreeSitterNode<'tree>) -> Vec> { - if node.kind() == "dotted_name" && !node_text(node, self.source).contains('.') { - return Vec::new(); - } - if self.language == Language::Python - && node.kind() == "with_clause" - && bare_identifier_text(node_text(node, self.source)) - { - return Vec::new(); + fn normalize_dotted_call_expression_with_source( + &mut self, + node: TreeSitterNode<'_>, + source: Option<&Node>, + ) -> Option { + let raw_named = self.raw_named_children(node); + let target = if raw_named.len() == 1 && self.dotted_call(raw_named[0]) { + raw_named[0] + } else { + node + }; + let (receiver_raw, method) = self.dotted_call_parts(target, None)?; + let args = self.call_arguments(target, None); + let args = if let Some(source) = source { + self.list_or_nil_from_source_node(args, source) + } else { + list_or_nil(args, node, self) + }; + let receiver = optional_node(self.normalize_node(receiver_raw)); + let node_type = if self.safe_navigation_call(target) { + "QCALL" + } else { + "CALL" + }; + let children = vec![receiver, Child::Symbol(method), args]; + if let Some(source) = source { + return Some(self.wrap_from_source_node(node_type, children, source)); } - if self.language == Language::Lua - && node.kind() == "variable_list" - && self.raw_named_children(node).len() == 1 - && self - .raw_named_children(node) - .first() - .map(|child| self.identifier_kind(child.kind())) - .unwrap_or(false) - && self.lua_single_assignment_block_child(node) - { - return Vec::new(); + Some(self.wrap(node_type, children, node)) + } + + fn normalize_begin(&mut self, node: TreeSitterNode<'_>) -> Option { + let named = self.named_children(node); + let rescue_nodes = named + .iter() + .copied() + .filter(|child| child.kind() == "rescue") + .collect::>(); + let ensure_node = named.iter().copied().find(|child| child.kind() == "ensure"); + if rescue_nodes.is_empty() { + let Some(ensure_node) = ensure_node else { + let children = self.normalize_children(node); + return Some(self.wrap("BEGIN", children, node)); + }; + let body_nodes = named + .iter() + .copied() + .take_while(|child| child.kind() != "ensure") + .collect::>(); + let body = + self.normalize_body_nodes(body_nodes.clone(), *body_nodes.first().unwrap_or(&node)); + let ensure_body = self.normalize_body(ensure_node); + let source_start = body_nodes.first().copied().unwrap_or(node); + let ensure_named = self.named_children(ensure_node); + let source_end = ensure_named.last().copied().unwrap_or(ensure_node); + let source = self.source_from_nodes(source_start, source_end); + return Some(self.wrap_from_source_node( + "ENSURE", + vec![optional_node(body), optional_node(ensure_body)], + &source, + )); } - if self.language == Language::Lua - && node.kind() == "variable_list" - && self.raw_named_children(node).len() == 1 - && node - .parent() - .map(|parent| parent.kind() == "for_generic_clause") - .unwrap_or(false) - { - return Vec::new(); + + let body_nodes = named + .iter() + .copied() + .take_while(|child| child.kind() != "rescue") + .collect::>(); + let body = + self.normalize_body_nodes(body_nodes.clone(), *body_nodes.first().unwrap_or(&node)); + let resbodies = rescue_nodes + .iter() + .filter_map(|child| self.normalize_rescue_clause(*child)) + .collect::>(); + let source_start = body_nodes.first().copied().unwrap_or(node); + let source_end = rescue_nodes + .last() + .and_then(|last| self.rescue_source_end(*last)) + .or_else(|| rescue_nodes.last().copied()) + .unwrap_or(node); + let source = self.source_from_nodes(source_start, source_end); + let rescued = self.wrap_from_source_node( + "RESCUE", + vec![ + optional_node(body), + optional_node(self.link_rescue_chain(resbodies)), + Child::Nil, + ], + &source, + ); + let Some(ensure_node) = ensure_node else { + return Some(rescued); + }; + let ensure_body = self.normalize_body(ensure_node); + let ensure_named = self.named_children(ensure_node); + let source_end = ensure_named.last().copied().unwrap_or(ensure_node); + let source = self.source_from_nodes(source_start, source_end); + Some(self.wrap_from_source_node( + "ENSURE", + vec![Child::Node(Box::new(rescued)), optional_node(ensure_body)], + &source, + )) + } + + fn normalize_rescue_clause(&mut self, node: TreeSitterNode<'_>) -> Option { + let exceptions = self + .normalization_adapter + .rescue_clause_exceptions(node, self.source); + let exception_nodes = exceptions + .iter() + .filter_map(|child| { + if child.kind() == "exceptions" + && ruby_exception_constant_text(node_text(*child, self.source)) + { + Some(self.normalize_const(*child)) + } else { + self.normalize_node(*child) + } + }) + .collect::>(); + let exception_source = self + .normalization_adapter + .rescue_clause_exceptions_source(node, self.source); + let exception_variable = self.rescue_exception_variable(node); + let handler = self.normalization_adapter.rescue_clause_handler(node); + let normalized_handler = handler.and_then(|handler| self.normalize_body(handler)); + let body = self.prepend_rescue_exception_assignment(normalized_handler, exception_variable); + Some(self.wrap( + "RESBODY", + vec![ + list_or_nil(exception_nodes, exception_source.unwrap_or(node), self), + optional_node(body), + Child::Nil, + ], + node, + )) + } + + fn rescue_source_end<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + if let Some(handler) = self.normalization_adapter.rescue_clause_handler(node) { + return self + .named_children(handler) + .last() + .copied() + .or(Some(handler)); } - if self.language == Language::Lua - && node.kind() == "variable_list" - && self.raw_named_children(node).len() == 1 - && node - .parent() - .map(|parent| { - parent.kind() == "variable_declaration" - && self.raw_named_children(parent).len() == 1 - }) - .unwrap_or(false) - { - return Vec::new(); + + self.named_children(node) + .into_iter() + .rev() + .find(|child| child.kind() != "comment") + .or(Some(node)) + } + + fn link_rescue_chain(&self, mut resbodies: Vec) -> Option { + let mut next = None; + while let Some(mut current) = resbodies.pop() { + while current.children.len() <= 2 { + current.children.push(Child::Nil); + } + current.children[2] = optional_node(next); + next = Some(current); } + next + } - let children = self.raw_named_children(node); - if self.language == Language::Lua - && node.kind() == "expression_list" - && children.len() == 1 - && self.identifier_kind(children[0].kind()) - && node - .parent() - .map(|parent| matches!(parent.kind(), "assignment_statement" | "return_statement")) - .unwrap_or(false) - && node_text(node, self.source) == node_text(children[0], self.source) - { - return Vec::new(); + fn rescue_exception_variable(&self, node: TreeSitterNode<'_>) -> Option { + let name = self + .normalization_adapter + .rescue_clause_exception_variable_name(node)?; + let source = self + .normalization_adapter + .rescue_clause_exception_variable_source(node) + .unwrap_or(name); + let errinfo = self.wrap("ERRINFO", Vec::new(), source); + Some(self.wrap( + "LASGN", + vec![ + Child::String(node_text(name, self.source).to_string()), + Child::Node(Box::new(errinfo)), + ], + source, + )) + } + + fn prepend_rescue_exception_assignment( + &self, + body: Option, + assignment: Option, + ) -> Option { + let Some(assignment) = assignment else { + return body; + }; + let Some(mut body) = body else { + return Some(assignment); + }; + if body.r#type == "BLOCK" { + let mut children = vec![Child::Node(Box::new(assignment))]; + children.extend( + body.children + .into_iter() + .filter(|child| !matches!(child, Child::Nil)), + ); + body.children = children; + Some(body) + } else { + let source = self.source_from_normalized_nodes(&assignment, &body); + Some(self.wrap_from_source_node( + "BLOCK", + vec![ + Child::Node(Box::new(assignment)), + Child::Node(Box::new(body)), + ], + &source, + )) } - if self.language == Language::Lua - && node.kind() == "expression_list" - && children.len() == 1 - && matches!( - children[0].kind(), - "true" | "false" | "nil" | "number" | "integer" | "float" - ) - && node - .parent() - .map(|parent| matches!(parent.kind(), "assignment_statement" | "return_statement")) - .unwrap_or(false) - && node_text(node, self.source) == node_text(children[0], self.source) - { - return Vec::new(); + } + + fn normalize_modifier_statement(&mut self, node: TreeSitterNode<'_>) -> Option { + let keyword = self.modifier_keyword(node); + let (action, condition) = self.modifier_parts(node)?; + let node_type = match keyword.as_deref() { + Some("unless") => "UNLESS", + Some("while") => "WHILE", + Some("until") => "UNTIL", + _ => "IF", + }; + let condition = optional_node(self.normalize_node(condition)); + let action = optional_node(self.normalize_modifier_action(action)); + let trailing = if matches!(node_type, "WHILE" | "UNTIL") { + Child::Bool(true) + } else { + Child::Nil + }; + Some(self.wrap(node_type, vec![condition, action, trailing], node)) + } + + fn normalize_modifier_action(&mut self, node: TreeSitterNode<'_>) -> Option { + if self.modifier_return_action(node) { + self.normalize_return_node(node) + } else { + self.normalize_node(node) } - if self.language == Language::Lua - && node.kind() == "expression_list" - && children.len() == 1 - && matches!( - children[0].kind(), - "binary_expression" - | "function_call" - | "dot_index_expression" - | "function_definition" - | "string" - ) - && node_text(node, self.source) == node_text(children[0], self.source) + } + + fn normalize_command_call_statement(&mut self, node: TreeSitterNode<'_>) -> Option { + let raw_named = self.raw_named_children(node); + let target = if matches!( + node.kind(), + "body_statement" | "block" | "block_body" | "statement" + ) && raw_named.len() == 1 + && raw_named[0].kind() == "call" + && node_text(node, self.source) == node_text(raw_named[0], self.source) { - return self.named_children(children[0]); + raw_named[0] + } else { + node + }; + let function = self.named_children(target).into_iter().next()?; + if self.visibility_inline_def_statement(node, function) { + let method = self.inline_def_from_statement(node); + return Some(self.wrap( + "FCALL", + vec![ + Child::Symbol(node_text(function, self.source).to_string()), + list_or_nil(method.into_iter().collect(), node, self), + ], + node, + )); } - if self.language == Language::Lua - && node.kind() == "expression_list" - && children.len() == 1 - && children[0].kind() == "table_constructor" - && node_text(node, self.source) == node_text(children[0], self.source) - { - return self.named_children(children[0]); - } - if self.language == Language::Lua - && node.kind() == "field" - && children.len() == 1 - && self.identifier_kind(children[0].kind()) - && node_text(node, self.source) == node_text(children[0], self.source) - { - return Vec::new(); + let args_node = self + .named_children(target) + .into_iter() + .find(|child| matches!(child.kind(), "argument_list" | "arguments")); + let args = args_node + .map(|args| self.command_arguments(args)) + .unwrap_or_default(); + let block = self.call_block(target); + let call_source = block.map(|block| self.source_before_child(node, block)); + if self.ruby() && node_text(function, self.source) == "yield" { + let children = vec![list_or_nil(args, args_node.unwrap_or(node), self)]; + if let Some(source) = call_source.as_ref() { + return Some(self.wrap_from_source_node("YIELD", children, source)); + } + return Some(self.wrap("YIELD", children, node)); } - if self.language == Language::Lua - && node.kind() == "field" - && children.len() == 1 - && children[0].kind() == "string" - && node_text(node, self.source) == node_text(children[0], self.source) - { - return self.named_children(children[0]); + let call_type = if args.is_empty() { "VCALL" } else { "FCALL" }; + let call_children = vec![ + Child::Symbol(node_text(function, self.source).to_string()), + list_or_nil(args, args_node.unwrap_or(node), self), + ]; + let call = if let Some(source) = call_source.as_ref() { + self.wrap_from_source_node(call_type, call_children, source) + } else { + self.wrap(call_type, call_children, node) + }; + let Some(block) = block else { + return Some(call); + }; + let block_args = self.normalize_block_parameters(Some(block)); + let body = self.with_ruby_scope(block, false, |normalizer| { + let body_node = normalizer + .named_field(block, "body") + .or_else(|| normalizer.block_child(block)) + .unwrap_or(block); + normalizer.normalize_body(body_node).map(dynamic_scope) + }); + Some(self.wrap( + "ITER", + vec![ + Child::Node(Box::new(call)), + Child::Node(Box::new(self.scope(body, block_args, node))), + ], + node, + )) + } + + fn normalize_visibility_inline_def(&mut self, node: TreeSitterNode<'_>) -> Option { + let message = + node_text(self.named_children(node).into_iter().next()?, self.source).to_string(); + let args = self + .named_children(node) + .into_iter() + .find(|child| child.kind() == "argument_list"); + let method = self.inline_def_from_argument_list(args); + Some(self.wrap( + "FCALL", + vec![ + Child::Symbol(message), + list_or_nil(method.into_iter().collect(), args.unwrap_or(node), self), + ], + node, + )) + } + + fn normalize_const(&mut self, node: TreeSitterNode<'_>) -> Node { + if matches!(node.kind(), "scope_resolution" | "scoped_type_identifier") { + let parts = self.named_children(node); + let base = parts + .first() + .map(|part| self.normalize_const(*part)) + .map(|part| Child::Node(Box::new(part))) + .unwrap_or(Child::Nil); + let name = self + .named_field(node, "name") + .or_else(|| parts.last().copied()) + .map(|name| node_text(name, self.source).to_string()) + .unwrap_or_default(); + return self.wrap("COLON2", vec![base, Child::Symbol(name)], node); } - if self.language == Language::Lua - && node.kind() == "field" - && children.len() == 1 - && children[0].kind() == "function_call" - && node_text(node, self.source) == node_text(children[0], self.source) - { - return self.named_children(children[0]); + + self.wrap( + "CONST", + vec![Child::Symbol(node_text(node, self.source).to_string())], + node, + ) + } + + fn const_for(&mut self, node: Option>, source: TreeSitterNode<'_>) -> Node { + let Some(node) = node else { + return self.wrap( + "CONST", + vec![Child::Symbol("(anonymous)".to_string())], + source, + ); + }; + if self.const_kind(node.kind()) { + return self.normalize_const(node); } - if self.language == Language::Lua - && node.kind() == "block" - && children.len() == 1 - && matches!( - children[0].kind(), - "assignment_statement" - | "function_call" - | "return_statement" - | "variable_declaration" - ) - && node_text(node, self.source) == node_text(children[0], self.source) - { - return self.named_children(children[0]); + self.wrap( + "CONST", + vec![Child::Symbol(node_text(node, self.source).to_string())], + node, + ) + } + + fn normalize_global_variable(&self, node: TreeSitterNode<'_>) -> Node { + let text = node_text(node, self.source).to_string(); + if let Some(value) = text.strip_prefix('$') { + if value + .chars() + .next() + .map(|first| matches!(first, '1'..='9')) + .unwrap_or(false) + && value.chars().all(|ch| ch.is_ascii_digit()) + { + let number = value + .parse::() + .expect("validated global nth reference should parse"); + return self.wrap("NTH_REF", vec![Child::Integer(number)], node); + } } - if self.language == Language::Python - && node.kind() == "relative_import" - && children.len() == 1 - && children[0].kind() == "import_prefix" - { - return Vec::new(); + self.wrap("GVAR", vec![Child::String(text)], node) + } + + fn array_literal_statement(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .array_literal_statement(node, self.source) + } + + fn normalize_array_literal_statement(&mut self, node: TreeSitterNode<'_>) -> Option { + let target = self + .normalization_adapter + .array_literal_target(node, self.source) + .unwrap_or(node); + let values = self + .normalization_adapter + .array_literal_values(target, self.source) + .into_iter() + .filter_map(|child| self.normalize_array_literal_value(child)) + .collect::>(); + if values.is_empty() { + Some(self.wrap("ZLIST", Vec::new(), target)) + } else { + Some(self.list_node(values, target)) } - if self.language == Language::Python && node.kind() == "block" && children.len() == 1 { - if children[0].kind() == "function_definition" { - return self.named_children(children[0]); - } - if children[0].kind() == "decorated_definition" { - return self.named_children(children[0]); + } + + fn normalize_array_literal_value(&mut self, node: TreeSitterNode<'_>) -> Option { + if node.kind() == "field" { + let named = self.named_children(node); + if named.len() == 1 { + return self.normalize_node(named[0]); } - if children[0].kind() == "pass_statement" - && node_text(node, self.source).trim() == "pass" - { - return Vec::new(); + if named.is_empty() { + return Some(self.normalize_terminal_statement(node)); } - if matches!(children[0].kind(), "break_statement" | "continue_statement") - && bare_identifier_text(node_text(node, self.source).trim()) - { - return Vec::new(); + } + + self.normalize_node(node) + } + + fn element_reference_statement(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .element_reference_statement(node, self.source) + } + + fn normalize_element_reference_statement(&mut self, node: TreeSitterNode<'_>) -> Option { + let target = self + .normalization_adapter + .element_reference_target(node, self.source) + .unwrap_or(node); + let receiver = self + .normalization_adapter + .element_reference_receiver(target, self.source)?; + let args = self + .normalization_adapter + .element_reference_arguments(target, self.source) + .into_iter() + .filter_map(|arg| self.normalize_node(arg)) + .collect::>(); + if self.ruby() && self.self_node(receiver) { + return Some(self.wrap( + "FCALL", + vec![ + Child::Symbol("[]".to_string()), + list_or_nil(args, target, self), + ], + target, + )); + } + + let receiver = optional_node(self.normalize_node(receiver)); + let args = list_or_nil(args, target, self); + Some(self.wrap( + "CALL", + vec![receiver, Child::Symbol("[]".to_string()), args], + target, + )) + } + + fn hash_literal_statement(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .hash_literal_statement(node, self.source) + } + + fn normalize_hash_literal_statement(&mut self, node: TreeSitterNode<'_>) -> Option { + let target = self + .normalization_adapter + .hash_literal_target(node, self.source) + .unwrap_or(node); + let children = self + .normalization_adapter + .hash_literal_values(target, self.source) + .into_iter() + .filter_map(|child| self.normalize_hash_literal_value(child)) + .map(|child| Child::Node(Box::new(child))) + .collect::>(); + Some(self.wrap("HASH", children, target)) + } + + fn normalize_hash_literal_value(&mut self, node: TreeSitterNode<'_>) -> Option { + if node.kind() == "field" { + let named = self.named_children(node); + if named.len() >= 2 { + let key = named[0]; + let value = named[1]; + let key_lit = self.wrap( + "LIT", + vec![Child::Symbol(node_text(key, self.source).to_string())], + key, + ); + let value = self.normalize_node(value); + return Some(self.wrap( + "HASH", + vec![Child::Node(Box::new(key_lit)), optional_node(value)], + node, + )); } - if children[0].kind() == "return_statement" - && node_text(node, self.source) == node_text(children[0], self.source) - { - if self.raw_named_children(children[0]).is_empty() { - return Vec::new(); - } - return self.named_children(children[0]); + } + + self.normalize_node(node) + } + + fn empty_body_statement(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .empty_body_statement(node, self.source) + } + + fn heredoc_body_statement(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter.heredoc_body_statement(node) + } + + fn heredoc_call_for_body(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .heredoc_call_for_body(node, self.source) + } + + fn terminal_statement(&self, node: TreeSitterNode<'_>) -> bool { + matches!( + node.kind(), + "body_statement" | "block_body" | "statement" | "argument_list" + ) && self.named_children(node).is_empty() + && !node_text(node, self.source).trim().is_empty() + } + + fn normalize_terminal_statement(&self, node: TreeSitterNode<'_>) -> Node { + let text = node_text(node, self.source).trim(); + if self.ruby() && text == "yield" { + return self.wrap("YIELD", vec![Child::Nil], node); + } + if ruby_instance_variable_text(text) { + return self.wrap("IVAR", vec![Child::String(text.to_string())], node); + } + if text.starts_with('$') { + return self.normalize_global_variable(node); + } + if text == "nil" { + return self.wrap("NIL", Vec::new(), node); + } + if text == "true" { + return self.wrap("TRUE", Vec::new(), node); + } + if text == "false" { + return self.wrap("FALSE", Vec::new(), node); + } + if let Some(symbol) = text.strip_prefix(':') { + if exact_bare_identifier_text(symbol) { + return self.wrap("LIT", vec![Child::Symbol(symbol.to_string())], node); } - if children[0].kind() == "delete_statement" { - return self.named_children(children[0]); + } + if exact_integer_text(text) { + if let Ok(value) = text.parse::() { + return self.wrap("INTEGER", vec![Child::Integer(value)], node); } - if children[0].kind() == "if_statement" { - return self.named_children(children[0]); + } + if text == "[]" { + return self.wrap("ZLIST", Vec::new(), node); + } + if bare_identifier_text(text) { + if self.ruby() && !self.ruby_local_name(text) { + return self.wrap("VCALL", vec![Child::Symbol(text.to_string())], node); } - if matches!( - children[0].kind(), - "assert_statement" - | "for_statement" - | "import_from_statement" - | "import_statement" - | "raise_statement" - | "try_statement" - | "while_statement" - | "with_statement" - ) { - return self.named_children(children[0]); - } - if children[0].kind() != "expression_statement" { - return children; - } - let statement_children = self.raw_named_children(children[0]); - if statement_children.len() == 1 - && statement_children[0].kind() == "identifier" - && node_text(node, self.source) == node_text(children[0], self.source) - { - return Vec::new(); - } - if statement_children.len() == 1 && statement_children[0].kind() == "ellipsis" { - return Vec::new(); - } - if statement_children.len() == 1 - && matches!( - statement_children[0].kind(), - "assignment" - | "augmented_assignment" - | "binary_operator" - | "call" - | "string" - | "subscript" - ) - { - return self.named_children(statement_children[0]); - } - } - if self.language == Language::Python - && node.kind() == "expression_statement" - && children.len() == 1 - && children[0].kind() == "yield" - { - return self.named_children(children[0]); - } - if self.language == Language::Python - && node.kind() == "expression_statement" - && children.len() == 1 - && children[0].kind() == "identifier" - { - return Vec::new(); - } - if self.language == Language::Python - && node.kind() == "expression_statement" - && children.len() == 1 - && children[0].kind() == "binary_operator" - { - return self.named_children(children[0]); - } - if self.language == Language::Python - && node.kind() == "expression_statement" - && children.len() == 1 - && children[0].kind() == "comparison_operator" - { - return self.named_children(children[0]); + return self.wrap("LVAR", vec![Child::String(text.to_string())], node); } - if self.language == Language::Python - && node.kind() == "expression_statement" - && children.len() == 1 - && children[0].kind() == "call" - { - return self.named_children(children[0]); + + self.wrap(&kind_type(node.kind()), Vec::new(), node) + } + + fn normalize_array_literal(&mut self, node: TreeSitterNode<'_>) -> Node { + let values = self + .named_children(node) + .into_iter() + .filter_map(|child| self.normalize_array_literal_value(child)) + .collect::>(); + if values.is_empty() { + self.wrap("ZLIST", Vec::new(), node) + } else { + self.list_node(values, node) } - if self.language == Language::Python - && node.kind() == "expression_statement" - && children.len() == 1 - && children[0].kind() == "attribute" - { - return self.named_children(children[0]); + } + + fn normalize_pair(&mut self, node: TreeSitterNode<'_>) -> Option { + let named = self.named_children(node); + let key = *named.first()?; + let value_raw = named.get(1).copied(); + + let has_hash_rocket = node + .children(&mut node.walk()) + .any(|child| !child.is_named() && node_text(child, self.source) == "=>"); + if has_hash_rocket { + let children = [ + self.normalize_node(key), + value_raw.and_then(|value| self.normalize_node(value)), + ] + .into_iter() + .flatten() + .map(|child| Child::Node(Box::new(child))) + .collect(); + return Some(self.wrap("HASH", children, node)); } - if self.language == Language::Python - && node.kind() == "expression_statement" - && children.len() == 1 - && children[0].kind() == "string" - { - return self.named_children(children[0]); + + let key_text = node_text(key, self.source); + let key_lit = self.wrap("LIT", vec![Child::Symbol(key_text.to_string())], key); + if self.ruby() && key.kind() == "hash_key_symbol" && value_raw.is_none() { + let value = self.local_or_call_for_name(key_text, key); + return Some(self.wrap( + "HASH", + vec![Child::Node(Box::new(key_lit)), Child::Node(Box::new(value))], + node, + )); } - if self.language == Language::Python && node.kind() == "as_pattern_target" { - return Vec::new(); + + let mut children = vec![Child::Node(Box::new(key_lit))]; + if let Some(value) = value_raw.and_then(|value| self.normalize_node(value)) { + children.push(Child::Node(Box::new(value))); } - if self.language == Language::Python - && matches!(node.kind(), "with_clause" | "with_item") - && children.len() == 1 - && matches!(children[0].kind(), "with_item" | "as_pattern") + Some(self.wrap("HASH", children, node)) + } + + fn normalize_block_argument(&mut self, node: TreeSitterNode<'_>) -> Option { + let value = self + .named_children(node) + .into_iter() + .next() + .and_then(|child| self.normalize_node(child)); + Some(self.wrap("BLOCK_PASS", vec![Child::Nil, optional_node(value)], node)) + } + + fn normalize_interpolated_string(&mut self, node: TreeSitterNode<'_>) -> Node { + let children = self.normalize_children(node); + self.wrap("DSTR", children, node) + } + + fn normalize_subshell(&mut self, node: TreeSitterNode<'_>) -> Node { + let children = self + .named_children(node) + .into_iter() + .filter_map(|child| match child.kind() { + "interpolation" => self + .normalize_interpolation(child) + .map(|node| Child::Node(Box::new(node))), + "string_content" => Some(Child::Node(Box::new(self.wrap( + "STR", + vec![Child::String(node_text(child, self.source).to_string())], + child, + )))), + _ => None, + }) + .collect::>(); + let node_type = if children + .iter() + .any(|child| matches!(child, Child::Node(node) if node.r#type == "EVSTR")) { - return self.named_children(children[0]); + "DXSTR" + } else { + "XSTR" + }; + self.wrap(node_type, children, node) + } + + fn normalize_chained_string(&mut self, node: TreeSitterNode<'_>) -> Node { + let mut normalized_children = Vec::new(); + for child in self.named_children(node) { + let normalized = self.normalize_node(child); + normalized_children.push((child, normalized)); } - if self.language == Language::Python - && node.kind() == "with_item" - && children.len() == 1 - && children[0].kind() == "call" - && node_text(node, self.source) == node_text(children[0], self.source) - { - return self.named_children(children[0]); + + let mut parts = Vec::new(); + for (_, normalized) in &normalized_children { + match normalized { + Some(normalized) if normalized.r#type == "DSTR" => { + parts.extend(normalized.children.clone()); + } + Some(normalized) => { + parts.push(Child::Node(Box::new(normalized.clone()))); + } + None => {} + } } - if self.language == Language::Python - && node.kind() == "with_item" - && children.len() == 1 - && children[0].kind() == "attribute" - && node_text(node, self.source) == node_text(children[0], self.source) - { - return self.named_children(children[0]); + + let source = self + .dynamic_string_source(&normalized_children) + .or_else(|| normalized_children.first().map(|(child, _)| *child)) + .unwrap_or(node); + self.wrap("DSTR", parts, source) + } + + fn normalize_concatenated_string_statement(&mut self, node: TreeSitterNode<'_>) -> Node { + let target = concatenated_string_target(node).unwrap_or(node); + let mut normalized_children = Vec::new(); + for child in self.named_children(target) { + let normalized = self.normalize_node(child); + normalized_children.push((child, normalized)); } - if node.kind() == "type" && children.len() == 1 { - if children[0].kind() == "union_type" { - return self.named_children(children[0]); - } - if self.language == Language::Python && children[0].kind() == "binary_operator" { - return self.named_children(children[0]); + + let mut parts = Vec::new(); + for (_, normalized) in &normalized_children { + match normalized { + Some(normalized) if normalized.r#type == "DSTR" => { + parts.extend(normalized.children.clone()); + } + Some(normalized) => { + parts.push(Child::Node(Box::new(normalized.clone()))); + } + None => {} } - if children[0].kind() == "generic_type" { - return self.named_children(children[0]); + } + + let source = self + .dynamic_string_source(&normalized_children) + .or_else(|| normalized_children.first().map(|(child, _)| *child)) + .unwrap_or(node); + self.wrap("DSTR", parts, source) + } + + fn dynamic_string_source<'tree>( + &self, + normalized_children: &[(TreeSitterNode<'tree>, Option)], + ) -> Option> { + normalized_children + .iter() + .find(|(_, child_node)| { + let Some(child_node) = child_node else { + return false; + }; + child_node.r#type == "DSTR" + && child_node + .children + .iter() + .filter_map(self::node) + .any(|part| part.r#type == "EVSTR") + }) + .map(|(child, _)| *child) + } + + fn normalize_interpolated_statement(&mut self, node: TreeSitterNode<'_>) -> Node { + let children = self.normalize_children(node); + self.wrap("DSTR", children, node) + } + + fn normalize_interpolation(&mut self, node: TreeSitterNode<'_>) -> Option { + let exprs = self + .named_children(node) + .into_iter() + .filter_map(|child| self.normalize_node(child)) + .collect::>(); + let body = if exprs.len() == 1 { + exprs.into_iter().next() + } else if exprs.is_empty() { + None + } else { + Some(self.list_node(exprs, node)) + }; + Some( + self.wrap( + "EVSTR", + body.into_iter() + .map(|node| Child::Node(Box::new(node))) + .collect(), + node, + ), + ) + } + + fn normalize_heredoc_body_statement(&mut self, node: TreeSitterNode<'_>) -> Option { + let mut heredoc_bodies = self + .named_children(node) + .into_iter() + .filter(|child| child.kind() == "heredoc_body"); + let mut children = Vec::new(); + + for child in self.named_children(node) { + if child.kind() == "heredoc_body" { + continue; } - if children[0].kind() == "attribute" { - return self.named_children(children[0]); + + let normalized = if self.heredoc_call_for_body(child) { + let body = heredoc_bodies.next(); + self.with_current_heredoc_body(body, |normalizer| normalizer.normalize_node(child)) + } else { + self.normalize_body(child) + }; + + if let Some(normalized) = normalized { + children.push(normalized); } - if children[0].kind() == "string" { - return self.named_children(children[0]); + } + + if children.is_empty() { + None + } else if children.len() == 1 { + children.into_iter().next() + } else { + Some( + self.wrap( + "BLOCK", + children + .into_iter() + .map(|child| Child::Node(Box::new(child))) + .collect(), + node, + ), + ) + } + } + + fn normalize_heredoc_beginning(&mut self, node: TreeSitterNode<'_>) -> Node { + let mut heredoc_body = None; + let mut ancestor = node.parent(); + while let Some(candidate) = ancestor { + let bodies = self + .named_children(candidate) + .into_iter() + .filter(|child| child.kind() == "heredoc_body") + .collect::>(); + if !bodies.is_empty() { + heredoc_body = if let Some(current) = self.current_heredoc_body_span { + bodies + .iter() + .copied() + .find(|body| span(*body) == current) + .or_else(|| bodies.first().copied()) + } else { + bodies.first().copied() + }; + break; } - if children[0].kind() == "list" { - if self.raw_named_children(children[0]).is_empty() { - return Vec::new(); + ancestor = candidate.parent(); + } + let children = heredoc_body + .map(|body| self.normalize_heredoc_children(body)) + .unwrap_or_default(); + self.wrap("DSTR", children, node) + } + + fn with_current_heredoc_body( + &mut self, + body: Option>, + block: impl FnOnce(&mut Self) -> T, + ) -> T { + let previous = self.current_heredoc_body_span; + self.current_heredoc_body_span = body.map(span); + let result = block(self); + self.current_heredoc_body_span = previous; + result + } + + fn normalize_heredoc_children(&mut self, node: TreeSitterNode<'_>) -> Vec { + self.named_children(node) + .into_iter() + .filter_map(|child| match child.kind() { + "interpolation" => self.normalize_interpolation(child), + "heredoc_content" => { + let text = node_text(child, self.source).to_string(); + if text.is_empty() { + None + } else { + Some(self.wrap("STR", vec![Child::String(text)], child)) + } } - return self.named_children(children[0]); + _ => None, + }) + .map(|child| Child::Node(Box::new(child))) + .collect() + } + + fn normalize_identifier(&mut self, node: TreeSitterNode<'_>) -> Node { + let name = node_text(node, self.source).to_string(); + if self.ruby_vcall_identifier(node, &name) || self.vcall_identifier(node, &name) { + self.wrap("VCALL", vec![Child::Symbol(name)], node) + } else { + self.wrap("LVAR", vec![Child::String(name)], node) + } + } + + fn normalize_parameters(&mut self, node: Option>) -> Option { + if self.language != Language::Ruby { + return None; + } + let node = node?; + let defaults = self + .named_children(node) + .into_iter() + .filter_map(|param| { + let name = self.named_field(param, "name")?; + let value = self.named_field(param, "value")?; + let value = optional_node(self.normalize_node(value)); + Some(self.wrap( + "LASGN", + vec![ + Child::Symbol(node_text(name, self.source).to_string()), + value, + ], + param, + )) + }) + .map(|node| Child::Node(Box::new(node))) + .collect::>(); + if defaults.is_empty() { + None + } else { + Some(self.wrap("ARGS", defaults, node)) + } + } + + fn normalize_block_parameters(&mut self, block: Option>) -> Option { + if self.language != Language::Ruby { + return None; + } + let block = block?; + let params = self + .named_children(block) + .into_iter() + .find(|child| child.kind() == "block_parameters")?; + let pre_init = self + .named_children(params) + .into_iter() + .filter(|param| param.kind() == "destructured_parameter") + .filter_map(|param| self.normalize_destructured_block_parameter(param)) + .map(|node| Child::Node(Box::new(node))) + .collect::>(); + if pre_init.is_empty() { + None + } else { + Some(self.wrap("ARGS", pre_init, params)) + } + } + + fn normalize_destructured_block_parameter( + &mut self, + param: TreeSitterNode<'_>, + ) -> Option { + let mut targets = Vec::new(); + for child in self.named_children(param) { + self.collect_destructured_parameter_targets(child, &mut targets); + } + if targets.is_empty() { + return None; + } + let dvar = self.wrap("DVAR", vec![Child::Nil], param); + Some(self.wrap( + "MASGN", + vec![ + Child::Node(Box::new(dvar)), + list_or_nil(targets, param, self), + Child::Nil, + ], + param, + )) + } + + fn collect_destructured_parameter_targets( + &mut self, + node: TreeSitterNode<'_>, + targets: &mut Vec, + ) { + if self.identifier_kind(node.kind()) { + targets.push(self.wrap( + "DASGN", + vec![ + Child::String(node_text(node, self.source).to_string()), + Child::Nil, + ], + node, + )); + return; + } + + for child in self.named_children(node) { + self.collect_destructured_parameter_targets(child, targets); + } + } + + fn normalize_children(&mut self, node: TreeSitterNode<'_>) -> Vec { + let mut children = Vec::new(); + for child in self.named_children(node) { + if child.kind() == "heredoc_body" { + continue; } - if matches!( - children[0].kind(), - "ellipsis" | "identifier" | "nil" | "none" | "null" - ) { - return Vec::new(); + if self.assignment_rhs(child) { + continue; } + if let Some(normalized) = self.normalize_node(child) { + children.push(Child::Node(Box::new(normalized))); + } + } + children + } + + fn scope(&self, body: Option, args: Option, source: TreeSitterNode<'_>) -> Node { + let source_node = body.as_ref().or(args.as_ref()).cloned(); + let children = vec![Child::Nil, optional_node(args), optional_node(body)]; + if let Some(source_node) = source_node { + self.wrap_from_source_node("SCOPE", children, &source_node) + } else { + self.wrap("SCOPE", children, source) + } + } + + fn list(&self, children: Option>, source: TreeSitterNode<'_>) -> Option { + let children = children?; + if children.is_empty() { + return None; + } + + Some(self.list_node(children, source)) + } + + fn list_node(&self, children: Vec, source: TreeSitterNode<'_>) -> Node { + self.wrap( + "LIST", + children + .into_iter() + .map(|child| Child::Node(Box::new(child))) + .collect(), + source, + ) + } + + fn list_or_nil_from_source_node(&self, children: Vec, source: &Node) -> Child { + if children.is_empty() { + Child::Nil + } else { + Child::Node(Box::new( + self.wrap_from_source_node( + "LIST", + children + .into_iter() + .map(|child| Child::Node(Box::new(child))) + .collect(), + source, + ), + )) + } + } + + fn wrap(&self, node_type: &str, children: Vec, source: TreeSitterNode<'_>) -> Node { + let node_span = span(source); + Node { + r#type: node_type.to_string(), + children, + first_lineno: node_span[0], + first_column: node_span[1], + last_lineno: node_span[2], + last_column: node_span[3], + text: node_text(source, self.source).to_string(), + } + } + + fn wrap_from_nodes( + &self, + node_type: &str, + children: Vec, + first: TreeSitterNode<'_>, + last: TreeSitterNode<'_>, + ) -> Node { + let first_span = span(first); + let last_span = span(last); + let text = self + .source + .get(first.start_byte()..last.end_byte()) + .unwrap_or("") + .to_string(); + Node { + r#type: node_type.to_string(), + children, + first_lineno: first_span[0], + first_column: first_span[1], + last_lineno: last_span[2], + last_column: last_span[3], + text, + } + } + + fn wrap_from_source_node(&self, node_type: &str, children: Vec, source: &Node) -> Node { + Node { + r#type: node_type.to_string(), + children, + first_lineno: source.first_lineno, + first_column: source.first_column, + last_lineno: source.last_lineno, + last_column: source.last_column, + text: source.text.clone(), + } + } + + fn wrap_from_span_text( + &self, + node_type: &str, + children: Vec, + node_span: Span, + text: &str, + ) -> Node { + Node { + r#type: node_type.to_string(), + children, + first_lineno: node_span[0], + first_column: node_span[1], + last_lineno: node_span[2], + last_column: node_span[3], + text: text.to_string(), + } + } + + fn with_ruby_scope( + &mut self, + node: TreeSitterNode<'_>, + reset: bool, + f: impl FnOnce(&mut Self) -> T, + ) -> T { + if !self.ruby() { + return f(self); + } + let previous = self.local_stack.clone(); + if reset { + self.local_stack.clear(); + } + self.local_stack.push(self.ruby_scope_locals(node)); + let result = f(self); + self.local_stack = previous; + result + } + + fn ruby_scope_locals(&self, node: TreeSitterNode<'_>) -> BTreeSet { + let mut locals = BTreeSet::new(); + self.collect_ruby_scope_locals(node, &mut locals, true); + locals + } + + fn collect_ruby_scope_locals( + &self, + node: TreeSitterNode<'_>, + locals: &mut BTreeSet, + root: bool, + ) { + if !root && self.ruby_scope_boundary(node) { + return; + } + self.collect_ruby_parameter_locals(node, locals); + self.collect_ruby_assignment_locals(node, locals); + for child in self.named_children(node) { + if !self.ruby_scope_child_boundary(child) { + self.collect_ruby_scope_locals(child, locals, false); + } + } + } + + fn collect_ruby_parameter_locals( + &self, + node: TreeSitterNode<'_>, + locals: &mut BTreeSet, + ) { + if !matches!( + node.kind(), + "method_parameters" | "block_parameters" | "lambda_parameters" + ) { + return; + } + + for child in self.named_children(node) { + self.collect_identifier_names(child, locals); + } + } + + fn collect_ruby_assignment_locals( + &self, + node: TreeSitterNode<'_>, + locals: &mut BTreeSet, + ) { + if node.kind() == "exception_variable" { + self.collect_identifier_names(node, locals); + return; + } + + if !self.ruby_assignment_node(node) { + return; + } + + if let Some(left) = self.assignment_left(node) { + self.collect_assignment_target_names(left, locals); + } + } + + fn collect_assignment_target_names( + &self, + node: TreeSitterNode<'_>, + locals: &mut BTreeSet, + ) { + if self.identifier_kind(node.kind()) { + locals.insert( + node_text(node, self.source) + .trim_start_matches('*') + .to_string(), + ); + return; + } + if matches!( + node.kind(), + "left_assignment_list" + | "expression_list" + | "splat" + | "splat_parameter" + | "rest_assignment" + ) { + for child in self.named_children(node) { + self.collect_assignment_target_names(child, locals); + } + } + } + + fn collect_identifier_names(&self, node: TreeSitterNode<'_>, locals: &mut BTreeSet) { + if self.identifier_kind(node.kind()) { + locals.insert( + node_text(node, self.source) + .trim_start_matches('*') + .to_string(), + ); + } + if self + .normalization_adapter + .identifier_text_node(node, self.source) + { + locals.insert(node_text(node, self.source).to_string()); + } + for child in self.raw_named_children(node) { + self.collect_identifier_names(child, locals); + } + } + + fn collect_parameter_names(&self, node: TreeSitterNode<'_>, locals: &mut BTreeSet) { + if let Some(name) = self.named_field(node, "name") { + self.collect_identifier_names(name, locals); + return; + } + if let Some(name) = self + .named_children(node) + .into_iter() + .find(|child| self.identifier_kind(child.kind())) + { + locals.insert( + node_text(name, self.source) + .trim_start_matches('*') + .to_string(), + ); + } + } + + fn ruby_scope_boundary(&self, node: TreeSitterNode<'_>) -> bool { + if matches!(node.kind(), "block" | "do_block") + && node + .parent() + .map(|parent| parent.kind() == "lambda") + .unwrap_or(false) + { + return false; + } + matches!( + node.kind(), + "singleton_class" | "lambda" | "block" | "do_block" + ) || function_kind(node.kind()) + || self.class_node(node) + || self.module_node(node) + } + + fn ruby_scope_child_boundary(&self, node: TreeSitterNode<'_>) -> bool { + self.ruby_scope_boundary(node) + } + + fn ruby_vcall_identifier(&self, node: TreeSitterNode<'_>, name: &str) -> bool { + self.ruby() + && self.identifier_kind(node.kind()) + && !self.assignment_lhs(node) + && !self.ruby_definition_identifier(node) + && !self.ruby_local_name(name) + } + + fn ruby_local_name(&self, name: &str) -> bool { + self.local_stack + .iter() + .rev() + .any(|scope| scope.contains(name)) + } + + fn ruby(&self) -> bool { + self.normalization_adapter.ruby() + } + + fn instance_variable(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .instance_variable(node, self.source) + } + + fn global_variable(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .global_variable(node, self.source) + } + + fn assignment_operator(&self, text: &str) -> bool { + self.normalization_adapter.assignment_operator(text) + } + + fn vcall_identifier(&self, node: TreeSitterNode<'_>, name: &str) -> bool { + if !self.identifier_kind(node.kind()) { + return false; + } + if self.ruby() && self.ruby_local_name(name) { + return false; + } + let Some(parent) = node.parent() else { + return false; + }; + if matches!( + parent.kind(), + "method" | "method_parameters" | "parameter_list" | "argument_list" | "arguments" + ) { + return false; + } + if self.member_read_node(parent) { + return false; + } + if self.dotted_expression(parent) { + return false; + } + if self.assignment_lhs(node) || self.assignment_rhs(node) { + return false; + } + + if matches!(parent.kind(), "body_statement" | "block_body" | "then") + && self.parent_named_child(parent, node) + { + return true; + } + if matches!(parent.kind(), "if_modifier" | "unless_modifier") + && self + .named_children(parent) + .into_iter() + .next() + .map(|child| child == node) + .unwrap_or(false) + { + return true; + } + + false + } + + fn ruby_definition_identifier(&self, node: TreeSitterNode<'_>) -> bool { + let Some(parent) = self.parent_node(node) else { + return false; + }; + if matches!(parent.kind(), "method" | "singleton_method") { + let name = self.named_field(parent, "name").or_else(|| { + self.named_children(parent) + .into_iter() + .find(|child| self.identifier_kind(child.kind())) + }); + return name + .map(|name| self.same_ts_node(name, node)) + .unwrap_or(false); + } + matches!( + parent.kind(), + "method_parameters" + | "block_parameters" + | "lambda_parameters" + | "optional_parameter" + | "keyword_parameter" + | "block_parameter" + ) + } + + fn ruby_assignment_node(&self, node: TreeSitterNode<'_>) -> bool { + if matches!(node.kind(), "assignment" | "operator_assignment") { + return true; + } + if node.kind() == "pattern" + && node + .children(&mut node.walk()) + .any(|child| !child.is_named() && node_text(child, self.source) == "=") + { + return true; + } + let raw_named = self.raw_named_children(node); + if node.kind() == "block_body" + && raw_named.len() == 1 + && raw_named[0].kind() == "assignment" + { + return true; + } + + matches!(node.kind(), "body_statement" | "block_body" | "statement") + && self.has_assignment_operator_child(node) + } + + fn self_node(&self, node: TreeSitterNode<'_>) -> bool { + matches!(node.kind(), "self" | "this") + || matches!(node_text(node, self.source), "self" | "this") + } + + fn assignment_lhs(&self, node: TreeSitterNode<'_>) -> bool { + if self.lua_single_assignment_block_child(node) { + return false; + } + if node + .prev_sibling() + .map(|sibling| node_text(sibling, self.source) == ":") + .unwrap_or(false) + { + return false; + } + if self.literal_fragment_assignment_context(node) { + return false; + } + node.next_sibling() + .map(|sibling| self.assignment_operator(node_text(sibling, self.source))) + .unwrap_or(false) + } + + fn literal_fragment_assignment_context(&self, node: TreeSitterNode<'_>) -> bool { + let Some(parent) = node.parent() else { + return false; + }; + if matches!( + parent.kind(), + "string" | "delimited_symbol" | "regex" | "regex_literal" + ) { + return true; + } + if self.language == Language::Lua + && matches!( + node.kind(), + "string_content" | "escape_sequence" | "interpolation" | "string_fragment" + ) + && parent.kind() == "expression_list" + { + return true; + } + + matches!( + node.kind(), + "string_content" | "escape_sequence" | "interpolation" | "string_fragment" + ) && parent + .parent() + .map(|grandparent| { + matches!( + grandparent.kind(), + "string" | "delimited_symbol" | "regex" | "regex_literal" + ) + }) + .unwrap_or(false) + } + + fn literal_fragment_expression_list(&self, node: TreeSitterNode<'_>) -> bool { + if node.kind() != "expression_list" { + return false; + } + + let named = self.named_children(node); + named.len() == 1 && self.literal_fragment_assignment_context(named[0]) + } + + fn assignment_rhs(&self, node: TreeSitterNode<'_>) -> bool { + if self.lua_single_assignment_block_child(node) { + return false; + } + if self.literal_fragment_assignment_context(node) { + return false; + } + node.prev_sibling() + .map(|sibling| self.assignment_operator(node_text(sibling, self.source))) + .unwrap_or(false) + } + + fn lua_single_assignment_block_child(&self, node: TreeSitterNode<'_>) -> bool { + if self.language != Language::Lua { + return false; + } + let Some(parent) = node.parent() else { + return false; + }; + if parent.kind() != "assignment_statement" { + return false; + } + let Some(grandparent) = parent.parent() else { + return false; + }; + grandparent.kind() == "block" + && node_text(grandparent, self.source) == node_text(parent, self.source) + && self.raw_named_children(grandparent).len() == 1 + } + + fn lua_single_assignment_statement(&self, node: TreeSitterNode<'_>) -> bool { + if self.language != Language::Lua || node.kind() != "assignment_statement" { + return false; + } + let Some(parent) = node.parent() else { + return false; + }; + parent.kind() == "block" + && node_text(parent, self.source) == node_text(node, self.source) + && self.raw_named_children(parent).len() == 1 + } + + fn has_assignment_operator_child(&self, node: TreeSitterNode<'_>) -> bool { + node.children(&mut node.walk()).any(|child| { + !child.is_named() && self.assignment_operator(node_text(child, self.source)) + }) + } + + fn single_short_var_lhs(&self, node: TreeSitterNode<'_>) -> bool { + let Some(parent) = node.parent() else { + return false; + }; + if parent.kind() != "short_var_declaration" { + return false; + } + if self.named_children(node).len() != 1 { + return false; + } + self.named_children(parent) + .into_iter() + .next() + .map(|child| child == node) + .unwrap_or(false) + } + + fn modifier_statement(&self, node: TreeSitterNode<'_>) -> bool { + let named = self.named_children(node); + matches!(node.kind(), "body_statement" | "block_body" | "statement") + && self.modifier_keyword(node).is_some() + && named.len() >= 2 + } + + fn modifier_return_action(&self, node: TreeSitterNode<'_>) -> bool { + matches!( + node.kind(), + "return" + | "return_statement" + | "return_expression" + | "break" + | "break_statement" + | "break_expression" + | "next" + | "continue_statement" + ) + } + + fn leading_if_statement(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .leading_if_statement(node, self.source) + } + + fn leading_if_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + self.normalization_adapter + .leading_if_target(node, self.source) + } + + fn normalize_leading_if_statement(&mut self, node: TreeSitterNode<'_>) -> Option { + let target = self.leading_if_target(node).unwrap_or(node); + if target != node { + return self.normalize_if(target); + } + let keyword = target + .children(&mut target.walk()) + .next() + .map(|child| child.kind().to_string())?; + let condition = self + .named_children(target) + .into_iter() + .find(|child| !matches!(child.kind(), "comment" | "then" | "elsif" | "else"))?; + let consequence = self + .named_children(target) + .into_iter() + .find(|child| child.kind() == "then") + .or_else(|| self.branch_child(target, condition, 0)); + let alternative = self.explicit_alternative(target); + let node_type = if keyword == "unless" { "UNLESS" } else { "IF" }; + let condition = optional_node(self.normalize_node(condition)); + let consequence = optional_node(consequence.and_then(|child| self.normalize_body(child))); + let alternative = + optional_node(alternative.and_then(|child| self.normalize_else_or_branch(child))); + Some(self.wrap(node_type, vec![condition, consequence, alternative], target)) + } + + fn leading_case_statement(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .leading_case_statement(node, self.source) + } + + fn leading_case_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + self.normalization_adapter + .leading_case_target(node, self.source) + } + + fn normalize_leading_case_statement(&mut self, node: TreeSitterNode<'_>) -> Option { + let target = self.leading_case_target(node).unwrap_or(node); + self.normalize_case(target) + } + + fn leading_loop_statement(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .leading_loop_statement(node, self.source) + } + + fn leading_loop_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + self.normalization_adapter + .leading_loop_target(node, self.source) + } + + fn normalize_leading_loop_statement(&mut self, node: TreeSitterNode<'_>) -> Option { + let target = self.leading_loop_target(node).unwrap_or(node); + if target != node { + let keyword = target.children(&mut target.walk()).next()?.kind(); + let node_type = if keyword == "until" { "UNTIL" } else { "WHILE" }; + return self.normalize_loop(target, node_type); + } + let keyword = target.children(&mut target.walk()).next()?.kind(); + let node_type = if keyword == "until" { "UNTIL" } else { "WHILE" }; + let named = self.named_children(target); + let condition = optional_node( + named + .first() + .and_then(|condition| self.normalize_node(*condition)), + ); + let body = optional_node(named.get(1).and_then(|body| self.normalize_body(*body))); + Some(self.wrap(node_type, vec![condition, body], target)) + } + + fn normalize_leading_owner_statement(&mut self, node: TreeSitterNode<'_>) -> Option { + let target = self.leading_owner_target(node).unwrap_or(node); + let keyword = target.children(&mut target.walk()).next()?.kind(); + let name = self.const_for(self.named_children(target).first().copied(), target); + let body_node = self.named_field(target, "body").or_else(|| { + self.named_children(target) + .into_iter() + .rev() + .find(|child| self.block_kind(child.kind())) + }); + let body = body_node.and_then(|body| self.normalize_body(body)); + if keyword == "module" { + Some(self.wrap( + "MODULE", + vec![ + Child::Node(Box::new(name)), + Child::Node(Box::new(self.scope(body, None, target))), + ], + target, + )) + } else { + Some(self.wrap( + "CLASS", + vec![ + Child::Node(Box::new(name)), + Child::Nil, + Child::Node(Box::new(self.scope(body, None, target))), + ], + target, + )) + } + } + + fn rescue_body_statement(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .rescue_body_statement(node, self.source) + } + + fn rescue_body_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + self.normalization_adapter + .rescue_body_target(node, self.source) + } + + fn normalize_rescue_body_statement(&mut self, node: TreeSitterNode<'_>) -> Option { + let target = self.rescue_body_target(node)?; + let body_nodes = self + .normalization_adapter + .rescue_body_nodes(target, self.source); + let body = self.normalize_body_nodes(body_nodes.clone(), target); + let rescue_nodes = self + .normalization_adapter + .rescue_clauses(target, self.source); + let resbodies = rescue_nodes + .iter() + .filter_map(|child| self.normalize_rescue_clause(*child)) + .collect::>(); + let source_start = body_nodes.first().copied().unwrap_or(target); + let source_end = rescue_nodes + .last() + .and_then(|last| self.rescue_source_end(*last)) + .or_else(|| rescue_nodes.last().copied()) + .unwrap_or(target); + let source = self.source_from_nodes(source_start, source_end); + Some(self.wrap_from_source_node( + "RESCUE", + vec![ + optional_node(body), + optional_node(self.link_rescue_chain(resbodies)), + Child::Nil, + ], + &source, + )) + } + + fn ensure_body_statement(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .ensure_body_statement(node, self.source) + } + + fn ensure_body_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + self.normalization_adapter + .ensure_body_target(node, self.source) + } + + fn normalize_ensure_body_statement(&mut self, node: TreeSitterNode<'_>) -> Option { + let target = self.ensure_body_target(node)?; + let body = if self.rescue_body_statement(target) { + self.normalize_rescue_body_statement(target) + } else { + let body_nodes = self + .normalization_adapter + .ensure_body_nodes(target, self.source); + self.normalize_body_nodes(body_nodes, target) + }; + let ensure_node = self + .normalization_adapter + .ensure_clause(target, self.source)?; + let ensure_body_node = self + .normalization_adapter + .ensure_clause_body(ensure_node) + .unwrap_or(ensure_node); + let ensure_body = self.normalize_body(ensure_body_node); + let source = body.clone(); + let children = vec![optional_node(body), optional_node(ensure_body)]; + if let Some(source) = source.as_ref() { + Some(self.wrap_from_source_node("ENSURE", children, source)) + } else { + Some(self.wrap("ENSURE", children, target)) + } + } + + fn command_call_statement(&self, node: TreeSitterNode<'_>) -> bool { + if !matches!( + node.kind(), + "body_statement" | "block" | "block_body" | "statement" + ) || self.dotted_call(node) + { + return false; + } + + let raw_named = self.raw_named_children(node); + let target = if raw_named.len() == 1 + && raw_named[0].kind() == "call" + && node_text(node, self.source) == node_text(raw_named[0], self.source) + { + raw_named[0] + } else { + node + }; + let children = self.named_children(target); + children + .first() + .map(|child| self.identifier_kind(child.kind())) + .unwrap_or(false) + && (children + .iter() + .any(|child| matches!(child.kind(), "argument_list" | "arguments")) + || self.call_block(target).is_some()) + } + + fn visibility_inline_def_call(&self, node: TreeSitterNode<'_>) -> bool { + if node.kind() != "call" { + return false; + } + let Some(message) = self.named_children(node).into_iter().next() else { + return false; + }; + if !inline_def_wrapper_mid(node_text(message, self.source)) { + return false; + } + self.named_children(node) + .into_iter() + .find(|child| child.kind() == "argument_list") + .map(|args| { + node_text(args, self.source) + .trim_start() + .starts_with("def ") + }) + .unwrap_or(false) + } + + fn visibility_inline_def_statement( + &self, + node: TreeSitterNode<'_>, + function: TreeSitterNode<'_>, + ) -> bool { + let function_text = if self.language == Language::Ruby && function.kind() == "call" { + self.named_children(function) + .into_iter() + .next() + .map(|child| node_text(child, self.source)) + .unwrap_or_else(|| node_text(function, self.source)) + } else { + node_text(function, self.source) + }; + inline_def_wrapper_mid(function_text) && node_text(node, self.source).contains("def ") + } + + fn inline_def_from_argument_list(&mut self, args: Option>) -> Option { + if !self.ruby() { + return None; + } + self.inline_def_from_source(args?) + } + + fn inline_def_from_statement(&mut self, node: TreeSitterNode<'_>) -> Option { + let target = if self.language == Language::Ruby + && matches!(node.kind(), "body_statement" | "block_body" | "statement") + { + let raw_named = self.raw_named_children(node); + if raw_named.len() == 1 + && raw_named[0].kind() == "call" + && node_text(raw_named[0], self.source) == node_text(node, self.source) + { + raw_named[0] + } else { + node + } + } else { + node + }; + let source = self + .named_children(target) + .into_iter() + .find(|child| child.kind() == "argument_list") + .unwrap_or(target); + self.inline_def_from_source(source) + } + + fn inline_def_from_source(&mut self, source: TreeSitterNode<'_>) -> Option { + if !self.ruby() { + return None; + } + if let Some(method) = self + .named_children(source) + .into_iter() + .find(|child| matches!(child.kind(), "method" | "singleton_method")) + { + return if method.kind() == "singleton_method" { + self.normalize_singleton_function(method) + } else { + self.normalize_function(method) + }; + } + let body = self.inline_def_body(source); + let receiver = self.inline_def_receiver(source); + let normalized_body = self.with_ruby_scope(source, true, |normalizer| { + let body = body.and_then(|body| normalizer.normalize_body(body)); + normalizer.elide_tail_returns(body) + }); + if let Some(receiver) = receiver { + let name = self.inline_def_name_after_receiver(source, receiver)?; + if name.is_empty() { + return None; + } + let receiver = self.normalize_node(receiver)?; + return Some(self.wrap( + "DEFS", + vec![ + Child::Node(Box::new(receiver)), + Child::Symbol(name), + Child::Node(Box::new(self.scope(normalized_body, None, source))), + ], + source, + )); + } + + let name = self + .named_children(source) + .into_iter() + .find(|child| self.identifier_kind(child.kind())) + .map(|child| node_text(child, self.source).to_string())?; + if name.is_empty() { + return None; + } + Some(self.wrap( + "DEFN", + vec![ + Child::Symbol(name), + Child::Node(Box::new(self.scope(normalized_body, None, source))), + ], + source, + )) + } + + fn inline_def_receiver<'tree>( + &self, + source: TreeSitterNode<'tree>, + ) -> Option> { + let text = node_text(source, self.source); + if !inline_def_receiver_text(text) { + return None; + } + let children = self.named_children(source); + if children.len() == 1 + && matches!(children[0].kind(), "method" | "singleton_method") + && node_text(children[0], self.source) == text + { + return self.inline_def_receiver(children[0]); + } + + children.into_iter().find(|child| { + matches!( + child.kind(), + "self" | "this" | "constant" | "scope_resolution" + ) + }) + } + + fn inline_def_name_after_receiver( + &self, + source: TreeSitterNode<'_>, + receiver: TreeSitterNode<'_>, + ) -> Option { + let children = self.named_children(source); + if let Some(index) = children + .iter() + .position(|child| self.same_ts_node(*child, receiver)) + { + return children + .into_iter() + .skip(index + 1) + .find(|child| self.identifier_kind(child.kind())) + .map(|child| node_text(child, self.source).to_string()); + } + + if children.len() == 1 + && matches!(children[0].kind(), "method" | "singleton_method") + && node_text(children[0], self.source) == node_text(source, self.source) + { + return self.inline_def_name_after_receiver(children[0], receiver); + } + + None + } + + fn inline_def_body<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { + let mut stack = self + .named_children(node) + .into_iter() + .rev() + .collect::>(); + while let Some(child) = stack.pop() { + if child.kind() == "body_statement" { + return Some(child); + } + stack.extend(self.named_children(child).into_iter().rev()); + } + None + } + + fn modifier_keyword(&self, node: TreeSitterNode<'_>) -> Option { + let mut seen_named = false; + for child in node.children(&mut node.walk()) { + seen_named = seen_named || child.is_named(); + if seen_named + && !child.is_named() + && matches!(child.kind(), "if" | "unless" | "while" | "until") + { + return Some(child.kind().to_string()); + } + } + + let raw_named = self.raw_named_children(node); + if raw_named.len() == 1 + && node_text(raw_named[0], self.source) == node_text(node, self.source) + { + return self.modifier_keyword(raw_named[0]); + } + + None + } + + fn modifier_parts<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option<(TreeSitterNode<'tree>, TreeSitterNode<'tree>)> { + let raw_named = self.raw_named_children(node); + if raw_named.len() == 1 + && node_text(raw_named[0], self.source) == node_text(node, self.source) + { + if let Some(parts) = self.modifier_parts(raw_named[0]) { + return Some(parts); + } + } + + let named = self.named_children(node); + Some((*named.first()?, *named.last()?)) + } + + fn ternary_statement(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .ternary_statement(node, self.source) + } + + fn ternary_parts<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { + self.normalization_adapter.ternary_parts(node, self.source) + } + + fn case_argument_list(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .case_argument_list(node, self.source) + } + + fn leading_function_statement(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .leading_function_statement(node, self.source) + } + + fn leading_owner_statement(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .leading_owner_statement(node, self.source) + } + + fn leading_owner_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + self.normalization_adapter + .leading_owner_target(node, self.source) + } + + fn leading_function_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + self.normalization_adapter + .leading_function_target(node, self.source) + } + + fn leading_function_name<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + self.named_children(node) + .into_iter() + .find(|child| self.identifier_kind(child.kind())) + } + + fn leading_function_body<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + let body_kind = match self.normalization_adapter { + TreeSitterNormalizationAdapter::Python | TreeSitterNormalizationAdapter::Lua => "block", + _ => "body_statement", + }; + self.named_children(node) + .into_iter() + .rev() + .find(|child| child.kind() == body_kind) + } + + fn zero_child_identifier_call(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .zero_child_identifier_call(node, self.source) + } + + fn boolean_expression(&self, node: TreeSitterNode<'_>) -> bool { + (self.normalization_adapter.boolean_expression_kind(node) || self.boolean_statement(node)) + && matches!(self.boolean_operator(node).as_deref(), Some("and" | "or")) + } + + fn boolean_statement(&self, node: TreeSitterNode<'_>) -> bool { + if !matches!( + node.kind(), + "body_statement" | "block_body" | "statement" | "argument_list" + ) { + return false; + } + let named = self.named_children(node); + let target = if self.language == Language::Ruby + && named.len() == 1 + && matches!( + named[0].kind(), + "binary" | "binary_expression" | "binary_operator" | "boolean_operator" + ) + && node_text(node, self.source) == node_text(named[0], self.source) + { + named[0] + } else { + node + }; + if !matches!( + self.binary_operator(target).as_deref(), + Some("&&" | "||" | "and" | "or") + ) { + return false; + } + if self.named_children(target).len() < 2 { + return false; + } + target.children(&mut target.walk()).all(|child| { + child.is_named() + || matches!( + node_text(child, self.source), + "&&" | "||" | "and" | "or" | "(" | ")" + ) + }) + } + + fn operator_call_expression(&self, node: TreeSitterNode<'_>) -> bool { + let operator_call_kind = match self.language { + Language::Python => matches!( + node.kind(), + "binary" | "binary_expression" | "binary_operator" + ), + Language::Lua => matches!( + node.kind(), + "binary" | "binary_expression" | "expression_list" + ), + _ => matches!(node.kind(), "binary" | "binary_expression"), + }; + + operator_call_kind + && self.named_children(node).len() >= 2 + && self + .binary_operator(node) + .map(|operator| OPERATOR_CALL_OPERATORS.contains(&operator.as_str())) + .unwrap_or(false) + } + + fn comparison_expression(&self, node: TreeSitterNode<'_>) -> bool { + if self.literal_fragment_expression_list(node) { + return false; + } + + self.normalization_adapter.comparison_expression_kind(node) + && self + .comparison_operator(node) + .map(|operator| COMPARISON_OPERATORS.contains(&operator.as_str())) + .unwrap_or(false) + } + + fn infix_statement(&self, node: TreeSitterNode<'_>) -> bool { + self.infix_statement_parts(node).is_some() + } + + fn regex_literal(&self, node: Option>) -> bool { + node.map(|node| matches!(node.kind(), "regex" | "regex_literal")) + .unwrap_or(false) + } + + fn argument_list_unary_not(&self, node: TreeSitterNode<'_>) -> bool { + if node.kind() != "argument_list" { + return false; + } + let named = self.named_children(node); + if node + .children(&mut node.walk()) + .next() + .map(|child| node_text(child, self.source) == "!") + .unwrap_or(false) + && named.len() == 1 + { + return true; + } + + let raw_named = self.raw_named_children(node); + if raw_named.len() != 1 || raw_named[0].kind() != "unary" { + return false; + } + node_text(node, self.source) == node_text(raw_named[0], self.source) + && self.unary_not_expression(raw_named[0]) + && self.raw_named_children(raw_named[0]).len() == 1 + } + + fn unary_not_statement(&self, node: TreeSitterNode<'_>) -> bool { + if !matches!( + node.kind(), + "body_statement" | "block_body" | "statement" | "argument_list" + ) { + return false; + } + let named = self.named_children(node); + if node + .children(&mut node.walk()) + .next() + .map(|child| node_text(child, self.source) == "!") + .unwrap_or(false) + && named.len() == 1 + { + return true; + } + + let raw_named = self.raw_named_children(node); + raw_named.len() == 1 + && raw_named[0].kind() == "unary" + && node_text(node, self.source) == node_text(raw_named[0], self.source) + && self.unary_not_expression(raw_named[0]) + && self.raw_named_children(raw_named[0]).len() == 1 + } + + fn unary_not_expression(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .unary_not_expression(node, self.source) + } + + fn unary_minus_expression(&self, node: TreeSitterNode<'_>) -> bool { + if self + .normalization_adapter + .unary_minus_expression(node, self.source) + { + return true; + } + + let raw_named = self.raw_named_children(node); + raw_named.len() == 1 + && node_text(node, self.source) == node_text(raw_named[0], self.source) + && self.unary_minus_expression(raw_named[0]) + } + + fn infix_statement_parts<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option<(TreeSitterNode<'tree>, String, TreeSitterNode<'tree>)> { + if !matches!( + node.kind(), + "body_statement" | "block_body" | "statement" | "argument_list" + ) { + return None; + } + let raw_named = self.raw_named_children(node); + let target = if raw_named.len() == 1 + && matches!( + raw_named[0].kind(), + "binary" | "binary_expression" | "comparison_operator" + ) + && node_text(node, self.source) == node_text(raw_named[0], self.source) + { + raw_named[0] + } else { + node + }; + let mut named_index = 0usize; + let mut left = None; + let mut right = None; + let mut operator = None; + for child in target.children(&mut target.walk()) { + if child.is_named() { + left.get_or_insert(child); + if operator.is_some() { + right = Some(child); + } + named_index += 1; + } else { + let text = node_text(child, self.source); + if COMPARISON_OPERATORS.contains(&text) || OPERATOR_CALL_OPERATORS.contains(&text) { + operator = Some(text.to_string()); + } + } + } + if named_index == 2 { + Some((left?, operator?, right?)) + } else { + None + } + } + + fn boolean_operator(&self, node: TreeSitterNode<'_>) -> Option { + let direct = self.binary_operator(node)?; + if matches!(direct.as_str(), "&&" | "and") { + Some("and".to_string()) + } else if matches!(direct.as_str(), "||" | "or") { + Some("or".to_string()) + } else { + None + } + } + + fn comparison_operator(&self, node: TreeSitterNode<'_>) -> Option { + if let Some(operator) = self.binary_operator(node) { + if COMPARISON_OPERATORS.contains(&operator.as_str()) { + return Some(operator); + } + } + + comparison_operator_from_text(&self.spaced_text(node)) + } + + fn binary_operator(&self, node: TreeSitterNode<'_>) -> Option { + self.normalization_adapter + .binary_operator(node, self.source) + } + + fn spaced_text(&self, node: TreeSitterNode<'_>) -> String { + format!(" {} ", node_text(node, self.source)) + } + + fn class_node(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter.class_node(node) + } + + fn module_node(&self, node: TreeSitterNode<'_>) -> bool { + node.kind() == "module" && self.named_field(node, "name").is_some() + } + + fn interpolated_statement(&self, node: TreeSitterNode<'_>) -> bool { + let children = self.named_children(node); + self.normalization_adapter + .interpolated_statement(node, &children) + } + + fn concatenated_string_statement(&self, node: TreeSitterNode<'_>) -> bool { + let children = self.named_children(node); + self.normalization_adapter + .concatenated_string_statement(node, &children) + } + + fn interpolated_string(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .interpolated_string(node, &self.named_children(node)) + } + + fn lambda_expression(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .lambda_expression(node, self.source) + } + + fn lambda_target<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { + self.normalization_adapter.lambda_target(node, self.source) + } + + fn interpolation_node(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter.interpolation_node(node) + } + + fn statement_call_with_block(&self, node: TreeSitterNode<'_>) -> bool { + matches!(node.kind(), "body_statement" | "block_body" | "statement") + && self.call_block(node).is_some() + && self.statement_block_call(node).is_some() + } + + fn statement_block_call<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + if self.dotted_call(node) { + return Some(node); + } + + let block = self.call_block(node); + let children = if self.language == Language::Ruby + && matches!(node.kind(), "body_statement" | "block_body" | "statement") + { + let raw_named = self.raw_named_children(node); + if raw_named.len() == 1 + && raw_named[0].kind() == "call" + && node_text(node, self.source) == node_text(raw_named[0], self.source) + { + self.named_children(raw_named[0]) + } else { + self.named_children(node) + } + } else { + self.named_children(node) + }; + + children.into_iter().find(|child| { + Some(*child) != block && (self.call_kind(child.kind()) || self.member_read_node(*child)) + }) + } + + fn yield_statement(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .yield_statement(node, self.source) + } + + fn yield_argument_list(&self, node: TreeSitterNode<'_>) -> bool { + if node.kind() != "argument_list" { + return false; + } + let Some(parent) = self.parent_node(node) else { + return false; + }; + let mut cursor = parent.walk(); + let first_child_is_yield = parent + .children(&mut cursor) + .next() + .map(|child| node_text(child, self.source) == "yield") + .unwrap_or(false); + first_child_is_yield + } + + fn super_statement(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .super_statement(node, self.source) + } + + fn argument_list_element_reference(&self, node: TreeSitterNode<'_>) -> bool { + if node.kind() != "argument_list" { + return false; + } + let named = self.named_children(node); + if named + .iter() + .any(|child| matches!(child.kind(), "block" | "do_block")) + { + return false; + } + + let children = node.children(&mut node.walk()).collect::>(); + let direct_bracket_shape = children + .first() + .map(|child| node_text(*child, self.source) != "[") + .unwrap_or(false) + && children + .iter() + .any(|child| !child.is_named() && node_text(*child, self.source) == "[") + && children + .iter() + .any(|child| !child.is_named() && node_text(*child, self.source) == "]") + && named.len() >= 2; + if direct_bracket_shape { + return true; + } + + if named.len() != 1 || named[0].kind() != "element_reference" { + return false; + } + let reference = named[0]; + let reference_named = self.raw_named_children(reference); + if reference_named.len() < 2 + || reference_named + .iter() + .any(|child| matches!(child.kind(), "block" | "do_block")) + { + return false; + } + let reference_children = reference + .children(&mut reference.walk()) + .collect::>(); + reference_children + .first() + .map(|child| node_text(*child, self.source) != "[") + .unwrap_or(false) + && reference_children + .iter() + .any(|child| !child.is_named() && node_text(*child, self.source) == "[") + && reference_children + .iter() + .any(|child| !child.is_named() && node_text(*child, self.source) == "]") + } + + fn dotted_expression(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter.dotted_expression_wrapper(node) && self.dotted_call(node) + } + + fn argument_list_call_with_block(&self, node: TreeSitterNode<'_>) -> bool { + if node.kind() != "argument_list" || self.dotted_call(node) { + return false; + } + + let target = if self.language == Language::Ruby { + let raw_named = self.raw_named_children(node); + if raw_named.len() == 1 + && raw_named[0].kind() == "call" + && node_text(node, self.source) == node_text(raw_named[0], self.source) + { + raw_named[0] + } else { + node + } + } else { + node + }; + + self.call_block(target).is_some() + && self + .named_children(target) + .into_iter() + .next() + .map(|child| self.identifier_kind(child.kind())) + .unwrap_or(false) + } + + fn dotted_call(&self, node: TreeSitterNode<'_>) -> bool { + let raw_named = self.raw_named_children(node); + if raw_named.len() == 1 + && node_text(node, self.source) == node_text(raw_named[0], self.source) + && self.dotted_call(raw_named[0]) + { + return true; + } + + if !node + .children(&mut node.walk()) + .any(|child| matches!(node_text(child, self.source), "." | "&.")) + { + return false; + } + let callable = self + .named_children(node) + .into_iter() + .filter(|child| { + !matches!( + child.kind(), + "block" | "do_block" | "argument_list" | "arguments" + ) + }) + .collect::>(); + if callable + .iter() + .any(|child| matches!(child.kind(), "string_content" | "interpolation")) + { + return false; + } + callable.len() >= 2 + } + + fn safe_navigation_call(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .safe_navigation_call(node, self.source) + } + + fn dotted_call_parts<'tree>( + &self, + node: TreeSitterNode<'tree>, + block: Option>, + ) -> Option<(TreeSitterNode<'tree>, String)> { + let raw_named = self.raw_named_children(node); + if raw_named.len() == 1 + && node_text(node, self.source) == node_text(raw_named[0], self.source) + && self.dotted_call(raw_named[0]) + { + return self.dotted_call_parts(raw_named[0], block); + } + + let callable = self + .named_children(node) + .into_iter() + .filter(|child| Some(*child) != block) + .filter(|child| { + !matches!( + child.kind(), + "block" | "do_block" | "argument_list" | "arguments" + ) + }) + .collect::>(); + let receiver = *callable.first()?; + let method = node_text(*callable.get(1)?, self.source) + .trim_end_matches('=') + .to_string(); + Some((receiver, method)) + } + + fn member_read_node(&self, node: TreeSitterNode<'_>) -> bool { + if self.language == Language::Lua && node.kind() == "field" { + return false; + } + matches!( + node.kind(), + "call" + | "attribute" + | "member_expression" + | "member_access_expression" + | "field" + | "field_access" + | "selector_expression" + | "field_expression" + | "navigation_expression" + | "directly_assignable_expression" + | "expression_list" + ) && self.member_parts(node).is_some() + } + + fn member_parts<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option<(TreeSitterNode<'tree>, String)> { + if node.kind() == "expression_list" + && !(self.named_field(node, "operand").is_some() + && self.named_field(node, "field").is_some()) + { + return None; + } + if self.dotted_call(node) { + return self.dotted_call_parts(node, None); + } + let named_children = self.named_children(node); + let receiver = self + .named_field(node, "receiver") + .or_else(|| self.named_field(node, "object")) + .or_else(|| self.named_field(node, "operand")) + .or_else(|| self.named_field(node, "value")) + .or_else(|| self.named_field(node, "expression")) + .or_else(|| { + named_children + .iter() + .copied() + .find(|child| child.kind() != "navigation_suffix") + })?; + let method = self + .named_field(node, "method") + .or_else(|| self.named_field(node, "field")) + .or_else(|| self.named_field(node, "property")) + .or_else(|| self.named_field(node, "suffix")) + .or_else(|| { + named_children + .iter() + .copied() + .find(|child| child.kind() == "navigation_suffix") + }) + .or_else(|| { + named_children.iter().copied().rev().find(|child| { + !matches!( + child.kind(), + "block" | "do_block" | "argument_list" | "arguments" + ) + }) + })?; + (receiver != method).then(|| { + ( + receiver, + self.member_name(method).trim_end_matches('=').to_string(), + ) + }) + } + + fn member_name(&self, node: TreeSitterNode<'_>) -> String { + if node.kind() == "navigation_suffix" { + let named_children = self.named_children(node); + let suffix = self + .named_field(node, "suffix") + .or_else(|| { + named_children + .iter() + .copied() + .find(|child| self.identifier_kind(child.kind())) + }) + .or_else(|| named_children.last().copied()); + return suffix + .map(|suffix| { + node_text(suffix, self.source) + .trim_start_matches(['.', '?']) + .to_string() + }) + .unwrap_or_default(); + } + + node_text(node, self.source) + .trim_start_matches(['.', '?']) + .to_string() + } + + fn call_arguments( + &mut self, + node: TreeSitterNode<'_>, + function: Option>, + ) -> Vec { + let Some(args) = self + .named_field(node, "arguments") + .or_else(|| self.named_field(node, "argument")) + .or_else(|| { + self.named_children(node) + .into_iter() + .find(|child| matches!(child.kind(), "argument_list" | "arguments")) + }) + else { + return Vec::new(); + }; + let children = self + .named_children(args) + .into_iter() + .filter(|child| Some(*child) != function) + .collect::>(); + if self.dotted_expression(args) { + return self.normalize_dotted_expression(args).into_iter().collect(); + } + let raw_args = self.raw_named_children(args); + if raw_args.len() == 1 && self.dotted_call(raw_args[0]) { + let source = self.wrap("SOURCE", Vec::new(), args); + return self + .normalize_dotted_call_expression_with_source(raw_args[0], Some(&source)) + .into_iter() + .collect(); + } + if children.len() == 1 + && children[0].kind() == "heredoc_beginning" + && heredoc_marker_text(node_text(args, self.source).trim_start()) + { + return self.literal_arguments_from_text(args); + } + if children.is_empty() { + return self + .scalar_argument_list_value(args) + .into_iter() + .chain(self.literal_arguments_from_text(args)) + .collect(); + } + if self.infix_statement(args) { + return self.normalize_infix_statement(args).into_iter().collect(); + } + + children + .into_iter() + .filter_map(|child| self.normalize_node(child)) + .collect() + } + + fn literal_arguments_from_text(&mut self, args: TreeSitterNode<'_>) -> Vec { + let text = node_text(args, self.source); + if text.trim_start().starts_with("<<") && heredoc_marker_text(text.trim_start()) { + return vec![self.normalize_heredoc_beginning(args)]; + } + + literal_symbol_arguments(text) + .into_iter() + .map(|name| self.wrap("LIT", vec![Child::Symbol(name)], args)) + .collect() + } + + fn command_arguments(&mut self, args: TreeSitterNode<'_>) -> Vec { + let children = self.named_children(args); + if children.is_empty() { + return self.scalar_argument_list_value(args).into_iter().collect(); + } + if self.infix_statement(args) { + return self.normalize_infix_statement(args).into_iter().collect(); + } + if self.dotted_expression(args) { + return self.normalize_dotted_expression(args).into_iter().collect(); + } + if children.len() == 1 + && self.call_kind(children[0].kind()) + && self.call_block(children[0]).is_some() + { + return self + .normalize_call_with_block(children[0]) + .into_iter() + .collect(); + } + children + .into_iter() + .filter_map(|child| self.normalize_node(child)) + .collect() + } + + fn yield_argument_nodes(&mut self, node: TreeSitterNode<'_>) -> Vec { + let children = self.named_children(node); + if children.is_empty() { + return self.scalar_argument_list_value(node).into_iter().collect(); + } + children + .into_iter() + .filter_map(|child| self.normalize_node(child)) + .collect() + } + + fn yield_inline_arguments(&mut self, node: TreeSitterNode<'_>) -> Vec { + self.named_children(node) + .into_iter() + .filter(|child| child.kind() != "yield") + .filter_map(|child| self.normalize_node(child)) + .collect() + } + + fn scalar_argument_list_value(&mut self, node: TreeSitterNode<'_>) -> Option { + let text = node_text(node, self.source).trim(); + if self.ruby() && text == "yield" { + return Some(self.wrap("YIELD", vec![Child::Nil], node)); + } + if text == "nil" { + return Some(self.wrap("NIL", Vec::new(), node)); + } + if text == "true" { + return Some(self.wrap("TRUE", Vec::new(), node)); + } + if text == "false" { + return Some(self.wrap("FALSE", Vec::new(), node)); + } + if let Some(symbol) = text.strip_prefix(':') { + if bare_identifier_text(symbol) { + return Some(self.wrap("LIT", vec![Child::Symbol(symbol.to_string())], node)); + } + } + if let Ok(value) = text.parse::() { + return Some(self.wrap("INTEGER", vec![Child::Integer(value)], node)); + } + if bare_identifier_text(text) { + if self.ruby() && !self.ruby_local_name(text) { + Some(self.wrap("VCALL", vec![Child::Symbol(text.to_string())], node)) + } else { + Some(self.wrap("LVAR", vec![Child::String(text.to_string())], node)) + } + } else { + None + } + } + + fn local_or_call_for_name(&self, name: &str, source: TreeSitterNode<'_>) -> Node { + if self.ruby() && !self.ruby_local_name(name) { + self.wrap("VCALL", vec![Child::Symbol(name.to_string())], source) + } else { + self.wrap("LVAR", vec![Child::String(name.to_string())], source) + } + } + + fn symbol_literal_node(&self, node: Option<&Node>) -> bool { + matches!( + node, + Some(node) + if node.r#type == "LIT" && matches!(node.children.first(), Some(Child::Symbol(_))) + ) + } + + fn same_ts_node(&self, left: TreeSitterNode<'_>, right: TreeSitterNode<'_>) -> bool { + left.kind() == right.kind() + && left.start_byte() == right.start_byte() + && left.end_byte() == right.end_byte() + } + + fn parent_named_child(&self, parent: TreeSitterNode<'_>, node: TreeSitterNode<'_>) -> bool { + self.named_children(parent) + .into_iter() + .any(|child| self.same_ts_node(child, node)) + } + + fn node_key(&self, node: TreeSitterNode<'_>) -> (String, usize, usize) { + (node.kind().to_string(), node.start_byte(), node.end_byte()) + } + + fn hidden_match(&self, node: TreeSitterNode<'_>) -> bool { + node.kind() == "expression_statement" + && node_text(node, self.source) + .trim_start() + .starts_with("match ") + && self + .named_children(node) + .into_iter() + .any(|child| child.kind() == "match_block") + } + + fn assignment_left<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { + self.named_field(node, "left") + .or_else(|| self.named_children(node).into_iter().next()) + } + + fn assignment_right<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + self.named_field(node, "right") + .or_else(|| self.named_children(node).into_iter().nth(1)) + } + + fn operator_assignment_operator(&self, node: TreeSitterNode<'_>) -> String { + let mut cursor = node.walk(); + let raw = node.children(&mut cursor).find_map(|child| { + let text = node_text(child, self.source); + (!child.is_named() && self.assignment_operator(text)).then_some(text) + }); + if let Some(raw) = raw { + return match raw { + "||=" => "||".to_string(), + "&&=" => "&&".to_string(), + _ => raw.trim_end_matches('=').to_string(), + }; + } + + let raw_named = self.raw_named_children(node); + if raw_named.len() == 1 + && node_text(node, self.source) + .trim_end_matches(';') + .trim_end() + == node_text(raw_named[0], self.source) + { + return self.operator_assignment_operator(raw_named[0]); + } + + String::new() + } + + fn parameters_child<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + self.named_field(node, "parameters").or_else(|| { + self.named_children(node).into_iter().find(|child| { + matches!( + child.kind(), + "parameters" + | "parameter_list" + | "formal_parameters" + | "function_value_parameters" + | "method_parameters" + ) + }) + }) + } + + fn inline_parameter_begin_marker(&self, function_node: TreeSitterNode<'_>) -> Option { + if !self.ruby() { + return None; + } + + let params = self.named_field(function_node, "parameters").or_else(|| { + self.named_children(function_node) + .into_iter() + .find(|child| child.kind() == "method_parameters") + })?; + let semicolon = params.next_sibling()?; + if semicolon.is_named() || node_text(semicolon, self.source) != ";" { + return None; + } + + let point = semicolon.start_position(); + Some(Node { + r#type: "BEGIN".to_string(), + children: vec![Child::Nil], + first_lineno: point.row + 1, + first_column: point.column, + last_lineno: point.row + 1, + last_column: point.column, + text: String::new(), + }) + } + + fn prepend_inline_parameter_begin( + &self, + function_node: TreeSitterNode<'_>, + body: Option, + ) -> Option { + let Some(marker) = self.inline_parameter_begin_marker(function_node) else { + return body; + }; + + let mut body = body?; + if body.r#type == "BLOCK" { + let mut children = body + .children + .into_iter() + .filter(|child| !matches!(child, Child::Nil)) + .collect::>(); + if children.is_empty() { + return None; + } + + body.children = vec![Child::Node(Box::new(marker))]; + body.children.append(&mut children); + return Some(body); + } + + Some(self.wrap( + "BLOCK", + vec![Child::Node(Box::new(marker)), Child::Node(Box::new(body))], + function_node, + )) + } + + fn declaration_entries<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Vec> { + if matches!(node.kind(), "local_variable_declaration") { + let entries = self + .named_children(node) + .into_iter() + .filter(|child| child.kind() == "variable_declarator") + .collect::>(); + if !entries.is_empty() { + return entries; + } + } + if matches!( + node.kind(), + "local_variable_declaration" + | "variable_declarator" + | "variable_declaration" + | "property_declaration" + ) { + vec![node] + } else { + Vec::new() + } + } + + fn declaration_name<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + if let Some(name) = self.named_field(node, "name") { + return Some(name); + } + + for child in self.named_children(node) { + if child.kind() == "variable_declaration" { + if let Some(name) = self.declaration_name(child) { + return Some(name); + } + } + if matches!(child.kind(), "identifier" | "simple_identifier" | "pattern") { + return Some(child); + } + } + None + } + + fn declaration_value<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + if node.kind() == "property_declaration" { + let mut after_target = false; + for child in self.named_children(node) { + if !after_target && matches!(child.kind(), "variable_declaration" | "pattern") { + after_target = true; + continue; + } + if after_target && !declaration_metadata_kind(child.kind()) { + return Some(child); + } + } + } + + self.named_field(node, "value").or_else(|| { + self.named_children(node).into_iter().find(|child| { + !declaration_metadata_kind(child.kind()) + && !matches!( + child.kind(), + "identifier" | "simple_identifier" | "pattern" | "variable_declaration" + ) + }) + }) + } + + fn assignment_target( + &mut self, + left: TreeSitterNode<'_>, + right: Option, + source: TreeSitterNode<'_>, + ) -> Option { + if self.instance_variable(left) { + return Some(self.wrap( + "IASGN", + vec![ + Child::String(node_text(left, self.source).to_string()), + optional_node(right), + ], + source, + )); + } + if self.global_variable(left) { + return Some(self.wrap( + "GASGN", + vec![ + Child::String(node_text(left, self.source).to_string()), + optional_node(right), + ], + source, + )); + } + if left.kind() == "element_reference" { + let named = self.named_children(left); + let receiver = *named.first()?; + let mut args = named + .iter() + .skip(1) + .filter_map(|arg| self.normalize_node(*arg)) + .collect::>(); + if let Some(right) = right { + args.push(right); + } + let receiver = optional_node(self.normalize_node(receiver)); + let args = list_or_nil(args, left, self); + return Some(self.wrap( + "ATTRASGN", + vec![receiver, Child::Symbol("[]=".to_string()), args], + source, + )); + } + if self.member_read_node(left) + || self + .normalization_adapter + .member_assignment_target(left, self.source) + { + let (receiver, method) = self.member_parts(left)?; + let writer = if node_text(left, self.source).contains("&.") { + method + } else { + format!("{method}=") + }; + let receiver = optional_node(self.normalize_node(receiver)); + let args = list_or_nil(right.into_iter().collect(), left, self); + return Some(self.wrap( + "ATTRASGN", + vec![receiver, Child::Symbol(writer), args], + source, + )); + } + if left.kind() == "expression_list" { + return self + .named_children(left) + .into_iter() + .next() + .and_then(|child| self.assignment_target(child, right, source)); + } + None + } + + fn normalize_assignment_lhs(&mut self, node: TreeSitterNode<'_>) -> Option { + let right = node + .next_named_sibling() + .and_then(|sibling| self.normalize_node(sibling)); + let source = node.parent().unwrap_or(node); + self.assignment_target(node, right.clone(), source) + .or_else(|| { + Some(self.wrap( + "LASGN", + vec![Child::String(self.target_name(node)), optional_node(right)], + source, + )) + }) + } + + fn target_name(&self, node: TreeSitterNode<'_>) -> String { + let text = node_text(node, self.source); + if self.identifier_kind(node.kind()) + || matches!(node.kind(), "splat" | "splat_parameter" | "rest_assignment") + { + text.trim_start_matches('*').to_string() + } else { + text.to_string() + } + } + + fn function_name(&self, node: TreeSitterNode<'_>) -> Option { + if node.kind() == "singleton_method" { + return Some(self.singleton_name(node)); + } + + Some( + self.named_field(node, "name") + .or_else(|| { + self.named_children(node).into_iter().find(|child| { + self.identifier_kind(child.kind()) || child.kind() == "constant" + }) + }) + .map(|name| node_text(name, self.source).to_string()) + .unwrap_or_default(), + ) + } + + fn singleton_receiver<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + if let Some(receiver) = self.named_field(node, "receiver") { + return Some(receiver); + } + + let children = self.named_children(node); + let name = self.named_field(node, "name").or_else(|| { + children + .iter() + .rev() + .copied() + .find(|child| self.identifier_kind(child.kind())) + }); + let parameters = self.named_field(node, "parameters"); + let body = self + .named_field(node, "body") + .or_else(|| self.block_child(node)); + + children.into_iter().find(|child| { + !name + .map(|name| self.same_ts_node(*child, name)) + .unwrap_or(false) + && !parameters + .map(|parameters| self.same_ts_node(*child, parameters)) + .unwrap_or(false) + && !body + .map(|body| self.same_ts_node(*child, body)) + .unwrap_or(false) + }) + } + + fn singleton_name(&self, node: TreeSitterNode<'_>) -> String { + self.named_field(node, "name") + .or_else(|| { + self.named_children(node) + .into_iter() + .rev() + .find(|child| self.identifier_kind(child.kind())) + }) + .map(|name| node_text(name, self.source).to_string()) + .unwrap_or_default() + } + + fn block_child<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { + self.named_children(node).into_iter().find(|child| { + matches!( + child.kind(), + "body_statement" + | "block_body" + | "block" + | "do_block" + | "class_body" + | "function_body" + | "match_block" + | "statement_block" + | "statement_list" + | "statements" + | "switch_body" + | "then" + | "control_structure_body" + ) + }) + } + + fn call_block<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { + if self.language == Language::Ruby + && matches!(node.kind(), "body_statement" | "block_body" | "statement") + { + let raw_named = self.raw_named_children(node); + if raw_named.len() == 1 + && raw_named[0].kind() == "call" + && node_text(node, self.source) == node_text(raw_named[0], self.source) + { + return self.call_block(raw_named[0]); + } + } + + self.named_children(node) + .into_iter() + .find(|child| matches!(child.kind(), "block" | "do_block")) + } + + fn named_field<'tree>( + &self, + node: TreeSitterNode<'tree>, + name: &str, + ) -> Option> { + if self.language == Language::Python + && matches!(name, "body" | "consequence") + && matches!( + node.kind(), + "elif_clause" + | "else_clause" + | "for_statement" + | "function_definition" + | "if_statement" + | "try_statement" + | "while_statement" + | "with_statement" + ) + { + if let Some(block) = self + .raw_named_children(node) + .into_iter() + .find(|child| child.kind() == "block") + { + return Some(block); + } + } + node.child_by_field_name(name) + } + + fn parent_node<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { + node.parent() + } + + fn next_sibling<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { + node.next_sibling() + } + + fn prev_sibling<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { + node.prev_sibling() + } + + fn next_named_sibling<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + node.next_named_sibling() + } + + fn named_children<'tree>(&self, node: TreeSitterNode<'tree>) -> Vec> { + if node.kind() == "dotted_name" && !node_text(node, self.source).contains('.') { + return Vec::new(); + } + if self.language == Language::Python + && node.kind() == "with_clause" + && bare_identifier_text(node_text(node, self.source)) + { + return Vec::new(); + } + if self.language == Language::Lua + && node.kind() == "variable_list" + && self.raw_named_children(node).len() == 1 + && self + .raw_named_children(node) + .first() + .map(|child| self.identifier_kind(child.kind())) + .unwrap_or(false) + && self.lua_single_assignment_block_child(node) + { + return Vec::new(); + } + if self.language == Language::Lua + && node.kind() == "variable_list" + && self.raw_named_children(node).len() == 1 + && node + .parent() + .map(|parent| parent.kind() == "for_generic_clause") + .unwrap_or(false) + { + return Vec::new(); + } + if self.language == Language::Lua + && node.kind() == "variable_list" + && self.raw_named_children(node).len() == 1 + && node + .parent() + .map(|parent| { + parent.kind() == "variable_declaration" + && self.raw_named_children(parent).len() == 1 + }) + .unwrap_or(false) + { + return Vec::new(); + } + + let children = self.raw_named_children(node); + if self.language == Language::Lua + && node.kind() == "variable_list" + && children.len() == 1 + && children[0].kind() == "dot_index_expression" + && node_text(node, self.source) == node_text(children[0], self.source) + { + return self.named_children(children[0]); + } + if self.language == Language::Ruby + && INTERPOLATED_STATEMENT_WRAPPER_KINDS.contains(&node.kind()) + && children.len() == 1 + && children[0].kind() == "string" + && node_text(node, self.source) == node_text(children[0], self.source) + { + let string_children = self.raw_named_children(children[0]); + if string_children + .iter() + .any(|child| child.kind() == "interpolation") + { + return string_children; + } + } + if self.language == Language::Ruby + && matches!(node.kind(), "body_statement" | "block_body" | "statement") + && children.len() == 1 + && matches!( + children[0].kind(), + "if_modifier" | "unless_modifier" | "while_modifier" | "until_modifier" + ) + && node_text(node, self.source) == node_text(children[0], self.source) + { + return self.named_children(children[0]); + } + if self.language == Language::Ruby + && matches!(node.kind(), "body_statement" | "block_body" | "statement") + && children.len() == 1 + && children[0].kind() == "yield" + && node_text(node, self.source) == node_text(children[0], self.source) + { + return self.named_children(children[0]); + } + if self.language == Language::Lua + && node.kind() == "expression_list" + && children.len() == 1 + && self.identifier_kind(children[0].kind()) + && node + .parent() + .map(|parent| matches!(parent.kind(), "assignment_statement" | "return_statement")) + .unwrap_or(false) + && node_text(node, self.source) == node_text(children[0], self.source) + { + return Vec::new(); + } + if self.language == Language::Lua + && node.kind() == "expression_list" + && children.len() == 1 + && matches!( + children[0].kind(), + "true" | "false" | "nil" | "number" | "integer" | "float" + ) + && node + .parent() + .map(|parent| matches!(parent.kind(), "assignment_statement" | "return_statement")) + .unwrap_or(false) + && node_text(node, self.source) == node_text(children[0], self.source) + { + return Vec::new(); + } + if self.language == Language::Lua + && node.kind() == "expression_list" + && children.len() == 1 + && matches!( + children[0].kind(), + "binary_expression" + | "function_call" + | "dot_index_expression" + | "function_definition" + | "string" + ) + && node_text(node, self.source) == node_text(children[0], self.source) + { + return self.named_children(children[0]); + } + if self.language == Language::Lua + && node.kind() == "expression_list" + && children.len() == 1 + && children[0].kind() == "table_constructor" + && node_text(node, self.source) == node_text(children[0], self.source) + { + return self.named_children(children[0]); + } + if self.language == Language::Lua + && node.kind() == "field" + && children.len() == 1 + && self.identifier_kind(children[0].kind()) + && node_text(node, self.source) == node_text(children[0], self.source) + { + return Vec::new(); + } + if self.language == Language::Lua + && node.kind() == "field" + && children.len() == 1 + && children[0].kind() == "string" + && node_text(node, self.source) == node_text(children[0], self.source) + { + return self.named_children(children[0]); + } + if self.language == Language::Lua + && node.kind() == "field" + && children.len() == 1 + && children[0].kind() == "function_call" + && node_text(node, self.source) == node_text(children[0], self.source) + { + return self.named_children(children[0]); + } + if self.language == Language::Lua + && node.kind() == "block" + && children.len() == 1 + && matches!( + children[0].kind(), + "function_call" | "return_statement" | "variable_declaration" + ) + && node_text(node, self.source) == node_text(children[0], self.source) + { + return self.named_children(children[0]); + } + if self.language == Language::Python + && node.kind() == "relative_import" + && children.len() == 1 + && children[0].kind() == "import_prefix" + { + return Vec::new(); + } + if self.language == Language::Python && node.kind() == "block" && children.len() == 1 { + if children[0].kind() == "function_definition" { + return self.named_children(children[0]); + } + if children[0].kind() == "decorated_definition" { + return self.named_children(children[0]); + } + if children[0].kind() == "pass_statement" + && node_text(node, self.source).trim() == "pass" + { + return Vec::new(); + } + if matches!(children[0].kind(), "break_statement" | "continue_statement") + && bare_identifier_text(node_text(node, self.source).trim()) + { + return Vec::new(); + } + if children[0].kind() == "return_statement" + && node_text(node, self.source) == node_text(children[0], self.source) + { + if self.raw_named_children(children[0]).is_empty() { + return Vec::new(); + } + return self.named_children(children[0]); + } + if children[0].kind() == "delete_statement" { + return self.named_children(children[0]); + } + if children[0].kind() == "if_statement" { + return self.named_children(children[0]); + } + if matches!( + children[0].kind(), + "assert_statement" + | "for_statement" + | "import_from_statement" + | "import_statement" + | "raise_statement" + | "try_statement" + | "while_statement" + | "with_statement" + ) { + return self.named_children(children[0]); + } + if children[0].kind() != "expression_statement" { + return children; + } + let statement_children = self.raw_named_children(children[0]); + if statement_children.len() == 1 + && statement_children[0].kind() == "identifier" + && node_text(node, self.source) == node_text(children[0], self.source) + { + return Vec::new(); + } + if statement_children.len() == 1 && statement_children[0].kind() == "ellipsis" { + return Vec::new(); + } + if statement_children.len() == 1 + && matches!( + statement_children[0].kind(), + "assignment" + | "augmented_assignment" + | "binary_operator" + | "call" + | "string" + | "subscript" + ) + { + return self.named_children(statement_children[0]); + } + } + if self.language == Language::Python + && node.kind() == "expression_statement" + && children.len() == 1 + && children[0].kind() == "yield" + { + return self.named_children(children[0]); + } + if self.language == Language::Python + && node.kind() == "expression_statement" + && children.len() == 1 + && children[0].kind() == "identifier" + { + return Vec::new(); + } + if self.language == Language::Python + && node.kind() == "expression_statement" + && children.len() == 1 + && children[0].kind() == "binary_operator" + { + return self.named_children(children[0]); + } + if self.language == Language::Python + && node.kind() == "expression_statement" + && children.len() == 1 + && children[0].kind() == "comparison_operator" + { + return self.named_children(children[0]); + } + if self.language == Language::Python + && node.kind() == "expression_statement" + && children.len() == 1 + && children[0].kind() == "call" + { + return self.named_children(children[0]); + } + if self.language == Language::Python + && node.kind() == "expression_statement" + && children.len() == 1 + && children[0].kind() == "attribute" + { + return self.named_children(children[0]); + } + if self.language == Language::Python + && node.kind() == "expression_statement" + && children.len() == 1 + && children[0].kind() == "string" + { + return self.named_children(children[0]); + } + if self.language == Language::Python && node.kind() == "as_pattern_target" { + return Vec::new(); + } + if self.language == Language::Python + && matches!(node.kind(), "with_clause" | "with_item") + && children.len() == 1 + && matches!(children[0].kind(), "with_item" | "as_pattern") + { + return self.named_children(children[0]); + } + if self.language == Language::Python + && node.kind() == "with_item" + && children.len() == 1 + && children[0].kind() == "call" + && node_text(node, self.source) == node_text(children[0], self.source) + { + return self.named_children(children[0]); + } + if self.language == Language::Python + && node.kind() == "with_item" + && children.len() == 1 + && children[0].kind() == "attribute" + && node_text(node, self.source) == node_text(children[0], self.source) + { + return self.named_children(children[0]); + } + if node.kind() == "type" && children.len() == 1 { + if children[0].kind() == "union_type" { + return self.named_children(children[0]); + } + if self.language == Language::Python && children[0].kind() == "binary_operator" { + return self.named_children(children[0]); + } + if children[0].kind() == "generic_type" { + return self.named_children(children[0]); + } + if children[0].kind() == "attribute" { + return self.named_children(children[0]); + } + if children[0].kind() == "string" { + return self.named_children(children[0]); + } + if children[0].kind() == "list" { + if self.raw_named_children(children[0]).is_empty() { + return Vec::new(); + } + return self.named_children(children[0]); + } + if matches!( + children[0].kind(), + "ellipsis" | "identifier" | "nil" | "none" | "null" + ) { + return Vec::new(); + } + } + if node.kind() == "expression_statement" + && children.len() == 1 + && matches!(children[0].kind(), "assignment" | "augmented_assignment") + { + return self.named_children(children[0]); + } + + children + } + + fn raw_named_children<'tree>(&self, node: TreeSitterNode<'tree>) -> Vec> { + node.children(&mut node.walk()) + .filter(|child| child.is_named()) + .collect() + } + + fn lua_no_paren_string_argument_content<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + if self.language != Language::Lua || node.kind() != "string" { + return None; + } + let parent = node.parent()?; + if parent.kind() != "arguments" + || node_text(parent, self.source) != node_text(node, self.source) + { + return None; + } + self.raw_named_children(node) + .into_iter() + .find(|child| child.kind() == "string_content") + } + + fn source_before_child(&self, node: TreeSitterNode<'_>, child: TreeSitterNode<'_>) -> Node { + let text = self + .source + .get(node.start_byte()..child.start_byte()) + .unwrap_or("") + .trim_end() + .to_string(); + if text.is_empty() { + return self.wrap("SOURCE", Vec::new(), node); + } + + let lines = text.lines().collect::>(); + let first_span = span(node); + let last_lineno = first_span[0] + lines.len() - 1; + let last_column = if lines.len() <= 1 { + first_span[1] + text.len() + } else { + lines.last().map(|line| line.len()).unwrap_or(0) + }; + Node { + r#type: "SOURCE".to_string(), + children: Vec::new(), + first_lineno: first_span[0], + first_column: first_span[1], + last_lineno, + last_column, + text, + } + } + + fn source_from_nodes( + &self, + first_node: TreeSitterNode<'_>, + last_node: TreeSitterNode<'_>, + ) -> Node { + self.wrap_from_nodes("SOURCE", Vec::new(), first_node, last_node) + } + + fn parenthesized_source(&self, node: TreeSitterNode<'_>) -> Option { + let mut open = None; + let mut close = None; + for child in node.children(&mut node.walk()) { + if child.is_named() { + continue; + } + match node_text(child, self.source) { + "(" if open.is_none() => open = Some(child), + ")" => close = Some(child), + _ => {} + } + } + Some(self.source_from_nodes(open?, close?)) + } + + fn source_from_normalized_nodes(&self, first_node: &Node, last_node: &Node) -> Node { + let lines = self.source.split_inclusive('\n').collect::>(); + let text = if first_node.first_lineno == last_node.last_lineno { + lines + .get(first_node.first_lineno.saturating_sub(1)) + .and_then(|line| line.get(first_node.first_column..last_node.last_column)) + .unwrap_or("") + .to_string() + } else { + let mut text = String::new(); + if let Some(line) = lines.get(first_node.first_lineno.saturating_sub(1)) { + text.push_str(line.get(first_node.first_column..).unwrap_or("")); + } + for index in first_node.first_lineno..last_node.last_lineno.saturating_sub(1) { + if let Some(line) = lines.get(index) { + text.push_str(line); + } + } + if let Some(line) = lines.get(last_node.last_lineno.saturating_sub(1)) { + text.push_str(line.get(..last_node.last_column).unwrap_or("")); + } + text + }; + + Node { + r#type: "SOURCE".to_string(), + children: Vec::new(), + first_lineno: first_node.first_lineno, + first_column: first_node.first_column, + last_lineno: last_node.last_lineno, + last_column: last_node.last_column, + text, + } + } + + fn first_named<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { + self.named_children(node).into_iter().next() + } + + fn branch_child<'tree>( + &self, + node: TreeSitterNode<'tree>, + condition: TreeSitterNode<'tree>, + offset: usize, + ) -> Option> { + self.named_children(node) + .into_iter() + .filter(|child| { + *child != condition && !matches!(child.kind(), "comment" | "else" | "elsif") + }) + .nth(offset) + } + + fn explicit_alternative<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + self.normalization_adapter.explicit_alternative(node) + } + + fn case_value<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { + self.named_field(node, "value") + .or_else(|| self.named_field(node, "subject")) + .or_else(|| self.named_field(node, "condition")) + .or_else(|| { + self.named_children(node).into_iter().find(|child| { + !self.when_kind(child.kind()) + && !self.block_kind(child.kind()) + && child.kind() != "else" + }) + }) + } + + fn case_arms<'tree>(&self, node: TreeSitterNode<'tree>) -> Vec> { + let mut arms = Vec::new(); + let mut stack = self.named_children(node); + while !stack.is_empty() { + let child = stack.remove(0); + if self.normalization_adapter.case_arm(child, self.source) { + arms.push(child); + } else if self + .normalization_adapter + .case_else_node_kind(child, self.source) + { + continue; + } else if !function_kind(child.kind()) { + stack.extend(self.named_children(child)); + } + } + arms + } + + fn when_body<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { + self.named_field(node, "body") + .or_else(|| self.named_field(node, "consequence")) + .or_else(|| self.named_field(node, "value")) + .or_else(|| { + self.named_children(node).into_iter().rev().find(|child| { + self.block_kind(child.kind()) || self.statement_node(child.kind()) + }) + }) + } + + fn identifier_kind(&self, kind: &str) -> bool { + identifier_kind_name(kind) + } + + fn const_kind(&self, kind: &str) -> bool { + matches!( + kind, + "constant" | "scope_resolution" | "type_identifier" | "scoped_type_identifier" + ) + } + + fn call_kind(&self, kind: &str) -> bool { + matches!( + kind, + "call" | "call_expression" | "method_call" | "method_call_expression" + ) + } + + fn block_kind(&self, kind: &str) -> bool { + matches!( + kind, + "block" + | "body_statement" + | "statement_block" + | "statement_list" + | "class_body" + | "switch_body" + | "match_block" + | "then" + | "block_body" + | "control_structure_body" + | "function_body" + | "statements" + ) + } + + fn case_kind(&self, kind: &str) -> bool { + matches!( + kind, + "case" + | "switch_statement" + | "expression_switch_statement" + | "switch_expression" + | "match_statement" + | "match_expression" + | "when_expression" + ) + } + + fn when_kind(&self, kind: &str) -> bool { + matches!( + kind, + "when" + | "switch_case" + | "case_clause" + | "expression_case" + | "case_statement" + | "switch_section" + | "switch_block_statement_group" + | "switch_entry" + | "when_entry" + | "match_arm" + ) + } + + fn statement_node(&self, kind: &str) -> bool { + kind.ends_with("_statement") + || kind.ends_with("_expression") + || matches!(kind, "return" | "break" | "next") + } + + fn unwrap_node(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .unwrap_node(node, self.source, self.named_children(node).len()) + } + + fn first_dotted_call_descendant<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + for child in self.named_children(node) { + if self.call_kind(child.kind()) && self.dotted_call(child) { + return Some(child); + } + if let Some(found) = self.first_dotted_call_descendant(child) { + return Some(found); + } + } + None + } + + fn elide_tail_returns(&self, node: Option) -> Option { + if self.language != Language::Ruby { + return node; + } + let mut node = node?; + if matches!( + node.r#type.as_str(), + "DEFN" | "DEFS" | "CLASS" | "MODULE" | "SCLASS" | "LAMBDA" | "ITER" + ) { + return Some(node); + } + if node.r#type == "RETURN" { + return node.children.into_iter().next().and_then(child_node); + } + + match node.r#type.as_str() { + "BLOCK" => { + if let Some(last) = node.children.pop() { + match child_node(last) { + Some(last_node) => { + if let Some(elided) = self.elide_tail_returns(Some(last_node)) { + node.children.push(Child::Node(Box::new(elided))); + } else { + node.children.push(Child::Nil); + } + } + None => node.children.push(Child::Nil), + } + } + } + "SCOPE" => { + if node.children.len() > 2 { + let child = std::mem::replace(&mut node.children[2], Child::Nil); + if let Some(elided) = + child_node(child).and_then(|body| self.elide_tail_returns(Some(body))) + { + node.children[2] = Child::Node(Box::new(elided)); + } + } + } + "IF" | "UNLESS" => { + for index in [1usize, 2usize] { + if node.children.len() > index { + let child = std::mem::replace(&mut node.children[index], Child::Nil); + if let Some(elided) = + child_node(child).and_then(|body| self.elide_tail_returns(Some(body))) + { + node.children[index] = Child::Node(Box::new(elided)); + } + } + } + } + "CASE" | "CASE2" => { + let index = if node.r#type == "CASE" { 1 } else { 0 }; + if node.children.len() > index { + let child = std::mem::replace(&mut node.children[index], Child::Nil); + if let Some(elided) = + child_node(child).and_then(|body| self.elide_tail_returns(Some(body))) + { + node.children[index] = Child::Node(Box::new(elided)); + } + } + } + "WHEN" | "RESBODY" => { + for index in [1usize, 2usize] { + if node.children.len() > index { + let child = std::mem::replace(&mut node.children[index], Child::Nil); + if let Some(elided) = + child_node(child).and_then(|body| self.elide_tail_returns(Some(body))) + { + node.children[index] = Child::Node(Box::new(elided)); + } + } + } + } + "RESCUE" => { + for index in [0usize, 1usize] { + if node.children.len() > index { + let child = std::mem::replace(&mut node.children[index], Child::Nil); + if let Some(elided) = + child_node(child).and_then(|body| self.elide_tail_returns(Some(body))) + { + node.children[index] = Child::Node(Box::new(elided)); + } + } + } + } + _ => {} + } + + Some(node) + } + + fn elide_implicit_nil_body(&self, node: Option) -> Option { + if self.language != Language::Ruby { + return node; + } + let node = self.drop_trailing_nil_statement(node); + match node { + Some(node) if node.r#type == "NIL" => None, + other => other, + } + } + + fn drop_trailing_nil_statement(&self, node: Option) -> Option { + let mut node = node?; + if node.r#type != "BLOCK" { + return Some(node); + } + node.children.retain(|child| !matches!(child, Child::Nil)); + while node + .children + .last() + .and_then(self::node) + .map(|child| child.r#type == "NIL") + .unwrap_or(false) + { + node.children.pop(); + } + if node.children.is_empty() { + None + } else if node.children.len() == 1 { + child_node(node.children.into_iter().next().unwrap()) + } else { + Some(node) + } + } +} + +fn optional_node(node: Option) -> Child { + node.map(|node| Child::Node(Box::new(node))) + .unwrap_or(Child::Nil) +} + +fn child_node(child: Child) -> Option { + match child { + Child::Node(node) => Some(*node), + _ => None, + } +} + +fn list_or_nil( + children: Vec, + source: TreeSitterNode<'_>, + normalizer: &TreeSitterNormalizer<'_>, +) -> Child { + if children.is_empty() { + Child::Nil + } else { + Child::Node(Box::new(normalizer.list_node(children, source))) + } +} + +fn integer_text(text: &str) -> bool { + let digits = text.strip_prefix('-').unwrap_or(text); + !digits.is_empty() && digits.chars().all(|ch| ch.is_ascii_digit()) +} + +fn ruby_constant_text(text: &str) -> bool { + let mut chars = text.chars(); + let Some(first) = chars.next() else { + return false; + }; + first.is_ascii_uppercase() && chars.all(|ch| ch == '_' || ch.is_ascii_alphanumeric()) +} + +fn dynamic_scope(mut node: Node) -> Node { + if matches!( + node.r#type.as_str(), + "DEFN" | "DEFS" | "CLASS" | "MODULE" | "SCLASS" | "LAMBDA" + ) { + return node; + } + if node.r#type == "LASGN" { + node.r#type = "DASGN".to_string(); + } else if node.r#type == "LVAR" { + node.r#type = "DVAR".to_string(); + } + node.children = node + .children + .into_iter() + .map(|child| match child { + Child::Node(node) => Child::Node(Box::new(dynamic_scope(*node))), + other => other, + }) + .collect(); + node +} + +fn declaration_metadata_kind(kind: &str) -> bool { + matches!( + kind, + "modifiers" + | "type" + | "nullable_type" + | "parenthesized_type" + | "user_type" + | "type_identifier" + | "integral_type" + | "floating_point_type" + | "void_type" + ) +} + +fn kind_type(kind: &str) -> String { + let mut result = String::new(); + let mut in_separator = false; + for ch in kind.chars() { + if ch.is_ascii_alphanumeric() { + result.push(ch.to_ascii_uppercase()); + in_separator = false; + } else if !in_separator { + result.push('_'); + in_separator = true; + } + } + result +} + +fn ts_node(node: Option>) -> bool { + node.is_some() +} + +fn if_kind(kind: &str) -> bool { + matches!( + kind, + "if" | "if_statement" + | "if_modifier" + | "unless" + | "unless_modifier" + | "if_expression" + | "conditional" + ) +} + +fn loop_kind(kind: &str) -> Option<&'static str> { + match kind { + "while" | "while_statement" | "while_modifier" => Some("WHILE"), + "until_modifier" => Some("UNTIL"), + "for" | "for_statement" | "for_in_clause" => Some("FOR"), + _ => None, + } +} + +fn function_kind(kind: &str) -> bool { + matches!( + kind, + "method" + | "function_definition" + | "function_declaration" + | "method_definition" + | "method_declaration" + | "function_item" + | "singleton_method" + ) +} + +fn return_kind(kind: &str) -> &str { + match kind { + "return" | "return_statement" | "return_expression" => "RETURN", + "break" | "break_statement" | "break_expression" => "BREAK", + "next" | "continue_statement" => "NEXT", + other => other, + } +} + +fn return_statement_kind(kind: &str) -> bool { + matches!( + kind, + "return" + | "return_statement" + | "return_expression" + | "break" + | "break_statement" + | "break_expression" + | "next" + | "continue_statement" + ) +} + +fn inline_def_wrapper_mid(text: &str) -> bool { + matches!( + text, + "public" | "protected" | "private" | "private_class_method" | "module_function" + ) +} + +fn inline_def_receiver_text(text: &str) -> bool { + let mut tokens = text.split_whitespace(); + while let Some(token) = tokens.next() { + if token != "def" { + continue; + } + let Some(name) = tokens.next() else { + return false; + }; + let Some((receiver, _method)) = name.split_once('.') else { + return false; + }; + return !receiver.is_empty(); + } + false +} + +fn literal_symbol_arguments(text: &str) -> Vec { + let chars = text.char_indices().collect::>(); + let mut symbols = Vec::new(); + let mut index = 0; + while index < chars.len() { + if chars[index].1 != ':' { + index += 1; + continue; + } + let Some((_, first)) = chars.get(index + 1).copied() else { + index += 1; + continue; + }; + if !(first == '_' || first.is_ascii_alphabetic()) { + index += 1; + continue; + } + + let start = chars[index + 1].0; + let mut end = start + first.len_utf8(); + let mut cursor = index + 2; + while let Some((byte, ch)) = chars.get(cursor).copied() { + if ch == '_' || ch.is_ascii_alphanumeric() { + end = byte + ch.len_utf8(); + cursor += 1; + } else { + break; + } + } + if let Some((byte, ch)) = chars.get(cursor).copied() { + if matches!(ch, '!' | '?' | '=') { + end = byte + ch.len_utf8(); + cursor += 1; + } + } + symbols.push(text[start..end].to_string()); + index = cursor; + } + symbols +} + +fn bare_identifier_text(text: &str) -> bool { + let text = text.trim(); + exact_bare_identifier_text(text) +} + +fn exact_bare_identifier_text(text: &str) -> bool { + let mut chars = text.chars(); + let Some(first) = chars.next() else { + return false; + }; + if !(first == '_' || first.is_ascii_alphabetic()) { + return false; + } + let mut chars = chars.peekable(); + while let Some(ch) = chars.next() { + if ch == '_' || ch.is_ascii_alphanumeric() { + continue; + } + if matches!(ch, '!' | '?' | '=') { + return chars.peek().is_none(); + } + return false; + } + true +} + +fn ruby_instance_variable_text(text: &str) -> bool { + text.strip_prefix('@') + .map(exact_bare_identifier_text) + .unwrap_or(false) +} + +fn exact_integer_text(text: &str) -> bool { + let digits = text.strip_prefix('-').unwrap_or(text); + !digits.is_empty() && digits.chars().all(|ch| ch.is_ascii_digit()) +} + +fn heredoc_marker_text(text: &str) -> bool { + text.split(|ch: char| ch.is_whitespace() || matches!(ch, '(' | ',')) + .any(|token| { + let Some(marker) = token.strip_prefix("<<") else { + return false; + }; + let marker = marker + .strip_prefix('-') + .or_else(|| marker.strip_prefix('~')) + .unwrap_or(marker); + let mut chars = marker.chars(); + let Some(first) = chars.next() else { + return false; + }; + first == '_' || first.is_ascii_alphabetic() + }) +} + +fn ruby_variable_name_text(text: &str) -> bool { + let mut chars = text.chars().peekable(); + let Some(first) = chars.next() else { + return false; + }; + if !(first == '_' || first.is_ascii_alphabetic()) { + return false; + } + while let Some(ch) = chars.next() { + if matches!(ch, '!' | '?' | '=') { + return chars.peek().is_none(); + } + if !(ch == '_' || ch.is_ascii_alphanumeric()) { + return false; + } + } + true +} + +fn comparison_operator_from_text(text: &str) -> Option { + for operator in ["===", "!==", "==", "!=", "<=", ">=", "<", ">"] { + if text.contains(operator) { + return Some(operator.to_string()); + } + } + None +} + +fn operator_assignment_statement_operator(text: &str) -> Option { + match text { + "+=" => Some("+".to_string()), + "-=" => Some("-".to_string()), + "*=" => Some("*".to_string()), + "/=" => Some("/".to_string()), + "%=" => Some("%".to_string()), + "&=" => Some("&".to_string()), + "|=" => Some("|".to_string()), + "^=" => Some("^".to_string()), + "||=" => Some("||".to_string()), + "&&=" => Some("&&".to_string()), + _ => None, + } +} + +pub fn child_to_string(child: Option<&Child>) -> Option { + match child { + Some(Child::String(value)) | Some(Child::Symbol(value)) => Some(value.clone()), + Some(Child::Integer(value)) => Some(value.to_string()), + _ => None, + } +} + +#[cfg(test)] +mod tests { + use super::{parse, parse_with_language, Child, Node}; + use crate::decomplex::syntax::Language; + use serde_json::{json, Value}; + use std::collections::BTreeSet; + use std::io::Write; + use std::path::Path; + use std::process::Command; + use tree_sitter::{Node as TreeSitterNode, Parser as TreeSitterParser}; + + fn parse_source(source: &str) -> Node { + let mut file = tempfile::Builder::new() + .suffix(".rb") + .tempfile() + .expect("create temp ruby file"); + file.write_all(source.as_bytes()) + .expect("write temp ruby file"); + parse(file.path()).expect("parse temp ruby file").0 + } + + fn parse_language_source(source: &str, language: Language, suffix: &str) -> Node { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create temp source file"); + file.write_all(source.as_bytes()) + .expect("write temp source file"); + parse_with_language(file.path(), language) + .expect("parse temp source file") + .0 + } + + fn nodes_of_type<'a>(node: &'a Node, node_type: &str, out: &mut Vec<&'a Node>) { + if node.r#type == node_type { + out.push(node); + } + for child in node.children.iter().filter_map(super::node) { + nodes_of_type(child, node_type, out); + } + } + + fn first_node<'a>(root: &'a Node, node_type: &str, text: &str) -> &'a Node { + let mut nodes = Vec::new(); + nodes_of_type(root, node_type, &mut nodes); + nodes + .into_iter() + .find(|node| node.text == text) + .unwrap_or_else(|| panic!("expected {node_type} with text {text:?} in {root:#?}")) + } + + fn child_node(node: &Node, index: usize) -> &Node { + node.children + .get(index) + .and_then(super::node) + .unwrap_or_else(|| panic!("expected child node {index} in {node:#?}")) + } + + fn child_types(node: &Node) -> Vec<&str> { + node.children + .iter() + .filter_map(super::node) + .map(|child| child.r#type.as_str()) + .collect() + } + + fn test_node(node_type: &str, children: Vec) -> Node { + Node { + r#type: node_type.to_string(), + children, + first_lineno: 1, + first_column: 0, + last_lineno: 1, + last_column: 1, + text: node_type.to_string(), + } + } + + fn infix_parts_text( + normalizer: &super::TreeSitterNormalizer<'_>, + node: TreeSitterNode<'_>, + source: &str, + ) -> Option<(String, String, String)> { + let (left, operator, right) = normalizer.infix_statement_parts(node)?; + Some(( + super::node_text(left, source).to_string(), + operator, + super::node_text(right, source).to_string(), + )) + } + + fn node_value(node: &Node) -> Value { + json!({ + "type": node.r#type, + "children": node.children.iter().map(child_value).collect::>(), + "first_lineno": node.first_lineno, + "first_column": node.first_column, + "last_lineno": node.last_lineno, + "last_column": node.last_column, + "text": node.text, + }) + } + + fn child_value(child: &Child) -> Value { + match child { + Child::Node(node) => node_value(node), + Child::Symbol(value) | Child::String(value) => Value::String(value.clone()), + Child::Integer(value) => Value::Number((*value).into()), + Child::Bool(value) => Value::Bool(*value), + Child::Nil => Value::Null, + } + } + + fn children_value(children: &[Child]) -> Value { + Value::Array(children.iter().map(child_value).collect()) + } + + fn ruby_language_name(language: Language) -> &'static str { + match language { + Language::Ruby => "ruby", + Language::Python => "python", + Language::JavaScript => "javascript", + Language::Java => "java", + Language::TypeScript => "typescript", + Language::Swift => "swift", + Language::Kotlin => "kotlin", + Language::Go => "go", + Language::Rust => "rust", + Language::Zig => "zig", + Language::Lua => "lua", + Language::C => "c", + Language::Cpp => "cpp", + Language::CSharp => "csharp", + } + } + + fn ruby_normalized_value(path: &Path, language: Language) -> Value { + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + root, = Decomplex::Ast.parse(ARGV.fetch(0)) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + elsif node.is_a?(Array) + node.map { |child| value(child) } + else + node + end + end + + puts JSON.generate(value(root)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "json", + "-e", + script, + ]) + .arg(path) + .output() + .expect("run ruby normalizer"); + assert!( + output.status.success(), + "ruby normalizer failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout).expect("ruby normalizer should emit JSON") + } + + fn assert_ruby_parity(source: &str, language: Language, suffix: &str) { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create parity temp source file"); + file.write_all(source.as_bytes()) + .expect("write parity temp source file"); + + let rust = node_value( + &parse_with_language(file.path(), language) + .expect("parse parity temp source file") + .0, + ); + let ruby = ruby_normalized_value(file.path(), language); + assert_eq!(rust, ruby); + } + + fn raw_tree(source: &str, language: Language) -> tree_sitter::Tree { + let mut parser = TreeSitterParser::new(); + parser + .set_language(&super::language_grammar(language)) + .expect("set raw parser language"); + parser.parse(source, None).expect("parse raw source") + } + + fn first_raw_node<'tree>( + node: TreeSitterNode<'tree>, + source: &str, + kind: &str, + text: &str, + ) -> TreeSitterNode<'tree> { + if node.kind() == kind && super::node_text(node, source) == text { + return node; + } + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + if let Some(found) = first_raw_node_opt(child, source, kind, text) { + return found; + } + } + panic!("expected raw node kind={kind:?} text={text:?}"); + } + + fn first_raw_node_opt<'tree>( + node: TreeSitterNode<'tree>, + source: &str, + kind: &str, + text: &str, + ) -> Option> { + if node.kind() == kind && super::node_text(node, source) == text { + return Some(node); + } + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + if let Some(found) = first_raw_node_opt(child, source, kind, text) { + return Some(found); + } + } + None + } + + fn nth_raw_node<'tree>( + node: TreeSitterNode<'tree>, + source: &str, + kind: &str, + text: &str, + index: usize, + ) -> TreeSitterNode<'tree> { + let mut found = Vec::new(); + collect_raw_nodes(node, source, kind, text, &mut found); + *found.get(index).unwrap_or_else(|| { + panic!("expected raw node kind={kind:?} text={text:?} index={index}") + }) + } + + fn collect_raw_nodes<'tree>( + node: TreeSitterNode<'tree>, + source: &str, + kind: &str, + text: &str, + found: &mut Vec>, + ) { + if node.kind() == kind && super::node_text(node, source) == text { + found.push(node); + } + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + collect_raw_nodes(child, source, kind, text, found); + } + } + + fn ruby_private_predicate( + source: &str, + language: Language, + suffix: &str, + method: &str, + kind: &str, + text: &str, + ) -> bool { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby predicate temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby predicate temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + method = ARGV.fetch(3) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + puts normalizer.send(method, target) ? "true" : "false" + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(method) + .output() + .expect("run ruby private predicate"); + assert!( + output.status.success(), + "ruby predicate failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + String::from_utf8(output.stdout) + .expect("ruby predicate output should be utf8") + .trim() + == "true" + } + + fn ruby_private_collected_names( + source: &str, + language: Language, + suffix: &str, + method: &str, + kind: &str, + text: &str, + ) -> BTreeSet { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby collected names temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby collected names temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + method = ARGV.fetch(3) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + locals = Set.new + normalizer.send(method, target, locals) + puts JSON.generate(locals.to_a.sort) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-r", + "set", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(method) + .output() + .expect("run ruby collected names helper"); + assert!( + output.status.success(), + "ruby collected names helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice::>(&output.stdout) + .expect("ruby collected names output should be json") + .into_iter() + .collect() + } + + fn ruby_private_scope_collected_names( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + root: bool, + ) -> BTreeSet { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby scope collected names temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby scope collected names temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + require "set" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + root = ARGV.fetch(3) == "true" + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + locals = Set.new + normalizer.send(:collect_ruby_scope_locals, target, locals, root: root) + puts JSON.generate(locals.to_a.sort) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(if root { "true" } else { "false" }) + .output() + .expect("run ruby scope collected names helper"); + assert!( + output.status.success(), + "ruby scope collected names helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice::>(&output.stdout) + .expect("ruby scope collected names output should be json") + .into_iter() + .collect() + } + + fn ruby_private_ruby_scope_locals( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + ) -> BTreeSet { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby scope locals temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby scope locals temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + puts JSON.generate(normalizer.send(:ruby_scope_locals, target).to_a.sort) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-r", + "set", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby scope locals helper"); + assert!( + output.status.success(), + "ruby scope locals helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice::>(&output.stdout) + .expect("ruby scope locals output should be json") + .into_iter() + .collect() + } + + fn ruby_private_with_ruby_scope_trace( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + reset: bool, + initial_stack: &[Vec<&str>], + ) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby with_ruby_scope temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby with_ruby_scope temp source file"); + let initial_stack_json = + serde_json::to_string(initial_stack).expect("serialize initial local stack"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + reset = ARGV.fetch(3) == "true" + initial = JSON.parse(ARGV.fetch(4)).map { |names| Set.new(names) } + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + normalizer.instance_variable_set(:@local_stack, initial) + snapshot = lambda do + Array(normalizer.instance_variable_get(:@local_stack)).map { |locals| locals.to_a.sort } + end + before = snapshot.call + inside = nil + result = normalizer.send(:with_ruby_scope, target, reset: reset) do + inside = snapshot.call + "block-result" + end + after = snapshot.call + puts JSON.generate("before" => before, "inside" => inside, "after" => after, "result" => result) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-r", + "set", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(if reset { "true" } else { "false" }) + .arg(initial_stack_json) + .output() + .expect("run ruby with_ruby_scope helper"); + assert!( + output.status.success(), + "ruby with_ruby_scope helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout).expect("ruby with_ruby_scope output should be json") + } + + fn local_stack_from(names: &[Vec<&str>]) -> Vec> { + names + .iter() + .map(|scope| scope.iter().map(|name| name.to_string()).collect()) + .collect() + } + + fn local_stack_value(stack: &[BTreeSet]) -> Value { + json!(stack + .iter() + .map(|scope| scope.iter().cloned().collect::>()) + .collect::>()) + } + + fn ruby_private_destructured_parameter_targets_value( + source: &str, + kind: &str, + text: &str, + ) -> Value { + let mut file = tempfile::Builder::new() + .suffix(".rb") + .tempfile() + .expect("create ruby destructured parameter temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby destructured parameter temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + targets = [] + normalizer.send(:collect_destructured_parameter_targets, target, targets) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + elsif node.is_a?(Array) + node.map { |child| value(child) } + else + node + end + end + + puts JSON.generate(targets.map { |node| value(node) }) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env( + "DECOMPLEX_FORCE_LANGUAGE", + ruby_language_name(Language::Ruby), + ) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby destructured parameter helper"); + assert!( + output.status.success(), + "ruby destructured parameter helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby destructured parameter output should be json") + } + + fn ruby_private_scope_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + mode: &str, + ) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby scope temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby scope temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + mode = ARGV.fetch(3) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + + body = mode == "body" ? normalizer.send(:wrap, :BODY, children: [], source: target) : nil + args = mode == "args" ? normalizer.send(:wrap, :ARGS, children: [], source: target) : nil + result = normalizer.send(:scope, body, args: args, source: target) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(mode) + .output() + .expect("run ruby scope helper"); + assert!( + output.status.success(), + "ruby scope helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout).expect("ruby scope output should be json") + } + + fn ruby_private_list_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + mode: &str, + ) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby list temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby list temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + mode = ARGV.fetch(3) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + + item = normalizer.send(:wrap, :ITEM, children: [], source: target) + children = + case mode + when "nil" then nil + when "empty" then [] + when "one" then [item] + else abort "unknown list mode: #{mode}" + end + result = normalizer.send(:list, children, source: target) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(mode) + .output() + .expect("run ruby list helper"); + assert!( + output.status.success(), + "ruby list helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout).expect("ruby list output should be json") + } + + fn ruby_private_string( + source: &str, + language: Language, + suffix: &str, + method: &str, + kind: &str, + text: &str, + ) -> String { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby string temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby string temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + method = ARGV.fetch(3) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + puts normalizer.send(method, target).to_s + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(method) + .output() + .expect("run ruby private string helper"); + assert!( + output.status.success(), + "ruby string helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + String::from_utf8(output.stdout) + .expect("ruby string helper output should be utf8") + .trim_end_matches(['\r', '\n']) + .to_string() + } + + fn ruby_private_text_predicate(language: Language, method: &str, text: &str) -> bool { + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + language = ARGV.fetch(0).to_sym + text = ARGV.fetch(1) + method = ARGV.fetch(2) + document = Object.new + document.define_singleton_method(:language) { language } + normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + normalizer.instance_variable_set(:@document, document) + puts normalizer.send(method, text) ? "true" : "false" + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .args(["-I", "lib", "-r", "decomplex/ast", "-e", script]) + .arg(ruby_language_name(language)) + .arg(text) + .arg(method) + .output() + .expect("run ruby private text predicate"); + assert!( + output.status.success(), + "ruby text predicate failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + String::from_utf8(output.stdout) + .expect("ruby text predicate output should be utf8") + .trim() + == "true" + } + + fn ruby_private_text_string(language: Language, method: &str, text: &str) -> String { + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + language = ARGV.fetch(0).to_sym + text = ARGV.fetch(1) + method = ARGV.fetch(2) + document = Object.new + document.define_singleton_method(:language) { language } + normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + normalizer.instance_variable_set(:@document, document) + puts normalizer.send(method, text).to_s + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .args(["-I", "lib", "-r", "decomplex/ast", "-e", script]) + .arg(ruby_language_name(language)) + .arg(text) + .arg(method) + .output() + .expect("run ruby private text string helper"); + assert!( + output.status.success(), + "ruby text string helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + String::from_utf8(output.stdout) + .expect("ruby text string output should be utf8") + .trim_end_matches(['\r', '\n']) + .to_string() + } + + fn ruby_private_ts_node_value(value: &str) -> bool { + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Object.new + document.define_singleton_method(:language) { :ruby } + normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + normalizer.instance_variable_set(:@document, document) + target = + case ARGV.fetch(0) + when "nil" + nil + when "string" + "value" + when "normalized_node" + Decomplex::Ast::Node.new(type: :LIT, children: [], first_lineno: 1, first_column: 0, last_lineno: 1, last_column: 1, text: "1") + else + abort "unknown ts_node? probe" + end + puts normalizer.send(:ts_node?, target) ? "true" : "false" + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .args(["-I", "lib", "-r", "decomplex/ast", "-e", script]) + .arg(value) + .output() + .expect("run ruby private ts_node? value helper"); + assert!( + output.status.success(), + "ruby ts_node? value helper failed for {value}: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + String::from_utf8(output.stdout) + .expect("ruby ts_node? value output should be utf8") + .trim() + == "true" + } + + fn ruby_private_regex_literal_value(value: &str) -> bool { + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Object.new + document.define_singleton_method(:language) { :ruby } + normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + normalizer.instance_variable_set(:@document, document) + target = + case ARGV.fetch(0) + when "nil" + nil + when "string" + "value" + when "normalized_node" + Decomplex::Ast::Node.new(type: :LIT, children: [], first_lineno: 1, first_column: 0, last_lineno: 1, last_column: 1, text: "1") + else + abort "unknown regex_literal? probe" + end + puts normalizer.send(:regex_literal?, target) ? "true" : "false" + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .args(["-I", "lib", "-r", "decomplex/ast", "-e", script]) + .arg(value) + .output() + .expect("run ruby private regex_literal? value helper"); + assert!( + output.status.success(), + "ruby regex_literal? value helper failed for {value}: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + String::from_utf8(output.stdout) + .expect("ruby regex_literal? value output should be utf8") + .trim() + == "true" + } + + fn ruby_private_node_signature( + source: &str, + language: Language, + suffix: &str, + method: &str, + kind: &str, + text: &str, + ) -> Option<(String, String)> { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby node signature temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby node signature temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + method = ARGV.fetch(3) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + result = normalizer.send(method, target) + if result + puts JSON.generate([result.kind, result.text.to_s]) + else + puts "null" + end + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(method) + .output() + .expect("run ruby private node signature helper"); + assert!( + output.status.success(), + "ruby node signature helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + let value: Value = serde_json::from_slice(&output.stdout) + .expect("ruby node signature output should be json"); + if value.is_null() { + return None; + } + let pair = value + .as_array() + .expect("ruby node signature should be an array"); + Some(( + pair[0] + .as_str() + .expect("node kind should be string") + .to_string(), + pair[1] + .as_str() + .expect("node text should be string") + .to_string(), + )) + } + + fn ruby_private_inline_def_name_after_receiver( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + ) -> String { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby inline def name temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby inline def name temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + receiver = normalizer.send(:inline_def_receiver, target) + puts normalizer.send(:inline_def_name_after_receiver, target, receiver).to_s + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby inline def name helper"); + assert!( + output.status.success(), + "ruby inline def name helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + String::from_utf8(output.stdout) + .expect("ruby inline def name output should be utf8") + .trim() + .to_string() + } + + fn ruby_private_inline_parameter_begin_marker_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + ) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby inline_parameter_begin_marker temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby inline_parameter_begin_marker temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found" unless target + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + result = normalizer.send(:inline_parameter_begin_marker, target) + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby private inline_parameter_begin_marker helper"); + assert!( + output.status.success(), + "ruby inline_parameter_begin_marker helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby inline_parameter_begin_marker output should be json") + } + + fn ruby_private_prepend_inline_parameter_begin_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + body: &Value, + ) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby prepend_inline_parameter_begin temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby prepend_inline_parameter_begin temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + def node(value) + return nil if value.nil? + return value unless value.is_a?(Hash) + + Decomplex::Ast::Node.new( + type: value.fetch("type").to_sym, + children: value.fetch("children").map { |child| node(child) }, + first_lineno: value.fetch("first_lineno"), + first_column: value.fetch("first_column"), + last_lineno: value.fetch("last_lineno"), + last_column: value.fetch("last_column"), + text: value.fetch("text") + ) + end + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + walk = lambda do |ts_node| + if ts_node.respond_to?(:kind) + target ||= ts_node if ts_node.kind == target_kind && ts_node.text.to_s == target_text + ts_node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + body = node(JSON.parse(ARGV.fetch(3))) + result = normalizer.send(:prepend_inline_parameter_begin, target, body) + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(body.to_string()) + .output() + .expect("run ruby private prepend_inline_parameter_begin helper"); + assert!( + output.status.success(), + "ruby prepend_inline_parameter_begin helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby prepend_inline_parameter_begin output should be json") + } + + fn ruby_private_local_or_call_for_name_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + name: &str, + local: bool, + ) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby local_or_call_for_name temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby local_or_call_for_name temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + require "set" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + name = ARGV.fetch(3) + local = ARGV.fetch(4) == "true" + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found" unless target + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + normalizer.instance_variable_set(:@local_stack, local ? [Set[name]] : []) + result = normalizer.send(:local_or_call_for_name, name, target) + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(name) + .arg(if local { "true" } else { "false" }) + .output() + .expect("run ruby private local_or_call_for_name helper"); + assert!( + output.status.success(), + "ruby local_or_call_for_name helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby local_or_call_for_name output should be json") + } + + fn ruby_private_ruby_vcall_identifier_predicate( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + local_names: &[&str], + ) -> bool { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby ruby_vcall_identifier temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby ruby_vcall_identifier temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + require "set" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + local_names = ARGV.fetch(3).split(",").reject(&:empty?) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + normalizer.instance_variable_set(:@local_stack, local_names.empty? ? [] : [Set.new(local_names)]) + puts normalizer.send(:ruby_vcall_identifier?, target) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(local_names.join(",")) + .output() + .expect("run ruby private ruby_vcall_identifier? helper"); + assert!( + output.status.success(), + "ruby ruby_vcall_identifier? helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + String::from_utf8(output.stdout) + .expect("ruby ruby_vcall_identifier? output should be utf8") + .trim() + == "true" + } + + fn ruby_private_vcall_identifier_predicate( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + local_names: &[&str], + ) -> bool { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby vcall_identifier temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby vcall_identifier temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + require "set" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + local_names = ARGV.fetch(3).split(",").reject(&:empty?) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + normalizer.instance_variable_set(:@local_stack, local_names.empty? ? [] : [Set.new(local_names)]) + puts normalizer.send(:vcall_identifier?, target) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(local_names.join(",")) + .output() + .expect("run ruby private vcall_identifier? helper"); + assert!( + output.status.success(), + "ruby vcall_identifier? helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + String::from_utf8(output.stdout) + .expect("ruby vcall_identifier? output should be utf8") + .trim() + == "true" + } + + fn ruby_private_normalize_terminal_statement_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + local_names: &[&str], + ) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby normalize_terminal_statement temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby normalize_terminal_statement temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + require "set" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + local_names = ARGV.fetch(3).split(",").reject(&:empty?) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + normalizer.instance_variable_set(:@local_stack, local_names.empty? ? [] : [Set.new(local_names)]) + result = normalizer.send(:normalize_terminal_statement, target) + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(local_names.join(",")) + .output() + .expect("run ruby private normalize_terminal_statement helper"); + assert!( + output.status.success(), + "ruby normalize_terminal_statement helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby normalize_terminal_statement output should be json") + } + + fn ruby_private_node_list_signature( + source: &str, + language: Language, + suffix: &str, + method: &str, + kind: &str, + text: &str, + ) -> Vec<(String, String)> { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby node list signature temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby node list signature temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + method = ARGV.fetch(3) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + result = Array(normalizer.send(method, target)) + puts JSON.generate(result.map { |node| [node.kind, node.text.to_s] }) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(method) + .output() + .expect("run ruby node list signature helper"); + assert!( + output.status.success(), + "ruby node list signature helper failed for {method}: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + let value: Value = serde_json::from_slice(&output.stdout) + .expect("ruby node list signature output should be json"); + value + .as_array() + .expect("ruby node list signature should be an array") + .iter() + .map(|item| { + let item = item + .as_array() + .expect("ruby node list item should be an array"); + ( + item[0] + .as_str() + .expect("ruby node list kind should be a string") + .to_string(), + item[1] + .as_str() + .expect("ruby node list text should be a string") + .to_string(), + ) + }) + .collect() + } + + fn ruby_private_dotted_call_parts( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + ) -> Option<(String, String, String)> { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby dotted_call_parts temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby dotted_call_parts temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + receiver, method = normalizer.send(:dotted_call_parts, target) + if receiver + puts JSON.generate([receiver.kind, receiver.text.to_s, method.to_s]) + else + puts "null" + end + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby private dotted_call_parts helper"); + assert!( + output.status.success(), + "ruby dotted_call_parts helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + let value: Value = serde_json::from_slice(&output.stdout) + .expect("ruby dotted_call_parts output should be json"); + if value.is_null() { + return None; + } + let parts = value + .as_array() + .expect("ruby dotted_call_parts should be an array"); + Some(( + parts[0] + .as_str() + .expect("receiver kind should be string") + .to_string(), + parts[1] + .as_str() + .expect("receiver text should be string") + .to_string(), + parts[2] + .as_str() + .expect("method should be string") + .to_string(), + )) + } + + fn ruby_private_member_parts( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + ) -> Option<(String, String, String)> { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby member_parts temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby member_parts temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + receiver, method = normalizer.send(:member_parts, target) + if receiver + puts JSON.generate([receiver.kind, receiver.text.to_s, method.to_s]) + else + puts "null" + end + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby private member_parts helper"); + assert!( + output.status.success(), + "ruby member_parts helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + let value: Value = serde_json::from_slice(&output.stdout) + .expect("ruby member_parts output should be json"); + if value.is_null() { + return None; + } + let parts = value + .as_array() + .expect("ruby member_parts should be an array"); + Some(( + parts[0] + .as_str() + .expect("receiver kind should be string") + .to_string(), + parts[1] + .as_str() + .expect("receiver text should be string") + .to_string(), + parts[2] + .as_str() + .expect("method should be string") + .to_string(), + )) + } + + fn ruby_private_named_field_signature( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + field: &str, + ) -> Option<(String, String)> { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby named_field temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby named_field temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + field = ARGV.fetch(3) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + result = normalizer.send(:named_field, target, field) + if result + puts JSON.generate([result.kind, result.text.to_s]) + else + puts "null" + end + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(field) + .output() + .expect("run ruby private named_field helper"); + assert!( + output.status.success(), + "ruby named_field helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + let value: Value = + serde_json::from_slice(&output.stdout).expect("ruby named_field output should be json"); + if value.is_null() { + return None; + } + let pair = value + .as_array() + .expect("ruby named_field output should be an array"); + Some(( + pair[0] + .as_str() + .expect("named_field kind should be string") + .to_string(), + pair[1] + .as_str() + .expect("named_field text should be string") + .to_string(), + )) + } + + fn ruby_private_branch_child_signature( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + condition_kind: &str, + condition_text: &str, + index: usize, + ) -> Option<(String, String)> { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby branch_child temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby branch_child temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + condition_kind = ARGV.fetch(3) + condition_text = ARGV.fetch(4) + index = Integer(ARGV.fetch(5)) + target = nil + condition = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + condition ||= node if node.kind == condition_kind && node.text.to_s == condition_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found" unless target + abort "condition node not found" unless condition + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + result = normalizer.send(:branch_child, target, condition, index) + if result + puts JSON.generate([result.kind, result.text.to_s]) + else + puts "null" + end + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(condition_kind) + .arg(condition_text) + .arg(index.to_string()) + .output() + .expect("run ruby private branch_child helper"); + assert!( + output.status.success(), + "ruby branch_child helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + let value: Value = serde_json::from_slice(&output.stdout) + .expect("ruby branch_child output should be json"); + if value.is_null() { + return None; + } + let pair = value + .as_array() + .expect("ruby branch_child output should be an array"); + Some(( + pair[0] + .as_str() + .expect("branch_child kind should be string") + .to_string(), + pair[1] + .as_str() + .expect("branch_child text should be string") + .to_string(), + )) + } + + fn ruby_private_wrap_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + normalized_source: bool, + ) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby wrap temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby wrap temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + normalized_source = ARGV.fetch(3) == "true" + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + source = if normalized_source + normalizer.send(:wrap, :INNER, children: [], source: target) + else + target + end + result = normalizer.send(:wrap, :OUTER, children: [:child], source: source) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(if normalized_source { "true" } else { "false" }) + .output() + .expect("run ruby private wrap helper"); + assert!( + output.status.success(), + "ruby wrap helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout).expect("ruby wrap output should be json") + } + + fn ruby_private_normalize_method_value( + source: &str, + language: Language, + suffix: &str, + method: &str, + kind: &str, + text: &str, + ) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby normalize method temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby normalize method temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + method = ARGV.fetch(3) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + result = normalizer.send(method, target) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + elsif node.is_a?(Array) + node.map { |child| value(child) } + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(method) + .output() + .expect("run ruby private normalize method helper"); + assert!( + output.status.success(), + "ruby normalize method helper failed for {method}: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout).expect("ruby normalize method output should be json") + } + + fn ruby_private_normalize_return_node_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + elide_symbol: bool, + ) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby normalize return node temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby normalize return node temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + elide_symbol = ARGV.fetch(3) == "true" + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + result = normalizer.send(:normalize_return_node, target, elide_symbol: elide_symbol) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + elsif node.is_a?(Array) + node.map { |child| value(child) } + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(if elide_symbol { "true" } else { "false" }) + .output() + .expect("run ruby private normalize_return_node helper"); + assert!( + output.status.success(), + "ruby normalize_return_node helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby normalize_return_node output should be json") + } + + fn ruby_private_normalize_body_nodes_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + ) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby normalize body nodes temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby normalize body nodes temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + if target_kind == "__root__" + target = document.root + else + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + end + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + result = normalizer.send(:normalize_body_nodes, target.named_children, source: target) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby private normalize_body_nodes helper"); + assert!( + output.status.success(), + "ruby normalize_body_nodes helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby normalize_body_nodes output should be json") + } + + fn ruby_private_inline_def_from_argument_list_nil_value( + source: &str, + language: Language, + suffix: &str, + ) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby inline def argument nil temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby inline def argument nil temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + result = normalizer.send(:inline_def_from_argument_list, nil) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .output() + .expect("run ruby private inline def argument nil helper"); + assert!( + output.status.success(), + "ruby inline def argument nil helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby inline def argument nil output should be json") + } + + fn ruby_private_assignment_target_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + ) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby assignment target temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby assignment target temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + source = normalizer.send(:parent_node, target) || target + right_raw = normalizer.send(:assignment_right, source) + right = right_raw ? normalizer.send(:normalize_node, right_raw) : nil + result = normalizer.send(:assignment_target, target, right, source: source) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby private assignment target helper"); + assert!( + output.status.success(), + "ruby assignment target helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby assignment target output should be json") + } + + fn ruby_private_normalize_multiple_assignment_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + ) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby multiple assignment temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby multiple assignment temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + left = normalizer.send(:assignment_left, target) + right_raw = normalizer.send(:assignment_right, target) + right = right_raw ? normalizer.send(:normalize_node, right_raw) : nil + result = normalizer.send(:normalize_multiple_assignment, left, right, target) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby private multiple assignment helper"); + assert!( + output.status.success(), + "ruby multiple assignment helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby multiple assignment output should be json") + } + + fn ruby_private_augmented_assignment_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + operator: &str, + ) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby augmented assignment value temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby augmented assignment value temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + operator = ARGV.fetch(3).to_sym + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + source = normalizer.send(:parent_node, target) || target + right_raw = normalizer.send(:assignment_right, source) + result = normalizer.send(:augmented_assignment_value, target, operator, right_raw, source) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(operator) + .output() + .expect("run ruby private augmented assignment value helper"); + assert!( + output.status.success(), + "ruby augmented assignment value helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby augmented assignment value output should be json") + } + + fn ruby_private_logical_operator_assignment_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + ) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby logical operator assignment temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby logical operator assignment temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + left = normalizer.send(:assignment_left, target) + right_raw = normalizer.send(:assignment_right, target) + right = normalizer.send(:normalize_node, right_raw) + operator = normalizer.send(:operator_assignment_operator, target) + result = normalizer.send(:normalize_logical_operator_assignment, left, operator, right, source: target) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby private logical operator assignment helper"); + assert!( + output.status.success(), + "ruby logical operator assignment helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby logical operator assignment output should be json") + } + + fn ruby_private_call_arguments_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + function_mode: &str, + ) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby call arguments temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby call arguments temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + function_mode = ARGV.fetch(3) + target = nil + fallback_target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + fallback_target ||= node if node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + target ||= fallback_target + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + function = + case function_mode + when "auto" + normalizer.send(:named_field, target, "function") || + normalizer.send(:named_field, target, "call") || + target.named_children.first + when "none" + nil + else + abort "unknown function mode: #{function_mode.inspect}" + end + result = normalizer.send(:call_arguments, target, function) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(result.map { |node| value(node) }) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(function_mode) + .output() + .expect("run ruby private call arguments helper"); + assert!( + output.status.success(), + "ruby call arguments helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout).expect("ruby call arguments output should be json") + } + + fn ruby_private_normalize_call_without_block_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + block_mode: &str, + ) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby normalize_call_without_block temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby normalize_call_without_block temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + block_mode = ARGV.fetch(3) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + block = + case block_mode + when "auto" + normalizer.send(:call_block, target) + when "none" + nil + else + abort "unknown block mode: #{block_mode.inspect}" + end + result = normalizer.send(:normalize_call_without_block, target, block) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(block_mode) + .output() + .expect("run ruby private normalize_call_without_block helper"); + assert!( + output.status.success(), + "ruby normalize_call_without_block helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby normalize_call_without_block output should be json") + } + + fn ruby_private_normalize_patterns_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + ) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby normalize_patterns temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby normalize_patterns temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + result = normalizer.send(:normalize_patterns, target) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(result.map { |node| value(node) }) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby private normalize_patterns helper"); + assert!( + output.status.success(), + "ruby normalize_patterns helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby normalize_patterns output should be json") + } + + fn ruby_private_command_arguments_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + ) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby command arguments temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby command arguments temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + fallback_target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + fallback_target ||= node if node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + target ||= fallback_target + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + result = normalizer.send(:command_arguments, target) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(result.map { |node| value(node) }) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby private command arguments helper"); + assert!( + output.status.success(), + "ruby command arguments helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby command arguments output should be json") + } + + fn ruby_private_const_for_nil_value(source: &str, language: Language, suffix: &str) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby const_for nil temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby const_for nil temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + result = normalizer.send(:const_for, nil) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .output() + .expect("run ruby private const_for nil helper"); + assert!( + output.status.success(), + "ruby const_for nil helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout).expect("ruby const_for nil output should be json") + } + + fn ruby_private_source_before_child_wrap_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + child_kind: &str, + child_text: &str, + ) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby source_before_child temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby source_before_child temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + child_kind = ARGV.fetch(3) + child_text = ARGV.fetch(4) + target = nil + child = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + child ||= node if node.kind == child_kind && node.text.to_s == child_text + node.named_children.each { |next_child| walk.call(next_child) } + end + end + walk.call(document.root) + abort "target node not found" unless target + abort "child node not found" unless child + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + source = normalizer.send(:source_before_child, target, child) + result = normalizer.send(:wrap, :OUTER, children: [], source: source) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(child_kind) + .arg(child_text) + .output() + .expect("run ruby private source_before_child helper"); + assert!( + output.status.success(), + "ruby source_before_child helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby source_before_child output should be json") + } + + fn ruby_private_source_from_nodes_value( + source: &str, + language: Language, + suffix: &str, + first_kind: &str, + first_text: &str, + last_kind: &str, + last_text: &str, + ) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby source_from_nodes temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby source_from_nodes temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + first_kind = ARGV.fetch(1) + first_text = ARGV.fetch(2) + last_kind = ARGV.fetch(3) + last_text = ARGV.fetch(4) + first_node = nil + last_node = nil + walk = lambda do |node| + if node.respond_to?(:kind) + first_node ||= node if node.kind == first_kind && node.text.to_s == first_text + last_node = node if node.kind == last_kind && node.text.to_s == last_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "first node not found: #{first_kind} #{first_text.inspect}" unless first_node + abort "last node not found: #{last_kind} #{last_text.inspect}" unless last_node + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + result = normalizer.send(:source_from_nodes, first_node, last_node) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(first_kind) + .arg(first_text) + .arg(last_kind) + .arg(last_text) + .output() + .expect("run ruby private source_from_nodes helper"); + assert!( + output.status.success(), + "ruby source_from_nodes helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby source_from_nodes output should be json") + } + + fn ruby_private_source_from_normalized_nodes_value( + source: &str, + language: Language, + suffix: &str, + first_kind: &str, + first_text: &str, + last_kind: &str, + last_text: &str, + ) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby source_from_normalized_nodes temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby source_from_normalized_nodes temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + first_kind = ARGV.fetch(1) + first_text = ARGV.fetch(2) + last_kind = ARGV.fetch(3) + last_text = ARGV.fetch(4) + first_raw = nil + last_raw = nil + walk = lambda do |node| + if node.respond_to?(:kind) + first_raw ||= node if node.kind == first_kind && node.text.to_s == first_text + last_raw ||= node if node.kind == last_kind && node.text.to_s == last_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "first node not found" unless first_raw + abort "last node not found" unless last_raw + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + first_node = normalizer.send(:wrap, :FIRST, children: [], source: first_raw) + last_node = normalizer.send(:wrap, :LAST, children: [], source: last_raw) + result = normalizer.send(:source_from_normalized_nodes, first_node, last_node) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(first_kind) + .arg(first_text) + .arg(last_kind) + .arg(last_text) + .output() + .expect("run ruby private source_from_normalized_nodes helper"); + assert!( + output.status.success(), + "ruby source_from_normalized_nodes helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby source_from_normalized_nodes output should be json") + } + + fn ruby_private_dynamic_string_source_signature( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + ) -> Option<(String, String)> { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby dynamic_string_source temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby dynamic_string_source temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + normalized = target.named_children.map { |child| [child, normalizer.send(:normalize_node, child)] } + result = normalizer.send(:dynamic_string_source, normalized) + if result + puts JSON.generate([result.kind, result.text.to_s]) + else + puts "null" + end + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby private dynamic_string_source helper"); + assert!( + output.status.success(), + "ruby dynamic_string_source helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + let value: Value = serde_json::from_slice(&output.stdout) + .expect("ruby dynamic_string_source output should be json"); + if value.is_null() { + return None; + } + let pair = value + .as_array() + .expect("ruby dynamic_string_source output should be an array"); + Some(( + pair[0] + .as_str() + .expect("dynamic_string_source kind should be string") + .to_string(), + pair[1] + .as_str() + .expect("dynamic_string_source text should be string") + .to_string(), + )) + } + + fn ruby_private_operator_assignment_statement_parts_signature( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + ) -> Option<(String, String, String, String, String)> { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby operator_assignment_statement_parts temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby operator_assignment_statement_parts temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + left, operator, right = normalizer.send(:operator_assignment_statement_parts, target) + if left && operator && right + puts JSON.generate([left.kind, left.text.to_s, operator.to_s, right.kind, right.text.to_s]) + else + puts "null" + end + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby private operator_assignment_statement_parts helper"); + assert!( + output.status.success(), + "ruby operator_assignment_statement_parts helper failed for {language:?} {kind:?} {text:?}: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + let value: Value = serde_json::from_slice(&output.stdout) + .expect("ruby operator_assignment_statement_parts output should be json"); + if value.is_null() { + return None; + } + let parts = value + .as_array() + .expect("ruby operator_assignment_statement_parts output should be an array"); + Some(( + parts[0] + .as_str() + .expect("operator_assignment left kind should be string") + .to_string(), + parts[1] + .as_str() + .expect("operator_assignment left text should be string") + .to_string(), + parts[2] + .as_str() + .expect("operator_assignment operator should be string") + .to_string(), + parts[3] + .as_str() + .expect("operator_assignment right kind should be string") + .to_string(), + parts[4] + .as_str() + .expect("operator_assignment right text should be string") + .to_string(), + )) + } + + fn ruby_private_modifier_parts_signature( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + ) -> Option<((String, String), (String, String))> { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby modifier_parts temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby modifier_parts temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + action, condition = normalizer.send(:modifier_parts, target) + if action && condition + puts JSON.generate([[action.kind, action.text.to_s], [condition.kind, condition.text.to_s]]) + else + puts "null" + end + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby private modifier_parts helper"); + assert!( + output.status.success(), + "ruby modifier_parts helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + let value: Value = serde_json::from_slice(&output.stdout) + .expect("ruby modifier_parts output should be json"); + if value.is_null() { + return None; + } + let pairs = value + .as_array() + .expect("ruby modifier_parts output should be an array"); + let action = pairs[0] + .as_array() + .expect("modifier_parts action should be an array"); + let condition = pairs[1] + .as_array() + .expect("modifier_parts condition should be an array"); + Some(( + ( + action[0] + .as_str() + .expect("modifier_parts action kind should be string") + .to_string(), + action[1] + .as_str() + .expect("modifier_parts action text should be string") + .to_string(), + ), + ( + condition[0] + .as_str() + .expect("modifier_parts condition kind should be string") + .to_string(), + condition[1] + .as_str() + .expect("modifier_parts condition text should be string") + .to_string(), + ), + )) + } + + fn ruby_private_visibility_inline_def_statement_predicate( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + ) -> bool { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby visibility_inline_def_statement temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby visibility_inline_def_statement temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + puts normalizer.send(:visibility_inline_def_statement?, target, target.named_children.first) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby private visibility_inline_def_statement helper"); + assert!( + output.status.success(), + "ruby visibility_inline_def_statement helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + String::from_utf8(output.stdout) + .expect("ruby visibility_inline_def_statement output should be utf8") + .trim() + == "true" + } + + fn ruby_private_drop_trailing_nil_statement_value(input: &Value) -> Value { + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + def node(value) + return nil if value.nil? + return value unless value.is_a?(Hash) + + Decomplex::Ast::Node.new( + type: value.fetch("type").to_sym, + children: value.fetch("children").map { |child| node(child) }, + first_lineno: value.fetch("first_lineno"), + first_column: value.fetch("first_column"), + last_lineno: value.fetch("last_lineno"), + last_column: value.fetch("last_column"), + text: value.fetch("text") + ) + end + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + result = normalizer.send(:drop_trailing_nil_statement, node(JSON.parse(ARGV.fetch(0)))) + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "json", + "-e", + script, + ]) + .arg(input.to_string()) + .output() + .expect("run ruby private drop_trailing_nil_statement helper"); + assert!( + output.status.success(), + "ruby drop_trailing_nil_statement helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby drop_trailing_nil_statement output should be json") + } + + fn ruby_private_elide_tail_returns_value(input: &Value, ruby: bool) -> Value { + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + def node(value) + return nil if value.nil? + return value unless value.is_a?(Hash) + + Decomplex::Ast::Node.new( + type: value.fetch("type").to_sym, + children: value.fetch("children").map { |child| node(child) }, + first_lineno: value.fetch("first_lineno"), + first_column: value.fetch("first_column"), + last_lineno: value.fetch("last_lineno"), + last_column: value.fetch("last_column"), + text: value.fetch("text") + ) + end + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + adapter = if ARGV.fetch(1) == "ruby" + Decomplex::Ast::RubyTreeSitterNormalizationAdapter.new(nil) + else + Decomplex::Ast::TreeSitterNormalizationAdapter.new(nil) + end + normalizer.instance_variable_set(:@normalization_adapter, adapter) + result = normalizer.send(:elide_tail_returns, node(JSON.parse(ARGV.fetch(0)))) + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "json", + "-e", + script, + ]) + .arg(input.to_string()) + .arg(if ruby { "ruby" } else { "other" }) + .output() + .expect("run ruby private elide_tail_returns helper"); + assert!( + output.status.success(), + "ruby elide_tail_returns helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby elide_tail_returns output should be json") + } + + fn ruby_private_elide_implicit_nil_body_value(input: &Value, ruby: bool) -> Value { + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + def node(value) + return nil if value.nil? + return value unless value.is_a?(Hash) + + Decomplex::Ast::Node.new( + type: value.fetch("type").to_sym, + children: value.fetch("children").map { |child| node(child) }, + first_lineno: value.fetch("first_lineno"), + first_column: value.fetch("first_column"), + last_lineno: value.fetch("last_lineno"), + last_column: value.fetch("last_column"), + text: value.fetch("text") + ) + end + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + adapter = if ARGV.fetch(1) == "ruby" + Decomplex::Ast::RubyTreeSitterNormalizationAdapter.new(nil) + else + Decomplex::Ast::TreeSitterNormalizationAdapter.new(nil) + end + normalizer.instance_variable_set(:@normalization_adapter, adapter) + result = normalizer.send(:elide_implicit_nil_body, node(JSON.parse(ARGV.fetch(0)))) + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "json", + "-e", + script, + ]) + .arg(input.to_string()) + .arg(if ruby { "ruby" } else { "other" }) + .output() + .expect("run ruby private elide_implicit_nil_body helper"); + assert!( + output.status.success(), + "ruby elide_implicit_nil_body helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby elide_implicit_nil_body output should be json") + } + + fn ruby_private_prepend_rescue_exception_assignment_value( + source: &str, + body: &Value, + assignment: &Value, + ) -> Value { + let mut file = tempfile::Builder::new() + .suffix(".rb") + .tempfile() + .expect("create ruby prepend rescue temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby prepend rescue temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + def node(value) + return nil if value.nil? + return value unless value.is_a?(Hash) + + Decomplex::Ast::Node.new( + type: value.fetch("type").to_sym, + children: value.fetch("children").map { |child| node(child) }, + first_lineno: value.fetch("first_lineno"), + first_column: value.fetch("first_column"), + last_lineno: value.fetch("last_lineno"), + last_column: value.fetch("last_column"), + text: value.fetch("text") + ) + end + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + body = node(JSON.parse(ARGV.fetch(1))) + assignment = node(JSON.parse(ARGV.fetch(2))) + result = normalizer.send(:prepend_rescue_exception_assignment, body, assignment) + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", "ruby") + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(body.to_string()) + .arg(assignment.to_string()) + .output() + .expect("run ruby private prepend_rescue_exception_assignment helper"); + assert!( + output.status.success(), + "ruby prepend_rescue_exception_assignment helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby prepend_rescue_exception_assignment output should be json") + } + + fn ruby_private_symbol_literal_node_predicate( + node_type: Option<&str>, + child_kind: Option<&str>, + ) -> bool { + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + def child(kind) + case kind + when "symbol" + :value + when "string" + "value" + when "node" + Decomplex::Ast::Node.new( + type: :NIL, + children: [], + first_lineno: 1, + first_column: 0, + last_lineno: 1, + last_column: 1, + text: "NIL" + ) + when "nil" + nil + else + nil + end + end + + node_type = ARGV.fetch(0) + child_kind = ARGV.fetch(1) + target = if node_type == "none" + nil + else + children = child_kind == "none" ? [] : [child(child_kind)] + Decomplex::Ast::Node.new( + type: node_type.to_sym, + children: children, + first_lineno: 1, + first_column: 0, + last_lineno: 1, + last_column: 1, + text: node_type + ) + end + normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + puts normalizer.send(:symbol_literal_node?, target) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .args(["-I", "lib", "-r", "decomplex/ast", "-e", script]) + .arg(node_type.unwrap_or("none")) + .arg(child_kind.unwrap_or("none")) + .output() + .expect("run ruby private symbol_literal_node? helper"); + assert!( + output.status.success(), + "ruby symbol_literal_node? helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + String::from_utf8(output.stdout) + .expect("ruby symbol_literal_node? output should be utf8") + .trim() + == "true" + } + + fn ruby_private_same_ts_node_predicate( + source: &str, + language: Language, + suffix: &str, + left_kind: &str, + left_text: &str, + left_index: usize, + right_kind: &str, + right_text: &str, + right_index: usize, + ) -> bool { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby same_ts_node temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby same_ts_node temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + left_kind = ARGV.fetch(1) + left_text = ARGV.fetch(2) + left_index = ARGV.fetch(3).to_i + right_kind = ARGV.fetch(4) + right_text = ARGV.fetch(5) + right_index = ARGV.fetch(6).to_i + + def matches(root, kind, text) + found = [] + walk = lambda do |node| + if node.respond_to?(:kind) + found << node if node.kind == kind && node.text.to_s == text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(root) + found + end + + left = matches(document.root, left_kind, left_text).fetch(left_index) + right = matches(document.root, right_kind, right_text).fetch(right_index) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + puts normalizer.send(:same_ts_node?, left, right) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-e", + script, + ]) + .arg(file.path()) + .arg(left_kind) + .arg(left_text) + .arg(left_index.to_string()) + .arg(right_kind) + .arg(right_text) + .arg(right_index.to_string()) + .output() + .expect("run ruby private same_ts_node? helper"); + assert!( + output.status.success(), + "ruby same_ts_node? helper failed for {language:?}: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + String::from_utf8(output.stdout) + .expect("ruby same_ts_node? output should be utf8") + .trim() + == "true" + } + + fn ruby_private_parent_named_child_predicate( + source: &str, + language: Language, + suffix: &str, + parent_kind: &str, + parent_text: &str, + parent_index: usize, + child_kind: &str, + child_text: &str, + child_index: usize, + ) -> bool { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby parent_named_child temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby parent_named_child temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + parent_kind = ARGV.fetch(1) + parent_text = ARGV.fetch(2) + parent_index = ARGV.fetch(3).to_i + child_kind = ARGV.fetch(4) + child_text = ARGV.fetch(5) + child_index = ARGV.fetch(6).to_i + + def matches(root, kind, text) + found = [] + walk = lambda do |node| + if node.respond_to?(:kind) + found << node if node.kind == kind && node.text.to_s == text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(root) + found + end + + parent = matches(document.root, parent_kind, parent_text).fetch(parent_index) + child = matches(document.root, child_kind, child_text).fetch(child_index) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + puts normalizer.send(:parent_named_child?, parent, child) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-e", + script, + ]) + .arg(file.path()) + .arg(parent_kind) + .arg(parent_text) + .arg(parent_index.to_string()) + .arg(child_kind) + .arg(child_text) + .arg(child_index.to_string()) + .output() + .expect("run ruby private parent_named_child? helper"); + assert!( + output.status.success(), + "ruby parent_named_child? helper failed for {language:?}: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + String::from_utf8(output.stdout) + .expect("ruby parent_named_child? output should be utf8") + .trim() + == "true" + } + + fn ruby_private_node_key_signature( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + index: usize, + ) -> (String, usize, usize) { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby node_key temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby node_key temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target_index = ARGV.fetch(3).to_i + found = [] + walk = lambda do |node| + if node.respond_to?(:kind) + found << node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + target = found.fetch(target_index) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + puts JSON.generate(normalizer.send(:node_key, target)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(index.to_string()) + .output() + .expect("run ruby private node_key helper"); + assert!( + output.status.success(), + "ruby node_key helper failed for {language:?}: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + let value: Value = + serde_json::from_slice(&output.stdout).expect("ruby node_key output should be json"); + let key = value + .as_array() + .expect("ruby node_key output should be an array"); + ( + key[0] + .as_str() + .expect("node_key kind should be string") + .to_string(), + key[1] + .as_u64() + .expect("node_key start byte should be integer") as usize, + key[2] + .as_u64() + .expect("node_key end byte should be integer") as usize, + ) + } + + #[test] + fn tree_normalizer_new_initializes_empty_state() { + let normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); + + assert_eq!(normalizer.source, ""); + assert_eq!(normalizer.language, Language::Ruby); + assert!(normalizer.local_stack.is_empty()); + assert_eq!(normalizer.root_span, None); + } + + #[test] + fn normalize_root_matches_ruby_across_tree_normalizer_languages() { + for (source, language, suffix) in [ + ( + "class C\n def each(value)\n yield value\n case value\n when 1 then :one\n else :other\n end\n end\nend\n", + Language::Ruby, + ".rb", + ), + ( + "def gen(value):\n yield value\n other()\n", + Language::Python, + ".py", + ), + ( + "function f(value: number) { switch (value) { case 1: one(); break; default: other(); } return value ? one() : other(); }\n", + Language::TypeScript, + ".ts", + ), + ( + "function f(value)\n if value then\n one()\n else\n other()\n end\n return value\nend\n", + Language::Lua, + ".lua", + ), + ] { + assert_ruby_parity(source, language, suffix); + } + } + + #[test] + fn tree_normalizer_yield_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def each\n yield :item\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "yield :item", + ), + ( + "def each\n value\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value", + ), + ( + "def gen():\n yield item\n other()\n", + Language::Python, + ".py", + "expression_statement", + "yield item", + ), + ( + "def gen():\n yield from items\n other()\n", + Language::Python, + ".py", + "expression_statement", + "yield from items", + ), + ( + "def gen():\n yield item\n other()\n", + Language::Python, + ".py", + "block", + "yield item\n other()", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.yield_statement(node), + ruby_private_predicate(source, language, suffix, "yield_statement?", kind, text), + "yield_statement? mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn yield_argument_list_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def each\n yield(:item)\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "(:item)", + ), + ( + "def each\n yield :item\nend\n", + Language::Ruby, + ".rb", + "argument_list", + ":item", + ), + ( + "def call\n foo(:item)\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "(:item)", + ), + ( + "yield_value(value)\n", + Language::Python, + ".py", + "argument_list", + "(value)", + ), + ( + "yield(value);\n", + Language::TypeScript, + ".ts", + "parenthesized_expression", + "(value)", + ), + ( + "coroutine.yield(value)\n", + Language::Lua, + ".lua", + "arguments", + "(value)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.yield_argument_list(node), + ruby_private_predicate( + source, + language, + suffix, + "yield_argument_list?", + kind, + text + ), + "yield_argument_list? mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn yield_argument_nodes_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def each\n yield(:item)\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "(:item)", + ), + ( + "def each\n yield nil\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "nil", + ), + ( + "def each\n yield item, other\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "item, other", + ), + ( + "yield_value(value)\n", + Language::Python, + ".py", + "argument_list", + "(value)", + ), + ( + "yield(value);\n", + Language::TypeScript, + ".ts", + "parenthesized_expression", + "(value)", + ), + ( + "coroutine.yield(value)\n", + Language::Lua, + ".lua", + "arguments", + "(value)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = Value::Array( + normalizer + .yield_argument_nodes(node) + .iter() + .map(node_value) + .collect(), + ); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "yield_argument_nodes", + kind, + text + ), + "yield_argument_nodes mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn yield_inline_arguments_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def each\n yield\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "yield", + ), + ( + "def gen():\n yield item\n other()\n", + Language::Python, + ".py", + "expression_statement", + "yield item", + ), + ( + "function* gen() { yield item; }\n", + Language::TypeScript, + ".ts", + "expression_statement", + "yield item;", + ), + ( + "coroutine.yield(item)\n", + Language::Lua, + ".lua", + "function_call", + "coroutine.yield(item)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = Value::Array( + normalizer + .yield_inline_arguments(node) + .iter() + .map(node_value) + .collect(), + ); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "yield_inline_arguments", + kind, + text + ), + "yield_inline_arguments mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_yield_argument_list_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def each\n yield(:item)\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "(:item)", + ), + ( + "def each\n yield :item\nend\n", + Language::Ruby, + ".rb", + "argument_list", + ":item", + ), + ( + "def each\n yield nil\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "nil", + ), + ( + "yield_value(value)\n", + Language::Python, + ".py", + "argument_list", + "(value)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = node_value(&normalizer.normalize_yield_argument_list(node)); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_yield_argument_list", + kind, + text + ), + "normalize_yield_argument_list mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_yield_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def each\n yield\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "yield", + ), + ( + "def each\n yield item\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "yield item", + ), + ( + "def each\n yield nil\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "yield nil", + ), + ( + "def gen():\n yield item\n other()\n", + Language::Python, + ".py", + "expression_statement", + "yield item", + ), + ( + "function* gen() { yield item; }\n", + Language::TypeScript, + ".ts", + "yield_expression", + "yield item", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = node_value(&normalizer.normalize_yield(node)); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_yield", + kind, + text + ), + "normalize_yield mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_yield_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def each\n yield\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "yield", + ), + ( + "def each\n yield item\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "yield item", + ), + ( + "def each\n yield nil\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "yield nil", + ), + ( + "def gen():\n yield item\n other()\n", + Language::Python, + ".py", + "expression_statement", + "yield item", + ), + ( + "def gen():\n yield from items\n other()\n", + Language::Python, + ".py", + "expression_statement", + "yield from items", + ), + ( + "function* gen() { yield item; }\n", + Language::TypeScript, + ".ts", + "expression_statement", + "yield item;", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = node_value(&normalizer.normalize_yield_statement(node)); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_yield_statement", + kind, + text + ), + "normalize_yield_statement mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_node_dispatch_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def each\n yield item\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "yield item", + ), + ( + "def check\n !flag\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "!flag", + ), + ( + "def gen():\n yield item\n other()\n", + Language::Python, + ".py", + "expression_statement", + "yield item", + ), + ( + "switch (value) { case 1: one(); default: other(); }\n", + Language::TypeScript, + ".ts", + "switch_statement", + "switch (value) { case 1: one(); default: other(); }", + ), + ( + "if value then one() else other() end\n", + Language::Lua, + ".lua", + "if_statement", + "if value then one() else other() end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_node(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_node", + kind, + text + ), + "normalize_node mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn python_yield_statement_in_multi_statement_block_matches_ruby_ast() { + let source = "def gen():\n yield item\n other()\n"; + assert_ruby_parity(source, Language::Python, ".py"); + + let root = parse_language_source(source, Language::Python, ".py"); + let defn = first_node(&root, "DEFN", "def gen():\n yield item\n other()"); + let scope = child_node(defn, 1); + let body = child_node(scope, 2); + + assert_eq!(body.r#type, "BLOCK"); + assert_eq!(child_types(body), vec!["YIELD", "EXPRESSION_STATEMENT"]); + } + + #[test] + fn tree_normalizer_super_statement_matches_ruby_private_predicate() { + for (source, kind, text) in [ + ( + "class Child < Parent\n def call\n super\n end\nend\n", + "body_statement", + "super", + ), + ( + "class Child < Parent\n def call\n super :item\n end\nend\n", + "body_statement", + "super :item", + ), + ( + "class Child < Parent\n def call\n value\n end\nend\n", + "body_statement", + "value", + ), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + + assert_eq!( + normalizer.super_statement(node), + ruby_private_predicate( + source, + Language::Ruby, + ".rb", + "super_statement?", + kind, + text + ), + "super_statement? mismatch for {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_super_statement_matches_ruby_private_method() { + for (source, kind, text) in [ + ( + "class Child < Parent\n def call\n super\n end\nend\n", + "body_statement", + "super", + ), + ( + "class Child < Parent\n def call\n super :item\n end\nend\n", + "body_statement", + "super :item", + ), + ( + "class Child < Parent\n def call\n super value\n end\nend\n", + "body_statement", + "super value", + ), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let rust = node_value(&normalizer.normalize_super_statement(node)); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + Language::Ruby, + ".rb", + "normalize_super_statement", + kind, + text + ), + "normalize_super_statement mismatch for {kind} {text:?}" + ); + } + } + + #[test] + fn ruby_super_statement_normalization_matches_ruby_ast() { + let source = "class Child < Parent\n def bare\n super\n end\n def with_arg\n super :item\n end\nend\n"; + assert_ruby_parity(source, Language::Ruby, ".rb"); + + let root = parse_language_source(source, Language::Ruby, ".rb"); + let bare = first_node(&root, "SUPER", "super"); + let with_arg = first_node(&root, "SUPER", "super :item"); + + assert_eq!(bare.children, vec![Child::Nil]); + assert_eq!(child_types(with_arg), vec!["LIST"]); + assert_eq!(child_types(child_node(with_arg, 0)), vec!["LIT"]); + } + + #[test] + fn tree_normalizer_argument_list_element_reference_matches_ruby_private_predicate() { + for (source, text) in [ + ("def indexed\n return items[0]\nend\n", "items[0]"), + ("def indexed\n return obj.foo[0]\nend\n", "obj.foo[0]"), + ("def indexed\n return [0]\nend\n", "[0]"), + ( + "def indexed\n return items[0], other\nend\n", + "items[0], other", + ), + ("def indexed\n return items[]\nend\n", "items[]"), + ( + "def indexed\n return items[0] { nope }\nend\n", + "items[0] { nope }", + ), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, "argument_list", text); + let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + + assert_eq!( + normalizer.argument_list_element_reference(node), + ruby_private_predicate( + source, + Language::Ruby, + ".rb", + "argument_list_element_reference?", + "argument_list", + text + ), + "argument_list_element_reference? mismatch for {text:?}" + ); + } + } + + #[test] + fn normalize_argument_list_element_reference_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def indexed\n return items[0]\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "items[0]", + ), + ( + "def indexed\n return obj.foo[0]\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "obj.foo[0]", + ), + ( + "def indexed\n return [0]\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "[0]", + ), + ( + "def indexed\n return items[0], other\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "items[0], other", + ), + ( + "def indexed\n return items[0] { nope }\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "items[0] { nope }", + ), + ( + "def indexed():\n return foo(items[0])\n", + Language::Python, + ".py", + "argument_list", + "(items[0])", + ), + ( + "function indexed(){ return foo(items[0]); }\n", + Language::TypeScript, + ".ts", + "arguments", + "(items[0])", + ), + ( + "function indexed() return foo(items[0]) end\n", + Language::Lua, + ".lua", + "arguments", + "(items[0])", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_argument_list_element_reference(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_argument_list_element_reference", + kind, + text + ), + "normalize_argument_list_element_reference mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn dynamic_scope_rewrites_locals_without_crossing_scope_boundaries() { + let inner_assignment = test_node("LASGN", vec![Child::Symbol("inner".to_string())]); + let node = test_node( + "BLOCK", + vec![ + Child::Node(Box::new(test_node( + "LASGN", + vec![Child::Symbol("value".to_string())], + ))), + Child::Node(Box::new(test_node( + "LVAR", + vec![Child::Symbol("value".to_string())], + ))), + Child::Node(Box::new(test_node( + "DEFN", + vec![ + Child::Symbol("nested".to_string()), + Child::Node(Box::new(test_node( + "SCOPE", + vec![ + Child::Nil, + Child::Nil, + Child::Node(Box::new(inner_assignment)), + ], + ))), + ], + ))), + ], + ); + + let result = super::dynamic_scope(node); + + assert_eq!(child_node(&result, 0).r#type, "DASGN"); + assert_eq!(child_node(&result, 1).r#type, "DVAR"); + let nested = child_node(&result, 2); + assert_eq!(nested.r#type, "DEFN"); + let nested_scope = child_node(nested, 1); + assert_eq!(nested_scope.r#type, "SCOPE"); + assert_eq!(child_node(nested_scope, 2).r#type, "LASGN"); + } + + #[test] + fn link_when_chain_sets_next_arm_and_pads_short_when_nodes() { + let fallback = test_node("ELSE", Vec::new()); + let first = test_node( + "WHEN", + vec![ + Child::Symbol("patterns".to_string()), + Child::Nil, + Child::Nil, + ], + ); + let second = test_node( + "WHEN", + vec![ + Child::Symbol("patterns".to_string()), + Child::Nil, + Child::Nil, + ], + ); + let normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); + + let result = normalizer + .link_when_chain(vec![first, second], Some(fallback)) + .expect("expected linked when chain"); + + assert_eq!(result.r#type, "WHEN"); + let next = child_node(&result, 2); + assert_eq!(next.r#type, "WHEN"); + assert_eq!(child_node(next, 2).r#type, "ELSE"); + + let short = test_node("WHEN", vec![Child::Symbol("patterns".to_string())]); + let fallback = test_node("ELSE", Vec::new()); + let result = normalizer + .link_when_chain(vec![short], Some(fallback)) + .expect("expected padded when chain"); + + assert_eq!(result.children.len(), 3); + assert_eq!(result.children[1], Child::Nil); + assert_eq!(child_node(&result, 2).r#type, "ELSE"); + } + + #[test] + fn link_rescue_chain_sets_next_rescue_and_pads_short_resbody_nodes() { + let first = test_node( + "RESBODY", + vec![ + Child::Symbol("exceptions".to_string()), + Child::Nil, + Child::Nil, + ], + ); + let second = test_node( + "RESBODY", + vec![ + Child::Symbol("exceptions".to_string()), + Child::Nil, + Child::Nil, + ], + ); + let normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); + + let result = normalizer + .link_rescue_chain(vec![first, second]) + .expect("expected linked rescue chain"); + + assert_eq!(result.r#type, "RESBODY"); + let next = child_node(&result, 2); + assert_eq!(next.r#type, "RESBODY"); + assert_eq!(next.children[2], Child::Nil); + + let short = test_node("RESBODY", vec![Child::Symbol("exceptions".to_string())]); + let result = normalizer + .link_rescue_chain(vec![short]) + .expect("expected padded rescue chain"); + + assert_eq!(result.children.len(), 3); + assert_eq!(result.children[1], Child::Nil); + assert_eq!(result.children[2], Child::Nil); + } + + #[test] + fn infix_statement_parts_extracts_allowed_wrapper_parts() { + let source = "def calc\n left + right\nend\n"; + let tree = raw_tree(source, Language::Ruby); + let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let body = first_raw_node(tree.root_node(), source, "body_statement", "left + right"); + let binary = first_raw_node(tree.root_node(), source, "binary", "left + right"); + + assert_eq!( + infix_parts_text(&normalizer, body, source), + Some(("left".to_string(), "+".to_string(), "right".to_string())) + ); + assert_eq!(infix_parts_text(&normalizer, binary, source), None); + + let source = "def calc\n return left + right\nend\n"; + let tree = raw_tree(source, Language::Ruby); + let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let args = first_raw_node(tree.root_node(), source, "argument_list", "left + right"); + assert_eq!( + infix_parts_text(&normalizer, args, source), + Some(("left".to_string(), "+".to_string(), "right".to_string())) + ); + + let source = "def calc\n left && right\nend\n"; + let tree = raw_tree(source, Language::Ruby); + let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let boolean = first_raw_node(tree.root_node(), source, "body_statement", "left && right"); + assert_eq!(infix_parts_text(&normalizer, boolean, source), None); + } + + #[test] + fn infix_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def calc\n left + right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left + right", + ), + ( + "def calc\n return left + right\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "left + right", + ), + ( + "def calc\n left && right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left && right", + ), + ( + "const value = left + right;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left + right", + ), + ( + "value = left + right\n", + Language::Python, + ".py", + "binary_operator", + "left + right", + ), + ( + "local value = left + right\n", + Language::Lua, + ".lua", + "expression_list", + "left + right", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.infix_statement(node), + ruby_private_predicate(source, language, suffix, "infix_statement?", kind, text), + "infix_statement? mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_infix_statement_matches_ruby_private_method() { + for (source, kind, text) in [ + ( + "def calc\n left + right\nend\n", + "body_statement", + "left + right", + ), + ( + "def calc\n return left + right\nend\n", + "argument_list", + "left + right", + ), + ( + "def match\n value =~ /left/\nend\n", + "body_statement", + "value =~ /left/", + ), + ( + "def match\n value =~ pattern\nend\n", + "body_statement", + "value =~ pattern", + ), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let rust = normalizer + .normalize_infix_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + Language::Ruby, + ".rb", + "normalize_infix_statement", + kind, + text + ), + "normalize_infix_statement mismatch for {kind} {text:?}" + ); + } + } + + #[test] + fn regex_literal_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "value =~ /left/\n", + Language::Ruby, + ".rb", + "regex", + "/left/", + ), + ( + "value = \"left\"\n", + Language::Ruby, + ".rb", + "string", + "\"left\"", + ), + ( + "const pattern = /left/;\n", + Language::TypeScript, + ".ts", + "regex", + "/left/", + ), + ( + "pattern = r\"left\"\n", + Language::Python, + ".py", + "string", + "r\"left\"", + ), + ( + "local pattern = \"left\"\n", + Language::Lua, + ".lua", + "string_content", + "left", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.regex_literal(Some(node)), + ruby_private_predicate(source, language, suffix, "regex_literal?", kind, text), + "regex_literal? mismatch for {language:?} {kind} {text:?}" + ); + } + + let normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); + assert_eq!( + normalizer.regex_literal(None), + ruby_private_regex_literal_value("nil") + ); + assert!(!ruby_private_regex_literal_value("string")); + assert!(!ruby_private_regex_literal_value("normalized_node")); + } + + #[test] + fn argument_list_unary_not_matches_ruby_private_predicate() { + for (line, text) in [ + ("return !flag", "!flag"), + ("return !!flag", "!!flag"), + ("return flag", "flag"), + ("return !flag, other", "!flag, other"), + ("return (!flag)", "(!flag)"), + ("return not flag", "not flag"), + ] { + let source = format!("def check\n {line}\nend\n"); + let tree = raw_tree(&source, Language::Ruby); + let node = first_raw_node(tree.root_node(), &source, "argument_list", text); + let normalizer = super::TreeSitterNormalizer::new(&source, Language::Ruby); + + assert_eq!( + normalizer.argument_list_unary_not(node), + ruby_private_predicate( + &source, + Language::Ruby, + ".rb", + "argument_list_unary_not?", + "argument_list", + text + ), + "argument_list_unary_not? mismatch for {line:?}" + ); + } + } + + #[test] + fn normalize_argument_list_unary_not_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def check\n return !flag\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "!flag", + ), + ( + "def check\n return !!flag\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "!!flag", + ), + ( + "def check\n return flag\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "flag", + ), + ( + "def check\n return !flag, other\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "!flag, other", + ), + ( + "def check():\n return foo(not flag)\n", + Language::Python, + ".py", + "argument_list", + "(not flag)", + ), + ( + "function check(){ return foo(!flag); }\n", + Language::TypeScript, + ".ts", + "arguments", + "(!flag)", + ), + ( + "function check() return foo(not flag) end\n", + Language::Lua, + ".lua", + "arguments", + "(not flag)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_argument_list_unary_not(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_argument_list_unary_not", + kind, + text + ), + "normalize_argument_list_unary_not mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn unary_not_statement_matches_ruby_private_predicate() { + for (line, text) in [ + ("!flag", "!flag"), + ("!!flag", "!!flag"), + ("flag", "flag"), + ("!flag; other", "!flag; other"), + ("(!flag)", "(!flag)"), + ("not flag", "not flag"), + ] { + let source = format!("def check\n {line}\nend\n"); + let tree = raw_tree(&source, Language::Ruby); + let node = first_raw_node(tree.root_node(), &source, "body_statement", text); + let normalizer = super::TreeSitterNormalizer::new(&source, Language::Ruby); + + assert_eq!( + normalizer.unary_not_statement(node), + ruby_private_predicate( + &source, + Language::Ruby, + ".rb", + "unary_not_statement?", + "body_statement", + text + ), + "unary_not_statement? mismatch for {line:?}" + ); + } + } + + #[test] + fn unary_not_expression_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def check\n !flag\n !!flag\n -flag\n not flag\nend\n", + Language::Ruby, + ".rb", + "unary", + "!flag", + ), + ( + "def check\n !flag\n !!flag\n -flag\n not flag\nend\n", + Language::Ruby, + ".rb", + "unary", + "!!flag", + ), + ( + "def check\n !flag\n !!flag\n -flag\n not flag\nend\n", + Language::Ruby, + ".rb", + "unary", + "-flag", + ), + ( + "def check\n !flag\n !!flag\n -flag\n not flag\nend\n", + Language::Ruby, + ".rb", + "unary", + "not flag", + ), + ( + "function check(flag: boolean) { return !flag; }\n", + Language::TypeScript, + ".ts", + "unary_expression", + "!flag", + ), + ( + "if not flag:\n pass\n", + Language::Python, + ".py", + "not_operator", + "not flag", + ), + ( + "if not flag then end\n", + Language::Lua, + ".lua", + "unary_expression", + "not flag", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.unary_not_expression(node), + ruby_private_predicate( + source, + language, + suffix, + "unary_not_expression?", + kind, + text + ), + "unary_not_expression? mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_unary_not_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def check\n !flag\n !!flag\n -flag\n not flag\nend\n", + Language::Ruby, + ".rb", + "unary", + "!flag", + ), + ( + "def check\n !flag\n !!flag\n -flag\n not flag\nend\n", + Language::Ruby, + ".rb", + "unary", + "!!flag", + ), + ( + "function check(flag: boolean) { return !flag; }\n", + Language::TypeScript, + ".ts", + "unary_expression", + "!flag", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_unary_not(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_unary_not", + kind, + text + ), + "normalize_unary_not mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_unary_not_statement_matches_ruby_private_method() { + for (line, text) in [("!flag", "!flag"), ("!!flag", "!!flag")] { + let source = format!("def check\n {line}\nend\n"); + let tree = raw_tree(&source, Language::Ruby); + let node = first_raw_node(tree.root_node(), &source, "body_statement", text); + let mut normalizer = super::TreeSitterNormalizer::new(&source, Language::Ruby); + let rust = normalizer + .normalize_unary_not_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + &source, + Language::Ruby, + ".rb", + "normalize_unary_not_statement", + "body_statement", + text + ), + "normalize_unary_not_statement mismatch for {text:?}" + ); + } + } + + #[test] + fn unary_minus_expression_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def check\n -flag\n !flag\n value\nend\n", + Language::Ruby, + ".rb", + "unary", + "-flag", + ), + ( + "def check\n -flag\n !flag\n value\nend\n", + Language::Ruby, + ".rb", + "unary", + "!flag", + ), + ( + "function check(value: number) { return -value; }\n", + Language::TypeScript, + ".ts", + "unary_expression", + "-value", + ), + ( + "x = -value\n", + Language::Python, + ".py", + "unary_operator", + "-value", + ), + ( + "local x = -value\n", + Language::Lua, + ".lua", + "expression_list", + "-value", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.unary_minus_expression(node), + ruby_private_predicate( + source, + language, + suffix, + "unary_minus_expression?", + kind, + text + ), + "unary_minus_expression? mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_unary_minus_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def check\n -1\n -flag\nend\n", + Language::Ruby, + ".rb", + "unary", + "-1", + ), + ( + "def check\n -1\n -flag\nend\n", + Language::Ruby, + ".rb", + "unary", + "-flag", + ), + ( + "function check(value: number) { return -value; }\n", + Language::TypeScript, + ".ts", + "unary_expression", + "-value", + ), + ( + "x = -value\n", + Language::Python, + ".py", + "unary_operator", + "-value", + ), + ( + "local x = -value\n", + Language::Lua, + ".lua", + "expression_list", + "-value", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_unary_minus(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_unary_minus", + kind, + text + ), + "normalize_unary_minus mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn binary_operator_matches_ruby_private_helper() { + for (source, language, suffix, kind, text) in [ + ( + "def calc\n left + right\n left && right\n value\nend\n", + Language::Ruby, + ".rb", + "binary", + "left + right", + ), + ( + "def calc\n left + right\n left && right\n value\nend\n", + Language::Ruby, + ".rb", + "binary", + "left && right", + ), + ( + "def calc\n left + right\n left && right\n value\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left + right\n left && right\n value", + ), + ( + "const value = left + right && other;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left + right && other", + ), + ( + "const value = left + right && other;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left + right", + ), + ( + "value = left + right and other\n", + Language::Python, + ".py", + "boolean_operator", + "left + right and other", + ), + ( + "value = left + right and other\n", + Language::Python, + ".py", + "binary_operator", + "left + right", + ), + ( + "local value = left + right and other\n", + Language::Lua, + ".lua", + "expression_list", + "left + right and other", + ), + ( + "local value = left + right and other\n", + Language::Lua, + ".lua", + "binary_expression", + "left + right", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.binary_operator(node).unwrap_or_default(), + ruby_private_string(source, language, suffix, "binary_operator", kind, text), + "binary_operator mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn boolean_operator_matches_ruby_private_helper() { + for (source, language, suffix, kind, text) in [ + ( + "def calc\n left && right\n left || right\n left + right\nend\n", + Language::Ruby, + ".rb", + "binary", + "left && right", + ), + ( + "def calc\n left && right\n left || right\n left + right\nend\n", + Language::Ruby, + ".rb", + "binary", + "left || right", + ), + ( + "def calc\n left && right\n left || right\n left + right\nend\n", + Language::Ruby, + ".rb", + "binary", + "left + right", + ), + ( + "const value = left && right || other;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left && right", + ), + ( + "const value = left && right || other;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left && right || other", + ), + ( + "value = left and right or other\n", + Language::Python, + ".py", + "boolean_operator", + "left and right", + ), + ( + "value = left and right or other\n", + Language::Python, + ".py", + "boolean_operator", + "left and right or other", + ), + ( + "local value = left and right or other\n", + Language::Lua, + ".lua", + "expression_list", + "left and right or other", + ), + ( + "local value = left + right\n", + Language::Lua, + ".lua", + "expression_list", + "left + right", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.boolean_operator(node).unwrap_or_default(), + ruby_private_string(source, language, suffix, "boolean_operator", kind, text), + "boolean_operator mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn comparison_operator_matches_ruby_private_helper() { + for (source, language, suffix, kind, text) in [ + ( + "def calc\n left == right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left == right", + ), + ( + "def calc\n left + right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left + right", + ), + ( + "const value = left === right;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left === right", + ), + ( + "const value = left + right;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left + right", + ), + ( + "value = left == right\n", + Language::Python, + ".py", + "comparison_operator", + "left == right", + ), + ( + "value = left + right\n", + Language::Python, + ".py", + "binary_operator", + "left + right", + ), + ( + "local value = left == right\n", + Language::Lua, + ".lua", + "expression_list", + "left == right", + ), + ( + "local value = left + right\n", + Language::Lua, + ".lua", + "expression_list", + "left + right", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.comparison_operator(node).unwrap_or_default(), + ruby_private_string(source, language, suffix, "comparison_operator", kind, text), + "comparison_operator mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn comparison_expression_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def calc\n left == right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left == right", + ), + ( + "const value = left === right;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left === right", + ), + ( + "const value = left + right;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left + right", + ), + ( + "value = left == right\n", + Language::Python, + ".py", + "comparison_operator", + "left == right", + ), + ( + "value = left + right\n", + Language::Python, + ".py", + "binary_operator", + "left + right", + ), + ( + "local value = left == right\n", + Language::Lua, + ".lua", + "expression_list", + "left == right", + ), + ( + "local value = left + right\n", + Language::Lua, + ".lua", + "expression_list", + "left + right", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.comparison_expression(node), + ruby_private_predicate( + source, + language, + suffix, + "comparison_expression?", + kind, + text + ), + "comparison_expression? mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn comparison_expression_normalization_matches_ruby() { + for (source, language, suffix) in [ + ("value = left == right\n", Language::Python, ".py"), + ( + "const value = left === right;\n", + Language::TypeScript, + ".ts", + ), + ("local value = left == right\n", Language::Lua, ".lua"), + ] { + assert_ruby_parity(source, language, suffix); + } + } + + #[test] + fn normalize_comparison_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def calc\n left == right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left == right", + ), + ( + "value = left == right\n", + Language::Python, + ".py", + "comparison_operator", + "left == right", + ), + ( + "const value = left === right;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left === right", + ), + ( + "local value = left == right\n", + Language::Lua, + ".lua", + "expression_list", + "left == right", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_comparison(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_comparison", + kind, + text + ), + "normalize_comparison mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn boolean_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def calc\n left && right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left && right", + ), + ( + "def calc\n left or right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left or right", + ), + ( + "def calc\n left + right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left + right", + ), + ( + "foo(left && right)\n", + Language::Ruby, + ".rb", + "argument_list", + "(left && right)", + ), + ( + "value = left and right\n", + Language::Python, + ".py", + "boolean_operator", + "left and right", + ), + ( + "local value = left and right\n", + Language::Lua, + ".lua", + "expression_list", + "left and right", + ), + ( + "const value = left && right;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left && right", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.boolean_statement(node), + ruby_private_predicate(source, language, suffix, "boolean_statement?", kind, text), + "boolean_statement? mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn boolean_expression_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def calc\n left && right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left && right", + ), + ( + "def calc\n left && right\n left + right\nend\n", + Language::Ruby, + ".rb", + "binary", + "left && right", + ), + ( + "def calc\n left && right\n left + right\nend\n", + Language::Ruby, + ".rb", + "binary", + "left + right", + ), + ( + "const value = left && right;\nconst other = left + right;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left && right", + ), + ( + "const value = left && right;\nconst other = left + right;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left + right", + ), + ( + "value = left and right\nother = left + right\n", + Language::Python, + ".py", + "boolean_operator", + "left and right", + ), + ( + "value = left and right\nother = left + right\n", + Language::Python, + ".py", + "binary_operator", + "left + right", + ), + ( + "local value = left and right\nlocal other = left + right\n", + Language::Lua, + ".lua", + "expression_list", + "left and right", + ), + ( + "local value = left and right\nlocal other = left + right\n", + Language::Lua, + ".lua", + "expression_list", + "left + right", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.boolean_expression(node), + ruby_private_predicate(source, language, suffix, "boolean_expression?", kind, text), + "boolean_expression? mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_boolean_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def calc\n left && right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left && right", + ), + ( + "def calc\n left || right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left || right", + ), + ( + "def calc\n left && middle && right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left && middle && right", + ), + ( + "value = left and right\n", + Language::Python, + ".py", + "boolean_operator", + "left and right", + ), + ( + "value = left or right\n", + Language::Python, + ".py", + "boolean_operator", + "left or right", + ), + ( + "local value = left and right\n", + Language::Lua, + ".lua", + "expression_list", + "left and right", + ), + ( + "local value = left or right\n", + Language::Lua, + ".lua", + "expression_list", + "left or right", + ), + ( + "const value = left && right;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left && right", + ), + ( + "const value = left || right;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left || right", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_boolean(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_boolean", + kind, + text + ), + "normalize_boolean mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn boolean_expression_normalization_matches_ruby() { + for (source, language, suffix) in [ + ("def calc\n left && right\nend\n", Language::Ruby, ".rb"), + ("value = left and right\n", Language::Python, ".py"), + ("local value = left and right\n", Language::Lua, ".lua"), + ( + "const value = left && right;\n", + Language::TypeScript, + ".ts", + ), + ] { + assert_ruby_parity(source, language, suffix); + } + } + + #[test] + fn operator_call_expression_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def calc\n left + right\n left && right\nend\n", + Language::Ruby, + ".rb", + "binary", + "left + right", + ), + ( + "def calc\n left + right\n left && right\nend\n", + Language::Ruby, + ".rb", + "binary", + "left && right", + ), + ( + "const value = left + right && other;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left + right", + ), + ( + "const value = left + right && other;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left + right && other", + ), + ( + "value = left + right and other\n", + Language::Python, + ".py", + "binary_operator", + "left + right", + ), + ( + "value = left + right and other\n", + Language::Python, + ".py", + "boolean_operator", + "left + right and other", + ), + ( + "local value = left + right\nlocal other = left and right\n", + Language::Lua, + ".lua", + "expression_list", + "left + right", + ), + ( + "local value = left + right\nlocal other = left and right\n", + Language::Lua, + ".lua", + "expression_list", + "left and right", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.operator_call_expression(node), + ruby_private_predicate( + source, + language, + suffix, + "operator_call_expression?", + kind, + text + ), + "operator_call_expression? mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_operator_call_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def calc\n left + right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left + right", + ), + ( + "def calc\n left =~ /right/\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left =~ /right/", + ), + ( + "def calc\n left =~ pattern\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left =~ pattern", + ), + ( + "value = left + right\n", + Language::Python, + ".py", + "binary_operator", + "left + right", + ), + ( + "const value = left + right;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left + right", + ), + ( + "local value = left + right\n", + Language::Lua, + ".lua", + "expression_list", + "left + right", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_operator_call(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_operator_call", + kind, + text + ), + "normalize_operator_call mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn operator_call_expression_normalization_matches_ruby() { + for (source, language, suffix) in [ + ("value = left + right\n", Language::Python, ".py"), + ("local value = left + right\n", Language::Lua, ".lua"), + ("const value = left + right;\n", Language::TypeScript, ".ts"), + ] { + assert_ruby_parity(source, language, suffix); + } + } + + #[test] + fn spaced_text_matches_ruby_private_helper() { + for (source, language, suffix, kind, text) in [ + ( + "def calc\n left + right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left + right", + ), + ( + "const value = left + right;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left + right", + ), + ( + "value = left + right\n", + Language::Python, + ".py", + "binary_operator", + "left + right", + ), + ( + "local value = left + right\n", + Language::Lua, + ".lua", + "expression_list", + "left + right", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.spaced_text(node), + ruby_private_string(source, language, suffix, "spaced_text", kind, text), + "spaced_text mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn class_node_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "class Thing; end\n", + Language::Ruby, + ".rb", + "class", + "class Thing; end", + ), + ( + "class Thing:\n pass\n", + Language::Python, + ".py", + "class_definition", + "class Thing:\n pass", + ), + ( + "class Thing {}\n", + Language::TypeScript, + ".ts", + "class_declaration", + "class Thing {}", + ), + ( + "local Thing = {}\n", + Language::Lua, + ".lua", + "variable_declaration", + "local Thing = {}", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.class_node(node), + ruby_private_predicate(source, language, suffix, "class_node?", kind, text), + "class_node? mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn module_node_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "module Thing\n value\nend\n", + Language::Ruby, + ".rb", + "module", + "module Thing\n value\nend", + ), + ( + "class Thing; end\n", + Language::Ruby, + ".rb", + "class", + "class Thing; end", + ), + ( + "value = 1\n", + Language::Python, + ".py", + "module", + "value = 1\n", + ), + ( + "namespace Thing { const value = 1; }\n", + Language::TypeScript, + ".ts", + "program", + "namespace Thing { const value = 1; }\n", + ), + ( + "local Thing = {}\n", + Language::Lua, + ".lua", + "chunk", + "local Thing = {}\n", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.module_node(node), + ruby_private_predicate(source, language, suffix, "module_node?", kind, text), + "module_node? mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_module_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "module Thing\n value\nend\n", + Language::Ruby, + ".rb", + "module", + "module Thing\n value\nend", + ), + ( + "module Empty\nend\n", + Language::Ruby, + ".rb", + "module", + "module Empty\nend", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_module(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_module", + kind, + text + ), + "normalize_module mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_singleton_class_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "class << self\n value\nend\n", + Language::Ruby, + ".rb", + "singleton_class", + "class << self\n value\nend", + ), + ( + "class << object\nend\n", + Language::Ruby, + ".rb", + "singleton_class", + "class << object\nend", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_singleton_class(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_singleton_class", + kind, + text + ), + "normalize_singleton_class mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn ruby_definition_identifier_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def helper(arg)\n arg\nend\n", + Language::Ruby, + ".rb", + "identifier", + "helper", + ), + ( + "def helper(arg)\n arg\nend\n", + Language::Ruby, + ".rb", + "identifier", + "arg", + ), + ( + "items.each { |item| item }\n", + Language::Ruby, + ".rb", + "identifier", + "item", + ), + ( + "def helper\n value\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value", + ), + ( + "def helper(arg):\n return arg\n", + Language::Python, + ".py", + "identifier", + "arg", + ), + ( + "function helper(arg) { return arg; }\n", + Language::TypeScript, + ".ts", + "identifier", + "arg", + ), + ( + "function helper(arg)\n return arg\nend\n", + Language::Lua, + ".lua", + "identifier", + "arg", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.ruby_definition_identifier(node), + ruby_private_predicate( + source, + language, + suffix, + "ruby_definition_identifier?", + kind, + text + ), + "ruby_definition_identifier? mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn literal_fragment_assignment_context_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "value = \"left = right\"\n", + Language::Ruby, + ".rb", + "string_content", + "left = right", + ), + ("value = 1\n", Language::Ruby, ".rb", "identifier", "value"), + ( + "value = \"left = right\"\n", + Language::Python, + ".py", + "string_content", + "left = right", + ), + ( + "const value = \"left = right\";\n", + Language::TypeScript, + ".ts", + "string_fragment", + "left = right", + ), + ( + "local value = \"left = right\"\n", + Language::Lua, + ".lua", + "string_content", + "left = right", + ), + ( + "local value = other\n", + Language::Lua, + ".lua", + "variable_list", + "value", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.literal_fragment_assignment_context(node), + ruby_private_predicate( + source, + language, + suffix, + "literal_fragment_assignment_context?", + kind, + text + ), + "literal_fragment_assignment_context? mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn assignment_lhs_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "value = other\n", + Language::Ruby, + ".rb", + "identifier", + "value", + ), + ( + "value = other\n", + Language::Ruby, + ".rb", + "identifier", + "other", + ), + ( + "{ key: value }\n", + Language::Ruby, + ".rb", + "hash_key_symbol", + "key", + ), + ( + "{ key: value }\n", + Language::Ruby, + ".rb", + "identifier", + "value", + ), + ( + "value = other\n", + Language::Python, + ".py", + "identifier", + "value", + ), + ( + "value = other\n", + Language::Python, + ".py", + "identifier", + "other", + ), + ( + "let value = other;\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + ), + ( + "let value = other;\n", + Language::TypeScript, + ".ts", + "identifier", + "other", + ), + ( + "let value = other;\n", + Language::TypeScript, + ".ts", + "variable_declarator", + "value = other", + ), + ( + "local value = other\n", + Language::Lua, + ".lua", + "variable_list", + "value", + ), + ( + "local value = other\n", + Language::Lua, + ".lua", + "expression_list", + "other", + ), + ( + "value = other\n", + Language::Lua, + ".lua", + "variable_list", + "value", + ), + ( + "value = other\n", + Language::Lua, + ".lua", + "expression_list", + "other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.assignment_lhs(node), + ruby_private_predicate(source, language, suffix, "assignment_lhs?", kind, text), + "assignment_lhs? mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn assignment_rhs_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "value = other\n", + Language::Ruby, + ".rb", + "identifier", + "value", + ), + ( + "value = other\n", + Language::Ruby, + ".rb", + "identifier", + "other", + ), + ( + "{ key: value }\n", + Language::Ruby, + ".rb", + "hash_key_symbol", + "key", + ), + ( + "{ key: value }\n", + Language::Ruby, + ".rb", + "identifier", + "value", + ), + ( + "value = other\n", + Language::Python, + ".py", + "identifier", + "value", + ), + ( + "value = other\n", + Language::Python, + ".py", + "identifier", + "other", + ), + ( + "let value = other;\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + ), + ( + "let value = other;\n", + Language::TypeScript, + ".ts", + "identifier", + "other", + ), + ( + "let value = other;\n", + Language::TypeScript, + ".ts", + "variable_declarator", + "value = other", + ), + ( + "local value = other\n", + Language::Lua, + ".lua", + "variable_list", + "value", + ), + ( + "local value = other\n", + Language::Lua, + ".lua", + "expression_list", + "other", + ), + ( + "value = other\n", + Language::Lua, + ".lua", + "variable_list", + "value", + ), + ( + "value = other\n", + Language::Lua, + ".lua", + "expression_list", + "other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.assignment_rhs(node), + ruby_private_predicate(source, language, suffix, "assignment_rhs?", kind, text), + "assignment_rhs? mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn ruby_assignment_node_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "value = 1\n", + Language::Ruby, + ".rb", + "assignment", + "value = 1", + ), + ( + "value += 1\n", + Language::Ruby, + ".rb", + "operator_assignment", + "value += 1", + ), + ( + "def helper\n value\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value", + ), + ( + "[1].each { |item| local = item }\n", + Language::Ruby, + ".rb", + "block_body", + "local = item", + ), + ( + "value = 1\n", + Language::Python, + ".py", + "expression_statement", + "value = 1", + ), + ( + "value = other;\n", + Language::TypeScript, + ".ts", + "assignment_expression", + "value = other", + ), + ( + "local value = other\n", + Language::Lua, + ".lua", + "assignment_statement", + "value = other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.ruby_assignment_node(node), + ruby_private_predicate( + source, + language, + suffix, + "ruby_assignment_node?", + kind, + text + ), + "ruby_assignment_node? mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn collect_assignment_target_names_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "value = other\n", + Language::Ruby, + ".rb", + "identifier", + "value", + ), + ( + "left, *rest = values\n", + Language::Ruby, + ".rb", + "left_assignment_list", + "left, *rest", + ), + ( + "value = other\n", + Language::Python, + ".py", + "identifier", + "value", + ), + ( + "const value = other;\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + ), + ( + "local value = other\n", + Language::Lua, + ".lua", + "variable_list", + "value", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let mut names = BTreeSet::new(); + normalizer.collect_assignment_target_names(node, &mut names); + + assert_eq!( + names, + ruby_private_collected_names( + source, + language, + suffix, + "collect_assignment_target_names", + kind, + text + ), + "collect_assignment_target_names mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn collect_identifier_names_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "left, *rest = values\n", + Language::Ruby, + ".rb", + "left_assignment_list", + "left, *rest", + ), + ( + "receiver.call(argument)\n", + Language::Ruby, + ".rb", + "call", + "receiver.call(argument)", + ), + ( + "value = other\n", + Language::Python, + ".py", + "expression_statement", + "value = other", + ), + ( + "const value = { shorthand };\n", + Language::TypeScript, + ".ts", + "object", + "{ shorthand }", + ), + ( + "local value = other\n", + Language::Lua, + ".lua", + "variable_declaration", + "local value = other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let mut names = BTreeSet::new(); + normalizer.collect_identifier_names(node, &mut names); + + assert_eq!( + names, + ruby_private_collected_names( + source, + language, + suffix, + "collect_identifier_names", + kind, + text + ), + "collect_identifier_names mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn member_name_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ("user.name\n", Language::Ruby, ".rb", "identifier", "name"), + ("user&.name\n", Language::Ruby, ".rb", "identifier", "name"), + ( + "user.name()\n", + Language::Python, + ".py", + "identifier", + "name", + ), + ( + "user?.name;\n", + Language::TypeScript, + ".ts", + "property_identifier", + "name", + ), + ("user.name()\n", Language::Lua, ".lua", "identifier", "name"), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.member_name(node), + ruby_private_string(source, language, suffix, "member_name", kind, text), + "member_name mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn member_parts_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ("user.name\n", Language::Ruby, ".rb", "call", "user.name"), + ("user&.name\n", Language::Ruby, ".rb", "call", "user&.name"), + ( + "user.name()\n", + Language::Python, + ".py", + "attribute", + "user.name", + ), + ( + "user.name(thing)\n", + Language::Python, + ".py", + "expression_statement", + "user.name(thing)", + ), + ( + "user.name();\n", + Language::TypeScript, + ".ts", + "member_expression", + "user.name", + ), + ( + "user.name(thing);\n", + Language::TypeScript, + ".ts", + "call_expression", + "user.name(thing)", + ), + ( + "user.name()\n", + Language::Lua, + ".lua", + "dot_index_expression", + "user.name", + ), + ( + "local value = other\n", + Language::Lua, + ".lua", + "expression_list", + "other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.member_parts(node).map(|(receiver, method)| { + ( + receiver.kind().to_string(), + super::node_text(receiver, source).to_string(), + method, + ) + }); + + assert_eq!( + rust, + ruby_private_member_parts(source, language, suffix, kind, text), + "member_parts mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn member_read_node_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ("user.name\n", Language::Ruby, ".rb", "call", "user.name"), + ("foo()\n", Language::Ruby, ".rb", "call", "foo()"), + ( + "user.name()\n", + Language::Python, + ".py", + "attribute", + "user.name", + ), + ( + "user.name(thing)\n", + Language::Python, + ".py", + "expression_statement", + "user.name(thing)", + ), + ( + "user.name();\n", + Language::TypeScript, + ".ts", + "member_expression", + "user.name", + ), + ( + "user.name(thing);\n", + Language::TypeScript, + ".ts", + "call_expression", + "user.name(thing)", + ), + ( + "user.name()\n", + Language::Lua, + ".lua", + "dot_index_expression", + "user.name", + ), + ( + "local value = other\n", + Language::Lua, + ".lua", + "expression_list", + "other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.member_read_node(node), + ruby_private_predicate(source, language, suffix, "member_read_node?", kind, text), + "member_read_node? mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_member_read_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ("user.name\n", Language::Ruby, ".rb", "call", "user.name"), + ( + "user.name()\n", + Language::Python, + ".py", + "attribute", + "user.name", + ), + ( + "user.name;\n", + Language::TypeScript, + ".ts", + "member_expression", + "user.name", + ), + ( + "user.name()\n", + Language::Lua, + ".lua", + "dot_index_expression", + "user.name", + ), + ("value\n", Language::Ruby, ".rb", "identifier", "value"), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_member_read(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_member_read", + kind, + text + ), + "normalize_member_read mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn assignment_left_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "value = other\n", + Language::Ruby, + ".rb", + "assignment", + "value = other", + ), + ( + "left, right = values\n", + Language::Ruby, + ".rb", + "assignment", + "left, right = values", + ), + ( + "value = other\n", + Language::Python, + ".py", + "expression_statement", + "value = other", + ), + ( + "value = other;\n", + Language::TypeScript, + ".ts", + "assignment_expression", + "value = other", + ), + ( + "value = other\n", + Language::Lua, + ".lua", + "assignment_statement", + "value = other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.assignment_left(node).map(|left| { + ( + left.kind().to_string(), + super::node_text(left, source).to_string(), + ) + }); + + assert_eq!( + rust, + ruby_private_node_signature( + source, + language, + suffix, + "assignment_left", + kind, + text + ), + "assignment_left mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn assignment_right_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "value = other\n", + Language::Ruby, + ".rb", + "assignment", + "value = other", + ), + ( + "left, right = values\n", + Language::Ruby, + ".rb", + "assignment", + "left, right = values", + ), + ( + "value = other\n", + Language::Python, + ".py", + "expression_statement", + "value = other", + ), + ( + "value = other;\n", + Language::TypeScript, + ".ts", + "assignment_expression", + "value = other", + ), + ( + "value = other\n", + Language::Lua, + ".lua", + "assignment_statement", + "value = other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.assignment_right(node).map(|right| { + ( + right.kind().to_string(), + super::node_text(right, source).to_string(), + ) + }); + + assert_eq!( + rust, + ruby_private_node_signature( + source, + language, + suffix, + "assignment_right", + kind, + text + ), + "assignment_right mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn singleton_receiver_matches_ruby_private_method() { + for (source, kind, text) in [ + ( + "def self.foo\nend\n", + "singleton_method", + "def self.foo\nend", + ), + ( + "def User.foo\nend\n", + "singleton_method", + "def User.foo\nend", + ), + ( + "def object.foo\nend\n", + "singleton_method", + "def object.foo\nend", + ), + ( + "def self.foo(value)\n value\nend\n", + "singleton_method", + "def self.foo(value)\n value\nend", + ), + ( + "def object.foo\n value\nend\n", + "singleton_method", + "def object.foo\n value\nend", + ), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let rust = normalizer.singleton_receiver(node).map(|receiver| { + ( + receiver.kind().to_string(), + super::node_text(receiver, source).to_string(), + ) + }); + + assert_eq!( + rust, + ruby_private_node_signature( + source, + Language::Ruby, + ".rb", + "singleton_receiver", + kind, + text + ), + "singleton_receiver mismatch for {kind} {text:?}" + ); + } + } + + #[test] + fn singleton_name_matches_ruby_private_method() { + for (source, kind, text) in [ + ( + "def self.foo\nend\n", + "singleton_method", + "def self.foo\nend", + ), + ( + "def User.foo\nend\n", + "singleton_method", + "def User.foo\nend", + ), + ( + "def object.foo\nend\n", + "singleton_method", + "def object.foo\nend", + ), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + + assert_eq!( + normalizer.singleton_name(node), + ruby_private_string(source, Language::Ruby, ".rb", "singleton_name", kind, text), + "singleton_name mismatch for {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_singleton_function_matches_ruby_private_method() { + for (source, kind, text) in [ + ( + "def self.hidden(value)\n return value\nend\n", + "singleton_method", + "def self.hidden(value)\n return value\nend", + ), + ( + "def User.hidden\nend\n", + "singleton_method", + "def User.hidden\nend", + ), + ( + "def object.hidden\n value\nend\n", + "singleton_method", + "def object.hidden\n value\nend", + ), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let rust = normalizer + .normalize_singleton_function(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + Language::Ruby, + ".rb", + "normalize_singleton_function", + kind, + text + ), + "normalize_singleton_function mismatch for {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_function_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def check(value)\n return value\nend\n", + Language::Ruby, + ".rb", + "method", + "def check(value)\n return value\nend", + ), + ( + "def empty\nend\n", + Language::Ruby, + ".rb", + "method", + "def empty\nend", + ), + ( + "def object.hidden\n value\nend\n", + Language::Ruby, + ".rb", + "singleton_method", + "def object.hidden\n value\nend", + ), + ( + "def check(value):\n return value\n", + Language::Python, + ".py", + "function_definition", + "def check(value):\n return value", + ), + ( + "function check(value) { return value; }\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function check(value) { return value; }", + ), + ( + "class Box { check(value) { return value; } }\n", + Language::TypeScript, + ".ts", + "method_definition", + "check(value) { return value; }", + ), + ( + "function check(value)\n return value\nend\n", + Language::Lua, + ".lua", + "function_declaration", + "function check(value)\n return value\nend", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_function(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_function", + kind, + text + ), + "normalize_function mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn lambda_expression_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "fn = ->(x) { x + 1 }\n", + Language::Ruby, + ".rb", + "lambda", + "->(x) { x + 1 }", + ), + ( + "fn = lambda x: x + 1\n", + Language::Python, + ".py", + "lambda", + "lambda x: x + 1", + ), + ( + "const fn = (x) => x + 1;\n", + Language::TypeScript, + ".ts", + "arrow_function", + "(x) => x + 1", + ), + ( + "const fn = function(x) { return x + 1; };\n", + Language::TypeScript, + ".ts", + "function_expression", + "function(x) { return x + 1; }", + ), + ( + "local fn = function(x) return x + 1 end\n", + Language::Lua, + ".lua", + "expression_list", + "function(x) return x + 1 end", + ), + ( + "function f(x) return x + 1 end\n", + Language::Lua, + ".lua", + "function_declaration", + "function f(x) return x + 1 end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.lambda_expression(node), + ruby_private_predicate(source, language, suffix, "lambda_expression?", kind, text), + "lambda_expression? mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_lambda_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "fn = ->(x) { x + 1 }\n", + Language::Ruby, + ".rb", + "lambda", + "->(x) { x + 1 }", + ), + ( + "fn = lambda x: x + 1\n", + Language::Python, + ".py", + "lambda", + "lambda x: x + 1", + ), + ( + "const fn = (x) => x + 1;\n", + Language::TypeScript, + ".ts", + "arrow_function", + "(x) => x + 1", + ), + ( + "const fn = function(x) { return x + 1; };\n", + Language::TypeScript, + ".ts", + "function_expression", + "function(x) { return x + 1; }", + ), + ( + "local fn = function(x) return x + 1 end\n", + Language::Lua, + ".lua", + "expression_list", + "function(x) return x + 1 end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_lambda(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_lambda", + kind, + text + ), + "normalize_lambda mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn lambda_expression_normalization_matches_ruby() { + for (source, language, suffix) in [ + ("fn = ->(x) { x + 1 }\n", Language::Ruby, ".rb"), + ("fn = lambda x: x + 1\n", Language::Python, ".py"), + ("const fn = (x) => x + 1;\n", Language::TypeScript, ".ts"), + ( + "const fn = function(x) { return x + 1; };\n", + Language::TypeScript, + ".ts", + ), + ( + "local fn = function(x) return x + 1 end\n", + Language::Lua, + ".lua", + ), + ] { + let root = parse_language_source(source, language, suffix); + let mut lambdas = Vec::new(); + nodes_of_type(&root, "LAMBDA", &mut lambdas); + assert!( + !lambdas.is_empty(), + "expected LAMBDA for {language:?} in {root:#?}" + ); + assert_ruby_parity(source, language, suffix); + } + } + + #[test] + fn function_name_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def run\nend\n", + Language::Ruby, + ".rb", + "method", + "def run\nend", + ), + ( + "def self.run\nend\n", + Language::Ruby, + ".rb", + "singleton_method", + "def self.run\nend", + ), + ( + "def run():\n pass\n", + Language::Python, + ".py", + "function_definition", + "def run():\n pass", + ), + ( + "function run() {}\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function run() {}", + ), + ( + "class Box { run() {} }\n", + Language::TypeScript, + ".ts", + "method_definition", + "run() {}", + ), + ( + "function run()\nend\n", + Language::Lua, + ".lua", + "function_declaration", + "function run()\nend", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.function_name(node).unwrap_or_default(), + ruby_private_string(source, language, suffix, "function_name", kind, text), + "function_name mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn collect_destructured_parameter_targets_matches_ruby_private_method() { + for (source, kind, text) in [ + ( + "items.each { |(left, right)| left }\n", + "destructured_parameter", + "(left, right)", + ), + ( + "items.each do |(left, (middle, right))| left end\n", + "destructured_parameter", + "(left, (middle, right))", + ), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let mut targets = Vec::new(); + normalizer.collect_destructured_parameter_targets(node, &mut targets); + let rust = Value::Array(targets.iter().map(node_value).collect()); + + assert_eq!( + rust, + ruby_private_destructured_parameter_targets_value(source, kind, text), + "collect_destructured_parameter_targets mismatch for {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_block_parameters_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "items.each { |(left, right)| left }\n", + Language::Ruby, + ".rb", + "block", + "{ |(left, right)| left }", + ), + ( + "items.each { |item, (left, right)| item }\n", + Language::Ruby, + ".rb", + "block", + "{ |item, (left, right)| item }", + ), + ( + "items.each { |item| item }\n", + Language::Ruby, + ".rb", + "block", + "{ |item| item }", + ), + ( + "def f(x):\n pass\n", + Language::Python, + ".py", + "function_definition", + "def f(x):\n pass", + ), + ( + "items.forEach((item) => item);\n", + Language::TypeScript, + ".ts", + "expression_statement", + "items.forEach((item) => item);", + ), + ( + "function f(x)\n return x\nend\n", + Language::Lua, + ".lua", + "function_declaration", + "function f(x)\n return x\nend", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_block_parameters(Some(node)) + .as_ref() + .map(node_value) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_block_parameters", + kind, + text + ), + "normalize_block_parameters mismatch for {language:?} {kind} {text:?}" + ); + } + + let mut normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); + assert!(normalizer.normalize_block_parameters(None).is_none()); + } + + #[test] + fn normalize_parameters_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f(value = 1)\nend\n", + Language::Ruby, + ".rb", + "method_parameters", + "(value = 1)", + ), + ( + "def f(value)\nend\n", + Language::Ruby, + ".rb", + "method_parameters", + "(value)", + ), + ( + "def f(value=1):\n pass\n", + Language::Python, + ".py", + "parameters", + "(value=1)", + ), + ( + "function f(value = 1) {}\n", + Language::TypeScript, + ".ts", + "formal_parameters", + "(value = 1)", + ), + ( + "function f(value)\nend\n", + Language::Lua, + ".lua", + "parameters", + "(value)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_parameters(Some(node)) + .as_ref() + .map(node_value) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_parameters", + kind, + text + ), + "normalize_parameters mismatch for {language:?} {kind} {text:?}" + ); + } + + let mut normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); + assert!(normalizer.normalize_parameters(None).is_none()); + } + + #[test] + fn normalize_destructured_block_parameter_matches_ruby_private_method() { + for (source, kind, text) in [ + ( + "items.each { |(left, right)| left }\n", + "destructured_parameter", + "(left, right)", + ), + ( + "items.each do |(left, (middle, right))| left end\n", + "destructured_parameter", + "(left, (middle, right))", + ), + ("items.each { |item| item }\n", "identifier", "item"), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let rust = normalizer + .normalize_destructured_block_parameter(node) + .as_ref() + .map(node_value) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + Language::Ruby, + ".rb", + "normalize_destructured_block_parameter", + kind, + text + ), + "normalize_destructured_block_parameter mismatch for {kind} {text:?}" + ); + } + } + + #[test] + fn scope_matches_ruby_private_method() { + for (source, language, suffix, kind, text, mode) in [ + ("1\n", Language::Ruby, ".rb", "integer", "1", "body"), + ( + "1\n", + Language::Python, + ".py", + "expression_statement", + "1", + "body", + ), + ( + "value;\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + "args", + ), + ( + "return value\n", + Language::Lua, + ".lua", + "expression_list", + "value", + "empty", + ), + ] { + let tree = raw_tree(source, language); + let root = tree.root_node(); + let node = first_raw_node(root, source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + normalizer.root_span = Some(super::span(root)); + let body = if mode == "body" { + Some(normalizer.wrap("BODY", Vec::new(), node)) + } else { + None + }; + let args = if mode == "args" { + Some(normalizer.wrap("ARGS", Vec::new(), node)) + } else { + None + }; + let rust = node_value(&normalizer.scope(body, args, node)); + + assert_eq!( + rust, + ruby_private_scope_value(source, language, suffix, kind, text, mode), + "scope mismatch for {language:?} {kind} {text:?} mode {mode}" + ); + } + } + + #[test] + fn list_matches_ruby_private_method() { + for (source, language, suffix, kind, text, mode) in [ + ( + "value\n", + Language::Ruby, + ".rb", + "identifier", + "value", + "one", + ), + ( + "value\n", + Language::Python, + ".py", + "expression_statement", + "value", + "empty", + ), + ( + "value;\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + "nil", + ), + ( + "return value\n", + Language::Lua, + ".lua", + "expression_list", + "value", + "one", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let item = normalizer.wrap("ITEM", Vec::new(), node); + let children = match mode { + "nil" => None, + "empty" => Some(Vec::new()), + "one" => Some(vec![item]), + _ => panic!("unknown list mode: {mode}"), + }; + let rust = normalizer + .list(children, node) + .as_ref() + .map(node_value) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_list_value(source, language, suffix, kind, text, mode), + "list mismatch for {language:?} {kind} {text:?} mode {mode}" + ); + } + } + + #[test] + fn unwrap_node_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def check\n (value)\n value\nend\n", + Language::Ruby, + ".rb", + "parenthesized_statements", + "(value)", + ), + ( + "value\n(value)\n", + Language::Python, + ".py", + "expression_statement", + "value", + ), + ( + "value\n(value)\n", + Language::Python, + ".py", + "expression_statement", + "(value)", + ), + ( + "const value = (other);\n", + Language::TypeScript, + ".ts", + "parenthesized_expression", + "(other)", + ), + ( + "local first = (other)\nlocal second = left + right\n", + Language::Lua, + ".lua", + "expression_list", + "(other)", + ), + ( + "local first = (other)\nlocal second = left + right\n", + Language::Lua, + ".lua", + "expression_list", + "left + right", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.unwrap_node(node), + ruby_private_predicate(source, language, suffix, "unwrap_node?", kind, text), + "unwrap_node? mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn statement_node_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def check\n return value\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "return value", + ), + ( + "def check\n return value\nend\n", + Language::Ruby, + ".rb", + "identifier", + "check", + ), + ( + "value\n(value)\n", + Language::Python, + ".py", + "expression_statement", + "(value)", + ), + ( + "value\n(value)\n", + Language::Python, + ".py", + "identifier", + "value", + ), + ( + "function check() { return value + other; }\n", + Language::TypeScript, + ".ts", + "return_statement", + "return value + other;", + ), + ( + "function check() { return value + other; }\n", + Language::TypeScript, + ".ts", + "binary_expression", + "value + other", + ), + ( + "function check() { return value + other; }\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + ), + ( + "return value\n", + Language::Lua, + ".lua", + "return_statement", + "return value", + ), + ( + "return value\n", + Language::Lua, + ".lua", + "expression_list", + "value", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.statement_node(node.kind()), + ruby_private_predicate(source, language, suffix, "statement_node?", kind, text), + "statement_node? mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn local_identifier_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def check\nend\nclass Thing; end\n", + Language::Ruby, + ".rb", + "identifier", + "check", + ), + ( + "def check\nend\nclass Thing; end\n", + Language::Ruby, + ".rb", + "constant", + "Thing", + ), + ( + "def check(value):\n pass\n", + Language::Python, + ".py", + "identifier", + "value", + ), + ( + "def check(value):\n pass\n", + Language::Python, + ".py", + "parameters", + "(value)", + ), + ( + "const value = object.field;\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + ), + ( + "const value = object.field;\n", + Language::TypeScript, + ".ts", + "property_identifier", + "field", + ), + ( + "const value = object.field;\n", + Language::TypeScript, + ".ts", + "lexical_declaration", + "const value = object.field;", + ), + ( + "local value = other\nprint(value)\n", + Language::Lua, + ".lua", + "identifier", + "value", + ), + ( + "local value = other\n", + Language::Lua, + ".lua", + "expression_list", + "other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.identifier_kind(node.kind()), + ruby_private_predicate(source, language, suffix, "local_identifier?", kind, text), + "local_identifier? mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn ruby_local_name_matches_scope_stack_lookup() { + let mut normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); + normalizer.local_stack = vec![ + BTreeSet::from(["outer".to_string(), "shared".to_string()]), + BTreeSet::from(["inner".to_string()]), + ]; + + assert!(normalizer.ruby_local_name("outer")); + assert!(normalizer.ruby_local_name("inner")); + assert!(normalizer.ruby_local_name("shared")); + assert!(!normalizer.ruby_local_name("missing")); + } + + #[test] + fn ruby_vcall_identifier_matches_ruby_private_predicate() { + let cases = vec![ + ( + "ruby_vcall", + "foo\n", + Language::Ruby, + ".rb", + "identifier", + "foo", + Vec::<&str>::new(), + ), + ( + "ruby_local", + "foo\n", + Language::Ruby, + ".rb", + "identifier", + "foo", + vec!["foo"], + ), + ( + "assignment_lhs", + "foo = 1\n", + Language::Ruby, + ".rb", + "identifier", + "foo", + Vec::<&str>::new(), + ), + ( + "method_name", + "def foo\nend\n", + Language::Ruby, + ".rb", + "identifier", + "foo", + Vec::<&str>::new(), + ), + ( + "parameter", + "def f(foo)\nend\n", + Language::Ruby, + ".rb", + "identifier", + "foo", + Vec::<&str>::new(), + ), + ( + "non_identifier", + "Thing\n", + Language::Ruby, + ".rb", + "constant", + "Thing", + Vec::<&str>::new(), + ), + ( + "non_ruby", + "foo\n", + Language::Python, + ".py", + "expression_statement", + "foo", + Vec::<&str>::new(), + ), + ]; + + for (label, source, language, suffix, kind, text, locals) in cases { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + if !locals.is_empty() { + normalizer + .local_stack + .push(locals.iter().map(|name| name.to_string()).collect()); + } + + assert_eq!( + normalizer.ruby_vcall_identifier(node, super::node_text(node, source)), + ruby_private_ruby_vcall_identifier_predicate( + source, language, suffix, kind, text, &locals, + ), + "ruby_vcall_identifier? mismatch for {label}" + ); + } + } + + #[test] + fn vcall_identifier_matches_ruby_private_predicate() { + let cases = vec![ + ( + "ruby_modifier_action", + "foo if cond\n", + Language::Ruby, + ".rb", + "identifier", + "foo", + Vec::<&str>::new(), + ), + ( + "ruby_local", + "foo if cond\n", + Language::Ruby, + ".rb", + "identifier", + "foo", + vec!["foo"], + ), + ( + "method_name", + "def foo\nend\n", + Language::Ruby, + ".rb", + "identifier", + "foo", + Vec::<&str>::new(), + ), + ( + "argument", + "call(foo)\n", + Language::Ruby, + ".rb", + "identifier", + "foo", + Vec::<&str>::new(), + ), + ( + "member_read", + "def f\n user.name\nend\n", + Language::Ruby, + ".rb", + "identifier", + "name", + Vec::<&str>::new(), + ), + ( + "assignment_lhs", + "foo = bar\n", + Language::Ruby, + ".rb", + "identifier", + "foo", + Vec::<&str>::new(), + ), + ( + "python_identifier", + "foo\n", + Language::Python, + ".py", + "expression_statement", + "foo", + Vec::<&str>::new(), + ), + ( + "typescript_identifier", + "foo;\n", + Language::TypeScript, + ".ts", + "identifier", + "foo", + Vec::<&str>::new(), + ), + ( + "lua_identifier", + "foo()\n", + Language::Lua, + ".lua", + "identifier", + "foo", + Vec::<&str>::new(), + ), + ]; + + for (label, source, language, suffix, kind, text, locals) in cases { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + if !locals.is_empty() { + normalizer + .local_stack + .push(locals.iter().map(|name| name.to_string()).collect()); + } + + assert_eq!( + normalizer.vcall_identifier(node, super::node_text(node, source)), + ruby_private_vcall_identifier_predicate( + source, language, suffix, kind, text, &locals, + ), + "vcall_identifier? mismatch for {label}" + ); + } + + let source = "def f\n Thing\nend\n"; + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, "constant", "Thing"); + let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + assert!( + !normalizer.vcall_identifier(node, super::node_text(node, source)), + "vcall_identifier? must reject non-local identifiers in statement wrappers" + ); + + let source = "foo\n"; + let tree = raw_tree(source, Language::Python); + let node = first_raw_node(tree.root_node(), source, "identifier", "foo"); + let normalizer = super::TreeSitterNormalizer::new(source, Language::Python); + assert!( + !normalizer.vcall_identifier(node, super::node_text(node, source)), + "vcall_identifier? must reject Python bare identifiers" + ); + } + + #[test] + fn collect_ruby_parameter_locals_matches_ruby_private_method() { + for (source, kind, text) in [ + ( + "def f(a, b = 1, *rest, key:, **opts, &block)\nend\n", + "method_parameters", + "(a, b = 1, *rest, key:, **opts, &block)", + ), + ( + "[1].each { |item, (left, right)| item }\n", + "block_parameters", + "|item, (left, right)|", + ), + ("fn = ->(x, y:) { x }\n", "lambda_parameters", "(x, y:)"), + ("value = other\n", "assignment", "value = other"), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let mut locals = BTreeSet::new(); + normalizer.collect_ruby_parameter_locals(node, &mut locals); + + assert_eq!( + locals, + ruby_private_collected_names( + source, + Language::Ruby, + ".rb", + "collect_ruby_parameter_locals", + kind, + text + ), + "collect_ruby_parameter_locals mismatch for {kind} {text:?}" + ); + } + } + + #[test] + fn collect_ruby_assignment_locals_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "value = other\n", + Language::Ruby, + ".rb", + "assignment", + "value = other", + ), + ( + "left, *rest = values\n", + Language::Ruby, + ".rb", + "assignment", + "left, *rest = values", + ), + ( + "value += 1\n", + Language::Ruby, + ".rb", + "operator_assignment", + "value += 1", + ), + ( + "begin\n work\nrescue => error\n error\nend\n", + Language::Ruby, + ".rb", + "exception_variable", + "=> error", + ), + ( + "value = other\n", + Language::Python, + ".py", + "expression_statement", + "value = other", + ), + ( + "let value = other;\n", + Language::TypeScript, + ".ts", + "variable_declarator", + "value = other", + ), + ( + "local value = other\n", + Language::Lua, + ".lua", + "assignment_statement", + "value = other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let mut locals = BTreeSet::new(); + normalizer.collect_ruby_assignment_locals(node, &mut locals); + + assert_eq!( + locals, + ruby_private_collected_names( + source, + language, + suffix, + "collect_ruby_assignment_locals", + kind, + text + ), + "collect_ruby_assignment_locals mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn collect_ruby_scope_locals_matches_ruby_private_method() { + for (source, language, suffix, kind, text, root) in [ + ( + "def outer(a)\n local = 1\n items.each { |item| nested = item }\n def inner(inner_arg)\n inner_local = 1\n end\nend\n", + Language::Ruby, + ".rb", + "method", + "def outer(a)\n local = 1\n items.each { |item| nested = item }\n def inner(inner_arg)\n inner_local = 1\n end\nend", + true, + ), + ( + "def outer(a)\n local = 1\nend\n", + Language::Ruby, + ".rb", + "method", + "def outer(a)\n local = 1\nend", + false, + ), + ( + "[1].each { |item| local = item }\n", + Language::Ruby, + ".rb", + "block", + "{ |item| local = item }", + true, + ), + ( + "value = other\n", + Language::Python, + ".py", + "expression_statement", + "value = other", + true, + ), + ( + "let value = other;\n", + Language::TypeScript, + ".ts", + "variable_declarator", + "value = other", + true, + ), + ( + "local value = other\n", + Language::Lua, + ".lua", + "assignment_statement", + "value = other", + true, + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let mut locals = BTreeSet::new(); + normalizer.collect_ruby_scope_locals(node, &mut locals, root); + + assert_eq!( + locals, + ruby_private_scope_collected_names(source, language, suffix, kind, text, root), + "collect_ruby_scope_locals mismatch for {language:?} {kind} {text:?} root={root}" + ); + } + } + + #[test] + fn ruby_scope_locals_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def outer(a)\n local = 1\n items.each { |item| nested = item }\n def inner(inner_arg)\n inner_local = 1\n end\nend\n", + Language::Ruby, + ".rb", + "method", + "def outer(a)\n local = 1\n items.each { |item| nested = item }\n def inner(inner_arg)\n inner_local = 1\n end\nend", + ), + ( + "[1].each { |item| local = item }\n", + Language::Ruby, + ".rb", + "block", + "{ |item| local = item }", + ), + ( + "value = other\n", + Language::Python, + ".py", + "expression_statement", + "value = other", + ), + ( + "let value = other;\n", + Language::TypeScript, + ".ts", + "variable_declarator", + "value = other", + ), + ( + "local value = other\n", + Language::Lua, + ".lua", + "assignment_statement", + "value = other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.ruby_scope_locals(node), + ruby_private_ruby_scope_locals(source, language, suffix, kind, text), + "ruby_scope_locals mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn with_ruby_scope_matches_ruby_private_method() { + for (source, language, suffix, kind, text, reset, initial_stack) in [ + ( + "def f(a)\n local = 1\nend\n", + Language::Ruby, + ".rb", + "method", + "def f(a)\n local = 1\nend", + false, + vec![vec!["outer"]], + ), + ( + "def f(a)\n local = 1\nend\n", + Language::Ruby, + ".rb", + "method", + "def f(a)\n local = 1\nend", + true, + vec![vec!["outer"]], + ), + ( + "[1].each { |item| local = item }\n", + Language::Ruby, + ".rb", + "block", + "{ |item| local = item }", + false, + vec![], + ), + ( + "def f(value):\n local = value\n", + Language::Python, + ".py", + "function_definition", + "def f(value):\n local = value", + true, + vec![vec!["outer"]], + ), + ( + "function f(value) { let local = value; }\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function f(value) { let local = value; }", + true, + vec![vec!["outer"]], + ), + ( + "function f(value)\n local local_value = value\nend\n", + Language::Lua, + ".lua", + "function_declaration", + "function f(value)\n local local_value = value\nend", + true, + vec![vec!["outer"]], + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + normalizer.local_stack = local_stack_from(&initial_stack); + let before = local_stack_value(&normalizer.local_stack); + let inside = normalizer.with_ruby_scope(node, reset, |normalizer| { + local_stack_value(&normalizer.local_stack) + }); + let after = local_stack_value(&normalizer.local_stack); + let rust = json!({ + "before": before, + "inside": inside, + "after": after, + "result": "block-result", + }); + + assert_eq!( + rust, + ruby_private_with_ruby_scope_trace( + source, + language, + suffix, + kind, + text, + reset, + &initial_stack, + ), + "with_ruby_scope mismatch for {language:?} {kind} {text:?} reset={reset}" + ); + } + } + + #[test] + fn ruby_scope_boundary_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n value\nend\n", + Language::Ruby, + ".rb", + "method", + "def f\n value\nend", + ), + ( + "class Box\nend\n", + Language::Ruby, + ".rb", + "class", + "class Box\nend", + ), + ( + "module Admin\nend\n", + Language::Ruby, + ".rb", + "module", + "module Admin\nend", + ), + ( + "items.each { |item| item }\n", + Language::Ruby, + ".rb", + "block", + "{ |item| item }", + ), + ( + "handler = -> { value }\n", + Language::Ruby, + ".rb", + "block", + "{ value }", + ), + ( + "def f():\n return value\n break\n continue\n", + Language::Python, + ".py", + "function_definition", + "def f():\n return value\n break\n continue", + ), + ( + "def f():\n return value\n", + Language::Python, + ".py", + "block", + "return value", + ), + ( + "class Box:\n pass\n", + Language::Python, + ".py", + "class_definition", + "class Box:\n pass", + ), + ( + "function f() { return value; }\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function f() { return value; }", + ), + ( + "class Box {}\n", + Language::TypeScript, + ".ts", + "class_declaration", + "class Box {}", + ), + ( + "function f()\n return value\nend\n", + Language::Lua, + ".lua", + "function_declaration", + "function f()\n return value\nend", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.ruby_scope_boundary(node), + ruby_private_predicate( + source, + language, + suffix, + "ruby_scope_boundary?", + kind, + text + ), + "ruby_scope_boundary? mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn ruby_scope_child_boundary_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n value\nend\n", + Language::Ruby, + ".rb", + "method", + "def f\n value\nend", + ), + ( + "class Box\nend\n", + Language::Ruby, + ".rb", + "class", + "class Box\nend", + ), + ( + "module Admin\nend\n", + Language::Ruby, + ".rb", + "module", + "module Admin\nend", + ), + ( + "items.each { |item| item }\n", + Language::Ruby, + ".rb", + "block", + "{ |item| item }", + ), + ( + "handler = -> { value }\n", + Language::Ruby, + ".rb", + "block", + "{ value }", + ), + ( + "def f():\n return value\n", + Language::Python, + ".py", + "function_definition", + "def f():\n return value", + ), + ( + "def f():\n return value\n", + Language::Python, + ".py", + "block", + "return value", + ), + ( + "class Box:\n pass\n", + Language::Python, + ".py", + "class_definition", + "class Box:\n pass", + ), + ( + "function f() { return value; }\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function f() { return value; }", + ), + ( + "class Box {}\n", + Language::TypeScript, + ".ts", + "class_declaration", + "class Box {}", + ), + ( + "function f()\n return value\nend\n", + Language::Lua, + ".lua", + "function_declaration", + "function f()\n return value\nend", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.ruby_scope_child_boundary(node), + ruby_private_predicate( + source, + language, + suffix, + "ruby_scope_child_boundary?", + kind, + text + ), + "ruby_scope_child_boundary? mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn ruby_predicate_uses_normalization_adapter() { + for (language, expected) in [ + (Language::Ruby, true), + (Language::Python, false), + (Language::Lua, false), + (Language::TypeScript, false), + ] { + let normalizer = super::TreeSitterNormalizer::new("", language); + + assert_eq!( + normalizer.ruby(), + expected, + "ruby? mismatch for {language:?}" + ); + } + } + + #[test] + fn interpolated_string_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "name = \"hi #{user}\"\nplain = \"hi\"\n", + Language::Ruby, + ".rb", + "string", + "\"hi #{user}\"", + ), + ( + "name = \"hi #{user}\"\nplain = \"hi\"\n", + Language::Ruby, + ".rb", + "string", + "\"hi\"", + ), + ( + "name = f\"hi {user}\"\nplain = \"hi\"\n", + Language::Python, + ".py", + "string", + "f\"hi {user}\"", + ), + ( + "name = f\"hi {user}\"\nplain = \"hi\"\n", + Language::Python, + ".py", + "string", + "\"hi\"", + ), + ( + "const name = `hi ${user}`;\nconst plain = `hi`;\n", + Language::TypeScript, + ".ts", + "template_string", + "`hi ${user}`", + ), + ( + "const name = `hi ${user}`;\nconst plain = `hi`;\n", + Language::TypeScript, + ".ts", + "template_string", + "`hi`", + ), + ( + "local name = \"hi\"\n", + Language::Lua, + ".lua", + "expression_list", + "\"hi\"", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.interpolated_string(node), + ruby_private_predicate( + source, + language, + suffix, + "interpolated_string?", + kind, + text + ), + "interpolated_string? mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_interpolated_string_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "name = \"hi #{user}\"\n", + Language::Ruby, + ".rb", + "string", + "\"hi #{user}\"", + ), + ( + "name = f\"hi {user}\"\n", + Language::Python, + ".py", + "string", + "f\"hi {user}\"", + ), + ( + "const name = `hi ${user}`;\n", + Language::TypeScript, + ".ts", + "template_string", + "`hi ${user}`", + ), + ( + "local name = \"hi\"\n", + Language::Lua, + ".lua", + "expression_list", + "\"hi\"", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = node_value(&normalizer.normalize_interpolated_string(node)); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_interpolated_string", + kind, + text + ), + "normalize_interpolated_string mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_subshell_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "value = `echo hi`\n", + Language::Ruby, + ".rb", + "subshell", + "`echo hi`", + ), + ( + "value = `echo #{name}`\n", + Language::Ruby, + ".rb", + "subshell", + "`echo #{name}`", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = node_value(&normalizer.normalize_subshell(node)); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_subshell", + kind, + text + ), + "normalize_subshell mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn const_node_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "class Thing; end\ndef check; end\n", + Language::Ruby, + ".rb", + "constant", + "Thing", + ), + ( + "class Thing; end\ndef check; end\n", + Language::Ruby, + ".rb", + "identifier", + "check", + ), + ( + "class Thing:\n pass\n", + Language::Python, + ".py", + "identifier", + "Thing", + ), + ( + "type Thing = Other;\nconst value = Thing;\n", + Language::TypeScript, + ".ts", + "type_identifier", + "Thing", + ), + ( + "type Thing = Other;\nconst value = Thing;\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + ), + ( + "local Thing = {}\n", + Language::Lua, + ".lua", + "variable_list", + "Thing", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.const_kind(node.kind()), + ruby_private_predicate(source, language, suffix, "const_node?", kind, text), + "const_node? mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn self_node_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ("self\nother\n", Language::Ruby, ".rb", "self", "self"), + ( + "self\nother\n", + Language::Ruby, + ".rb", + "identifier", + "other", + ), + ( + "self.value\nother.value\n", + Language::Python, + ".py", + "identifier", + "self", + ), + ( + "self.value\nother.value\n", + Language::Python, + ".py", + "identifier", + "other", + ), + ( + "this.value;\nother;\n", + Language::TypeScript, + ".ts", + "this", + "this", + ), + ( + "this.value;\nother;\n", + Language::TypeScript, + ".ts", + "identifier", + "other", + ), + ( + "print(self.value)\nprint(other.value)\n", + Language::Lua, + ".lua", + "identifier", + "self", + ), + ( + "print(self.value)\nprint(other.value)\n", + Language::Lua, + ".lua", + "identifier", + "other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.self_node(node), + ruby_private_predicate(source, language, suffix, "self_node?", kind, text), + "self_node? mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn instance_variable_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "@value\nname\n", + Language::Ruby, + ".rb", + "instance_variable", + "@value", + ), + ( + "@value\nname\n", + Language::Ruby, + ".rb", + "identifier", + "name", + ), + ( + "@decorator\ndef call():\n pass\n", + Language::Python, + ".py", + "decorator", + "@decorator", + ), + ( + "@sealed\nclass Thing {}\n", + Language::TypeScript, + ".ts", + "decorator", + "@sealed", + ), + ( + "print(value)\n", + Language::Lua, + ".lua", + "identifier", + "value", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.instance_variable(node), + ruby_private_predicate(source, language, suffix, "instance_variable?", kind, text), + "instance_variable? mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn global_variable_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "$value\nname\n", + Language::Ruby, + ".rb", + "global_variable", + "$value", + ), + ( + "$value\nname\n", + Language::Ruby, + ".rb", + "identifier", + "name", + ), + ( + "value = \"$name\"\n", + Language::Python, + ".py", + "string_content", + "$name", + ), + ( + "const $value = other;\n", + Language::TypeScript, + ".ts", + "identifier", + "$value", + ), + ( + "print(\"$name\")\n", + Language::Lua, + ".lua", + "string_content", + "$name", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.global_variable(node), + ruby_private_predicate(source, language, suffix, "global_variable?", kind, text), + "global_variable? mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_global_variable_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "$value\n$1\n$12\n$0\n", + Language::Ruby, + ".rb", + "global_variable", + "$value", + ), + ( + "$value\n$1\n$12\n$0\n", + Language::Ruby, + ".rb", + "global_variable", + "$1", + ), + ( + "$value\n$1\n$12\n$0\n", + Language::Ruby, + ".rb", + "global_variable", + "$12", + ), + ( + "$value\n$1\n$12\n$0\n", + Language::Ruby, + ".rb", + "global_variable", + "$0", + ), + ( + "value = \"$name\"\n", + Language::Python, + ".py", + "string_content", + "$name", + ), + ( + "const $value = 1;\n", + Language::TypeScript, + ".ts", + "identifier", + "$value", + ), + ( + "print(\"$name\")\n", + Language::Lua, + ".lua", + "string_content", + "$name", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.normalize_global_variable(node); + + assert_eq!( + node_value(&rust), + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_global_variable", + kind, + text + ), + "normalize_global_variable mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn assignment_operator_matches_ruby_private_predicate() { + for (language, text) in [ + (Language::Ruby, "="), + (Language::Ruby, "**="), + (Language::Ruby, "??="), + (Language::Python, ":="), + (Language::Python, "//="), + (Language::Python, "&&="), + (Language::TypeScript, "??="), + (Language::TypeScript, ">>>="), + (Language::TypeScript, ":="), + (Language::Lua, "="), + (Language::Lua, "+="), + ] { + let normalizer = super::TreeSitterNormalizer::new("", language); + + assert_eq!( + normalizer.assignment_operator(text), + ruby_private_text_predicate(language, "assignment_operator?", text), + "assignment_operator? mismatch for {language:?} {text:?}" + ); + } + } + + #[test] + fn operator_assignment_operator_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "value **= other\nflag ||= fallback\n", + Language::Ruby, + ".rb", + "operator_assignment", + "value **= other", + ), + ( + "value **= other\nflag ||= fallback\n", + Language::Ruby, + ".rb", + "operator_assignment", + "flag ||= fallback", + ), + ( + "value //= other\n", + Language::Python, + ".py", + "expression_statement", + "value //= other", + ), + ( + "value ??= other;\ncount >>>= 1;\n", + Language::TypeScript, + ".ts", + "augmented_assignment_expression", + "value ??= other", + ), + ( + "value ??= other;\ncount >>>= 1;\n", + Language::TypeScript, + ".ts", + "augmented_assignment_expression", + "count >>>= 1", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.operator_assignment_operator(node), + ruby_private_string( + source, + language, + suffix, + "operator_assignment_operator", + kind, + text + ), + "operator_assignment_operator mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_logical_operator_assignment_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "value ||= fallback\n", + Language::Ruby, + ".rb", + "operator_assignment", + "value ||= fallback", + ), + ( + "value &&= fallback\n", + Language::Ruby, + ".rb", + "operator_assignment", + "value &&= fallback", + ), + ( + "value += fallback\n", + Language::Ruby, + ".rb", + "operator_assignment", + "value += fallback", + ), + ( + "@value ||= fallback\n", + Language::Ruby, + ".rb", + "operator_assignment", + "@value ||= fallback", + ), + ( + "value //= fallback\n", + Language::Python, + ".py", + "expression_statement", + "value //= fallback", + ), + ( + "value ||= fallback;\n", + Language::TypeScript, + ".ts", + "augmented_assignment_expression", + "value ||= fallback", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let left = normalizer + .assignment_left(node) + .expect("operator assignment should have left side"); + let right = normalizer + .assignment_right(node) + .and_then(|right| normalizer.normalize_node(right)); + let operator = normalizer.operator_assignment_operator(node); + let rust = normalizer + .normalize_logical_operator_assignment(left, &operator, right, node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_logical_operator_assignment_value( + source, language, suffix, kind, text + ), + "normalize_logical_operator_assignment mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_operator_assignment_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "value += other\n", + Language::Ruby, + ".rb", + "operator_assignment", + "value += other", + ), + ( + "$value += 1\n", + Language::Ruby, + ".rb", + "operator_assignment", + "$value += 1", + ), + ( + "items[index] += value\n", + Language::Ruby, + ".rb", + "operator_assignment", + "items[index] += value", + ), + ( + "object.value += 1\n", + Language::Ruby, + ".rb", + "operator_assignment", + "object.value += 1", + ), + ( + "flag ||= fallback\n", + Language::Ruby, + ".rb", + "operator_assignment", + "flag ||= fallback", + ), + ( + "flag &&= fallback\n", + Language::Ruby, + ".rb", + "operator_assignment", + "flag &&= fallback", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_operator_assignment(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_operator_assignment", + kind, + text + ), + "normalize_operator_assignment mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn first_named_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "class Thing; end\nname\n", + Language::Ruby, + ".rb", + "class", + "class Thing; end", + ), + ( + "class Thing; end\nname\n", + Language::Ruby, + ".rb", + "identifier", + "name", + ), + ( + "def check(value):\n return value\n", + Language::Python, + ".py", + "function_definition", + "def check(value):\n return value", + ), + ( + "function check(value) { return value; }\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function check(value) { return value; }", + ), + ( + "print(value)\n", + Language::Lua, + ".lua", + "function_call", + "print(value)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let found = normalizer.first_named(node).map(|node| { + ( + node.kind().to_string(), + super::node_text(node, source).to_string(), + ) + }); + + assert_eq!( + found, + ruby_private_node_signature(source, language, suffix, "first_named", kind, text), + "first_named mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn block_child_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def check\n call\nend\n", + Language::Ruby, + ".rb", + "method", + "def check\n call\nend", + ), + ( + "items.each do\n call\nend\n", + Language::Ruby, + ".rb", + "call", + "items.each do\n call\nend", + ), + ( + "def check():\n call()\n", + Language::Python, + ".py", + "function_definition", + "def check():\n call()", + ), + ( + "function check() { call(); }\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function check() { call(); }", + ), + ( + "function check()\n call()\nend\n", + Language::Lua, + ".lua", + "function_declaration", + "function check()\n call()\nend", + ), + ("name\n", Language::Ruby, ".rb", "identifier", "name"), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let found = normalizer.block_child(node).map(|node| { + ( + node.kind().to_string(), + super::node_text(node, source).to_string(), + ) + }); + + assert_eq!( + found, + ruby_private_node_signature(source, language, suffix, "block_child", kind, text), + "block_child mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn branch_child_matches_ruby_private_method() { + for (source, language, suffix, kind, text, condition_kind, condition_text, index) in [ + ( + "if ready\n call\nelse\n stop\nend\n", + Language::Ruby, + ".rb", + "if", + "if ready\n call\nelse\n stop\nend", + "identifier", + "ready", + 0, + ), + ( + "if ready\n call\nelse\n stop\nend\n", + Language::Ruby, + ".rb", + "if", + "if ready\n call\nelse\n stop\nend", + "identifier", + "ready", + 1, + ), + ( + "if ready\n # note\n call\nend\n", + Language::Ruby, + ".rb", + "if", + "if ready\n # note\n call\nend", + "identifier", + "ready", + 0, + ), + ( + "if ready:\n call()\nelse:\n stop()\n", + Language::Python, + ".py", + "if_statement", + "if ready:\n call()\nelse:\n stop()", + "identifier", + "ready", + 1, + ), + ( + "if (ready) { call(); } else { stop(); }\n", + Language::TypeScript, + ".ts", + "if_statement", + "if (ready) { call(); } else { stop(); }", + "parenthesized_expression", + "(ready)", + 0, + ), + ( + "if ready then\n call()\nelse\n stop()\nend\n", + Language::Lua, + ".lua", + "if_statement", + "if ready then\n call()\nelse\n stop()\nend", + "identifier", + "ready", + 1, + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let condition = + first_raw_node(tree.root_node(), source, condition_kind, condition_text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let found = normalizer.branch_child(node, condition, index).map(|node| { + ( + node.kind().to_string(), + super::node_text(node, source).to_string(), + ) + }); + + assert_eq!( + found, + ruby_private_branch_child_signature( + source, + language, + suffix, + kind, + text, + condition_kind, + condition_text, + index + ), + "branch_child mismatch for {language:?} {kind} {text:?} index {index}" + ); + } + } + + #[test] + fn explicit_alternative_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "if ready\n call\nelsif other\n stop\nend\n", + Language::Ruby, + ".rb", + "if", + "if ready\n call\nelsif other\n stop\nend", + ), + ( + "if ready\n call\nend\n", + Language::Ruby, + ".rb", + "if", + "if ready\n call\nend", + ), + ( + "if ready:\n call()\nelif other:\n stop()\n", + Language::Python, + ".py", + "if_statement", + "if ready:\n call()\nelif other:\n stop()", + ), + ( + "if (ready) { call(); } else { stop(); }\n", + Language::TypeScript, + ".ts", + "if_statement", + "if (ready) { call(); } else { stop(); }", + ), + ( + "if ready then\n call()\nelseif other then\n stop()\nend\n", + Language::Lua, + ".lua", + "if_statement", + "if ready then\n call()\nelseif other then\n stop()\nend", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let found = normalizer.explicit_alternative(node).map(|node| { + ( + node.kind().to_string(), + super::node_text(node, source).to_string(), + ) + }); + + assert_eq!( + found, + ruby_private_node_signature( + source, + language, + suffix, + "explicit_alternative", + kind, + text + ), + "explicit_alternative mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn wrap_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "first\nsecond\n", + Language::Ruby, + ".rb", + "identifier", + "second", + ), + ( + "first\nsecond\n", + Language::Python, + ".py", + "expression_statement", + "second", + ), + ( + "first;\nsecond;\n", + Language::TypeScript, + ".ts", + "identifier", + "second", + ), + ( + "print(first)\nprint(second)\n", + Language::Lua, + ".lua", + "identifier", + "second", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + let raw_wrapped = + normalizer.wrap("OUTER", vec![Child::Symbol("child".to_string())], node); + assert_eq!( + node_value(&raw_wrapped), + ruby_private_wrap_value(source, language, suffix, kind, text, false), + "wrap raw-source mismatch for {language:?} {kind} {text:?}" + ); + + let inner = normalizer.wrap("INNER", Vec::new(), node); + let node_wrapped = normalizer.wrap_from_source_node( + "OUTER", + vec![Child::Symbol("child".to_string())], + &inner, + ); + assert_eq!( + node_value(&node_wrapped), + ruby_private_wrap_value(source, language, suffix, kind, text, true), + "wrap normalized-source mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn source_before_child_matches_ruby_private_method() { + for (source, language, suffix, kind, text, child_kind, child_text) in [ + ( + "if ready\n call\nend\n", + Language::Ruby, + ".rb", + "if", + "if ready\n call\nend", + "then", + "\n call", + ), + ( + "if ready:\n call()\n", + Language::Python, + ".py", + "if_statement", + "if ready:\n call()", + "block", + "call()", + ), + ( + "if (ready) { call(); }\n", + Language::TypeScript, + ".ts", + "if_statement", + "if (ready) { call(); }", + "statement_block", + "{ call(); }", + ), + ( + "if ready then\n call()\nend\n", + Language::Lua, + ".lua", + "if_statement", + "if ready then\n call()\nend", + "block", + "call()", + ), + ( + "puts value\n", + Language::Ruby, + ".rb", + "call", + "puts value", + "identifier", + "puts", + ), + ( + "call()\n", + Language::Python, + ".py", + "expression_statement", + "call()", + "identifier", + "call", + ), + ( + "call();\n", + Language::TypeScript, + ".ts", + "expression_statement", + "call();", + "identifier", + "call", + ), + ( + "call()\n", + Language::Lua, + ".lua", + "function_call", + "call()", + "identifier", + "call", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let child = first_raw_node(tree.root_node(), source, child_kind, child_text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let source_node = normalizer.source_before_child(node, child); + let wrapped = normalizer.wrap_from_source_node("OUTER", Vec::new(), &source_node); + + assert_eq!( + node_value(&wrapped), + ruby_private_source_before_child_wrap_value( + source, language, suffix, kind, text, child_kind, child_text + ), + "source_before_child mismatch for {language:?} {kind} {text:?} before {child_kind} {child_text:?}" + ); + } + } + + #[test] + fn source_from_nodes_matches_ruby_private_method() { + for (source, language, suffix, first_kind, first_text, last_kind, last_text) in [ + ( + "left + right\n", + Language::Ruby, + ".rb", + "identifier", + "left", + "identifier", + "right", + ), + ( + "left = one\nright = two\n", + Language::Python, + ".py", + "identifier", + "one", + "identifier", + "two", + ), + ( + "const left = one;\nconst right = two;\n", + Language::TypeScript, + ".ts", + "identifier", + "one", + "identifier", + "two", + ), + ( + "local left = one\nlocal right = two\n", + Language::Lua, + ".lua", + "expression_list", + "one", + "expression_list", + "two", + ), + ] { + let tree = raw_tree(source, language); + let first_raw = first_raw_node(tree.root_node(), source, first_kind, first_text); + let last_raw = first_raw_node(tree.root_node(), source, last_kind, last_text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let source_node = normalizer.source_from_nodes(first_raw, last_raw); + + assert_eq!( + node_value(&source_node), + ruby_private_source_from_nodes_value( + source, language, suffix, first_kind, first_text, last_kind, last_text + ), + "source_from_nodes mismatch for {language:?} {first_kind} {first_text:?} through {last_kind} {last_text:?}" + ); + } + } + + #[test] + fn source_from_normalized_nodes_matches_ruby_private_method() { + for (source, language, suffix, first_kind, first_text, last_kind, last_text) in [ + ( + "first\nsecond\n", + Language::Ruby, + ".rb", + "identifier", + "first", + "identifier", + "second", + ), + ( + "first\nsecond\n", + Language::Python, + ".py", + "expression_statement", + "first", + "expression_statement", + "second", + ), + ( + "first;\nsecond;\n", + Language::TypeScript, + ".ts", + "expression_statement", + "first;", + "expression_statement", + "second;", + ), + ( + "print(first)\nprint(second)\n", + Language::Lua, + ".lua", + "function_call", + "print(first)", + "function_call", + "print(second)", + ), + ( + "first + second\n", + Language::Ruby, + ".rb", + "identifier", + "first", + "identifier", + "second", + ), + ] { + let tree = raw_tree(source, language); + let first_raw = first_raw_node(tree.root_node(), source, first_kind, first_text); + let last_raw = first_raw_node(tree.root_node(), source, last_kind, last_text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let first_node = normalizer.wrap("FIRST", Vec::new(), first_raw); + let last_node = normalizer.wrap("LAST", Vec::new(), last_raw); + let source_node = normalizer.source_from_normalized_nodes(&first_node, &last_node); + + assert_eq!( + node_value(&source_node), + ruby_private_source_from_normalized_nodes_value( + source, language, suffix, first_kind, first_text, last_kind, last_text + ), + "source_from_normalized_nodes mismatch for {language:?} {first_kind} {first_text:?} through {last_kind} {last_text:?}" + ); + } + } + + #[test] + fn named_field_matches_ruby_private_method() { + for (source, language, suffix, kind, text, field) in [ + ( + "def check(value)\n value\nend\n", + Language::Ruby, + ".rb", + "method", + "def check(value)\n value\nend", + "name", + ), + ( + "def check(value)\n value\nend\n", + Language::Ruby, + ".rb", + "method", + "def check(value)\n value\nend", + "missing", + ), + ( + "if ready:\n call()\n", + Language::Python, + ".py", + "if_statement", + "if ready:\n call()", + "body", + ), + ( + "if ready:\n call()\n", + Language::Python, + ".py", + "if_statement", + "if ready:\n call()", + "condition", + ), + ( + "function check(value) { return value; }\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function check(value) { return value; }", + "body", + ), + ( + "function check(value)\n return value\nend\n", + Language::Lua, + ".lua", + "function_declaration", + "function check(value)\n return value\nend", + "body", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let found = normalizer.named_field(node, field).map(|node| { + ( + node.kind().to_string(), + super::node_text(node, source).to_string(), + ) + }); + + assert_eq!( + found, + ruby_private_named_field_signature(source, language, suffix, kind, text, field), + "named_field mismatch for {language:?} {kind} {text:?} field {field}" + ); + } + } + + #[test] + fn parent_node_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def check\nend\n", + Language::Ruby, + ".rb", + "identifier", + "check", + ), + ("value\n", Language::Ruby, ".rb", "program", "value\n"), + ( + "if ready:\n call()\n", + Language::Python, + ".py", + "identifier", + "ready", + ), + ( + "call(value);\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + ), + ( + "call(value)\n", + Language::Lua, + ".lua", + "identifier", + "value", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let found = normalizer.parent_node(node).map(|node| { + ( + node.kind().to_string(), + super::node_text(node, source).to_string(), + ) + }); + + assert_eq!( + found, + ruby_private_node_signature(source, language, suffix, "parent_node", kind, text), + "parent_node mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn next_sibling_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ("a + b\n", Language::Ruby, ".rb", "identifier", "a"), + ("a + b\n", Language::Python, ".py", "identifier", "a"), + ("a + b;\n", Language::TypeScript, ".ts", "identifier", "a"), + ("print(a, b)\n", Language::Lua, ".lua", "identifier", "a"), + ("a\n", Language::Ruby, ".rb", "identifier", "a"), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let found = normalizer.next_sibling(node).map(|node| { + ( + node.kind().to_string(), + super::node_text(node, source).to_string(), + ) + }); + + assert_eq!( + found, + ruby_private_node_signature(source, language, suffix, "next_sibling", kind, text), + "next_sibling mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn prev_sibling_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ("a + b\n", Language::Ruby, ".rb", "identifier", "b"), + ("a + b\n", Language::Python, ".py", "identifier", "b"), + ("a + b;\n", Language::TypeScript, ".ts", "identifier", "b"), + ("print(a, b)\n", Language::Lua, ".lua", "identifier", "b"), + ("a\n", Language::Ruby, ".rb", "identifier", "a"), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let found = normalizer.prev_sibling(node).map(|node| { + ( + node.kind().to_string(), + super::node_text(node, source).to_string(), + ) + }); + + assert_eq!( + found, + ruby_private_node_signature(source, language, suffix, "prev_sibling", kind, text), + "prev_sibling mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn next_named_sibling_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ("a + b\n", Language::Ruby, ".rb", "identifier", "a"), + ("a + b\n", Language::Python, ".py", "identifier", "a"), + ("a + b;\n", Language::TypeScript, ".ts", "identifier", "a"), + ("print(a, b)\n", Language::Lua, ".lua", "identifier", "a"), + ("a\n", Language::Ruby, ".rb", "identifier", "a"), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let found = normalizer.next_named_sibling(node).map(|node| { + ( + node.kind().to_string(), + super::node_text(node, source).to_string(), + ) + }); + + assert_eq!( + found, + ruby_private_node_signature( + source, + language, + suffix, + "next_named_sibling", + kind, + text + ), + "next_named_sibling mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn ternary_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f(cond, a, b)\n cond ? a : b\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "cond ? a : b", + ), + ( + "value = a if cond else b\n", + Language::Python, + ".py", + "conditional_expression", + "a if cond else b", + ), + ( + "const value = cond ? a : b;\n", + Language::TypeScript, + ".ts", + "ternary_expression", + "cond ? a : b", + ), + ( + "local value = cond and a or b\n", + Language::Lua, + ".lua", + "expression_list", + "cond and a or b", + ), + ( + "def f(cond)\n cond\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "cond", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.ternary_statement(node), + ruby_private_predicate(source, language, suffix, "ternary_statement?", kind, text), + "ternary_statement? mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_ternary_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f(cond, a, b)\n cond ? a : b\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "cond ? a : b", + ), + ( + "value = a if cond else b\n", + Language::Python, + ".py", + "conditional_expression", + "a if cond else b", + ), + ( + "const value = cond ? a : b;\n", + Language::TypeScript, + ".ts", + "ternary_expression", + "cond ? a : b", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_ternary_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_ternary_statement", + kind, + text + ), + "normalize_ternary_statement mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn ternary_statement_normalization_matches_ruby() { + for (source, language, suffix, if_text) in [ + ( + "def f(cond, a, b)\n cond ? a : b\nend\n", + Language::Ruby, + ".rb", + "cond ? a : b", + ), + ( + "def f(cond, a, b):\n return a if cond else b\n", + Language::Python, + ".py", + "a if cond else b", + ), + ( + "function f(cond: boolean, a: number, b: number) { return cond ? a : b; }\n", + Language::TypeScript, + ".ts", + "cond ? a : b", + ), + ] { + let root = parse_language_source(source, language, suffix); + let if_node = first_node(&root, "IF", if_text); + assert_eq!(child_node(if_node, 0).text, "cond"); + assert_eq!(child_node(if_node, 1).text, "a"); + assert_eq!(child_node(if_node, 2).text, "b"); + assert_ruby_parity(source, language, suffix); + } + } + + #[test] + fn case_argument_list_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f(x)\n return case x\n when 1 then :one\n else :other\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "case x\n when 1 then :one\n else :other\n end", + ), + ( + "case x\nwhen 1 then :one\nelse :other\nend\n", + Language::Ruby, + ".rb", + "case", + "case x\nwhen 1 then :one\nelse :other\nend", + ), + ( + "match value:\n case 1:\n one()\n", + Language::Python, + ".py", + "case_clause", + "case 1:\n one()", + ), + ( + "switch (value) { case 1: one(); break; }\n", + Language::TypeScript, + ".ts", + "switch_case", + "case 1: one(); break;", + ), + ( + "if value == 1 then one() end\n", + Language::Lua, + ".lua", + "if_statement", + "if value == 1 then one() end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.case_argument_list(node), + ruby_private_predicate(source, language, suffix, "case_argument_list?", kind, text), + "case_argument_list? mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn leading_function_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def outer\n def inner\n x\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "def inner\n x\n end", + ), + ( + "def outer():\n def inner():\n x\n", + Language::Python, + ".py", + "block", + "def inner():\n x", + ), + ( + "function outer()\n function inner()\n x()\n end\nend\n", + Language::Lua, + ".lua", + "block", + "function inner()\n x()\n end", + ), + ( + "function outer() { function inner() { x; } }\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function inner() { x; }", + ), + ( + "def outer\n x\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "x", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.leading_function_statement(node), + ruby_private_predicate( + source, + language, + suffix, + "leading_function_statement?", + kind, + text + ), + "leading_function_statement? mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_leading_function_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def outer\n def inner\n x\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "def inner\n x\n end", + ), + ( + "def outer():\n def inner():\n x\n", + Language::Python, + ".py", + "block", + "def inner():\n x", + ), + ( + "function outer()\n function inner()\n x()\n end\nend\n", + Language::Lua, + ".lua", + "block", + "function inner()\n x()\n end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_leading_function_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_leading_function_statement", + kind, + text + ), + "normalize_leading_function_statement mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn leading_function_statement_normalization_matches_ruby() { + for (source, language, suffix) in [ + ( + "def outer\n def inner\n x\n end\nend\n", + Language::Ruby, + ".rb", + ), + ( + "def outer():\n def inner():\n x\n", + Language::Python, + ".py", + ), + ( + "function outer()\n function inner()\n x()\n end\nend\n", + Language::Lua, + ".lua", + ), + ] { + let root = parse_language_source(source, language, suffix); + let mut defns = Vec::new(); + nodes_of_type(&root, "DEFN", &mut defns); + assert!( + defns + .iter() + .any(|node| matches!(node.children.first(), Some(Child::Symbol(name)) if name == "inner")), + "expected nested DEFN inner for {language:?} in {root:#?}" + ); + let mut iters = Vec::new(); + nodes_of_type(&root, "ITER", &mut iters); + assert!( + iters.iter().all(|node| !node.text.contains("inner")), + "nested function must not normalize as ITER for {language:?}: {iters:#?}" + ); + assert_ruby_parity(source, language, suffix); + } + } + + #[test] + fn leading_owner_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def outer\n class Inner\n value\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "class Inner\n value\n end", + ), + ( + "def outer\n module Inner\n value\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "module Inner\n value\n end", + ), + ( + "def outer():\n class Inner:\n pass\n", + Language::Python, + ".py", + "block", + "class Inner:\n pass", + ), + ( + "function outer() { class Inner {} }\n", + Language::TypeScript, + ".ts", + "class_declaration", + "class Inner {}", + ), + ( + "function outer()\n Inner = {}\nend\n", + Language::Lua, + ".lua", + "block", + "Inner = {}", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.leading_owner_statement(node), + ruby_private_predicate( + source, + language, + suffix, + "leading_owner_statement?", + kind, + text + ), + "leading_owner_statement? mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_leading_owner_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def outer\n class Inner\n value\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "class Inner\n value\n end", + ), + ( + "def outer\n module Inner\n value\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "module Inner\n value\n end", + ), + ( + "def outer():\n class Inner:\n pass\n", + Language::Python, + ".py", + "block", + "class Inner:\n pass", + ), + ( + "function outer() { class Inner {} }\n", + Language::TypeScript, + ".ts", + "class_declaration", + "class Inner {}", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_leading_owner_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_leading_owner_statement", + kind, + text + ), + "normalize_leading_owner_statement mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn modifier_keyword_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n value if cond\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value if cond", + ), + ( + "def f\n value unless cond\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value unless cond", + ), + ( + "def f\n value while cond\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value while cond", + ), + ( + "def f\n value until cond\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value until cond", + ), + ( + "def f\n if cond\n value\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "if cond\n value\n end", + ), + ( + "def f():\n if cond:\n value()\n", + Language::Python, + ".py", + "block", + "if cond:\n value()", + ), + ( + "function f() { if (cond) { value(); } }\n", + Language::TypeScript, + ".ts", + "if_statement", + "if (cond) { value(); }", + ), + ( + "function f()\n if cond then\n value()\n end\nend\n", + Language::Lua, + ".lua", + "block", + "if cond then\n value()\n end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.modifier_keyword(node).unwrap_or_default(); + + assert_eq!( + rust, + ruby_private_string(source, language, suffix, "modifier_keyword", kind, text), + "modifier_keyword mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn modifier_parts_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n value if cond\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value if cond", + ), + ( + "def f\n value unless cond\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value unless cond", + ), + ( + "def f\n if cond\n value\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "if cond\n value\n end", + ), + ( + "def f():\n if cond:\n value()\n", + Language::Python, + ".py", + "block", + "if cond:\n value()", + ), + ( + "function f() { if (cond) { value(); } }\n", + Language::TypeScript, + ".ts", + "if_statement", + "if (cond) { value(); }", + ), + ( + "function f()\n if cond then\n value()\n end\nend\n", + Language::Lua, + ".lua", + "block", + "if cond then\n value()\n end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.modifier_parts(node).map(|(action, condition)| { + ( + ( + action.kind().to_string(), + super::node_text(action, source).to_string(), + ), + ( + condition.kind().to_string(), + super::node_text(condition, source).to_string(), + ), + ) + }); + + assert_eq!( + rust, + ruby_private_modifier_parts_signature(source, language, suffix, kind, text), + "modifier_parts mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn modifier_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n value if cond\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value if cond", + ), + ( + "def f\n return value if cond\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "return value if cond", + ), + ( + "def f\n if cond\n value\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "if cond\n value\n end", + ), + ( + "def f():\n if cond:\n value()\n", + Language::Python, + ".py", + "block", + "if cond:\n value()", + ), + ( + "function f() { if (cond) { value(); } }\n", + Language::TypeScript, + ".ts", + "if_statement", + "if (cond) { value(); }", + ), + ( + "function f()\n if cond then\n value()\n end\nend\n", + Language::Lua, + ".lua", + "block", + "if cond then\n value()\n end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.modifier_statement(node), + ruby_private_predicate(source, language, suffix, "modifier_statement?", kind, text), + "modifier_statement? mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_modifier_action_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "return value if cond\n", + Language::Ruby, + ".rb", + "return", + "return value", + ), + ("break if done\n", Language::Ruby, ".rb", "break", "break"), + ( + "value if cond\n", + Language::Ruby, + ".rb", + "identifier", + "value", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_modifier_action(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_modifier_action", + kind, + text + ), + "normalize_modifier_action mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_modifier_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n value if cond\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value if cond", + ), + ( + "def f\n value unless cond\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value unless cond", + ), + ( + "def f\n value while cond\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value while cond", + ), + ( + "def f\n value until cond\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value until cond", + ), + ( + "def f\n return value if cond\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "return value if cond", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_modifier_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_modifier_statement", + kind, + text + ), + "normalize_modifier_statement mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn modifier_return_action_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "return value if ready\n", + Language::Ruby, + ".rb", + "return", + "return value", + ), + ("break if done\n", Language::Ruby, ".rb", "break", "break"), + ("next if skip\n", Language::Ruby, ".rb", "next", "next"), + ( + "return value if ready\n", + Language::Ruby, + ".rb", + "identifier", + "ready", + ), + ( + "def f():\n return value\n break\n continue\n", + Language::Python, + ".py", + "return_statement", + "return value", + ), + ( + "def f():\n return value\n break\n continue\n", + Language::Python, + ".py", + "break_statement", + "break", + ), + ( + "def f():\n return value\n break\n continue\n", + Language::Python, + ".py", + "continue_statement", + "continue", + ), + ( + "def f():\n return value\n", + Language::Python, + ".py", + "identifier", + "value", + ), + ( + "function f() { return value; break; continue; }\n", + Language::TypeScript, + ".ts", + "return_statement", + "return value;", + ), + ( + "function f() { return value; break; continue; }\n", + Language::TypeScript, + ".ts", + "break_statement", + "break;", + ), + ( + "function f() { return value; break; continue; }\n", + Language::TypeScript, + ".ts", + "continue_statement", + "continue;", + ), + ( + "function f() { return value; }\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + ), + ( + "return value\n", + Language::Lua, + ".lua", + "return_statement", + "return value", + ), + ( + "return value\n", + Language::Lua, + ".lua", + "expression_list", + "value", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.modifier_return_action(node), + ruby_private_predicate( + source, + language, + suffix, + "modifier_return_action?", + kind, + text + ), + "modifier_return_action? mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn call_block_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "items.each do |item|\n item\nend\n", + Language::Ruby, + ".rb", + "call", + "items.each do |item|\n item\nend", + ), + ( + "items.map { |item| item }\n", + Language::Ruby, + ".rb", + "call", + "items.map { |item| item }", + ), + ( + "def f\n items.map { |item| item }\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "items.map { |item| item }", + ), + ("items.each\n", Language::Ruby, ".rb", "call", "items.each"), + ( + "def f():\n value()\n", + Language::Python, + ".py", + "function_definition", + "def f():\n value()", + ), + ( + "function f()\n value()\nend\n", + Language::Lua, + ".lua", + "function_declaration", + "function f()\n value()\nend", + ), + ( + "function f() { value(); }\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function f() { value(); }", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let found = normalizer.call_block(node).map(|node| { + ( + node.kind().to_string(), + super::node_text(node, source).to_string(), + ) + }); + + assert_eq!( + found, + ruby_private_node_signature(source, language, suffix, "call_block", kind, text), + "call_block mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn statement_block_call_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n items.map { |item| item }\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "items.map { |item| item }", + ), + ( + "items.map { |item| item }\n", + Language::Ruby, + ".rb", + "call", + "items.map { |item| item }", + ), + ( + "def f\n foo(bar) { baz }\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "foo(bar) { baz }", + ), + ( + "user.name()\n", + Language::Python, + ".py", + "attribute", + "user.name", + ), + ( + "def f():\n value()\n", + Language::Python, + ".py", + "function_definition", + "def f():\n value()", + ), + ( + "user.name();\n", + Language::TypeScript, + ".ts", + "member_expression", + "user.name", + ), + ( + "function f() { value(); }\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function f() { value(); }", + ), + ( + "user.name()\n", + Language::Lua, + ".lua", + "dot_index_expression", + "user.name", + ), + ( + "function f()\n value()\nend\n", + Language::Lua, + ".lua", + "function_declaration", + "function f()\n value()\nend", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let found = normalizer.statement_block_call(node).map(|node| { + ( + node.kind().to_string(), + super::node_text(node, source).to_string(), + ) + }); + + assert_eq!( + found, + ruby_private_node_signature( + source, + language, + suffix, + "statement_block_call", + kind, + text + ), + "statement_block_call mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn statement_call_with_block_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n items.map { |item| item }\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "items.map { |item| item }", + ), + ( + "items.map { |item| item }\n", + Language::Ruby, + ".rb", + "call", + "items.map { |item| item }", + ), + ( + "def f\n foo(bar) { baz }\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "foo(bar) { baz }", + ), + ( + "def f\n items.map\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "items.map", + ), + ( + "def f():\n value(lambda item: item)\n", + Language::Python, + ".py", + "function_definition", + "def f():\n value(lambda item: item)", + ), + ( + "items.map(item => item);\n", + Language::TypeScript, + ".ts", + "expression_statement", + "items.map(item => item);", + ), + ( + "items:map(function(item) return item end)\n", + Language::Lua, + ".lua", + "function_call", + "items:map(function(item) return item end)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.statement_call_with_block(node), + ruby_private_predicate( + source, + language, + suffix, + "statement_call_with_block?", + kind, + text + ), + "statement_call_with_block? mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_statement_call_with_block_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [( + "def f\n items.map { |item| item }\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "items.map { |item| item }", + )] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_statement_call_with_block(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_statement_call_with_block", + kind, + text + ), + "normalize_statement_call_with_block mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn visibility_inline_def_call_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "private def hidden; value; end\n", + Language::Ruby, + ".rb", + "call", + "private def hidden; value; end", + ), + ( + "public def visible\n value\nend\n", + Language::Ruby, + ".rb", + "call", + "public def visible\n value\nend", + ), + ( + "private :hidden\n", + Language::Ruby, + ".rb", + "call", + "private :hidden", + ), + ( + "private(value)\n", + Language::Python, + ".py", + "expression_statement", + "private(value)", + ), + ( + "private(value);\n", + Language::TypeScript, + ".ts", + "call_expression", + "private(value)", + ), + ( + "private(value)\n", + Language::Lua, + ".lua", + "function_call", + "private(value)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.visibility_inline_def_call(node), + ruby_private_predicate( + source, + language, + suffix, + "visibility_inline_def_call?", + kind, + text + ), + "visibility_inline_def_call? mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn visibility_inline_def_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "class C\n private def hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "private def hidden\n value\n end", + ), + ( + "class C\n module_function def helper\n value\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "module_function def helper\n value\n end", + ), + ( + "class C\n private :hidden\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "private :hidden", + ), + ( + "private(value)\n", + Language::Python, + ".py", + "expression_statement", + "private(value)", + ), + ( + "private(value);\n", + Language::TypeScript, + ".ts", + "expression_statement", + "private(value);", + ), + ( + "private(value)\n", + Language::Lua, + ".lua", + "function_call", + "private(value)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let function = normalizer.named_children(node).into_iter().next().expect( + "visibility_inline_def_statement test target should have a first named child", + ); + + assert_eq!( + normalizer.visibility_inline_def_statement(node, function), + ruby_private_visibility_inline_def_statement_predicate( + source, language, suffix, kind, text + ), + "visibility_inline_def_statement? mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_visibility_inline_def_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "private def hidden\n value\nend\n", + Language::Ruby, + ".rb", + "call", + "private def hidden\n value\nend", + ), + ( + "public def visible\n value\nend\n", + Language::Ruby, + ".rb", + "call", + "public def visible\n value\nend", + ), + ( + "module_function def self.helper\n value\nend\n", + Language::Ruby, + ".rb", + "call", + "module_function def self.helper\n value\nend", + ), + ( + "private(value)\n", + Language::Python, + ".py", + "expression_statement", + "private(value)", + ), + ( + "private(value);\n", + Language::TypeScript, + ".ts", + "call_expression", + "private(value)", + ), + ( + "private(value)\n", + Language::Lua, + ".lua", + "function_call", + "private(value)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_visibility_inline_def(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_visibility_inline_def", + kind, + text + ), + "normalize_visibility_inline_def mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn inline_def_from_argument_list_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "class C\n private def hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "def hidden\n value\n end", + ), + ( + "class C\n private def self.hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "def self.hidden\n value\n end", + ), + ( + "class C\n private :hidden\nend\n", + Language::Ruby, + ".rb", + "argument_list", + ":hidden", + ), + ( + "private(value)\n", + Language::Python, + ".py", + "argument_list", + "(value)", + ), + ( + "private(value);\n", + Language::TypeScript, + ".ts", + "arguments", + "(value)", + ), + ( + "private(value)\n", + Language::Lua, + ".lua", + "arguments", + "(value)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .inline_def_from_argument_list(Some(node)) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "inline_def_from_argument_list", + kind, + text + ), + "inline_def_from_argument_list mismatch for {language:?} {kind} {text:?}" + ); + } + + for (source, language, suffix) in [ + ("private def hidden\n value\nend\n", Language::Ruby, ".rb"), + ("private(value)\n", Language::Python, ".py"), + ("private(value);\n", Language::TypeScript, ".ts"), + ("private(value)\n", Language::Lua, ".lua"), + ] { + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .inline_def_from_argument_list(None) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_inline_def_from_argument_list_nil_value(source, language, suffix), + "inline_def_from_argument_list nil mismatch for {language:?}" + ); + } + } + + #[test] + fn inline_def_from_source_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "class C\n private def hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "def hidden\n value\n end", + ), + ( + "class C\n private def self.hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "def self.hidden\n value\n end", + ), + ( + "def hidden\n value\nend\n", + Language::Ruby, + ".rb", + "method", + "def hidden\n value\nend", + ), + ( + "def self.hidden\n value\nend\n", + Language::Ruby, + ".rb", + "singleton_method", + "def self.hidden\n value\nend", + ), + ( + "class C\n private :hidden\nend\n", + Language::Ruby, + ".rb", + "argument_list", + ":hidden", + ), + ( + "def hidden():\n value\n", + Language::Python, + ".py", + "function_definition", + "def hidden():\n value", + ), + ( + "function hidden() {\n value;\n}\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function hidden() {\n value;\n}", + ), + ( + "function hidden()\n value()\nend\n", + Language::Lua, + ".lua", + "function_declaration", + "function hidden()\n value()\nend", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .inline_def_from_source(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "inline_def_from_source", + kind, + text + ), + "inline_def_from_source mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn inline_def_from_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "class C\n private def hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "private def hidden\n value\n end", + ), + ( + "class C\n module_function def self.hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "module_function def self.hidden\n value\n end", + ), + ( + "private def hidden\n value\nend\n", + Language::Ruby, + ".rb", + "call", + "private def hidden\n value\nend", + ), + ( + "class C\n private :hidden\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "private :hidden", + ), + ( + "private(value)\n", + Language::Python, + ".py", + "expression_statement", + "private(value)", + ), + ( + "private(value);\n", + Language::TypeScript, + ".ts", + "expression_statement", + "private(value);", + ), + ( + "private(value)\n", + Language::Lua, + ".lua", + "function_call", + "private(value)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .inline_def_from_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "inline_def_from_statement", + kind, + text + ), + "inline_def_from_statement mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn inline_def_body_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "class C\n private def hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "def hidden\n value\n end", + ), + ( + "class C\n private def self.hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "def self.hidden\n value\n end", + ), + ( + "class C\n private def empty\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "def empty\n end", + ), + ( + "def hidden():\n value\n", + Language::Python, + ".py", + "function_definition", + "def hidden():\n value", + ), + ( + "function hidden() {\n value;\n}\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function hidden() {\n value;\n}", + ), + ( + "function hidden()\n value()\nend\n", + Language::Lua, + ".lua", + "function_declaration", + "function hidden()\n value()\nend", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.inline_def_body(node).map(|body| { + ( + body.kind().to_string(), + super::node_text(body, source).to_string(), + ) + }); + + assert_eq!( + rust, + ruby_private_node_signature( + source, + language, + suffix, + "inline_def_body", + kind, + text + ), + "inline_def_body mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn inline_def_receiver_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "class C\n private def hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "def hidden\n value\n end", + ), + ( + "class C\n private def self.hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "def self.hidden\n value\n end", + ), + ( + "class C\n private def Owner.hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "def Owner.hidden\n value\n end", + ), + ( + "class C\n private def Owner::Nested.hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "def Owner::Nested.hidden\n value\n end", + ), + ( + "def hidden():\n value\n", + Language::Python, + ".py", + "function_definition", + "def hidden():\n value", + ), + ( + "function hidden() {\n value;\n}\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function hidden() {\n value;\n}", + ), + ( + "function hidden()\n value()\nend\n", + Language::Lua, + ".lua", + "function_declaration", + "function hidden()\n value()\nend", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.inline_def_receiver(node).map(|receiver| { + ( + receiver.kind().to_string(), + super::node_text(receiver, source).to_string(), + ) + }); + + assert_eq!( + rust, + ruby_private_node_signature( + source, + language, + suffix, + "inline_def_receiver", + kind, + text + ), + "inline_def_receiver mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn inline_def_name_after_receiver_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "class C\n private def self.hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "def self.hidden\n value\n end", + ), + ( + "class C\n private def Owner.hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "def Owner.hidden\n value\n end", + ), + ( + "class C\n private def Owner::Nested.hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "def Owner::Nested.hidden\n value\n end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let receiver = normalizer + .inline_def_receiver(node) + .expect("inline def receiver should exist for name-after-receiver case"); + let rust = normalizer + .inline_def_name_after_receiver(node, receiver) + .unwrap_or_default(); + + assert_eq!( + rust, + ruby_private_inline_def_name_after_receiver(source, language, suffix, kind, text), + "inline_def_name_after_receiver mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn inline_parameter_begin_marker_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f(a); a; end\n", + Language::Ruby, + ".rb", + "method", + "def f(a); a; end", + ), + ( + "def f a; a; end\n", + Language::Ruby, + ".rb", + "method", + "def f a; a; end", + ), + ( + "def f(a)\n a\nend\n", + Language::Ruby, + ".rb", + "method", + "def f(a)\n a\nend", + ), + ( + "def f(a):\n return a\n", + Language::Python, + ".py", + "function_definition", + "def f(a):\n return a", + ), + ( + "function f(a) { return a; }\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function f(a) { return a; }", + ), + ( + "function f(a)\n return a\nend\n", + Language::Lua, + ".lua", + "function_declaration", + "function f(a)\n return a\nend", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .inline_parameter_begin_marker(node) + .as_ref() + .map(node_value) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_inline_parameter_begin_marker_value( + source, language, suffix, kind, text + ), + "inline_parameter_begin_marker mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn prepend_inline_parameter_begin_matches_ruby_private_method() { + let scalar = test_node("VCALL", Vec::new()); + let block = test_node( + "BLOCK", + vec![Child::Node(Box::new(scalar.clone())), Child::Nil], + ); + let empty_block = test_node("BLOCK", vec![Child::Nil]); + + let cases = vec![ + ( + "no_marker", + "def f(a)\n a\nend\n", + Language::Ruby, + ".rb", + "method", + "def f(a)\n a\nend", + Some(scalar.clone()), + ), + ( + "marker_nil_body", + "def f(a); a; end\n", + Language::Ruby, + ".rb", + "method", + "def f(a); a; end", + None, + ), + ( + "marker_scalar_body", + "def f(a); a; end\n", + Language::Ruby, + ".rb", + "method", + "def f(a); a; end", + Some(scalar.clone()), + ), + ( + "marker_block_body", + "def f(a); a; end\n", + Language::Ruby, + ".rb", + "method", + "def f(a); a; end", + Some(block), + ), + ( + "marker_empty_block", + "def f(a); a; end\n", + Language::Ruby, + ".rb", + "method", + "def f(a); a; end", + Some(empty_block), + ), + ( + "non_ruby", + "def f(a):\n return a\n", + Language::Python, + ".py", + "function_definition", + "def f(a):\n return a", + Some(scalar), + ), + ]; + + for (label, source, language, suffix, kind, text, body) in cases { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .prepend_inline_parameter_begin(node, body.clone()) + .as_ref() + .map(node_value) + .unwrap_or(Value::Null); + let body_value = body.as_ref().map(node_value).unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_prepend_inline_parameter_begin_value( + source, + language, + suffix, + kind, + text, + &body_value, + ), + "prepend_inline_parameter_begin mismatch for {label}" + ); + } + } + + #[test] + fn scalar_argument_list_value_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n return yield\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "yield", + ), + ( + "def f\n return nil\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "nil", + ), + ( + "def f\n return true\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "true", + ), + ( + "def f\n return false\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "false", + ), + ( + "def f\n return :ok?\nend\n", + Language::Ruby, + ".rb", + "argument_list", + ":ok?", + ), + ( + "def f\n return 12\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "12", + ), + ( + "def f\n return -12\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "-12", + ), + ( + "def f\n return name\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "name", + ), + ( + "def f():\n return value\n", + Language::Python, + ".py", + "identifier", + "value", + ), + ( + "function f() { return value; }\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + ), + ( + "function f()\n return value\nend\n", + Language::Lua, + ".lua", + "expression_list", + "value", + ), + ( + "function f() { return yield; }\n", + Language::TypeScript, + ".ts", + "yield_expression", + "yield", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .scalar_argument_list_value(node) + .as_ref() + .map(node_value) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "scalar_argument_list_value", + kind, + text, + ), + "scalar_argument_list_value mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn local_or_call_for_name_matches_ruby_private_method() { + for (source, language, suffix, kind, text, name, local) in [ + ( + "def f\n {name:}\nend\n", + Language::Ruby, + ".rb", + "hash_key_symbol", + "name", + "name", + false, + ), + ( + "def f\n {name:}\nend\n", + Language::Ruby, + ".rb", + "hash_key_symbol", + "name", + "name", + true, + ), + ( + "def f():\n value\n", + Language::Python, + ".py", + "identifier", + "f", + "f", + false, + ), + ( + "function f() { value; }\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + "value", + false, + ), + ( + "function f()\n value()\nend\n", + Language::Lua, + ".lua", + "identifier", + "value", + "value", + false, + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + if local { + normalizer + .local_stack + .push(BTreeSet::from([name.to_string()])); + } + let rust = node_value(&normalizer.local_or_call_for_name(name, node)); + + assert_eq!( + rust, + ruby_private_local_or_call_for_name_value( + source, language, suffix, kind, text, name, local + ), + "local_or_call_for_name mismatch for {language:?} {name:?} local={local}" + ); + } + } + + #[test] + fn literal_arguments_from_text_normalization_matches_ruby() { + let symbol_source = "puts :ok\n"; + let root = parse_language_source(symbol_source, Language::Ruby, ".rb"); + let fcall = first_node(&root, "FCALL", "puts :ok"); + assert_eq!( + fcall.children.first(), + Some(&Child::Symbol("puts".to_string())) + ); + let args = child_node(fcall, 1); + assert_eq!(args.r#type, "LIST"); + let lit = child_node(args, 0); + assert_eq!(lit.r#type, "LIT"); + assert_eq!(lit.children.first(), Some(&Child::Symbol("ok".to_string()))); + assert_ruby_parity(symbol_source, Language::Ruby, ".rb"); + + let heredoc_source = "def f\n puts <<~TXT\n hi\n TXT\nend\n"; + let root = parse_language_source(heredoc_source, Language::Ruby, ".rb"); + let fcall = first_node(&root, "FCALL", "puts <<~TXT"); + let args = child_node(fcall, 1); + assert_eq!(args.r#type, "LIST"); + let dstr = child_node(args, 0); + assert_eq!(dstr.r#type, "DSTR"); + assert_eq!(child_types(dstr), vec!["STR"]); + let body = child_node(dstr, 0); + assert_eq!( + body.children.first(), + Some(&Child::String("\n hi\n ".to_string())) + ); + assert_ruby_parity(heredoc_source, Language::Ruby, ".rb"); + } + + #[test] + fn literal_symbol_arguments_matches_ruby_scan_contract() { + assert_eq!( + super::literal_symbol_arguments(":one, :two?, :three!, :four=, :1, ::Name"), + vec![ + "one".to_string(), + "two?".to_string(), + "three!".to_string(), + "four=".to_string(), + "Name".to_string(), + ] + ); + } + + #[test] + fn elide_tail_returns_matches_ruby_private_method() { + let leaf = |node_type: &str| test_node(node_type, vec![Child::String("value".to_string())]); + let return_leaf = || test_node("RETURN", vec![Child::Node(Box::new(leaf("LVAR")))]); + let protected_def = test_node( + "DEFN", + vec![ + Child::Symbol("kept".to_string()), + Child::Node(Box::new(test_node( + "SCOPE", + vec![Child::Nil, Child::Nil, Child::Node(Box::new(return_leaf()))], + ))), + ], + ); + let cases = vec![ + None, + Some(return_leaf()), + Some(test_node( + "BLOCK", + vec![ + Child::Node(Box::new(leaf("LVAR"))), + Child::Node(Box::new(return_leaf())), + ], + )), + Some(test_node( + "SCOPE", + vec![Child::Nil, Child::Nil, Child::Node(Box::new(return_leaf()))], + )), + Some(test_node( + "IF", + vec![ + Child::Node(Box::new(leaf("COND"))), + Child::Node(Box::new(return_leaf())), + Child::Node(Box::new(return_leaf())), + ], + )), + Some(test_node( + "UNLESS", + vec![ + Child::Node(Box::new(leaf("COND"))), + Child::Node(Box::new(return_leaf())), + Child::Node(Box::new(return_leaf())), + ], + )), + Some(test_node( + "CASE", + vec![ + Child::Node(Box::new(leaf("LVAR"))), + Child::Node(Box::new(return_leaf())), + ], + )), + Some(test_node( + "CASE2", + vec![Child::Node(Box::new(return_leaf()))], + )), + Some(test_node( + "WHEN", + vec![ + Child::Node(Box::new(leaf("LIST"))), + Child::Node(Box::new(return_leaf())), + Child::Node(Box::new(return_leaf())), + ], + )), + Some(test_node( + "RESCUE", + vec![ + Child::Node(Box::new(return_leaf())), + Child::Node(Box::new(return_leaf())), + ], + )), + Some(test_node( + "RESBODY", + vec![ + Child::Node(Box::new(leaf("LIST"))), + Child::Node(Box::new(return_leaf())), + Child::Node(Box::new(return_leaf())), + ], + )), + Some(protected_def), + ]; + let normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); + + for node in cases { + let input = node.as_ref().map(node_value).unwrap_or(Value::Null); + let rust = normalizer + .elide_tail_returns(node) + .as_ref() + .map(node_value) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_elide_tail_returns_value(&input, true), + "elide_tail_returns mismatch for input {input}" + ); + } + + let non_ruby = Some(return_leaf()); + let input = non_ruby.as_ref().map(node_value).unwrap_or(Value::Null); + let normalizer = super::TreeSitterNormalizer::new("", Language::Python); + let rust = normalizer + .elide_tail_returns(non_ruby) + .as_ref() + .map(node_value) + .unwrap_or(Value::Null); + + assert_eq!(rust, input); + assert_eq!(ruby_private_elide_tail_returns_value(&input, false), input); + } + + #[test] + fn elide_implicit_nil_body_matches_ruby_private_method() { + let normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); + let leaf = || test_node("LVAR", vec![Child::String("value".to_string())]); + let nil_node = || test_node("NIL", Vec::new()); + let cases = vec![ + None, + Some(nil_node()), + Some(leaf()), + Some(test_node( + "BLOCK", + vec![ + Child::Node(Box::new(leaf())), + Child::Node(Box::new(nil_node())), + Child::Node(Box::new(nil_node())), + ], + )), + Some(test_node( + "BLOCK", + vec![Child::Nil, Child::Node(Box::new(nil_node()))], + )), + Some(test_node( + "BLOCK", + vec![ + Child::Node(Box::new(leaf())), + Child::Node(Box::new(leaf())), + Child::Node(Box::new(nil_node())), + ], + )), + ]; + + for node in cases { + let input = node.as_ref().map(node_value).unwrap_or(Value::Null); + let rust = normalizer + .elide_implicit_nil_body(node) + .as_ref() + .map(node_value) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_elide_implicit_nil_body_value(&input, true), + "elide_implicit_nil_body mismatch for input {input}" + ); + } + + let non_ruby = Some(nil_node()); + let input = non_ruby.as_ref().map(node_value).unwrap_or(Value::Null); + let normalizer = super::TreeSitterNormalizer::new("", Language::Python); + let rust = normalizer + .elide_implicit_nil_body(non_ruby) + .as_ref() + .map(node_value) + .unwrap_or(Value::Null); + + assert_eq!(rust, input); + assert_eq!( + ruby_private_elide_implicit_nil_body_value(&input, false), + input + ); + } + + #[test] + fn drop_trailing_nil_statement_matches_ruby_private_method() { + let normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); + let leaf = |node_type: &str| test_node(node_type, vec![Child::Symbol("value".to_string())]); + let nil_node = || test_node("NIL", Vec::new()); + let block = |children| test_node("BLOCK", children); + + for node in [ + None, + Some(nil_node()), + Some(block(vec![ + Child::Node(Box::new(leaf("LASGN"))), + Child::Node(Box::new(nil_node())), + ])), + Some(block(vec![ + Child::Node(Box::new(leaf("LASGN"))), + Child::Node(Box::new(nil_node())), + Child::Node(Box::new(nil_node())), + ])), + Some(block(vec![ + Child::Node(Box::new(leaf("LASGN"))), + Child::Nil, + Child::Node(Box::new(nil_node())), + ])), + Some(block(vec![Child::Nil, Child::Node(Box::new(nil_node()))])), + Some(block(vec![ + Child::Node(Box::new(leaf("LASGN"))), + Child::Nil, + Child::Node(Box::new(leaf("VCALL"))), + ])), + Some(block(vec![ + Child::Node(Box::new(leaf("LASGN"))), + Child::Nil, + Child::Node(Box::new(leaf("VCALL"))), + Child::Node(Box::new(nil_node())), + ])), + ] { + let input = node.as_ref().map(node_value).unwrap_or(Value::Null); + let rust = normalizer + .drop_trailing_nil_statement(node) + .as_ref() + .map(node_value) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_drop_trailing_nil_statement_value(&input), + "drop_trailing_nil_statement mismatch for input {input}" + ); + } + } + + #[test] + fn symbol_literal_node_matches_ruby_private_predicate() { + let normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); + for (node, node_type, child_kind) in [ + (None, None, None), + ( + Some(test_node("LIT", vec![Child::Symbol("value".to_string())])), + Some("LIT"), + Some("symbol"), + ), + ( + Some(test_node("LIT", vec![Child::String("value".to_string())])), + Some("LIT"), + Some("string"), + ), + (Some(test_node("LIT", Vec::new())), Some("LIT"), None), + ( + Some(test_node("STR", vec![Child::Symbol("value".to_string())])), + Some("STR"), + Some("symbol"), + ), + ( + Some(test_node( + "LIT", + vec![Child::Node(Box::new(test_node("NIL", Vec::new())))], + )), + Some("LIT"), + Some("node"), + ), + ( + Some(test_node("LIT", vec![Child::Nil])), + Some("LIT"), + Some("nil"), + ), + ] { + assert_eq!( + normalizer.symbol_literal_node(node.as_ref()), + ruby_private_symbol_literal_node_predicate(node_type, child_kind), + "symbol_literal_node? mismatch for node_type={node_type:?} child_kind={child_kind:?}" + ); + } + } + + #[test] + fn same_ts_node_matches_ruby_private_predicate() { + for ( + source, + language, + suffix, + left_kind, + left_text, + left_index, + right_kind, + right_text, + right_index, + ) in [ + ( + "value\nvalue\n", + Language::Ruby, + ".rb", + "identifier", + "value", + 0, + "identifier", + "value", + 0, + ), + ( + "value\nvalue\n", + Language::Ruby, + ".rb", + "identifier", + "value", + 0, + "identifier", + "value", + 1, + ), + ( + "value\nvalue\n", + Language::Python, + ".py", + "expression_statement", + "value", + 0, + "expression_statement", + "value", + 0, + ), + ( + "value\nvalue\n", + Language::Python, + ".py", + "expression_statement", + "value", + 0, + "expression_statement", + "value", + 1, + ), + ( + "value;\nvalue;\n", + Language::TypeScript, + ".ts", + "expression_statement", + "value;", + 0, + "expression_statement", + "value;", + 1, + ), + ( + "value()\nvalue()\n", + Language::Lua, + ".lua", + "function_call", + "value()", + 0, + "function_call", + "value()", + 0, + ), + ( + "value()\nvalue()\n", + Language::Lua, + ".lua", + "function_call", + "value()", + 0, + "function_call", + "value()", + 1, + ), + ] { + let tree = raw_tree(source, language); + let left = nth_raw_node(tree.root_node(), source, left_kind, left_text, left_index); + let right = nth_raw_node( + tree.root_node(), + source, + right_kind, + right_text, + right_index, + ); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.same_ts_node(left, right), + ruby_private_same_ts_node_predicate( + source, + language, + suffix, + left_kind, + left_text, + left_index, + right_kind, + right_text, + right_index + ), + "same_ts_node? mismatch for {language:?} {left_kind}:{left_text:?}[{left_index}] vs {right_kind}:{right_text:?}[{right_index}]" + ); + } + } + + #[test] + fn parent_named_child_matches_ruby_private_predicate() { + for ( + source, + language, + suffix, + parent_kind, + parent_text, + parent_index, + child_kind, + child_text, + child_index, + ) in [ + ( + "def f\n {name:}\nend\n", + Language::Ruby, + ".rb", + "pair", + "name:", + 0, + "hash_key_symbol", + "name", + 0, + ), + ( + "def f\n {name:}\nend\n", + Language::Ruby, + ".rb", + "pair", + "name:", + 0, + "identifier", + "f", + 0, + ), + ( + "def f():\n value\n", + Language::Python, + ".py", + "function_definition", + "def f():\n value", + 0, + "identifier", + "f", + 0, + ), + ( + "def f():\n value\n", + Language::Python, + ".py", + "block", + "value", + 0, + "identifier", + "f", + 0, + ), + ( + "function f() { value; }\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function f() { value; }", + 0, + "identifier", + "f", + 0, + ), + ( + "function f() { value; }\n", + Language::TypeScript, + ".ts", + "statement_block", + "{ value; }", + 0, + "identifier", + "f", + 0, + ), + ( + "function f()\n value()\nend\n", + Language::Lua, + ".lua", + "function_declaration", + "function f()\n value()\nend", + 0, + "identifier", + "f", + 0, + ), + ( + "function f()\n value()\nend\n", + Language::Lua, + ".lua", + "block", + "value()", + 0, + "identifier", + "f", + 0, + ), + ] { + let tree = raw_tree(source, language); + let parent = nth_raw_node( + tree.root_node(), + source, + parent_kind, + parent_text, + parent_index, + ); + let child = nth_raw_node( + tree.root_node(), + source, + child_kind, + child_text, + child_index, + ); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.parent_named_child(parent, child), + ruby_private_parent_named_child_predicate( + source, + language, + suffix, + parent_kind, + parent_text, + parent_index, + child_kind, + child_text, + child_index + ), + "parent_named_child? mismatch for {language:?} {parent_kind}:{parent_text:?}[{parent_index}] -> {child_kind}:{child_text:?}[{child_index}]" + ); + } + } + + #[test] + fn node_key_matches_ruby_private_method() { + for (source, language, suffix, kind, text, index) in [ + ( + "value\nvalue\n", + Language::Ruby, + ".rb", + "identifier", + "value", + 0, + ), + ( + "value\nvalue\n", + Language::Ruby, + ".rb", + "identifier", + "value", + 1, + ), + ( + "value\nvalue\n", + Language::Python, + ".py", + "expression_statement", + "value", + 1, + ), + ( + "value;\nvalue;\n", + Language::TypeScript, + ".ts", + "expression_statement", + "value;", + 0, + ), + ( + "value()\nvalue()\n", + Language::Lua, + ".lua", + "function_call", + "value()", + 1, + ), + ] { + let tree = raw_tree(source, language); + let node = nth_raw_node(tree.root_node(), source, kind, text, index); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.node_key(node), + ruby_private_node_key_signature(source, language, suffix, kind, text, index), + "node_key mismatch for {language:?} {kind}:{text:?}[{index}]" + ); + } + } + + #[test] + fn bare_identifier_text_matches_ruby_private_predicate() { + for text in [ + "value", + "_value", + "value1", + "value?", + "value!", + "value=", + " value? ", + "", + "1value", + "value-name", + "value?name", + "value??", + "value!=", + "value =", + ] { + assert_eq!( + super::bare_identifier_text(text), + ruby_private_text_predicate(Language::Ruby, "bare_identifier_text?", text), + "bare_identifier_text? mismatch for {text:?}" + ); + } + } + + #[test] + fn hidden_match_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "match(value)\n", + Language::Ruby, + ".rb", + "call", + "match(value)", + ), + ( + "match value:\n case 1:\n result\n", + Language::Python, + ".py", + "match_statement", + "match value:\n case 1:\n result", + ), + ( + "match(value)\n", + Language::Python, + ".py", + "expression_statement", + "match(value)", + ), + ( + "match(value);\n", + Language::TypeScript, + ".ts", + "expression_statement", + "match(value);", + ), + ( + "match(value)\n", + Language::Lua, + ".lua", + "function_call", + "match(value)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.hidden_match(node), + ruby_private_predicate(source, language, suffix, "hidden_match?", kind, text), + "hidden_match? mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn kind_type_matches_ruby_private_method() { + for kind in [ + "", + "body_statement", + "block_body", + "block", + "statements", + "expression_statement", + "alreadyCAPS", + "argument-list??", + "foo__bar", + "123kind", + "é_node", + ] { + assert_eq!( + super::kind_type(kind), + ruby_private_text_string(Language::Ruby, "kind_type", kind), + "kind_type mismatch for {kind:?}" + ); + } + } + + #[test] + fn ts_node_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ("ready?\n", Language::Ruby, ".rb", "call", "ready?"), + ( + "value\n", + Language::Python, + ".py", + "expression_statement", + "value", + ), + ( + "let value = 1;\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + ), + ( + "value = 1\n", + Language::Lua, + ".lua", + "variable_list", + "value", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + + assert_eq!( + super::ts_node(Some(node)), + ruby_private_predicate(source, language, suffix, "ts_node?", kind, text), + "ts_node? raw-node mismatch for {language:?} {kind}:{text:?}" + ); + } + + assert_eq!(super::ts_node(None), ruby_private_ts_node_value("nil")); + assert!(!ruby_private_ts_node_value("string")); + assert!(!ruby_private_ts_node_value("normalized_node")); + } + + #[test] + fn command_call_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n puts value\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "puts value", + ), + ( + "def f\n foo { value }\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "foo { value }", + ), + ( + "def f\n foo\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "foo", + ), + ( + "def f\n user.name value\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "user.name value", + ), + ( + "print(value)\n", + Language::Python, + ".py", + "expression_statement", + "print(value)", + ), + ( + "console.log(value);\n", + Language::TypeScript, + ".ts", + "expression_statement", + "console.log(value);", + ), + ( + "print(value)\n", + Language::Lua, + ".lua", + "function_call", + "print(value)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.command_call_statement(node), + ruby_private_predicate( + source, + language, + suffix, + "command_call_statement?", + kind, + text + ), + "command_call_statement? mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_command_call_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n puts value\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "puts value", + ), + ( + "def f\n foo { value }\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "foo { value }", + ), + ( + "print(value)\n", + Language::Python, + ".py", + "expression_statement", + "print(value)", + ), + ( + "console.log(value);\n", + Language::TypeScript, + ".ts", + "expression_statement", + "console.log(value);", + ), + ( + "print(value)\n", + Language::Lua, + ".lua", + "function_call", + "print(value)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_command_call_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_command_call_statement", + kind, + text + ), + "normalize_command_call_statement mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn zero_child_identifier_call_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ("foo?\n", Language::Ruby, ".rb", "call", "foo?"), + ("foo!\n", Language::Ruby, ".rb", "call", "foo!"), + ("foo()\n", Language::Ruby, ".rb", "call", "foo()"), + ( + "foo()\n", + Language::Python, + ".py", + "expression_statement", + "foo()", + ), + ( + "foo();\n", + Language::TypeScript, + ".ts", + "call_expression", + "foo()", + ), + ("foo()\n", Language::Lua, ".lua", "function_call", "foo()"), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.zero_child_identifier_call(node), + ruby_private_predicate( + source, + language, + suffix, + "zero_child_identifier_call?", + kind, + text + ), + "zero_child_identifier_call? mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn zero_child_identifier_call_normalization_matches_ruby() { + for source in ["foo?\n", "foo!\n"] { + let root = parse_language_source(source, Language::Ruby, ".rb"); + let text = source.trim(); + let vcall = first_node(&root, "VCALL", text); + assert_eq!( + vcall.children.first(), + Some(&Child::Symbol(text.to_string())) + ); + assert_ruby_parity(source, Language::Ruby, ".rb"); + } + } + + #[test] + fn normalize_zero_child_call_matches_ruby_private_method() { + for source in ["foo?\n", "foo!\n", "foo()\n"] { + let text = source.trim(); + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, "call", text); + let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let rust = normalizer.normalize_zero_child_call(node); + + assert_eq!( + node_value(&rust), + ruby_private_normalize_method_value( + source, + Language::Ruby, + ".rb", + "normalize_zero_child_call", + "call", + text + ), + "normalize_zero_child_call mismatch for {text:?}" + ); + } + } + + #[test] + fn normalize_const_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ("Foo\n", Language::Ruby, ".rb", "constant", "Foo"), + ( + "Foo::Bar\n", + Language::Ruby, + ".rb", + "scope_resolution", + "Foo::Bar", + ), + ( + "class Foo::Bar::Baz\nend\n", + Language::Ruby, + ".rb", + "scope_resolution", + "Foo::Bar::Baz", + ), + ( + "type Alias = Foo;\n", + Language::TypeScript, + ".ts", + "type_identifier", + "Foo", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.normalize_const(node); + + assert_eq!( + node_value(&rust), + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_const", + kind, + text + ), + "normalize_const mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn assignment_receiver_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ("value += 1\n", Language::Ruby, ".rb", "identifier", "value"), + ( + "@value += 1\n", + Language::Ruby, + ".rb", + "instance_variable", + "@value", + ), + ( + "$value += 1\n", + Language::Ruby, + ".rb", + "global_variable", + "$value", + ), + ("VALUE += 1\n", Language::Ruby, ".rb", "constant", "VALUE"), + ( + "user.value += 1\n", + Language::Ruby, + ".rb", + "call", + "user.value", + ), + ( + "value += 1\n", + Language::Python, + ".py", + "identifier", + "value", + ), + ( + "user.value += 1\n", + Language::Python, + ".py", + "attribute", + "user.value", + ), + ( + "value += 1;\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + ), + ( + "user.value += 1;\n", + Language::TypeScript, + ".ts", + "member_expression", + "user.value", + ), + ( + "value = 1\n", + Language::Lua, + ".lua", + "variable_list", + "value", + ), + ( + "user.value = 1\n", + Language::Lua, + ".lua", + "variable_list", + "user.value", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .assignment_receiver(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "assignment_receiver", + kind, + text + ), + "assignment_receiver mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn assignment_target_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "@value = 1\n", + Language::Ruby, + ".rb", + "instance_variable", + "@value", + ), + ( + "$value = 1\n", + Language::Ruby, + ".rb", + "global_variable", + "$value", + ), + ( + "items[index] = value\n", + Language::Ruby, + ".rb", + "element_reference", + "items[index]", + ), + ( + "user.value = 1\n", + Language::Ruby, + ".rb", + "call", + "user.value", + ), + ( + "user.value = 1\n", + Language::Python, + ".py", + "attribute", + "user.value", + ), + ( + "user.value = 1;\n", + Language::TypeScript, + ".ts", + "member_expression", + "user.value", + ), + ( + "user.value = 1\n", + Language::Lua, + ".lua", + "variable_list", + "user.value", + ), + ( + "value = 1\n", + Language::Lua, + ".lua", + "variable_list", + "value", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let source_node = normalizer.parent_node(node).unwrap_or(node); + let right = normalizer + .assignment_right(source_node) + .and_then(|right| normalizer.normalize_node(right)); + let rust = normalizer + .assignment_target(node, right, source_node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_assignment_target_value(source, language, suffix, kind, text), + "assignment_target mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn augmented_assignment_value_matches_ruby_private_method() { + for (source, language, suffix, kind, text, operator) in [ + ( + "value += 1\n", + Language::Ruby, + ".rb", + "identifier", + "value", + "+", + ), + ( + "@value *= 2\n", + Language::Ruby, + ".rb", + "instance_variable", + "@value", + "*", + ), + ( + "$value += 1\n", + Language::Ruby, + ".rb", + "global_variable", + "$value", + "+", + ), + ( + "VALUE -= 1\n", + Language::Ruby, + ".rb", + "constant", + "VALUE", + "-", + ), + ( + "user.value += 1\n", + Language::Ruby, + ".rb", + "call", + "user.value", + "+", + ), + ( + "value += 1\n", + Language::Python, + ".py", + "identifier", + "value", + "+", + ), + ( + "user.value += 1\n", + Language::Python, + ".py", + "attribute", + "user.value", + "+", + ), + ( + "value += 1;\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + "+", + ), + ( + "user.value += 1;\n", + Language::TypeScript, + ".ts", + "member_expression", + "user.value", + "+", + ), + ( + "value = 1\n", + Language::Lua, + ".lua", + "variable_list", + "value", + "+", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let source_node = normalizer.parent_node(node).unwrap_or(node); + let right_raw = normalizer.assignment_right(source_node); + let rust = + normalizer.augmented_assignment_value(node, operator, right_raw, source_node); + + assert_eq!( + node_value(&rust), + ruby_private_augmented_assignment_value( + source, language, suffix, kind, text, operator + ), + "augmented_assignment_value mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn target_name_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "value = other\n", + Language::Ruby, + ".rb", + "identifier", + "value", + ), + ( + "$value = other\n", + Language::Ruby, + ".rb", + "global_variable", + "$value", + ), + ( + "VALUE = other\n", + Language::Ruby, + ".rb", + "constant", + "VALUE", + ), + ( + "a, *rest = values\n", + Language::Ruby, + ".rb", + "rest_assignment", + "*rest", + ), + ( + "value = other\n", + Language::Python, + ".py", + "identifier", + "value", + ), + ( + "let value = other;\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + ), + ( + "value = other\n", + Language::Lua, + ".lua", + "variable_list", + "value", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + Value::String(normalizer.target_name(node)), + ruby_private_normalize_method_value( + source, + language, + suffix, + "target_name", + kind, + text + ), + "target_name mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_multiple_assignment_matches_ruby_private_method() { + for (source, kind, text) in [ + ("a, b = values\n", "assignment", "a, b = values"), + ("$a, b = values\n", "assignment", "$a, b = values"), + ("a, *rest = values\n", "assignment", "a, *rest = values"), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let left = normalizer + .assignment_left(node) + .expect("multiple assignment should have left side"); + let right = normalizer + .assignment_right(node) + .and_then(|right| normalizer.normalize_node(right)); + let rust = normalizer.normalize_multiple_assignment(left, right, node); + + assert_eq!( + node_value(&rust), + ruby_private_normalize_multiple_assignment_value( + source, + Language::Ruby, + ".rb", + kind, + text + ), + "normalize_multiple_assignment mismatch for {text:?}" + ); + } + } + + #[test] + fn normalize_assignment_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "value = other\n", + Language::Ruby, + ".rb", + "assignment", + "value = other", + ), + ( + "@value = other\n", + Language::Ruby, + ".rb", + "assignment", + "@value = other", + ), + ( + "$value = other\n", + Language::Ruby, + ".rb", + "assignment", + "$value = other", + ), + ( + "items[index] = value\n", + Language::Ruby, + ".rb", + "assignment", + "items[index] = value", + ), + ( + "user.value = other\n", + Language::Ruby, + ".rb", + "assignment", + "user.value = other", + ), + ( + "a, b = values\n", + Language::Ruby, + ".rb", + "assignment", + "a, b = values", + ), + ( + "value = other\n", + Language::Python, + ".py", + "expression_statement", + "value = other", + ), + ( + "user.value = other\n", + Language::Python, + ".py", + "expression_statement", + "user.value = other", + ), + ( + "value = other;\n", + Language::TypeScript, + ".ts", + "expression_statement", + "value = other;", + ), + ( + "user.value = other;\n", + Language::TypeScript, + ".ts", + "expression_statement", + "user.value = other;", + ), + ( + "value = other\n", + Language::Lua, + ".lua", + "assignment_statement", + "value = other", + ), + ( + "user.value = other\n", + Language::Lua, + ".lua", + "assignment_statement", + "user.value = other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_assignment(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_assignment", + kind, + text + ), + "normalize_assignment mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_assignment_lhs_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "value = other\n", + Language::Ruby, + ".rb", + "identifier", + "value", + ), + ( + "@value = other\n", + Language::Ruby, + ".rb", + "instance_variable", + "@value", + ), + ( + "$value = other\n", + Language::Ruby, + ".rb", + "global_variable", + "$value", + ), + ( + "items[index] = value\n", + Language::Ruby, + ".rb", + "element_reference", + "items[index]", + ), + ( + "user.value = other\n", + Language::Ruby, + ".rb", + "call", + "user.value", + ), + ( + "value = other\n", + Language::Python, + ".py", + "identifier", + "value", + ), + ( + "user.value = other\n", + Language::Python, + ".py", + "attribute", + "user.value", + ), + ( + "value = other;\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + ), + ( + "user.value = other;\n", + Language::TypeScript, + ".ts", + "member_expression", + "user.value", + ), + ( + "value = other\n", + Language::Lua, + ".lua", + "variable_list", + "value", + ), + ( + "user.value = other\n", + Language::Lua, + ".lua", + "variable_list", + "user.value", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_assignment_lhs(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_assignment_lhs", + kind, + text + ), + "normalize_assignment_lhs mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_begin_matches_ruby_private_method() { + for (source, text) in [ + ("begin\n work\n done\nend\n", "begin\n work\n done\nend"), + ( + "begin\n work\nensure\n cleanup\nend\n", + "begin\n work\nensure\n cleanup\nend", + ), + ( + "begin\n work\nrescue Error => e\n handle\nend\n", + "begin\n work\nrescue Error => e\n handle\nend", + ), + ( + "begin\n work\nrescue Error => e\n handle\nensure\n cleanup\nend\n", + "begin\n work\nrescue Error => e\n handle\nensure\n cleanup\nend", + ), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, "begin", text); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let rust = normalizer + .normalize_begin(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + Language::Ruby, + ".rb", + "normalize_begin", + "begin", + text + ), + "normalize_begin mismatch for {text:?}" + ); + } + } + + #[test] + fn normalize_block_argument_matches_ruby_private_method() { + for (source, text) in [ + ("foo(&block)\n", "&block"), + ("foo(&:to_s)\n", "&:to_s"), + ("foo(&method(:bar))\n", "&method(:bar)"), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, "block_argument", text); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let rust = normalizer + .normalize_block_argument(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + Language::Ruby, + ".rb", + "normalize_block_argument", + "block_argument", + text + ), + "normalize_block_argument mismatch for {text:?}" + ); + } + } + + #[test] + fn normalize_body_nodes_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ("\n", Language::Ruby, ".rb", "__root__", ""), + ("value\n", Language::Ruby, ".rb", "__root__", ""), + ("first\nsecond\n", Language::Ruby, ".rb", "__root__", ""), + ( + "first()\nsecond()\n", + Language::Python, + ".py", + "__root__", + "", + ), + ( + "first();\nsecond();\n", + Language::TypeScript, + ".ts", + "__root__", + "", + ), + ("first()\nsecond()\n", Language::Lua, ".lua", "__root__", ""), + ] { + let tree = raw_tree(source, language); + let target = if kind == "__root__" { + tree.root_node() + } else { + first_raw_node(tree.root_node(), source, kind, text) + }; + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let nodes = normalizer.named_children(target); + let rust = normalizer + .normalize_body_nodes(nodes, target) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_body_nodes_value(source, language, suffix, kind, text), + "normalize_body_nodes mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_children_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n one\n two\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "one\n two", + ), + ( + "def f\n value = other\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value = other", + ), + ( + "def f\n x = <<~TXT\n hi\n TXT\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "x = <<~TXT\n hi\n TXT", + ), + ( + "def f():\n one()\n two()\n", + Language::Python, + ".py", + "block", + "one()\n two()", + ), + ( + "def f():\n value = other\n", + Language::Python, + ".py", + "block", + "value = other", + ), + ( + "function f(){ one(); two(); }\n", + Language::TypeScript, + ".ts", + "statement_block", + "{ one(); two(); }", + ), + ( + "function f(){ value = other; }\n", + Language::TypeScript, + ".ts", + "assignment_expression", + "value = other", + ), + ( + "function f()\n one()\n two()\nend\n", + Language::Lua, + ".lua", + "block", + "one()\n two()", + ), + ( + "function f()\n value = other\nend\n", + Language::Lua, + ".lua", + "block", + "value = other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = children_value(&normalizer.normalize_children(node)); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_children", + kind, + text + ), + "normalize_children mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_class_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "class Thing; end\n", + Language::Ruby, + ".rb", + "class", + "class Thing; end", + ), + ( + "class Thing:\n pass\n", + Language::Python, + ".py", + "class_definition", + "class Thing:\n pass", + ), + ( + "class Thing {}\n", + Language::TypeScript, + ".ts", + "class_declaration", + "class Thing {}", + ), + ( + "local Thing = {}\n", + Language::Lua, + ".lua", + "variable_declaration", + "local Thing = {}", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_class(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_class", + kind, + text + ), + "normalize_class mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_impl_matches_ruby_private_method() { + for (source, kind, text) in [( + "impl Thing {\n fn call(&self) {\n work();\n }\n}\n", + "impl_item", + "impl Thing {\n fn call(&self) {\n work();\n }\n}", + )] { + let tree = raw_tree(source, Language::Rust); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Rust); + let rust = normalizer + .normalize_impl(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + Language::Rust, + ".rs", + "normalize_impl", + kind, + text + ), + "normalize_impl mismatch for {kind} {text:?}" + ); + } + } + + #[test] + fn rust_impl_normalization_matches_ruby() { + let source = "impl Thing {\n fn call(&self) {\n work();\n }\n}\n"; + let root = parse_language_source(source, Language::Rust, ".rs"); + let class_node = first_node(&root, "CLASS", source.trim_end()); + + assert_eq!(child_node(class_node, 0).r#type, "CONST"); + assert_ruby_parity(source, Language::Rust, ".rs"); + } + + #[test] + fn normalize_body_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n value\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value", + ), + ( + "def f\n return value\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "return value", + ), + ( + "def f\n items[index]\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "items[index]", + ), + ( + "def f\n [first, second]\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "[first, second]", + ), + ( + "def f\n value if ready?\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value if ready?", + ), + ( + "def f\n left && right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left && right", + ), + ( + "def f():\n return value\n", + Language::Python, + ".py", + "block", + "return value", + ), + ( + "def f():\n value = other\n", + Language::Python, + ".py", + "block", + "value = other", + ), + ( + "function f() {\n return value;\n}\n", + Language::TypeScript, + ".ts", + "return_statement", + "return value;", + ), + ( + "function f() {\n value = other;\n}\n", + Language::TypeScript, + ".ts", + "expression_statement", + "value = other;", + ), + ( + "function f()\n return value\nend\n", + Language::Lua, + ".lua", + "block", + "return value", + ), + ( + "function f()\n value = other\nend\n", + Language::Lua, + ".lua", + "block", + "value = other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_body(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_body", + kind, + text + ), + "normalize_body mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_return_value_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n return nil\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "nil", + ), + ( + "def f\n return items[index]\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "items[index]", + ), + ( + "def f\n return left && right\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "left && right", + ), + ( + "def f\n return condition ? yes : no\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "condition ? yes : no", + ), + ( + "def f\n return foo { value }\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "foo { value }", + ), + ( + "def f\n return user.name\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "user.name", + ), + ( + "def f\n return !value\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "!value", + ), + ( + "def f\n return left + right\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "left + right", + ), + ( + "def f\n return foo(bar)\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "foo(bar)", + ), + ( + "def f():\n return value + other\n", + Language::Python, + ".py", + "binary_operator", + "value + other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_return_value(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_return_value", + kind, + text + ), + "normalize_return_value mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_return_node_matches_ruby_private_method() { + for (source, language, suffix, kind, text, elide_symbol) in [ + ( + "return :ok if cond\n", + Language::Ruby, + ".rb", + "return", + "return :ok", + false, + ), + ( + "return :ok if cond\n", + Language::Ruby, + ".rb", + "return", + "return :ok", + true, + ), + ( + "return value if cond\n", + Language::Ruby, + ".rb", + "return", + "return value", + true, + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_return_node_with_elide_symbol(node, elide_symbol) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_return_node_value( + source, + language, + suffix, + kind, + text, + elide_symbol + ), + "normalize_return_node mismatch for {language:?} {kind} {text:?} elide_symbol={elide_symbol}" + ); + } + } + + #[test] + fn normalize_return_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "return :ok if cond\n", + Language::Ruby, + ".rb", + "return", + "return :ok", + ), + ("break if done\n", Language::Ruby, ".rb", "break", "break"), + ( + "next value if done\n", + Language::Ruby, + ".rb", + "next", + "next value", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_return(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_return", + kind, + text + ), + "normalize_return mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn call_arguments_matches_ruby_private_method() { + for (source, language, suffix, kind, text, function_mode) in [ + ( + "foo(value)\n", + Language::Ruby, + ".rb", + "call", + "foo(value)", + "auto", + ), + ( + "foo(left + right)\n", + Language::Ruby, + ".rb", + "call", + "foo(left + right)", + "auto", + ), + ( + "foo(user.name)\n", + Language::Ruby, + ".rb", + "call", + "foo(user.name)", + "auto", + ), + ( + "user.name(value)\n", + Language::Ruby, + ".rb", + "call", + "user.name(value)", + "none", + ), + ( + "foo(value)\n", + Language::Python, + ".py", + "call", + "foo(value)", + "auto", + ), + ( + "foo(value);\n", + Language::TypeScript, + ".ts", + "call_expression", + "foo(value)", + "auto", + ), + ( + "foo(value)\n", + Language::Lua, + ".lua", + "function_call", + "foo(value)", + "auto", + ), + ( + "user.name(value)\n", + Language::Lua, + ".lua", + "function_call", + "user.name(value)", + "none", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let function = match function_mode { + "auto" => normalizer + .named_field(node, "function") + .or_else(|| normalizer.named_field(node, "call")) + .or_else(|| normalizer.named_children(node).into_iter().next()), + "none" => None, + other => panic!("unknown function mode {other:?}"), + }; + let rust = Value::Array( + normalizer + .call_arguments(node, function) + .iter() + .map(node_value) + .collect(), + ); + + assert_eq!( + rust, + ruby_private_call_arguments_value( + source, + language, + suffix, + kind, + text, + function_mode + ), + "call_arguments mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_call_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ("ready?\n", Language::Ruby, ".rb", "call", "ready?"), + ("foo(value)\n", Language::Ruby, ".rb", "call", "foo(value)"), + ( + "user.name(value)\n", + Language::Ruby, + ".rb", + "call", + "user.name(value)", + ), + ( + "def f\n foo { bar }\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "foo { bar }", + ), + ( + "foo(value)\n", + Language::Python, + ".py", + "expression_statement", + "foo(value)", + ), + ( + "foo(value);\n", + Language::TypeScript, + ".ts", + "call_expression", + "foo(value)", + ), + ( + "foo(value)\n", + Language::Lua, + ".lua", + "function_call", + "foo(value)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_call(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_call", + kind, + text + ), + "normalize_call mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_call_with_block_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "items.map { |item| item }\n", + Language::Ruby, + ".rb", + "call", + "items.map { |item| item }", + ), + ( + "items.each do |item|\n item\nend\n", + Language::Ruby, + ".rb", + "call", + "items.each do |item|\n item\nend", + ), + ( + "foo(1) { bar }\n", + Language::Ruby, + ".rb", + "call", + "foo(1) { bar }", + ), + ( + "def f\n foo { bar }\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "foo { bar }", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_call_with_block(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_call_with_block", + kind, + text + ), + "normalize_call_with_block mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_call_without_block_matches_ruby_private_method() { + for (source, language, suffix, kind, text, block_mode) in [ + ( + "foo(value)\n", + Language::Ruby, + ".rb", + "call", + "foo(value)", + "none", + ), + ( + "user.name(value)\n", + Language::Ruby, + ".rb", + "call", + "user.name(value)", + "none", + ), + ( + "foo(1) { bar }\n", + Language::Ruby, + ".rb", + "call", + "foo(1) { bar }", + "auto", + ), + ( + "items.map(1) { |item| item }\n", + Language::Ruby, + ".rb", + "call", + "items.map(1) { |item| item }", + "auto", + ), + ( + "Foo { bar }\n", + Language::Ruby, + ".rb", + "call", + "Foo { bar }", + "auto", + ), + ( + "foo(value)\n", + Language::Python, + ".py", + "expression_statement", + "foo(value)", + "none", + ), + ( + "foo(value);\n", + Language::TypeScript, + ".ts", + "call_expression", + "foo(value)", + "none", + ), + ( + "foo(value)\n", + Language::Lua, + ".lua", + "function_call", + "foo(value)", + "none", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let block = match block_mode { + "auto" => normalizer.call_block(node), + "none" => None, + other => panic!("unknown block mode {other:?}"), + }; + let rust = normalizer + .normalize_call_without_block(node, block) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_call_without_block_value( + source, language, suffix, kind, text, block_mode + ), + "normalize_call_without_block mismatch for {language:?} {kind} {text:?} with block mode {block_mode:?}" + ); + } + } + + #[test] + fn command_arguments_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "foo value\n", + Language::Ruby, + ".rb", + "argument_list", + "value", + ), + ( + "foo :name\n", + Language::Ruby, + ".rb", + "argument_list", + ":name", + ), + ( + "foo left + right\n", + Language::Ruby, + ".rb", + "argument_list", + "left + right", + ), + ( + "foo user.name\n", + Language::Ruby, + ".rb", + "argument_list", + "user.name", + ), + ( + "foo(value)\n", + Language::Python, + ".py", + "argument_list", + "(value)", + ), + ( + "foo(left + right)\n", + Language::Python, + ".py", + "argument_list", + "(left + right)", + ), + ( + "foo(value);\n", + Language::TypeScript, + ".ts", + "arguments", + "(value)", + ), + ( + "foo(value)\n", + Language::Lua, + ".lua", + "arguments", + "(value)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = Value::Array( + normalizer + .command_arguments(node) + .iter() + .map(node_value) + .collect(), + ); + + assert_eq!( + rust, + ruby_private_command_arguments_value(source, language, suffix, kind, text), + "command_arguments mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn const_for_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ("Foo\n", Language::Ruby, ".rb", "constant", "Foo"), + ("foo\n", Language::Ruby, ".rb", "identifier", "foo"), + ( + "class Foo:\n pass\n", + Language::Python, + ".py", + "identifier", + "Foo", + ), + ( + "type Alias = Foo;\n", + Language::TypeScript, + ".ts", + "type_identifier", + "Foo", + ), + ( + "local Foo = {}\n", + Language::Lua, + ".lua", + "variable_list", + "Foo", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.const_for(Some(node), node); + + assert_eq!( + node_value(&rust), + ruby_private_normalize_method_value( + source, + language, + suffix, + "const_for", + kind, + text + ), + "const_for mismatch for {language:?} {kind} {text:?}" + ); + } + + for (source, language, suffix) in [ + ("class Foo\nend\n", Language::Ruby, ".rb"), + ("class Foo:\n pass\n", Language::Python, ".py"), + ("class Foo {}\n", Language::TypeScript, ".ts"), + ("local Foo = {}\n", Language::Lua, ".lua"), + ] { + let tree = raw_tree(source, language); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.const_for(None, tree.root_node()); + + assert_eq!( + node_value(&rust), + ruby_private_const_for_nil_value(source, language, suffix), + "const_for nil mismatch for {language:?}" + ); + } + } + + #[test] + fn normalize_patterns_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "case value\nwhen 1\n one\nend\n", + Language::Ruby, + ".rb", + "when", + "when 1\n one", + ), + ( + "case\nwhen ready\n one\nend\n", + Language::Ruby, + ".rb", + "when", + "when ready\n one", + ), + ( + "case value\nwhen Foo::Bar\n one\nend\n", + Language::Ruby, + ".rb", + "when", + "when Foo::Bar\n one", + ), + ( + "case value\nwhen Foo\n one\nend\n", + Language::Ruby, + ".rb", + "when", + "when Foo\n one", + ), + ( + "match value:\n case 1:\n one()\n", + Language::Python, + ".py", + "case_clause", + "case 1:\n one()", + ), + ( + "switch (value) { case 1: one(); default: other(); }\n", + Language::TypeScript, + ".ts", + "switch_case", + "case 1: one();", + ), + ("return 1\n", Language::Lua, ".lua", "expression_list", "1"), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = Value::Array( + normalizer + .normalize_patterns(node) + .iter() + .map(node_value) + .collect(), + ); + + assert_eq!( + rust, + ruby_private_normalize_patterns_value(source, language, suffix, kind, text), + "normalize_patterns mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn case_value_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "case value\nwhen 1\n one\nend\n", + Language::Ruby, + ".rb", + "case", + "case value\nwhen 1\n one\nend", + ), + ( + "case\nwhen ready\n one\nend\n", + Language::Ruby, + ".rb", + "case", + "case\nwhen ready\n one\nend", + ), + ( + "match value:\n case 1:\n one()\n", + Language::Python, + ".py", + "match_statement", + "match value:\n case 1:\n one()", + ), + ( + "switch (value) { case 1: one(); }\n", + Language::TypeScript, + ".ts", + "switch_statement", + "switch (value) { case 1: one(); }", + ), + ( + "if value == 1 then one() end\n", + Language::Lua, + ".lua", + "if_statement", + "if value == 1 then one() end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.case_value(node).map(|value| { + ( + value.kind().to_string(), + super::node_text(value, source).to_string(), + ) + }); + + assert_eq!( + rust, + ruby_private_node_signature(source, language, suffix, "case_value", kind, text), + "case_value mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn case_arms_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "case value\nwhen 1\n one\nwhen 2\n two\nelse\n other\nend\n", + Language::Ruby, + ".rb", + "case", + "case value\nwhen 1\n one\nwhen 2\n two\nelse\n other\nend", + ), + ( + "match value:\n case 1:\n one()\n case _:\n other()\n", + Language::Python, + ".py", + "match_statement", + "match value:\n case 1:\n one()\n case _:\n other()", + ), + ( + "switch (value) { case 1: one(); default: other(); }\n", + Language::TypeScript, + ".ts", + "switch_statement", + "switch (value) { case 1: one(); default: other(); }", + ), + ( + "if value == 1 then one() end\n", + Language::Lua, + ".lua", + "if_statement", + "if value == 1 then one() end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .case_arms(node) + .into_iter() + .map(|arm| { + ( + arm.kind().to_string(), + super::node_text(arm, source).to_string(), + ) + }) + .collect::>(); + + assert_eq!( + rust, + ruby_private_node_list_signature(source, language, suffix, "case_arms", kind, text), + "case_arms mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn when_body_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "case value\nwhen 1\n one\nend\n", + Language::Ruby, + ".rb", + "when", + "when 1\n one", + ), + ( + "match value:\n case 1:\n one()\n", + Language::Python, + ".py", + "case_clause", + "case 1:\n one()", + ), + ( + "switch (value) { case 1: one(); default: other(); }\n", + Language::TypeScript, + ".ts", + "switch_case", + "case 1: one();", + ), + ( + "switch (value) { case 1: one(); default: other(); }\n", + Language::TypeScript, + ".ts", + "switch_default", + "default: other();", + ), + ( + "if value == 1 then one() end\n", + Language::Lua, + ".lua", + "if_statement", + "if value == 1 then one() end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.when_body(node).map(|body| { + ( + body.kind().to_string(), + super::node_text(body, source).to_string(), + ) + }); + + assert_eq!( + rust, + ruby_private_node_signature(source, language, suffix, "when_body", kind, text), + "when_body mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_when_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "case value\nwhen 1\n one\nend\n", + Language::Ruby, + ".rb", + "when", + "when 1\n one", + ), + ( + "case value\nwhen Foo::Bar\n one\nend\n", + Language::Ruby, + ".rb", + "when", + "when Foo::Bar\n one", + ), + ( + "match value:\n case 1:\n one()\n", + Language::Python, + ".py", + "case_clause", + "case 1:\n one()", + ), + ( + "switch (value) { case 1: one(); break; default: other(); }\n", + Language::TypeScript, + ".ts", + "switch_case", + "case 1: one(); break;", + ), + ( + "if value == 1 then one() else other() end\n", + Language::Lua, + ".lua", + "if_statement", + "if value == 1 then one() else other() end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_when(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_when", + kind, + text + ), + "normalize_when mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn case_else_body_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "case value\nwhen 1\n one\nelse\n other\nend\n", + Language::Ruby, + ".rb", + "case", + "case value\nwhen 1\n one\nelse\n other\nend", + ), + ( + "case value\nwhen 1\n one\nend\n", + Language::Ruby, + ".rb", + "case", + "case value\nwhen 1\n one\nend", + ), + ( + "match value:\n case 1:\n one()\n case _:\n other()\n", + Language::Python, + ".py", + "match_statement", + "match value:\n case 1:\n one()\n case _:\n other()", + ), + ( + "match value:\n case 1:\n one()\n", + Language::Python, + ".py", + "match_statement", + "match value:\n case 1:\n one()", + ), + ( + "switch (value) { case 1: one(); break; default: other(); }\n", + Language::TypeScript, + ".ts", + "switch_statement", + "switch (value) { case 1: one(); break; default: other(); }", + ), + ( + "switch (value) { case 1: one(); break; }\n", + Language::TypeScript, + ".ts", + "switch_statement", + "switch (value) { case 1: one(); break; }", + ), + ( + "if value == 1 then one() else other() end\n", + Language::Lua, + ".lua", + "if_statement", + "if value == 1 then one() else other() end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .case_else_body(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "case_else_body", + kind, + text + ), + "case_else_body mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_case_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "case value\nwhen 1\n one\nwhen 2\n two\nelse\n other\nend\n", + Language::Ruby, + ".rb", + "case", + "case value\nwhen 1\n one\nwhen 2\n two\nelse\n other\nend", + ), + ( + "case\nwhen ready\n one\nelse\n other\nend\n", + Language::Ruby, + ".rb", + "case", + "case\nwhen ready\n one\nelse\n other\nend", + ), + ( + "match value:\n case 1:\n one()\n case _:\n other()\n", + Language::Python, + ".py", + "match_statement", + "match value:\n case 1:\n one()\n case _:\n other()", + ), + ( + "switch (value) { case 1: one(); break; default: other(); }\n", + Language::TypeScript, + ".ts", + "switch_statement", + "switch (value) { case 1: one(); break; default: other(); }", + ), + ( + "if value == 1 then one() else other() end\n", + Language::Lua, + ".lua", + "if_statement", + "if value == 1 then one() else other() end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_case(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_case", + kind, + text + ), + "normalize_case mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn dotted_call_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ("user.name\n", Language::Ruby, ".rb", "call", "user.name"), + ("user&.name\n", Language::Ruby, ".rb", "call", "user&.name"), + ("user\n", Language::Ruby, ".rb", "identifier", "user"), + ( + "user.name()\n", + Language::Python, + ".py", + "attribute", + "user.name", + ), + ( + "user\n", + Language::Python, + ".py", + "expression_statement", + "user", + ), + ( + "user.name();\n", + Language::TypeScript, + ".ts", + "member_expression", + "user.name", + ), + ("user;\n", Language::TypeScript, ".ts", "identifier", "user"), + ( + "user.name()\n", + Language::Lua, + ".lua", + "dot_index_expression", + "user.name", + ), + ("user()\n", Language::Lua, ".lua", "function_call", "user()"), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.dotted_call(node), + ruby_private_predicate(source, language, suffix, "dotted_call?", kind, text), + "dotted_call? mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn dotted_expression_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n user.name\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "user.name", + ), + ("user.name\n", Language::Ruby, ".rb", "call", "user.name"), + ( + "user.name\n", + Language::Python, + ".py", + "expression_statement", + "user.name", + ), + ( + "user.name()\n", + Language::Python, + ".py", + "attribute", + "user.name", + ), + ( + "user.name;\n", + Language::TypeScript, + ".ts", + "expression_statement", + "user.name;", + ), + ( + "user.name;\n", + Language::TypeScript, + ".ts", + "member_expression", + "user.name", + ), + ( + "user.name()\n", + Language::Lua, + ".lua", + "dot_index_expression", + "user.name", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.dotted_expression(node), + ruby_private_predicate(source, language, suffix, "dotted_expression?", kind, text), + "dotted_expression? mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn dotted_expression_normalization_matches_ruby() { + for (source, language, suffix) in [ + ("def f\n user.name\nend\n", Language::Ruby, ".rb"), + ("user.name\n", Language::Python, ".py"), + ] { + assert_ruby_parity(source, language, suffix); + } + } + + #[test] + fn normalize_else_or_branch_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "if ready\n call\nelse\n stop\nend\n", + Language::Ruby, + ".rb", + "else", + "else\n stop", + ), + ( + "if ready\n call\nelse\n user.name\nend\n", + Language::Ruby, + ".rb", + "else", + "else\n user.name", + ), + ( + "if ready:\n call()\nelse:\n stop()\n", + Language::Python, + ".py", + "else_clause", + "else:\n stop()", + ), + ( + "if ready:\n call()\nelse:\n if backup:\n stop()\n", + Language::Python, + ".py", + "else_clause", + "else:\n if backup:\n stop()", + ), + ( + "if (ready) { call(); } else { stop(); }\n", + Language::TypeScript, + ".ts", + "else_clause", + "else { stop(); }", + ), + ( + "if ready then\n call()\nelse\n stop()\nend\n", + Language::Lua, + ".lua", + "else_statement", + "else\n stop()", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_else_or_branch(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_else_or_branch", + kind, + text + ), + "normalize_else_or_branch mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_if_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "if ready\n call\nelse\n stop\nend\n", + Language::Ruby, + ".rb", + "if", + "if ready\n call\nelse\n stop\nend", + ), + ( + "call if ready\n", + Language::Ruby, + ".rb", + "if_modifier", + "call if ready", + ), + ( + "unless ready\n call\nend\n", + Language::Ruby, + ".rb", + "unless", + "unless ready\n call\nend", + ), + ( + "if ready:\n call()\nelse:\n stop()\n", + Language::Python, + ".py", + "if_statement", + "if ready:\n call()\nelse:\n stop()", + ), + ( + "if ready:\n call()\nelif other:\n stop()\n", + Language::Python, + ".py", + "if_statement", + "if ready:\n call()\nelif other:\n stop()", + ), + ( + "if (ready) { call(); } else { stop(); }\n", + Language::TypeScript, + ".ts", + "if_statement", + "if (ready) { call(); } else { stop(); }", + ), + ( + "if ready then\n call()\nelseif other then\n stop()\nend\n", + Language::Lua, + ".lua", + "if_statement", + "if ready then\n call()\nelseif other then\n stop()\nend", + ), + ( + "if ready then\n call()\nelse\n stop()\nend\n", + Language::Lua, + ".lua", + "if_statement", + "if ready then\n call()\nelse\n stop()\nend", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_if(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_if", + kind, + text + ), + "normalize_if mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_elsif_matches_ruby_private_method() { + for (source, kind, text) in [ + ( + "if ready\n call\nelsif other\n stop\nend\n", + "elsif", + "elsif other\n stop", + ), + ( + "if ready\n call\nelsif other\n stop\nelse\n done\nend\n", + "elsif", + "elsif other\n stop\nelse\n done", + ), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let rust = node_value(&normalizer.normalize_elsif(node)); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + Language::Ruby, + ".rb", + "normalize_elsif", + kind, + text + ), + "normalize_elsif mismatch for {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_loop_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "while ready\n work\nend\n", + Language::Ruby, + ".rb", + "while", + "while ready\n work\nend", + ), + ( + "work while ready\n", + Language::Ruby, + ".rb", + "while_modifier", + "work while ready", + ), + ( + "work until ready\n", + Language::Ruby, + ".rb", + "until_modifier", + "work until ready", + ), + ( + "for item in items\n work\nend\n", + Language::Ruby, + ".rb", + "for", + "for item in items\n work\nend", + ), + ( + "while ready:\n work()\n", + Language::Python, + ".py", + "while_statement", + "while ready:\n work()", + ), + ( + "for item in items:\n work()\n", + Language::Python, + ".py", + "for_statement", + "for item in items:\n work()", + ), + ( + "while ready do\n work()\nend\n", + Language::Lua, + ".lua", + "while_statement", + "while ready do\n work()\nend", + ), + ( + "while (ready) { work(); }\n", + Language::TypeScript, + ".ts", + "while_statement", + "while (ready) { work(); }", + ), + ( + "for (let i = 0; i < n; i++) { work(i); }\n", + Language::TypeScript, + ".ts", + "for_statement", + "for (let i = 0; i < n; i++) { work(i); }", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let node_type = super::loop_kind(node.kind()).expect("test node should be a loop kind"); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_loop(node, node_type) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_loop", + kind, + text + ), + "normalize_loop mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn ruby_elsif_normalization_matches_ruby() { + for source in [ + "if ready\n call\nelsif other\n stop\nend\n", + "if ready\n call\nelsif other\n stop\nelse\n done\nend\n", + ] { + let root = parse_language_source(source, Language::Ruby, ".rb"); + let if_node = first_node(&root, "IF", source.trim_end()); + + assert_eq!( + child_node(if_node, 2).r#type, + "IF", + "expected Ruby elsif alternative to normalize as nested IF: {if_node:#?}" + ); + assert_ruby_parity(source, Language::Ruby, ".rb"); + } + } + + #[test] + fn normalize_dotted_expression_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n user.name\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "user.name", + ), + ( + "def f\n user.name { value }\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "user.name { value }", + ), + ( + "user.name\n", + Language::Python, + ".py", + "expression_statement", + "user.name", + ), + ( + "user.name;\n", + Language::TypeScript, + ".ts", + "expression_statement", + "user.name;", + ), + ( + "user.name()\n", + Language::Lua, + ".lua", + "dot_index_expression", + "user.name", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_dotted_expression(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_dotted_expression", + kind, + text + ), + "normalize_dotted_expression mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_dotted_call_expression_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n user.name\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "user.name", + ), + ( + "def f\n user.name(1)\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "user.name(1)", + ), + ( + "def f\n user&.name\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "user&.name", + ), + ( + "def f\n user.name { value }\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "user.name { value }", + ), + ( + "user.name\n", + Language::Python, + ".py", + "expression_statement", + "user.name", + ), + ( + "user.name;\n", + Language::TypeScript, + ".ts", + "member_expression", + "user.name", + ), + ( + "user.name()\n", + Language::Lua, + ".lua", + "dot_index_expression", + "user.name", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_dotted_call_expression(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_dotted_call_expression", + kind, + text + ), + "normalize_dotted_call_expression mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn argument_list_call_with_block_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n return foo { bar }\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "foo { bar }", + ), + ( + "def f\n return foo do\n bar\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "foo do\n bar\n end", + ), + ( + "def f\n return foo(1) { bar }\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "foo(1) { bar }", + ), + ( + "def f\n foo { bar }\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "foo { bar }", + ), + ( + "def f\n return foo.bar { baz }\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "foo.bar { baz }", + ), + ( + "def f\n return Foo { bar }\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "Foo { bar }", + ), + ( + "def f():\n return foo(lambda: bar)\n", + Language::Python, + ".py", + "argument_list", + "(lambda: bar)", + ), + ( + "function f(){ return foo(() => bar); }\n", + Language::TypeScript, + ".ts", + "arguments", + "(() => bar)", + ), + ( + "function f() return foo(function() return bar end) end\n", + Language::Lua, + ".lua", + "arguments", + "(function() return bar end)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.argument_list_call_with_block(node), + ruby_private_predicate( + source, + language, + suffix, + "argument_list_call_with_block?", + kind, + text + ), + "argument_list_call_with_block? mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_argument_list_call_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n return foo { bar }\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "foo { bar }", + ), + ( + "def f\n return foo do\n bar\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "foo do\n bar\n end", + ), + ( + "def f\n return foo(1) { bar }\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "foo(1) { bar }", + ), + ( + "def f\n foo { bar }\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "foo { bar }", + ), + ( + "def f():\n return foo(lambda: bar)\n", + Language::Python, + ".py", + "argument_list", + "(lambda: bar)", + ), + ( + "function f(){ return foo(() => bar); }\n", + Language::TypeScript, + ".ts", + "arguments", + "(() => bar)", + ), + ( + "function f() return foo(function() return bar end) end\n", + Language::Lua, + ".lua", + "arguments", + "(function() return bar end)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_argument_list_call(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_argument_list_call", + kind, + text + ), + "normalize_argument_list_call mismatch for {language:?} {kind} {text:?}" + ); + } + } + + #[test] + fn normalize_argument_list_call_with_block_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n return foo { bar }\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "foo { bar }", + ), + ( + "def f\n return foo do\n bar\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "foo do\n bar\n end", + ), + ( + "def f\n return foo(1) { bar }\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "foo(1) { bar }", + ), + ( + "def f\n foo { bar }\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "foo { bar }", + ), + ( + "def f():\n return foo(lambda: bar)\n", + Language::Python, + ".py", + "argument_list", + "(lambda: bar)", + ), + ( + "function f(){ return foo(() => bar); }\n", + Language::TypeScript, + ".ts", + "arguments", + "(() => bar)", + ), + ( + "function f() return foo(function() return bar end) end\n", + Language::Lua, + ".lua", + "arguments", + "(function() return bar end)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_argument_list_call_with_block(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_argument_list_call_with_block", + kind, + text + ), + "normalize_argument_list_call_with_block mismatch for {language:?} {kind} {text:?}" + ); } - if node.kind() == "expression_statement" - && children.len() == 1 - && matches!(children[0].kind(), "assignment" | "augmented_assignment") - { - return self.named_children(children[0]); + } + + #[test] + fn safe_navigation_call_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ("user&.name\n", Language::Ruby, ".rb", "call", "user&.name"), + ("user.name\n", Language::Ruby, ".rb", "call", "user.name"), + ( + "user.name()\n", + Language::Python, + ".py", + "attribute", + "user.name", + ), + ( + "user?.name;\n", + Language::TypeScript, + ".ts", + "member_expression", + "user?.name", + ), + ( + "user?.name();\n", + Language::TypeScript, + ".ts", + "call_expression", + "user?.name()", + ), + ( + "user.name;\n", + Language::TypeScript, + ".ts", + "member_expression", + "user.name", + ), + ( + "user.name()\n", + Language::Lua, + ".lua", + "dot_index_expression", + "user.name", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.safe_navigation_call(node), + ruby_private_predicate( + source, + language, + suffix, + "safe_navigation_call?", + kind, + text + ), + "safe_navigation_call? mismatch for {language:?} {kind} {text:?}" + ); } + } - children + #[test] + fn rescue_source_end_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "begin\n work\nrescue Error => e\n handle\nend\n", + Language::Ruby, + ".rb", + "rescue", + "rescue Error => e\n handle", + ), + ( + "try:\n work()\nexcept Error as e:\n handle()\n", + Language::Python, + ".py", + "except_clause", + "except Error as e:\n handle()", + ), + ( + "try { work(); } catch (e) { handle(); }\n", + Language::TypeScript, + ".ts", + "catch_clause", + "catch (e) { handle(); }", + ), + ("work()\n", Language::Lua, ".lua", "function_call", "work()"), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.rescue_source_end(node).map(|source_end| { + ( + source_end.kind().to_string(), + super::node_text(source_end, source).to_string(), + ) + }); + + assert_eq!( + rust, + ruby_private_node_signature( + source, + language, + suffix, + "rescue_source_end", + kind, + text + ), + "rescue_source_end mismatch for {language:?} {kind} {text:?}" + ); + } } - fn raw_named_children<'tree>(&self, node: TreeSitterNode<'tree>) -> Vec> { - node.children(&mut node.walk()) - .filter(|child| child.is_named()) - .collect() + #[test] + fn rescue_exception_variable_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "begin\n work\nrescue Error => e\n handle\nend\n", + Language::Ruby, + ".rb", + "rescue", + "rescue Error => e\n handle", + ), + ( + "begin\n work\nrescue Error\n handle\nend\n", + Language::Ruby, + ".rb", + "rescue", + "rescue Error\n handle", + ), + ( + "try:\n work()\nexcept Error as e:\n handle()\n", + Language::Python, + ".py", + "except_clause", + "except Error as e:\n handle()", + ), + ( + "try:\n work()\nexcept Error:\n handle()\n", + Language::Python, + ".py", + "except_clause", + "except Error:\n handle()", + ), + ( + "try { work(); } catch (e) { handle(); }\n", + Language::TypeScript, + ".ts", + "catch_clause", + "catch (e) { handle(); }", + ), + ("work()\n", Language::Lua, ".lua", "function_call", "work()"), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .rescue_exception_variable(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "rescue_exception_variable", + kind, + text + ), + "rescue_exception_variable mismatch for {language:?} {kind} {text:?}" + ); + } } - fn lua_no_paren_string_argument_content<'tree>( - &self, - node: TreeSitterNode<'tree>, - ) -> Option> { - if self.language != Language::Lua || node.kind() != "string" { - return None; + #[test] + fn normalize_rescue_clause_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "begin\n work\nrescue Error => e\n handle\nend\n", + Language::Ruby, + ".rb", + "rescue", + "rescue Error => e\n handle", + ), + ( + "begin\n work\nrescue Net::Error\n handle\nend\n", + Language::Ruby, + ".rb", + "rescue", + "rescue Net::Error\n handle", + ), + ( + "try:\n work()\nexcept Error as e:\n handle(e)\n", + Language::Python, + ".py", + "except_clause", + "except Error as e:\n handle(e)", + ), + ( + "try { work(); } catch (e) { handle(e); }\n", + Language::TypeScript, + ".ts", + "catch_clause", + "catch (e) { handle(e); }", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_rescue_clause(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_rescue_clause", + kind, + text + ), + "normalize_rescue_clause mismatch for {language:?} {kind} {text:?}" + ); } - let parent = node.parent()?; - if parent.kind() != "arguments" - || node_text(parent, self.source) != node_text(node, self.source) - { - return None; + } + + #[test] + fn normalize_rescue_modifier_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [( + "value rescue fallback\n", + Language::Ruby, + ".rb", + "rescue_modifier", + "value rescue fallback", + )] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_rescue_modifier(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_rescue_modifier", + kind, + text + ), + "normalize_rescue_modifier mismatch for {language:?} {kind} {text:?}" + ); } - self.raw_named_children(node) - .into_iter() - .find(|child| child.kind() == "string_content") } - fn source_before_child(&self, node: TreeSitterNode<'_>, child: TreeSitterNode<'_>) -> Node { - let text = self - .source - .get(node.start_byte()..child.start_byte()) - .unwrap_or("") - .trim_end() - .to_string(); - if text.is_empty() { - return self.wrap("SOURCE", Vec::new(), node); + #[test] + fn prepend_rescue_exception_assignment_matches_ruby_private_method() { + fn synthetic_node( + node_type: &str, + text: &str, + first_lineno: usize, + first_column: usize, + last_lineno: usize, + last_column: usize, + children: Vec, + ) -> Node { + Node { + r#type: node_type.to_string(), + children, + first_lineno, + first_column, + last_lineno, + last_column, + text: text.to_string(), + } } - let lines = text.lines().collect::>(); - let first_span = span(node); - let last_lineno = first_span[0] + lines.len() - 1; - let last_column = if lines.len() <= 1 { - first_span[1] + text.len() - } else { - lines.last().map(|line| line.len()).unwrap_or(0) - }; - Node { - r#type: "SOURCE".to_string(), - children: Vec::new(), - first_lineno: first_span[0], - first_column: first_span[1], - last_lineno, - last_column, - text, + let source = "assign\nbody\n"; + let assignment = synthetic_node("LASGN", "assign", 1, 0, 1, 6, Vec::new()); + let body = synthetic_node("VCALL", "body", 2, 0, 2, 4, Vec::new()); + let block = synthetic_node( + "BLOCK", + "body", + 2, + 0, + 2, + 4, + vec![Child::Node(Box::new(body.clone())), Child::Nil], + ); + + for (label, body_node, assignment_node) in [ + ("no_assignment", Some(body.clone()), None), + ("no_body", None, Some(assignment.clone())), + ("block_body", Some(block), Some(assignment.clone())), + ("scalar_body", Some(body), Some(assignment)), + ] { + let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let rust = normalizer + .prepend_rescue_exception_assignment(body_node.clone(), assignment_node.clone()) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + let body_value = body_node.as_ref().map(node_value).unwrap_or(Value::Null); + let assignment_value = assignment_node + .as_ref() + .map(node_value) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_prepend_rescue_exception_assignment_value( + source, + &body_value, + &assignment_value + ), + "prepend_rescue_exception_assignment mismatch for {label}" + ); + } + } + + #[test] + fn dotted_call_parts_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ("user.name\n", Language::Ruby, ".rb", "call", "user.name"), + ("user&.name\n", Language::Ruby, ".rb", "call", "user&.name"), + ( + "user.name()\n", + Language::Python, + ".py", + "attribute", + "user.name", + ), + ( + "user.name();\n", + Language::TypeScript, + ".ts", + "member_expression", + "user.name", + ), + ( + "user.name()\n", + Language::Lua, + ".lua", + "dot_index_expression", + "user.name", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .dotted_call_parts(node, None) + .map(|(receiver, method)| { + ( + receiver.kind().to_string(), + super::node_text(receiver, source).to_string(), + method, + ) + }); + + assert_eq!( + rust, + ruby_private_dotted_call_parts(source, language, suffix, kind, text), + "dotted_call_parts mismatch for {language:?} {kind} {text:?}" + ); } } - fn first_named<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { - self.named_children(node).into_iter().next() + #[test] + fn dotted_call_parts_normalization_matches_ruby() { + for (source, language, suffix) in [ + ("user.name\n", Language::Ruby, ".rb"), + ("user&.name\n", Language::Ruby, ".rb"), + ("user.name()\n", Language::Python, ".py"), + ("user.name();\n", Language::TypeScript, ".ts"), + ("user.name()\n", Language::Lua, ".lua"), + ] { + let root = parse_language_source(source, language, suffix); + if language != Language::Lua { + let mut calls = Vec::new(); + nodes_of_type(&root, "CALL", &mut calls); + let mut qcalls = Vec::new(); + nodes_of_type(&root, "QCALL", &mut qcalls); + assert!( + calls + .iter() + .chain(qcalls.iter()) + .any(|node| matches!(node.children.get(1), Some(Child::Symbol(method)) if method == "name")), + "expected dotted call method name for {language:?} in {root:#?}" + ); + } + assert_ruby_parity(source, language, suffix); + } } - fn branch_child<'tree>( - &self, - node: TreeSitterNode<'tree>, - condition: TreeSitterNode<'tree>, - offset: usize, - ) -> Option> { - self.named_children(node) - .into_iter() - .filter(|child| *child != condition) - .nth(offset) - } + #[test] + fn leading_if_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n if x\n y\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "if x\n y\n end", + ), + ( + "def f():\n if x:\n y()\n", + Language::Python, + ".py", + "block", + "if x:\n y()", + ), + ( + "function f()\n if x then\n y()\n end\nend\n", + Language::Lua, + ".lua", + "block", + "if x then\n y()\n end", + ), + ( + "function f() { if (x) { y(); } }\n", + Language::TypeScript, + ".ts", + "if_statement", + "if (x) { y(); }", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); - fn explicit_alternative<'tree>( - &self, - node: TreeSitterNode<'tree>, - ) -> Option> { - self.named_children(node).into_iter().find(|child| { - matches!( - child.kind(), - "elif_clause" - | "else" - | "else_clause" - | "else_statement" - | "elsif" - | "elseif_statement" - ) - }) + assert_eq!( + normalizer.leading_if_statement(node), + ruby_private_predicate( + source, + language, + suffix, + "leading_if_statement?", + kind, + text + ), + "leading_if_statement? mismatch for {language:?} {kind} {text:?}" + ); + } } - fn case_value<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { - self.named_field(node, "value") - .or_else(|| self.named_field(node, "subject")) - .or_else(|| self.named_field(node, "condition")) - .or_else(|| { - self.named_children(node).into_iter().find(|child| { - !self.when_kind(child.kind()) - && !self.block_kind(child.kind()) - && child.kind() != "else" - }) - }) - } + #[test] + fn normalize_leading_if_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n if x\n y\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "if x\n y\n end", + ), + ( + "def f():\n if x:\n y()\n", + Language::Python, + ".py", + "block", + "if x:\n y()", + ), + ( + "function f()\n if x then\n y()\n end\nend\n", + Language::Lua, + ".lua", + "block", + "if x then\n y()\n end", + ), + ( + "function f() { if (x) { y(); } }\n", + Language::TypeScript, + ".ts", + "if_statement", + "if (x) { y(); }", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_leading_if_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); - fn case_arms<'tree>(&self, node: TreeSitterNode<'tree>) -> Vec> { - let mut arms = Vec::new(); - let mut stack = self.named_children(node); - while !stack.is_empty() { - let child = stack.remove(0); - if self.when_kind(child.kind()) { - arms.push(child); - } else if !function_kind(child.kind()) { - stack.extend(self.named_children(child)); - } + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_leading_if_statement", + kind, + text + ), + "normalize_leading_if_statement mismatch for {language:?} {kind} {text:?}" + ); } - arms - } - - fn when_body<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { - self.named_field(node, "body") - .or_else(|| self.named_field(node, "consequence")) - .or_else(|| self.named_field(node, "value")) - .or_else(|| { - self.named_children(node).into_iter().rev().find(|child| { - self.block_kind(child.kind()) || self.statement_node(child.kind()) - }) - }) } - fn identifier_kind(&self, kind: &str) -> bool { - matches!( - kind, - "identifier" - | "simple_identifier" - | "property_identifier" - | "field_identifier" - | "shorthand_property_identifier" - ) + #[test] + fn leading_if_statement_normalization_matches_ruby() { + for (source, language, suffix) in [ + ("def f\n if x\n y\n end\nend\n", Language::Ruby, ".rb"), + ( + "def f():\n if x:\n y()\n", + Language::Python, + ".py", + ), + ( + "function f()\n if x then\n y()\n end\nend\n", + Language::Lua, + ".lua", + ), + ( + "function f() { if (x) { y(); } }\n", + Language::TypeScript, + ".ts", + ), + ] { + let root = parse_language_source(source, language, suffix); + let mut if_nodes = Vec::new(); + nodes_of_type(&root, "IF", &mut if_nodes); + assert!( + !if_nodes.is_empty(), + "expected IF node for {language:?} in {root:#?}" + ); + assert_ruby_parity(source, language, suffix); + } } - fn const_kind(&self, kind: &str) -> bool { - matches!( - kind, - "constant" | "scope_resolution" | "type_identifier" | "scoped_type_identifier" - ) - } + #[test] + fn leading_case_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f(x)\n case x\n when 1 then y\n else z\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "case x\n when 1 then y\n else z\n end", + ), + ( + "def f(x):\n match x:\n case 1:\n y()\n", + Language::Python, + ".py", + "block", + "match x:\n case 1:\n y()", + ), + ( + "function f(x) { switch (x) { case 1: y(); break; default: z(); } }\n", + Language::TypeScript, + ".ts", + "switch_statement", + "switch (x) { case 1: y(); break; default: z(); }", + ), + ( + "function f(x)\n if x == 1 then y() end\nend\n", + Language::Lua, + ".lua", + "block", + "if x == 1 then y() end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); - fn call_kind(&self, kind: &str) -> bool { - matches!( - kind, - "call" | "call_expression" | "method_call" | "method_call_expression" - ) + assert_eq!( + normalizer.leading_case_statement(node), + ruby_private_predicate( + source, + language, + suffix, + "leading_case_statement?", + kind, + text + ), + "leading_case_statement? mismatch for {language:?} {kind} {text:?}" + ); + } } - fn block_kind(&self, kind: &str) -> bool { - matches!( - kind, - "block" - | "body_statement" - | "statement_block" - | "statement_list" - | "class_body" - | "switch_body" - | "match_block" - | "then" - | "block_body" - | "control_structure_body" - | "function_body" - | "statements" - ) - } + #[test] + fn normalize_leading_case_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f(x)\n case x\n when 1 then y\n else z\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "case x\n when 1 then y\n else z\n end", + ), + ( + "def f(x):\n match x:\n case 1:\n y()\n", + Language::Python, + ".py", + "block", + "match x:\n case 1:\n y()", + ), + ( + "function f(x) { switch (x) { case 1: y(); break; default: z(); } }\n", + Language::TypeScript, + ".ts", + "switch_statement", + "switch (x) { case 1: y(); break; default: z(); }", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_leading_case_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); - fn case_kind(&self, kind: &str) -> bool { - matches!( - kind, - "case" - | "switch_statement" - | "expression_switch_statement" - | "switch_expression" - | "match_statement" - | "match_expression" - | "when_expression" - ) + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_leading_case_statement", + kind, + text + ), + "normalize_leading_case_statement mismatch for {language:?} {kind} {text:?}" + ); + } } - fn when_kind(&self, kind: &str) -> bool { - matches!( - kind, - "when" - | "switch_case" - | "case_clause" - | "expression_case" - | "case_statement" - | "switch_section" - | "switch_block_statement_group" - | "switch_entry" - | "when_entry" - | "match_arm" - ) + #[test] + fn leading_case_statement_normalization_matches_ruby() { + for (source, language, suffix) in [ + ( + "def f(x)\n case x\n when 1 then y\n else z\n end\nend\n", + Language::Ruby, + ".rb", + ), + ( + "def f(x):\n match x:\n case 1:\n y()\n", + Language::Python, + ".py", + ), + ( + "function f(x) { switch (x) { case 1: y(); break; default: z(); } }\n", + Language::TypeScript, + ".ts", + ), + ] { + let root = parse_language_source(source, language, suffix); + let mut case_nodes = Vec::new(); + nodes_of_type(&root, "CASE", &mut case_nodes); + assert!( + !case_nodes.is_empty(), + "expected CASE node for {language:?} in {root:#?}" + ); + assert_ruby_parity(source, language, suffix); + } } - fn statement_node(&self, kind: &str) -> bool { - kind.ends_with("_statement") - || kind.ends_with("_expression") - || matches!(kind, "return" | "break" | "next") - } + #[test] + fn leading_loop_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f(x)\n while x\n y\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "while x\n y\n end", + ), + ( + "def f(x):\n while x:\n y()\n", + Language::Python, + ".py", + "block", + "while x:\n y()", + ), + ( + "function f(x)\n while x do\n y()\n end\nend\n", + Language::Lua, + ".lua", + "block", + "while x do\n y()\n end", + ), + ( + "function f(x) { while (x) { y(); } }\n", + Language::TypeScript, + ".ts", + "while_statement", + "while (x) { y(); }", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); - fn unwrap_node(&self, node: TreeSitterNode<'_>) -> bool { - matches!( - node.kind(), - "parenthesized_expression" - | "parenthesized_statements" - | "expression_statement" - | "statement" - | "case_pattern" - | "match_pattern" - | "pattern" - ) && self.named_children(node).len() == 1 + assert_eq!( + normalizer.leading_loop_statement(node), + ruby_private_predicate( + source, + language, + suffix, + "leading_loop_statement?", + kind, + text + ), + "leading_loop_statement? mismatch for {language:?} {kind} {text:?}" + ); + } } - fn first_dotted_call_descendant<'tree>( - &self, - node: TreeSitterNode<'tree>, - ) -> Option> { - for child in self.named_children(node) { - if self.call_kind(child.kind()) && self.dotted_call(child) { - return Some(child); - } - if let Some(found) = self.first_dotted_call_descendant(child) { - return Some(found); - } + #[test] + fn normalize_leading_loop_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f(x)\n while x\n y\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "while x\n y\n end", + ), + ( + "def f(x)\n until x\n y\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "until x\n y\n end", + ), + ( + "def f(x):\n while x:\n y()\n", + Language::Python, + ".py", + "block", + "while x:\n y()", + ), + ( + "function f(x)\n while x do\n y()\n end\nend\n", + Language::Lua, + ".lua", + "block", + "while x do\n y()\n end", + ), + ( + "function f(x) { while (x) { y(); } }\n", + Language::TypeScript, + ".ts", + "while_statement", + "while (x) { y(); }", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_leading_loop_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_leading_loop_statement", + kind, + text + ), + "normalize_leading_loop_statement mismatch for {language:?} {kind} {text:?}" + ); } - None } - fn elide_tail_returns(&self, node: Option) -> Option { - if self.language != Language::Ruby { - return node; - } - let mut node = node?; - if matches!( - node.r#type.as_str(), - "DEFN" | "DEFS" | "CLASS" | "MODULE" | "SCLASS" | "LAMBDA" | "ITER" - ) { - return Some(node); - } - if node.r#type == "RETURN" { - return node.children.into_iter().find_map(child_node); + #[test] + fn leading_loop_statement_normalization_matches_ruby() { + for (source, language, suffix) in [ + ( + "def f(x)\n while x\n y\n end\nend\n", + Language::Ruby, + ".rb", + ), + ( + "def f(x):\n while x:\n y()\n", + Language::Python, + ".py", + ), + ( + "function f(x)\n while x do\n y()\n end\nend\n", + Language::Lua, + ".lua", + ), + ( + "function f(x) { while (x) { y(); } }\n", + Language::TypeScript, + ".ts", + ), + ] { + let root = parse_language_source(source, language, suffix); + let mut while_nodes = Vec::new(); + nodes_of_type(&root, "WHILE", &mut while_nodes); + assert!( + !while_nodes.is_empty(), + "expected WHILE node for {language:?} in {root:#?}" + ); + assert_ruby_parity(source, language, suffix); } + } - match node.r#type.as_str() { - "BLOCK" => { - if let Some(last) = node.children.pop() { - match child_node(last) { - Some(last_node) => { - if let Some(elided) = self.elide_tail_returns(Some(last_node)) { - node.children.push(Child::Node(Box::new(elided))); - } else { - node.children.push(Child::Nil); - } - } - None => node.children.push(Child::Nil), - } - } - } - "SCOPE" => { - if node.children.len() > 2 { - let child = std::mem::replace(&mut node.children[2], Child::Nil); - if let Some(elided) = - child_node(child).and_then(|body| self.elide_tail_returns(Some(body))) - { - node.children[2] = Child::Node(Box::new(elided)); - } - } - } - "IF" | "UNLESS" => { - for index in [1usize, 2usize] { - if node.children.len() > index { - let child = std::mem::replace(&mut node.children[index], Child::Nil); - if let Some(elided) = - child_node(child).and_then(|body| self.elide_tail_returns(Some(body))) - { - node.children[index] = Child::Node(Box::new(elided)); - } - } - } - } - "CASE" | "CASE2" => { - let index = if node.r#type == "CASE" { 1 } else { 0 }; - if node.children.len() > index { - let child = std::mem::replace(&mut node.children[index], Child::Nil); - if let Some(elided) = - child_node(child).and_then(|body| self.elide_tail_returns(Some(body))) - { - node.children[index] = Child::Node(Box::new(elided)); - } - } - } - "WHEN" | "RESBODY" => { - for index in [1usize, 2usize] { - if node.children.len() > index { - let child = std::mem::replace(&mut node.children[index], Child::Nil); - if let Some(elided) = - child_node(child).and_then(|body| self.elide_tail_returns(Some(body))) - { - node.children[index] = Child::Node(Box::new(elided)); - } - } - } - } - "RESCUE" => { - for index in [0usize, 1usize] { - if node.children.len() > index { - let child = std::mem::replace(&mut node.children[index], Child::Nil); - if let Some(elided) = - child_node(child).and_then(|body| self.elide_tail_returns(Some(body))) - { - node.children[index] = Child::Node(Box::new(elided)); - } - } - } - } - _ => {} - } + #[test] + fn rescue_body_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n work\nrescue Error => e\n handle\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "work\nrescue Error => e\n handle", + ), + ( + "try:\n work()\nexcept Error as e:\n handle(e)\n", + Language::Python, + ".py", + "try_statement", + "try:\n work()\nexcept Error as e:\n handle(e)", + ), + ( + "try { work(); } catch (e) { handle(e); }\n", + Language::TypeScript, + ".ts", + "try_statement", + "try { work(); } catch (e) { handle(e); }", + ), + ( + "local ok, err = pcall(work)\n", + Language::Lua, + ".lua", + "variable_declaration", + "local ok, err = pcall(work)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); - Some(node) + assert_eq!( + normalizer.rescue_body_statement(node), + ruby_private_predicate( + source, + language, + suffix, + "rescue_body_statement?", + kind, + text + ), + "rescue_body_statement? mismatch for {language:?} {kind} {text:?}" + ); + } } - fn elide_implicit_nil_body(&self, node: Option) -> Option { - if self.language != Language::Ruby { - return node; + #[test] + fn normalize_rescue_body_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n work\nrescue Error => e\n handle\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "work\nrescue Error => e\n handle", + ), + ( + "try:\n work()\nexcept Error as e:\n handle(e)\n", + Language::Python, + ".py", + "try_statement", + "try:\n work()\nexcept Error as e:\n handle(e)", + ), + ( + "try { work(); } catch (e) { handle(e); }\n", + Language::TypeScript, + ".ts", + "try_statement", + "try { work(); } catch (e) { handle(e); }", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_rescue_body_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_rescue_body_statement", + kind, + text + ), + "normalize_rescue_body_statement mismatch for {language:?} {kind} {text:?}" + ); } - let node = self.drop_trailing_nil_statement(node); - match node { - Some(node) if node.r#type == "NIL" => None, - other => other, + } + + #[test] + fn rescue_body_statement_normalization_matches_ruby() { + for (source, language, suffix) in [ + ( + "def f\n work\nrescue Error => e\n handle\nend\n", + Language::Ruby, + ".rb", + ), + ( + "try:\n work()\nexcept Error as e:\n handle(e)\n", + Language::Python, + ".py", + ), + ( + "try { work(); } catch (e) { handle(e); }\n", + Language::TypeScript, + ".ts", + ), + ] { + let root = parse_language_source(source, language, suffix); + let mut rescue_nodes = Vec::new(); + nodes_of_type(&root, "RESCUE", &mut rescue_nodes); + assert!( + !rescue_nodes.is_empty(), + "expected RESCUE node for {language:?} in {root:#?}" + ); + let mut resbody_nodes = Vec::new(); + nodes_of_type(&root, "RESBODY", &mut resbody_nodes); + assert!( + !resbody_nodes.is_empty(), + "expected RESBODY node for {language:?} in {root:#?}" + ); + assert_ruby_parity(source, language, suffix); } } - fn drop_trailing_nil_statement(&self, node: Option) -> Option { - let mut node = node?; - if node.r#type != "BLOCK" { - return Some(node); + #[test] + fn ensure_body_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n work\nensure\n cleanup\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "work\nensure\n cleanup", + ), + ( + "try:\n work()\nfinally:\n cleanup()\n", + Language::Python, + ".py", + "try_statement", + "try:\n work()\nfinally:\n cleanup()", + ), + ( + "try { work(); } finally { cleanup(); }\n", + Language::TypeScript, + ".ts", + "try_statement", + "try { work(); } finally { cleanup(); }", + ), + ( + "work()\ncleanup()\n", + Language::Lua, + ".lua", + "function_call", + "work()", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.ensure_body_statement(node), + ruby_private_predicate( + source, + language, + suffix, + "ensure_body_statement?", + kind, + text + ), + "ensure_body_statement? mismatch for {language:?} {kind} {text:?}" + ); } - while node - .children - .last() - .and_then(self::node) - .map(|child| child.r#type == "NIL") - .unwrap_or(false) - { - node.children.pop(); + } + + #[test] + fn ensure_body_statement_normalization_matches_ruby() { + for (source, language, suffix) in [ + ( + "def f\n work\nensure\n cleanup\nend\n", + Language::Ruby, + ".rb", + ), + ( + "try:\n work()\nfinally:\n cleanup()\n", + Language::Python, + ".py", + ), + ( + "try { work(); } finally { cleanup(); }\n", + Language::TypeScript, + ".ts", + ), + ( + "try:\n work()\nexcept Error as e:\n handle(e)\nfinally:\n cleanup()\n", + Language::Python, + ".py", + ), + ] { + let root = parse_language_source(source, language, suffix); + let mut ensure_nodes = Vec::new(); + nodes_of_type(&root, "ENSURE", &mut ensure_nodes); + assert!( + !ensure_nodes.is_empty(), + "expected ENSURE node for {language:?} in {root:#?}" + ); + assert_ruby_parity(source, language, suffix); } - if node.children.is_empty() { - None - } else if node.children.len() == 1 { - child_node(node.children.into_iter().next().unwrap()) - } else { - Some(node) + } + + #[test] + fn normalize_ensure_body_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n work\nensure\n cleanup\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "work\nensure\n cleanup", + ), + ( + "try:\n work()\nfinally:\n cleanup()\n", + Language::Python, + ".py", + "try_statement", + "try:\n work()\nfinally:\n cleanup()", + ), + ( + "try:\n work()\nexcept Error as e:\n handle(e)\nfinally:\n cleanup()\n", + Language::Python, + ".py", + "try_statement", + "try:\n work()\nexcept Error as e:\n handle(e)\nfinally:\n cleanup()", + ), + ( + "try { work(); } finally { cleanup(); }\n", + Language::TypeScript, + ".ts", + "try_statement", + "try { work(); } finally { cleanup(); }", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_ensure_body_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_ensure_body_statement", + kind, + text + ), + "normalize_ensure_body_statement mismatch for {language:?} {kind} {text:?}" + ); } } -} -fn optional_node(node: Option) -> Child { - node.map(|node| Child::Node(Box::new(node))) - .unwrap_or(Child::Nil) -} + #[test] + fn normalize_ensure_clause_matches_ruby_private_method() { + for (source, kind, text) in [ + ( + "begin\n work\nensure\n cleanup\nend\n", + "ensure", + "ensure\n cleanup", + ), + ( + "begin\n work\nensure\n user.name\nend\n", + "ensure", + "ensure\n user.name", + ), + ( + "begin\n work\nensure\n user.name\n cleanup\nend\n", + "ensure", + "ensure\n user.name\n cleanup", + ), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let rust = normalizer + .normalize_ensure_clause(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); -fn child_node(child: Child) -> Option { - match child { - Child::Node(node) => Some(*node), - _ => None, + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + Language::Ruby, + ".rb", + "normalize_ensure_clause", + kind, + text + ), + "normalize_ensure_clause mismatch for {kind} {text:?}" + ); + } } -} -fn list_or_nil( - children: Vec, - source: TreeSitterNode<'_>, - normalizer: &TreeSitterNormalizer<'_>, -) -> Child { - if children.is_empty() { - Child::Nil - } else { - Child::Node(Box::new(normalizer.list(children, source))) + #[test] + fn ruby_begin_ensure_clause_keeps_all_body_statements() { + let source = "begin\n work\nensure\n user.name\n cleanup\nend\n"; + let root = parse_language_source(source, Language::Ruby, ".rb"); + let ensure = first_node(&root, "ENSURE", "work\nensure\n user.name\n cleanup"); + let ensure_body = child_node(ensure, 1); + + assert_eq!( + child_types(ensure_body), + vec!["CALL", "VCALL"], + "Ruby ensure clause body must retain all statements: {ensure:#?}" + ); + assert_ruby_parity(source, Language::Ruby, ".rb"); } -} -fn dynamic_scope(mut node: Node) -> Node { - if matches!( - node.r#type.as_str(), - "DEFN" | "DEFS" | "CLASS" | "MODULE" | "SCLASS" | "LAMBDA" - ) { - return node; - } - if node.r#type == "LASGN" { - node.r#type = "DASGN".to_string(); - } else if node.r#type == "LVAR" { - node.r#type = "DVAR".to_string(); + #[test] + fn array_literal_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n [a, b]\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "[a, b]", + ), + ( + "def f():\n [a, b]\n", + Language::Python, + ".py", + "block", + "[a, b]", + ), + ( + "function f() { [a, b]; }\n", + Language::TypeScript, + ".ts", + "expression_statement", + "[a, b];", + ), + ( + "function f()\n {a, b}\nend\n", + Language::Lua, + ".lua", + "block", + "\n {a, b}", + ), + ( + "function f()\n {x = a, y = b}\nend\n", + Language::Lua, + ".lua", + "block", + "\n {x = a, y = b}", + ), + ( + "local rocks_path = table.concat({rocks_tree, \"a_rock\"})\n", + Language::Lua, + ".lua", + "arguments", + "({rocks_tree, \"a_rock\"})", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.array_literal_statement(node), + ruby_private_predicate( + source, + language, + suffix, + "array_literal_statement?", + kind, + text + ), + "array_literal_statement? mismatch for {language:?} {kind} {text:?}" + ); + } } - node.children = node - .children - .into_iter() - .map(|child| match child { - Child::Node(node) => Child::Node(Box::new(dynamic_scope(*node))), - other => other, - }) - .collect(); - node -} - -fn assignment_operator(text: &str) -> bool { - matches!(text, "=" | "+=" | "-=" | "*=" | "/=" | "%=" | "&&=" | "||=") -} - -fn declaration_metadata_kind(kind: &str) -> bool { - matches!( - kind, - "modifiers" - | "type" - | "nullable_type" - | "parenthesized_type" - | "user_type" - | "type_identifier" - | "integral_type" - | "floating_point_type" - | "void_type" - ) -} -fn kind_type(kind: &str) -> String { - match kind { - "body_statement" | "block_body" | "block" | "statements" => "BLOCK".to_string(), - other => other - .chars() - .map(|ch| { - if ch.is_ascii_alphanumeric() { - ch.to_ascii_uppercase() - } else { - '_' - } - }) - .collect(), + #[test] + fn array_literal_statement_normalization_matches_ruby() { + for (source, language, suffix) in [ + ("def f\n [a, b]\nend\n", Language::Ruby, ".rb"), + ("def f():\n [a, b]\n", Language::Python, ".py"), + ("function f() { [a, b]; }\n", Language::TypeScript, ".ts"), + ("function f()\n {a, b}\nend\n", Language::Lua, ".lua"), + ] { + let root = parse_language_source(source, language, suffix); + let mut lists = Vec::new(); + nodes_of_type(&root, "LIST", &mut lists); + assert!( + lists + .iter() + .any(|node| node.text.contains('a') && node.text.contains('b')), + "expected LIST for {language:?} in {root:#?}" + ); + assert_ruby_parity(source, language, suffix); + } } -} -fn if_kind(kind: &str) -> bool { - matches!( - kind, - "if" | "if_statement" - | "if_modifier" - | "unless" - | "unless_modifier" - | "if_expression" - | "conditional" - ) -} + #[test] + fn normalize_array_literal_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n [a, b]\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "[a, b]", + ), + ( + "def f\n []\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "[]", + ), + ( + "def f():\n [a, b]\n", + Language::Python, + ".py", + "block", + "[a, b]", + ), + ("def f():\n []\n", Language::Python, ".py", "block", "[]"), + ( + "function f() { [a, b]; }\n", + Language::TypeScript, + ".ts", + "expression_statement", + "[a, b];", + ), + ( + "function f() { []; }\n", + Language::TypeScript, + ".ts", + "expression_statement", + "[];", + ), + ( + "function f()\n {a, b}\nend\n", + Language::Lua, + ".lua", + "block", + "\n {a, b}", + ), + ( + "assert.same(install, { bin = { P\"bin/binfile\" } })\n", + Language::Lua, + ".lua", + "arguments", + "(install, { bin = { P\"bin/binfile\" } })", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_array_literal_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); -fn loop_kind(kind: &str) -> Option<&'static str> { - match kind { - "while" | "while_statement" | "while_modifier" => Some("WHILE"), - "until_modifier" => Some("UNTIL"), - "for" | "for_statement" | "for_in_clause" => Some("FOR"), - _ => None, + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_array_literal_statement", + kind, + text + ), + "normalize_array_literal_statement mismatch for {language:?} {kind} {text:?}" + ); + } } -} -fn function_kind(kind: &str) -> bool { - matches!( - kind, - "method" - | "function_definition" - | "function_declaration" - | "method_definition" - | "method_declaration" - | "function_item" - | "singleton_method" - ) -} + #[test] + fn element_reference_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n items[0]\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "items[0]", + ), + ( + "def f\n [0]\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "[0]", + ), + ( + "def f():\n items[0]\n", + Language::Python, + ".py", + "block", + "items[0]", + ), + ( + "return items[0]\n", + Language::Python, + ".py", + "subscript", + "items[0]", + ), + ( + "function f() { items[0]; }\n", + Language::TypeScript, + ".ts", + "expression_statement", + "items[0];", + ), + ( + "return items[0];\n", + Language::TypeScript, + ".ts", + "subscript_expression", + "items[0]", + ), + ( + "return items[1]\n", + Language::Lua, + ".lua", + "expression_list", + "items[1]", + ), + ( + "print(items[1])\n", + Language::Lua, + ".lua", + "bracket_index_expression", + "items[1]", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); -fn return_kind(kind: &str) -> &str { - match kind { - "return" | "return_statement" | "return_expression" => "RETURN", - "break" | "break_statement" | "break_expression" => "BREAK", - "next" | "continue_statement" => "NEXT", - other => other, + assert_eq!( + normalizer.element_reference_statement(node), + ruby_private_predicate( + source, + language, + suffix, + "element_reference_statement?", + kind, + text + ), + "element_reference_statement? mismatch for {language:?} {kind} {text:?}" + ); + } } -} -fn inline_def_wrapper_mid(text: &str) -> bool { - matches!( - text, - "public" | "protected" | "private" | "private_class_method" | "module_function" - ) -} + #[test] + fn normalize_element_reference_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n items[0]\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "items[0]", + ), + ( + "def f\n self[0]\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "self[0]", + ), + ( + "return items[0]\n", + Language::Python, + ".py", + "subscript", + "items[0]", + ), + ( + "return items[0];\n", + Language::TypeScript, + ".ts", + "subscript_expression", + "items[0]", + ), + ( + "print(items[1])\n", + Language::Lua, + ".lua", + "bracket_index_expression", + "items[1]", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_element_reference(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); -fn bare_identifier_text(text: &str) -> bool { - let mut chars = text.chars(); - let Some(first) = chars.next() else { - return false; - }; - if !(first == '_' || first.is_ascii_alphabetic()) { - return false; + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_element_reference", + kind, + text + ), + "normalize_element_reference mismatch for {language:?} {kind} {text:?}" + ); + } } - chars.all(|ch| ch == '_' || ch == '!' || ch == '?' || ch == '=' || ch.is_ascii_alphanumeric()) -} - -fn instance_variable_node(node: TreeSitterNode<'_>, source: &str) -> bool { - let text = node_text(node, source); - node.kind() == "instance_variable" - || text - .strip_prefix('@') - .map(bare_identifier_text) - .unwrap_or(false) -} -fn global_variable_node(node: TreeSitterNode<'_>, source: &str) -> bool { - node.kind() == "global_variable" - || (!matches!(node.kind(), "string_content" | "escape_sequence") - && node_text(node, source).starts_with('$')) -} + #[test] + fn normalize_element_reference_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n items[0]\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "items[0]", + ), + ( + "def f\n self[0]\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "self[0]", + ), + ( + "def f():\n items[0]\n", + Language::Python, + ".py", + "block", + "items[0]", + ), + ( + "return items[0]\n", + Language::Python, + ".py", + "subscript", + "items[0]", + ), + ( + "function f() { items[0]; }\n", + Language::TypeScript, + ".ts", + "expression_statement", + "items[0];", + ), + ( + "return items[0];\n", + Language::TypeScript, + ".ts", + "subscript_expression", + "items[0]", + ), + ( + "return items[1]\n", + Language::Lua, + ".lua", + "expression_list", + "items[1]", + ), + ( + "print(items[1])\n", + Language::Lua, + ".lua", + "bracket_index_expression", + "items[1]", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_element_reference_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); -fn comparison_operator_from_text(text: &str) -> Option { - for operator in ["===", "!==", "==", "!=", "<=", ">=", "<", ">"] { - if text.contains(operator) { - return Some(operator.to_string()); + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_element_reference_statement", + kind, + text + ), + "normalize_element_reference_statement mismatch for {language:?} {kind} {text:?}" + ); } } - None -} -pub fn child_to_string(child: Option<&Child>) -> Option { - match child { - Some(Child::String(value)) | Some(Child::Symbol(value)) => Some(value.clone()), - _ => None, + #[test] + fn element_reference_statement_normalization_matches_ruby() { + for (source, language, suffix) in [ + ("def f\n items[0]\nend\n", Language::Ruby, ".rb"), + ("def f():\n items[0]\n", Language::Python, ".py"), + ("function f() { items[0]; }\n", Language::TypeScript, ".ts"), + ("return items[1]\n", Language::Lua, ".lua"), + ] { + let root = parse_language_source(source, language, suffix); + let mut calls = Vec::new(); + nodes_of_type(&root, "CALL", &mut calls); + assert!( + calls.iter().any(|node| { + matches!(node.children.get(1), Some(Child::Symbol(message)) if message == "[]") + && node.text.contains("items") + }), + "expected element reference CALL for {language:?} in {root:#?}" + ); + assert_ruby_parity(source, language, suffix); + } } -} -#[cfg(test)] -mod tests { - use super::{parse, parse_with_language, Child, Node}; - use crate::decomplex::syntax::Language; - use serde_json::{json, Value}; - use std::io::Write; - use std::path::Path; - use std::process::Command; - use tree_sitter::{Node as TreeSitterNode, Parser as TreeSitterParser}; + #[test] + fn hash_literal_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n {a: b}\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "{a: b}", + ), + ( + "def f():\n {\"a\": b}\n", + Language::Python, + ".py", + "block", + "{\"a\": b}", + ), + ( + "function f() { ({a: b}); }\n", + Language::TypeScript, + ".ts", + "expression_statement", + "({a: b});", + ), + ( + "return {a: b};\n", + Language::TypeScript, + ".ts", + "object", + "{a: b}", + ), + ( + "function f()\n {a = b}\nend\n", + Language::Lua, + ".lua", + "block", + "\n {a = b}", + ), + ( + "function f()\n {a, b}\nend\n", + Language::Lua, + ".lua", + "block", + "\n {a, b}", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); - fn parse_source(source: &str) -> Node { - let mut file = tempfile::Builder::new() - .suffix(".rb") - .tempfile() - .expect("create temp ruby file"); - file.write_all(source.as_bytes()) - .expect("write temp ruby file"); - parse(file.path()).expect("parse temp ruby file").0 + assert_eq!( + normalizer.hash_literal_statement(node), + ruby_private_predicate( + source, + language, + suffix, + "hash_literal_statement?", + kind, + text + ), + "hash_literal_statement? mismatch for {language:?} {kind} {text:?}" + ); + } } - fn parse_language_source(source: &str, language: Language, suffix: &str) -> Node { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create temp source file"); - file.write_all(source.as_bytes()) - .expect("write temp source file"); - parse_with_language(file.path(), language) - .expect("parse temp source file") - .0 - } + #[test] + fn normalize_hash_literal_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n {a: b}\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "{a: b}", + ), + ( + "def f():\n {\"a\": b}\n", + Language::Python, + ".py", + "block", + "{\"a\": b}", + ), + ( + "function f() { ({a: b}); }\n", + Language::TypeScript, + ".ts", + "expression_statement", + "({a: b});", + ), + ( + "return {a: b};\n", + Language::TypeScript, + ".ts", + "object", + "{a: b}", + ), + ( + "function f()\n {a = b}\nend\n", + Language::Lua, + ".lua", + "block", + "\n {a = b}", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_hash_literal_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); - fn nodes_of_type<'a>(node: &'a Node, node_type: &str, out: &mut Vec<&'a Node>) { - if node.r#type == node_type { - out.push(node); - } - for child in node.children.iter().filter_map(super::node) { - nodes_of_type(child, node_type, out); + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_hash_literal_statement", + kind, + text + ), + "normalize_hash_literal_statement mismatch for {language:?} {kind} {text:?}" + ); } } - fn first_node<'a>(root: &'a Node, node_type: &str, text: &str) -> &'a Node { - let mut nodes = Vec::new(); - nodes_of_type(root, node_type, &mut nodes); - nodes - .into_iter() - .find(|node| node.text == text) - .unwrap_or_else(|| panic!("expected {node_type} with text {text:?} in {root:#?}")) - } + #[test] + fn normalize_pair_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n {a: b}\nend\n", + Language::Ruby, + ".rb", + "pair", + "a: b", + ), + ( + "def f\n {name:}\nend\n", + Language::Ruby, + ".rb", + "pair", + "name:", + ), + ( + "def f\n {\"a\" => b}\nend\n", + Language::Ruby, + ".rb", + "pair", + "\"a\" => b", + ), + ( + "def f():\n {\"a\": b}\n", + Language::Python, + ".py", + "pair", + "\"a\": b", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_pair(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); - fn child_node(node: &Node, index: usize) -> &Node { - node.children - .get(index) - .and_then(super::node) - .unwrap_or_else(|| panic!("expected child node {index} in {node:#?}")) + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_pair", + kind, + text + ), + "normalize_pair mismatch for {language:?} {kind} {text:?}" + ); + } } - fn child_types(node: &Node) -> Vec<&str> { - node.children - .iter() - .filter_map(super::node) - .map(|child| child.r#type.as_str()) - .collect() + #[test] + fn hash_literal_statement_normalization_matches_ruby() { + for (source, language, suffix) in [ + ("def f\n {a: b}\nend\n", Language::Ruby, ".rb"), + ("def f():\n {\"a\": b}\n", Language::Python, ".py"), + ("function f() { ({a: b}); }\n", Language::TypeScript, ".ts"), + ("function f()\n {a = b}\nend\n", Language::Lua, ".lua"), + ] { + let root = parse_language_source(source, language, suffix); + let mut hashes = Vec::new(); + nodes_of_type(&root, "HASH", &mut hashes); + assert!( + hashes + .iter() + .any(|node| node.text.contains('a') && node.text.contains('b')), + "expected hash literal HASH for {language:?} in {root:#?}" + ); + assert_ruby_parity(source, language, suffix); + } } - fn test_node(node_type: &str, children: Vec) -> Node { - Node { - r#type: node_type.to_string(), - children, - first_lineno: 1, - first_column: 0, - last_lineno: 1, - last_column: 1, - text: node_type.to_string(), + #[test] + fn empty_body_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f():\n pass\n", + Language::Python, + ".py", + "block", + "pass", + ), + ( + "function f() {}\n", + Language::TypeScript, + ".ts", + "statement_block", + "{}", + ), + ( + "function f() { work(); }\n", + Language::TypeScript, + ".ts", + "statement_block", + "{ work(); }", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.empty_body_statement(node), + ruby_private_predicate( + source, + language, + suffix, + "empty_body_statement?", + kind, + text + ), + "empty_body_statement? mismatch for {language:?} {kind} {text:?}" + ); } } - fn infix_parts_text( - normalizer: &super::TreeSitterNormalizer<'_>, - node: TreeSitterNode<'_>, - source: &str, - ) -> Option<(String, String, String)> { - let (left, operator, right) = normalizer.infix_statement_parts(node)?; - Some(( - super::node_text(left, source).to_string(), - operator, - super::node_text(right, source).to_string(), - )) + #[test] + fn empty_body_statement_normalization_matches_ruby() { + for (source, language, suffix) in [ + ("def f():\n pass\n", Language::Python, ".py"), + ("function f() {}\n", Language::TypeScript, ".ts"), + ] { + let root = parse_language_source(source, language, suffix); + let mut defns = Vec::new(); + nodes_of_type(&root, "DEFN", &mut defns); + let scope = child_node(defns[0], 1); + assert!( + matches!(scope.children.get(2), Some(Child::Nil)), + "expected empty body for {language:?} in {root:#?}" + ); + assert_ruby_parity(source, language, suffix); + } } - fn node_value(node: &Node) -> Value { - json!({ - "type": node.r#type, - "children": node.children.iter().map(child_value).collect::>(), - "first_lineno": node.first_lineno, - "first_column": node.first_column, - "last_lineno": node.last_lineno, - "last_column": node.last_column, - "text": node.text, - }) - } + #[test] + fn heredoc_body_statement_matches_ruby_private_predicate() { + let ruby_source = "def f\n puts <<~TXT\n hi\n TXT\nend\n"; + for (source, language, suffix, kind, text) in [ + ( + ruby_source, + Language::Ruby, + ".rb", + "body_statement", + "puts <<~TXT\n hi\n TXT", + ), + (ruby_source, Language::Ruby, ".rb", "call", "puts <<~TXT"), + ( + "def f():\n value = 1\n", + Language::Python, + ".py", + "block", + "value = 1", + ), + ( + "function f() { value = 1; }\n", + Language::TypeScript, + ".ts", + "statement_block", + "{ value = 1; }", + ), + ( + "function f()\n value = 1\nend\n", + Language::Lua, + ".lua", + "block", + "value = 1", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); - fn child_value(child: &Child) -> Value { - match child { - Child::Node(node) => node_value(node), - Child::Symbol(value) | Child::String(value) => Value::String(value.clone()), - Child::Nil => Value::Null, + assert_eq!( + normalizer.heredoc_body_statement(node), + ruby_private_predicate( + source, + language, + suffix, + "heredoc_body_statement?", + kind, + text + ), + "heredoc_body_statement? mismatch for {language:?} {kind} {text:?}" + ); } } - fn ruby_language_name(language: Language) -> &'static str { - match language { - Language::Ruby => "ruby", - Language::Python => "python", - Language::JavaScript => "javascript", - Language::Java => "java", - Language::TypeScript => "typescript", - Language::Swift => "swift", - Language::Kotlin => "kotlin", - Language::Go => "go", - Language::Rust => "rust", - Language::Zig => "zig", - Language::Lua => "lua", - Language::C => "c", - Language::Cpp => "cpp", - Language::CSharp => "csharp", + #[test] + fn heredoc_call_for_body_matches_ruby_private_predicate() { + let ruby_arg_source = "def f\n puts <<~TXT\n hi\n TXT\nend\n"; + let ruby_receiver_source = "def emit\n <<~ZIG.chomp\n hi\n ZIG\nend\n"; + for (source, language, suffix, kind, text) in [ + ( + ruby_arg_source, + Language::Ruby, + ".rb", + "body_statement", + "puts <<~TXT\n hi\n TXT", + ), + ( + ruby_arg_source, + Language::Ruby, + ".rb", + "call", + "puts <<~TXT", + ), + ( + ruby_arg_source, + Language::Ruby, + ".rb", + "argument_list", + "<<~TXT", + ), + ( + ruby_arg_source, + Language::Ruby, + ".rb", + "method", + "def f\n puts <<~TXT\n hi\n TXT\nend", + ), + ( + ruby_receiver_source, + Language::Ruby, + ".rb", + "call", + "<<~ZIG.chomp", + ), + ( + ruby_receiver_source, + Language::Ruby, + ".rb", + "heredoc_beginning", + "<<~ZIG", + ), + ( + "def f():\n value = 1\n", + Language::Python, + ".py", + "block", + "value = 1", + ), + ( + "function f() { value = 1; }\n", + Language::TypeScript, + ".ts", + "statement_block", + "{ value = 1; }", + ), + ( + "function f()\n value = 1\nend\n", + Language::Lua, + ".lua", + "block", + "value = 1", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.heredoc_call_for_body(node), + ruby_private_predicate( + source, + language, + suffix, + "heredoc_call_for_body?", + kind, + text + ), + "heredoc_call_for_body? mismatch for {language:?} {kind} {text:?}" + ); } } - fn ruby_normalized_value(path: &Path, language: Language) -> Value { - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - root, = Decomplex::Ast.parse(ARGV.fetch(0)) + #[test] + fn with_current_heredoc_body_restores_previous_body() { + let source = "def f\n puts <<~TXT\n hi\n TXT\nend\n"; + let tree = raw_tree(source, Language::Ruby); + let body = first_raw_node(tree.root_node(), source, "heredoc_body", "\n hi\n TXT"); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + normalizer.current_heredoc_body_span = Some([9, 2, 9, 7]); - def value(node) - if node.is_a?(Decomplex::Ast::Node) - { - "type" => node.type.to_s, - "children" => node.children.map { |child| value(child) }, - "first_lineno" => node.first_lineno, - "first_column" => node.first_column, - "last_lineno" => node.last_lineno, - "last_column" => node.last_column, - "text" => node.text.to_s, - } - elsif node.is_a?(Symbol) - node.to_s - else - node - end - end + let result = normalizer.with_current_heredoc_body(Some(body), |normalizer| { + assert_eq!( + normalizer.current_heredoc_body_span, + Some(super::span(body)) + ); + "result" + }); - puts JSON.generate(value(root)) - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "json", - "-e", - script, - ]) - .arg(path) - .output() - .expect("run ruby normalizer"); - assert!( - output.status.success(), - "ruby normalizer failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - serde_json::from_slice(&output.stdout).expect("ruby normalizer should emit JSON") + assert_eq!(result, "result"); + assert_eq!(normalizer.current_heredoc_body_span, Some([9, 2, 9, 7])); } - fn assert_ruby_parity(source: &str, language: Language, suffix: &str) { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create parity temp source file"); - file.write_all(source.as_bytes()) - .expect("write parity temp source file"); - - let rust = node_value( - &parse_with_language(file.path(), language) - .expect("parse parity temp source file") - .0, - ); - let ruby = ruby_normalized_value(file.path(), language); - assert_eq!(rust, ruby); - } + #[test] + fn normalize_interpolation_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "name = \"#{user}\"\n", + Language::Ruby, + ".rb", + "interpolation", + "#{user}", + ), + ( + "name = \"#{a; b}\"\n", + Language::Ruby, + ".rb", + "interpolation", + "#{a; b}", + ), + ( + "name = f\"hi {user}\"\n", + Language::Python, + ".py", + "interpolation", + "{user}", + ), + ( + "const name = `hi ${user}`;\n", + Language::TypeScript, + ".ts", + "template_substitution", + "${user}", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_interpolation(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); - fn raw_tree(source: &str, language: Language) -> tree_sitter::Tree { - let mut parser = TreeSitterParser::new(); - parser - .set_language(&super::language_grammar(language)) - .expect("set raw parser language"); - parser.parse(source, None).expect("parse raw source") + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_interpolation", + kind, + text + ), + "normalize_interpolation mismatch for {language:?} {kind} {text:?}" + ); + } } - fn first_raw_node<'tree>( - node: TreeSitterNode<'tree>, - source: &str, - kind: &str, - text: &str, - ) -> TreeSitterNode<'tree> { - if node.kind() == kind && super::node_text(node, source) == text { - return node; - } - let mut cursor = node.walk(); - for child in node.children(&mut cursor) { - if let Some(found) = first_raw_node_opt(child, source, kind, text) { - return found; - } + #[test] + fn normalize_heredoc_children_matches_ruby_private_method() { + for (source, kind, text) in [ + ( + "def f\n puts <<~TXT\n hi\n TXT\nend\n", + "heredoc_body", + "\n hi\n TXT", + ), + ( + "def f\n puts <<~TXT\n hi #{name}\n TXT\nend\n", + "heredoc_body", + "\n hi #{name}\n TXT", + ), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let rust = children_value(&normalizer.normalize_heredoc_children(node)); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + Language::Ruby, + ".rb", + "normalize_heredoc_children", + kind, + text + ), + "normalize_heredoc_children mismatch for {kind} {text:?}" + ); } - panic!("expected raw node kind={kind:?} text={text:?}"); } - fn first_raw_node_opt<'tree>( - node: TreeSitterNode<'tree>, - source: &str, - kind: &str, - text: &str, - ) -> Option> { - if node.kind() == kind && super::node_text(node, source) == text { - return Some(node); - } - let mut cursor = node.walk(); - for child in node.children(&mut cursor) { - if let Some(found) = first_raw_node_opt(child, source, kind, text) { - return Some(found); - } + #[test] + fn normalize_heredoc_beginning_matches_ruby_private_method() { + for (source, kind, text) in [( + "def emit\n <<~ZIG.chomp\n hi\n ZIG\nend\n", + "heredoc_beginning", + "<<~ZIG", + )] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let rust = node_value(&normalizer.normalize_heredoc_beginning(node)); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + Language::Ruby, + ".rb", + "normalize_heredoc_beginning", + kind, + text + ), + "normalize_heredoc_beginning mismatch for {kind} {text:?}" + ); } - None } - fn ruby_private_predicate( - source: &str, - language: Language, - suffix: &str, - method: &str, - kind: &str, - text: &str, - ) -> bool { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby predicate temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby predicate temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - target_kind = ARGV.fetch(1) - target_text = ARGV.fetch(2) - method = ARGV.fetch(3) - target = nil - walk = lambda do |node| - if node.respond_to?(:kind) - target ||= node if node.kind == target_kind && node.text.to_s == target_text - node.named_children.each { |child| walk.call(child) } - end - end - walk.call(document.root) - abort "target node not found" unless target - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - puts normalizer.send(method, target) ? "true" : "false" - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-e", - script, - ]) - .arg(file.path()) - .arg(kind) - .arg(text) - .arg(method) - .output() - .expect("run ruby private predicate"); - assert!( - output.status.success(), - "ruby predicate failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - String::from_utf8(output.stdout) - .expect("ruby predicate output should be utf8") - .trim() - == "true" - } + #[test] + fn normalize_heredoc_beginning_uses_current_body_for_multiple_heredocs() { + let source = "def f\n puts <<~A, <<~B\n one\n A\n two\n B\nend\n"; + let tree = raw_tree(source, Language::Ruby); + let beginning = first_raw_node(tree.root_node(), source, "heredoc_beginning", "<<~B"); + let body = first_raw_node(tree.root_node(), source, "heredoc_body", "\n two\n B"); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); - fn ruby_private_string( - source: &str, - language: Language, - suffix: &str, - method: &str, - kind: &str, - text: &str, - ) -> String { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby string temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby string temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - target_kind = ARGV.fetch(1) - target_text = ARGV.fetch(2) - method = ARGV.fetch(3) - target = nil - walk = lambda do |node| - if node.respond_to?(:kind) - target ||= node if node.kind == target_kind && node.text.to_s == target_text - node.named_children.each { |child| walk.call(child) } - end - end - walk.call(document.root) - abort "target node not found" unless target - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - puts normalizer.send(method, target).to_s - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-e", - script, - ]) - .arg(file.path()) - .arg(kind) - .arg(text) - .arg(method) - .output() - .expect("run ruby private string helper"); - assert!( - output.status.success(), - "ruby string helper failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) + let dstr = normalizer.with_current_heredoc_body(Some(body), |normalizer| { + normalizer.normalize_heredoc_beginning(beginning) + }); + + let content = child_node(&dstr, 0); + assert_eq!(content.r#type, "STR"); + assert_eq!( + content.children, + vec![Child::String("\n two\n ".to_string())] ); - String::from_utf8(output.stdout) - .expect("ruby string helper output should be utf8") - .trim() - .to_string() } #[test] - fn tree_normalizer_new_initializes_empty_state() { - let normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); + fn normalize_heredoc_body_statement_matches_ruby_private_method() { + for (source, kind, text) in [ + ( + "def f\n puts <<~TXT\n hi\n TXT\nend\n", + "body_statement", + "puts <<~TXT\n hi\n TXT", + ), + ( + "def emit\n <<~ZIG.chomp\n hi\n ZIG\nend\n", + "body_statement", + "<<~ZIG.chomp\n hi\n ZIG", + ), + ( + "def f\n puts <<~A, <<~B\n one\n A\n two\n B\nend\n", + "body_statement", + "puts <<~A, <<~B\n one\n A\n two\n B", + ), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let rust = normalizer + .normalize_heredoc_body_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); - assert_eq!(normalizer.source, ""); - assert_eq!(normalizer.language, Language::Ruby); - assert!(normalizer.local_stack.is_empty()); - assert_eq!(normalizer.root_span, None); + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + Language::Ruby, + ".rb", + "normalize_heredoc_body_statement", + kind, + text + ), + "normalize_heredoc_body_statement mismatch for {kind} {text:?}" + ); + } } #[test] - fn tree_normalizer_yield_statement_matches_ruby_private_predicate() { + fn interpolated_statement_matches_ruby_private_predicate() { for (source, language, suffix, kind, text) in [ ( - "def each\n yield :item\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "yield :item", - ), - ( - "def each\n value\nend\n", + "def f\n \"hi #{name}\"\nend\n", Language::Ruby, ".rb", "body_statement", - "value", + "\"hi #{name}\"", ), ( - "def gen():\n yield item\n other()\n", + "def f():\n f\"hi {name}\"\n", Language::Python, ".py", - "expression_statement", - "yield item", + "block", + "f\"hi {name}\"", ), ( - "def gen():\n yield from items\n other()\n", - Language::Python, - ".py", + "function f() { `hi ${name}`; }\n", + Language::TypeScript, + ".ts", "expression_statement", - "yield from items", + "`hi ${name}`;", ), ( - "def gen():\n yield item\n other()\n", - Language::Python, - ".py", + "function f()\n \"hi\"\nend\n", + Language::Lua, + ".lua", "block", - "yield item\n other()", + "\n \"hi\"", ), ] { let tree = raw_tree(source, language); @@ -5216,435 +27920,724 @@ mod tests { let normalizer = super::TreeSitterNormalizer::new(source, language); assert_eq!( - normalizer.yield_statement(node), - ruby_private_predicate(source, language, suffix, "yield_statement?", kind, text), - "yield_statement? mismatch for {language:?} {kind} {text:?}" + normalizer.interpolated_statement(node), + ruby_private_predicate( + source, + language, + suffix, + "interpolated_statement?", + kind, + text + ), + "interpolated_statement? mismatch for {language:?} {kind} {text:?}" ); } } #[test] - fn python_yield_statement_in_multi_statement_block_matches_ruby_ast() { - let source = "def gen():\n yield item\n other()\n"; - assert_ruby_parity(source, Language::Python, ".py"); - - let root = parse_language_source(source, Language::Python, ".py"); - let defn = first_node(&root, "DEFN", "def gen():\n yield item\n other()"); - let scope = child_node(defn, 1); - let body = child_node(scope, 2); + fn interpolated_statement_normalization_matches_ruby() { + let source = "def f\n \"hi #{name}\"\nend\n"; + let root = parse_language_source(source, Language::Ruby, ".rb"); + let dstr = first_node(&root, "DSTR", "\"hi #{name}\""); - assert_eq!(body.r#type, "BLOCK"); - assert_eq!(child_types(body), vec!["YIELD", "EXPRESSION_STATEMENT"]); + assert_eq!(child_types(dstr), vec!["STR", "EVSTR"]); + assert_ruby_parity(source, Language::Ruby, ".rb"); } #[test] - fn tree_normalizer_super_statement_matches_ruby_private_predicate() { + fn normalize_interpolated_statement_matches_ruby_private_method() { for (source, kind, text) in [ ( - "class Child < Parent\n def call\n super\n end\nend\n", - "body_statement", - "super", - ), - ( - "class Child < Parent\n def call\n super :item\n end\nend\n", + "def f\n \"hi #{name}\"\nend\n", "body_statement", - "super :item", + "\"hi #{name}\"", ), ( - "class Child < Parent\n def call\n value\n end\nend\n", + "def f\n \"#{first} #{last}\"\nend\n", "body_statement", - "value", + "\"#{first} #{last}\"", ), ] { let tree = raw_tree(source, Language::Ruby); let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let rust = node_value(&normalizer.normalize_interpolated_statement(node)); assert_eq!( - normalizer.super_statement(node), - ruby_private_predicate( + rust, + ruby_private_normalize_method_value( source, Language::Ruby, ".rb", - "super_statement?", + "normalize_interpolated_statement", kind, text ), - "super_statement? mismatch for {kind} {text:?}" + "normalize_interpolated_statement mismatch for {kind} {text:?}" ); } } #[test] - fn ruby_super_statement_normalization_matches_ruby_ast() { - let source = "class Child < Parent\n def bare\n super\n end\n def with_arg\n super :item\n end\nend\n"; - assert_ruby_parity(source, Language::Ruby, ".rb"); - - let root = parse_language_source(source, Language::Ruby, ".rb"); - let bare = first_node(&root, "SUPER", "super"); - let with_arg = first_node(&root, "SUPER", "super :item"); - - assert_eq!(bare.children, vec![Child::Nil]); - assert_eq!(child_types(with_arg), vec!["LIST"]); - assert_eq!(child_types(child_node(with_arg, 0)), vec!["LIT"]); - } - - #[test] - fn tree_normalizer_argument_list_element_reference_matches_ruby_private_predicate() { - for (source, text) in [ - ("def indexed\n return items[0]\nend\n", "items[0]"), - ("def indexed\n return obj.foo[0]\nend\n", "obj.foo[0]"), - ("def indexed\n return [0]\nend\n", "[0]"), + fn concatenated_string_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ ( - "def indexed\n return items[0], other\nend\n", - "items[0], other", + "def f\n \"a\" \"b\"\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "\"a\" \"b\"", ), - ("def indexed\n return items[]\nend\n", "items[]"), ( - "def indexed\n return items[0] { nope }\nend\n", - "items[0] { nope }", + "def f():\n \"a\" \"b\"\n", + Language::Python, + ".py", + "block", + "\"a\" \"b\"", + ), + ( + "function f() { \"a\"; }\n", + Language::TypeScript, + ".ts", + "expression_statement", + "\"a\";", + ), + ( + "function f()\n \"a\"\nend\n", + Language::Lua, + ".lua", + "block", + "\n \"a\"", ), ] { - let tree = raw_tree(source, Language::Ruby); - let node = first_raw_node(tree.root_node(), source, "argument_list", text); - let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); assert_eq!( - normalizer.argument_list_element_reference(node), + normalizer.concatenated_string_statement(node), ruby_private_predicate( source, - Language::Ruby, - ".rb", - "argument_list_element_reference?", - "argument_list", + language, + suffix, + "concatenated_string_statement?", + kind, text ), - "argument_list_element_reference? mismatch for {text:?}" + "concatenated_string_statement? mismatch for {language:?} {kind} {text:?}" ); } } #[test] - fn dynamic_scope_rewrites_locals_without_crossing_scope_boundaries() { - let inner_assignment = test_node("LASGN", vec![Child::Symbol("inner".to_string())]); - let node = test_node( - "BLOCK", - vec![ - Child::Node(Box::new(test_node( - "LASGN", - vec![Child::Symbol("value".to_string())], - ))), - Child::Node(Box::new(test_node( - "LVAR", - vec![Child::Symbol("value".to_string())], - ))), - Child::Node(Box::new(test_node( - "DEFN", - vec![ - Child::Symbol("nested".to_string()), - Child::Node(Box::new(test_node( - "SCOPE", - vec![ - Child::Nil, - Child::Nil, - Child::Node(Box::new(inner_assignment)), - ], - ))), - ], - ))), - ], - ); - - let result = super::dynamic_scope(node); - - assert_eq!(child_node(&result, 0).r#type, "DASGN"); - assert_eq!(child_node(&result, 1).r#type, "DVAR"); - let nested = child_node(&result, 2); - assert_eq!(nested.r#type, "DEFN"); - let nested_scope = child_node(nested, 1); - assert_eq!(nested_scope.r#type, "SCOPE"); - assert_eq!(child_node(nested_scope, 2).r#type, "LASGN"); - } - - #[test] - fn link_when_chain_sets_next_arm_and_pads_short_when_nodes() { - let fallback = test_node("ELSE", Vec::new()); - let first = test_node( - "WHEN", - vec![ - Child::Symbol("patterns".to_string()), - Child::Nil, - Child::Nil, - ], - ); - let second = test_node( - "WHEN", - vec![ - Child::Symbol("patterns".to_string()), - Child::Nil, - Child::Nil, - ], - ); - let normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); - - let result = normalizer - .link_when_chain(vec![first, second], Some(fallback)) - .expect("expected linked when chain"); - - assert_eq!(result.r#type, "WHEN"); - let next = child_node(&result, 2); - assert_eq!(next.r#type, "WHEN"); - assert_eq!(child_node(next, 2).r#type, "ELSE"); - - let short = test_node("WHEN", vec![Child::Symbol("patterns".to_string())]); - let fallback = test_node("ELSE", Vec::new()); - let result = normalizer - .link_when_chain(vec![short], Some(fallback)) - .expect("expected padded when chain"); + fn concatenated_string_statement_normalization_matches_ruby() { + for (source, language, suffix, expected_text, expected_types) in [ + ( + "def f\n \"a\" \"b\"\nend\n", + Language::Ruby, + ".rb", + "\"a\"", + vec!["STR", "STR"], + ), + ( + "def f\n \"a\" \"b #{name}\"\nend\n", + Language::Ruby, + ".rb", + "\"b #{name}\"", + vec!["STR", "STR", "EVSTR"], + ), + ( + "def f():\n \"a\" \"b\"\n", + Language::Python, + ".py", + "\"a\"", + vec!["STR", "STR"], + ), + ( + "def f():\n \"a\" f\"b {name}\"\n", + Language::Python, + ".py", + "f\"b {name}\"", + vec!["STR", "STRING_START", "STR", "EVSTR", "STRING_END"], + ), + ] { + let root = parse_language_source(source, language, suffix); + let dstr = first_node(&root, "DSTR", expected_text); - assert_eq!(result.children.len(), 3); - assert_eq!(result.children[1], Child::Nil); - assert_eq!(child_node(&result, 2).r#type, "ELSE"); + assert_eq!(child_types(dstr), expected_types); + assert_ruby_parity(source, language, suffix); + } } #[test] - fn link_rescue_chain_sets_next_rescue_and_pads_short_resbody_nodes() { - let first = test_node( - "RESBODY", - vec![ - Child::Symbol("exceptions".to_string()), - Child::Nil, - Child::Nil, - ], - ); - let second = test_node( - "RESBODY", - vec![ - Child::Symbol("exceptions".to_string()), - Child::Nil, - Child::Nil, - ], - ); - let normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); - - let result = normalizer - .link_rescue_chain(vec![first, second]) - .expect("expected linked rescue chain"); - - assert_eq!(result.r#type, "RESBODY"); - let next = child_node(&result, 2); - assert_eq!(next.r#type, "RESBODY"); - assert_eq!(next.children[2], Child::Nil); - - let short = test_node("RESBODY", vec![Child::Symbol("exceptions".to_string())]); - let result = normalizer - .link_rescue_chain(vec![short]) - .expect("expected padded rescue chain"); + fn normalize_concatenated_string_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n \"a\" \"b\"\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "\"a\" \"b\"", + ), + ( + "def f\n \"a\" \"b #{name}\"\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "\"a\" \"b #{name}\"", + ), + ( + "def f():\n \"a\" \"b\"\n", + Language::Python, + ".py", + "block", + "\"a\" \"b\"", + ), + ( + "def f():\n \"a\" f\"b {name}\"\n", + Language::Python, + ".py", + "block", + "\"a\" f\"b {name}\"", + ), + ( + "function f() { \"a\"; }\n", + Language::TypeScript, + ".ts", + "expression_statement", + "\"a\";", + ), + ( + "function f()\n \"a\"\nend\n", + Language::Lua, + ".lua", + "block", + "\n \"a\"", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.normalize_concatenated_string_statement(node); - assert_eq!(result.children.len(), 3); - assert_eq!(result.children[1], Child::Nil); - assert_eq!(result.children[2], Child::Nil); + assert_eq!( + node_value(&rust), + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_concatenated_string_statement", + kind, + text + ), + "normalize_concatenated_string_statement mismatch for {language:?} {kind} {text:?}" + ); + } } #[test] - fn infix_statement_parts_extracts_allowed_wrapper_parts() { - let source = "def calc\n left + right\nend\n"; - let tree = raw_tree(source, Language::Ruby); - let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); - let body = first_raw_node(tree.root_node(), source, "body_statement", "left + right"); - let binary = first_raw_node(tree.root_node(), source, "binary", "left + right"); - - assert_eq!( - infix_parts_text(&normalizer, body, source), - Some(("left".to_string(), "+".to_string(), "right".to_string())) - ); - assert_eq!(infix_parts_text(&normalizer, binary, source), None); - - let source = "def calc\n return left + right\nend\n"; - let tree = raw_tree(source, Language::Ruby); - let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); - let args = first_raw_node(tree.root_node(), source, "argument_list", "left + right"); - assert_eq!( - infix_parts_text(&normalizer, args, source), - Some(("left".to_string(), "+".to_string(), "right".to_string())) - ); + fn normalize_chained_string_matches_ruby_private_method() { + for (source, language, suffix, ruby_kind, ruby_text, rust_kind, rust_text) in [ + ( + "def f\n \"a\" \"b\"\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "\"a\" \"b\"", + "chained_string", + "\"a\" \"b\"", + ), + ( + "def f\n \"a\" \"b #{name}\"\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "\"a\" \"b #{name}\"", + "chained_string", + "\"a\" \"b #{name}\"", + ), + ( + "def f():\n \"a\" \"b\"\n", + Language::Python, + ".py", + "block", + "\"a\" \"b\"", + "concatenated_string", + "\"a\" \"b\"", + ), + ( + "def f():\n \"a\" f\"b {name}\"\n", + Language::Python, + ".py", + "block", + "\"a\" f\"b {name}\"", + "concatenated_string", + "\"a\" f\"b {name}\"", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, rust_kind, rust_text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.normalize_chained_string(node); - let source = "def calc\n left && right\nend\n"; - let tree = raw_tree(source, Language::Ruby); - let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); - let boolean = first_raw_node(tree.root_node(), source, "body_statement", "left && right"); - assert_eq!(infix_parts_text(&normalizer, boolean, source), None); + assert_eq!( + node_value(&rust), + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_chained_string", + ruby_kind, + ruby_text + ), + "normalize_chained_string mismatch for {language:?} {rust_kind} {rust_text:?}" + ); + } } #[test] - fn argument_list_unary_not_matches_ruby_private_predicate() { - for (line, text) in [ - ("return !flag", "!flag"), - ("return !!flag", "!!flag"), - ("return flag", "flag"), - ("return !flag, other", "!flag, other"), - ("return (!flag)", "(!flag)"), - ("return not flag", "not flag"), + fn dynamic_string_source_matches_ruby_private_method() { + for (source, language, suffix, ruby_kind, ruby_text, rust_kind, rust_text) in [ + ( + "def f\n \"a\" \"b #{name}\"\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "\"a\" \"b #{name}\"", + "chained_string", + "\"a\" \"b #{name}\"", + ), + ( + "def f\n \"a\" \"b\"\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "\"a\" \"b\"", + "chained_string", + "\"a\" \"b\"", + ), + ( + "def f():\n \"a\" f\"b {name}\"\n", + Language::Python, + ".py", + "block", + "\"a\" f\"b {name}\"", + "concatenated_string", + "\"a\" f\"b {name}\"", + ), + ( + "def f():\n \"a\" \"b\"\n", + Language::Python, + ".py", + "block", + "\"a\" \"b\"", + "concatenated_string", + "\"a\" \"b\"", + ), ] { - let source = format!("def check\n {line}\nend\n"); - let tree = raw_tree(&source, Language::Ruby); - let node = first_raw_node(tree.root_node(), &source, "argument_list", text); - let normalizer = super::TreeSitterNormalizer::new(&source, Language::Ruby); + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, rust_kind, rust_text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let mut normalized_children = Vec::new(); + for child in normalizer.named_children(node) { + let normalized = normalizer.normalize_node(child); + normalized_children.push((child, normalized)); + } + let rust = normalizer + .dynamic_string_source(&normalized_children) + .map(|node| { + ( + node.kind().to_string(), + super::node_text(node, source).to_string(), + ) + }); + let ruby = ruby_private_dynamic_string_source_signature( + source, language, suffix, ruby_kind, ruby_text, + ); assert_eq!( - normalizer.argument_list_unary_not(node), - ruby_private_predicate( - &source, - Language::Ruby, - ".rb", - "argument_list_unary_not?", - "argument_list", - text - ), - "argument_list_unary_not? mismatch for {line:?}" + rust, ruby, + "dynamic_string_source mismatch for {language:?} {rust_kind} {rust_text:?}" ); } } #[test] - fn unary_not_statement_matches_ruby_private_predicate() { - for (line, text) in [ - ("!flag", "!flag"), - ("!!flag", "!!flag"), - ("flag", "flag"), - ("!flag; other", "!flag; other"), - ("(!flag)", "(!flag)"), - ("not flag", "not flag"), + fn terminal_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n foo()\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "()", + ), + ( + "def f\n foo\n foo()\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "foo\n foo()", + ), + ( + "def f():\n foo()\n", + Language::Python, + ".py", + "argument_list", + "()", + ), + ( + "def f():\n foo\n", + Language::Python, + ".py", + "block", + "foo", + ), + ( + "function f() { foo(); }\n", + Language::TypeScript, + ".ts", + "arguments", + "()", + ), + ( + "function f()\n foo()\nend\n", + Language::Lua, + ".lua", + "arguments", + "()", + ), ] { - let source = format!("def check\n {line}\nend\n"); - let tree = raw_tree(&source, Language::Ruby); - let node = first_raw_node(tree.root_node(), &source, "body_statement", text); - let normalizer = super::TreeSitterNormalizer::new(&source, Language::Ruby); + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); assert_eq!( - normalizer.unary_not_statement(node), - ruby_private_predicate( - &source, - Language::Ruby, - ".rb", - "unary_not_statement?", - "body_statement", - text - ), - "unary_not_statement? mismatch for {line:?}" + normalizer.terminal_statement(node), + ruby_private_predicate(source, language, suffix, "terminal_statement?", kind, text), + "terminal_statement? mismatch for {language:?} {kind} {text:?}" ); } } #[test] - fn unary_not_expression_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ + fn normalize_terminal_statement_matches_ruby_private_method() { + let cases = vec![ ( - "def check\n !flag\n !!flag\n -flag\n not flag\nend\n", + "yield\n", Language::Ruby, ".rb", - "unary", - "!flag", + "yield", + "yield", + "yield", + Vec::<&str>::new(), ), ( - "def check\n !flag\n !!flag\n -flag\n not flag\nend\n", + "@name\n", Language::Ruby, ".rb", - "unary", - "!!flag", + "instance_variable", + "instance_variable", + "@name", + Vec::<&str>::new(), ), ( - "def check\n !flag\n !!flag\n -flag\n not flag\nend\n", + "$1\n$value\n", Language::Ruby, ".rb", - "unary", - "-flag", + "global_variable", + "global_variable", + "$1", + Vec::<&str>::new(), ), ( - "def check\n !flag\n !!flag\n -flag\n not flag\nend\n", + "$1\n$value\n", + Language::Ruby, + ".rb", + "global_variable", + "global_variable", + "$value", + Vec::<&str>::new(), + ), + ( + "nil\ntrue\nfalse\n", + Language::Ruby, + ".rb", + "nil", + "nil", + "nil", + Vec::<&str>::new(), + ), + ( + "nil\ntrue\nfalse\n", + Language::Ruby, + ".rb", + "true", + "true", + "true", + Vec::<&str>::new(), + ), + ( + "nil\ntrue\nfalse\n", + Language::Ruby, + ".rb", + "false", + "false", + "false", + Vec::<&str>::new(), + ), + ( + ":ready\n", + Language::Ruby, + ".rb", + "simple_symbol", + "simple_symbol", + ":ready", + Vec::<&str>::new(), + ), + ( + "-123\n", Language::Ruby, ".rb", "unary", - "not flag", + "unary", + "-123", + Vec::<&str>::new(), + ), + ( + "[]\n", + Language::Ruby, + ".rb", + "array", + "array", + "[]", + Vec::<&str>::new(), + ), + ( + "foo\n", + Language::Ruby, + ".rb", + "identifier", + "identifier", + "foo", + Vec::<&str>::new(), + ), + ( + "foo\n", + Language::Ruby, + ".rb", + "identifier", + "identifier", + "foo", + vec!["foo"], + ), + ( + "foo\n", + Language::Python, + ".py", + "expression_statement", + "identifier", + "foo", + Vec::<&str>::new(), + ), + ( + "foo;\n", + Language::TypeScript, + ".ts", + "identifier", + "identifier", + "foo", + Vec::<&str>::new(), + ), + ( + "foo()\n", + Language::Lua, + ".lua", + "identifier", + "identifier", + "foo", + Vec::<&str>::new(), + ), + ( + "foo()\n", + Language::Ruby, + ".rb", + "argument_list", + "argument_list", + "()", + Vec::<&str>::new(), + ), + ]; + + for (source, language, suffix, ruby_kind, rust_kind, text, locals) in cases { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, rust_kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + if !locals.is_empty() { + normalizer + .local_stack + .push(locals.iter().map(|name| name.to_string()).collect()); + } + let rust = node_value(&normalizer.normalize_terminal_statement(node)); + + assert_eq!( + rust, + ruby_private_normalize_terminal_statement_value( + source, + language, + suffix, + ruby_kind, + text, + &locals, + ), + "normalize_terminal_statement mismatch for {language:?} ruby={ruby_kind} rust={rust_kind} {text:?} locals={locals:?}" + ); + } + } + + #[test] + fn operator_assignment_statement_parts_matches_ruby_private_method() { + for (source, language, suffix, ruby_kind, ruby_text, rust_kind, rust_text) in [ + ( + "def f\n x += 1\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "x += 1", + "operator_assignment", + "x += 1", + ), + ( + "def f\n x ||= y\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "x ||= y", + "operator_assignment", + "x ||= y", + ), + ( + "def f\n x += 1\n y += 2\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "x += 1\n y += 2", + "body_statement", + "x += 1\n y += 2", + ), + ( + "def f():\n x += 1\n", + Language::Python, + ".py", + "block", + "x += 1", + "augmented_assignment", + "x += 1", ), ( - "function check(flag: boolean) { return !flag; }\n", + "function f() { obj.x ||= y; }\n", Language::TypeScript, ".ts", - "unary_expression", - "!flag", + "augmented_assignment_expression", + "obj.x ||= y", + "augmented_assignment_expression", + "obj.x ||= y", ), ( - "if not flag:\n pass\n", - Language::Python, - ".py", - "not_operator", - "not flag", + "function f() { x += 1; }\n", + Language::TypeScript, + ".ts", + "expression_statement", + "x += 1;", + "expression_statement", + "x += 1;", ), ( - "if not flag then end\n", + "function f()\n x = x + 1\nend\n", Language::Lua, ".lua", - "unary_expression", - "not flag", + "block", + "x = x + 1", + "block", + "x = x + 1", ), ] { let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); + let node = first_raw_node(tree.root_node(), source, rust_kind, rust_text); let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.operator_assignment_statement_parts(node).map( + |(left, operator, right)| { + ( + left.kind().to_string(), + super::node_text(left, source).to_string(), + operator, + right.kind().to_string(), + super::node_text(right, source).to_string(), + ) + }, + ); + let ruby = ruby_private_operator_assignment_statement_parts_signature( + source, language, suffix, ruby_kind, ruby_text, + ); assert_eq!( - normalizer.unary_not_expression(node), - ruby_private_predicate( - source, - language, - suffix, - "unary_not_expression?", - kind, - text - ), - "unary_not_expression? mismatch for {language:?} {kind} {text:?}" + rust, ruby, + "operator_assignment_statement_parts mismatch for {language:?} {rust_kind} {rust_text:?}" ); } } #[test] - fn unary_minus_expression_matches_ruby_private_predicate() { + fn operator_assignment_statement_matches_ruby_private_predicate() { for (source, language, suffix, kind, text) in [ ( - "def check\n -flag\n !flag\n value\nend\n", + "def f\n x += 1\nend\n", Language::Ruby, ".rb", - "unary", - "-flag", + "body_statement", + "x += 1", ), ( - "def check\n -flag\n !flag\n value\nend\n", + "def f\n x ||= y\nend\n", Language::Ruby, ".rb", - "unary", - "!flag", + "body_statement", + "x ||= y", ), ( - "function check(value: number) { return -value; }\n", - Language::TypeScript, - ".ts", - "unary_expression", - "-value", + "def f\n x = 1\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "x = 1", ), ( - "x = -value\n", + "def f\n x += 1\n y += 2\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "x += 1\n y += 2", + ), + ( + "def f():\n x += 1\n", Language::Python, ".py", - "unary_operator", - "-value", + "block", + "x += 1", ), ( - "local x = -value\n", + "function f() { x += 1; }\n", + Language::TypeScript, + ".ts", + "expression_statement", + "x += 1;", + ), + ( + "function f()\n x = x + 1\nend\n", Language::Lua, ".lua", - "expression_list", - "-value", + "block", + "x = x + 1", ), ] { let tree = raw_tree(source, language); @@ -5652,95 +28645,92 @@ mod tests { let normalizer = super::TreeSitterNormalizer::new(source, language); assert_eq!( - normalizer.unary_minus_expression(node), + normalizer.operator_assignment_statement(node), ruby_private_predicate( source, language, suffix, - "unary_minus_expression?", + "operator_assignment_statement?", kind, text ), - "unary_minus_expression? mismatch for {language:?} {kind} {text:?}" + "operator_assignment_statement? mismatch for {language:?} {kind} {text:?}" ); } } #[test] - fn binary_operator_matches_ruby_private_helper() { + fn normalize_operator_assignment_statement_matches_ruby_private_method() { for (source, language, suffix, kind, text) in [ ( - "def calc\n left + right\n left && right\n value\nend\n", + "def f\n x += 1\nend\n", Language::Ruby, ".rb", - "binary", - "left + right", + "body_statement", + "x += 1", ), ( - "def calc\n left + right\n left && right\n value\nend\n", + "def f\n x ||= y\nend\n", Language::Ruby, ".rb", - "binary", - "left && right", + "body_statement", + "x ||= y", ), ( - "def calc\n left + right\n left && right\n value\nend\n", + "def f\n items[index] += value\nend\n", Language::Ruby, ".rb", "body_statement", - "left + right\n left && right\n value", + "items[index] += value", ), ( - "const value = left + right && other;\n", - Language::TypeScript, - ".ts", - "binary_expression", - "left + right && other", - ), - ( - "const value = left + right && other;\n", - Language::TypeScript, - ".ts", - "binary_expression", - "left + right", - ), - ( - "value = left + right and other\n", - Language::Python, - ".py", - "boolean_operator", - "left + right and other", + "def f\n object.value += 1\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "object.value += 1", ), ( - "value = left + right and other\n", + "def f():\n x += 1\n", Language::Python, ".py", - "binary_operator", - "left + right", + "block", + "x += 1", ), ( - "local value = left + right and other\n", - Language::Lua, - ".lua", - "expression_list", - "left + right and other", + "function f() { x += 1; }\n", + Language::TypeScript, + ".ts", + "augmented_assignment_expression", + "x += 1", ), ( - "local value = left + right and other\n", - Language::Lua, - ".lua", - "binary_expression", - "left + right", + "function f() { obj.x ||= y; }\n", + Language::TypeScript, + ".ts", + "augmented_assignment_expression", + "obj.x ||= y", ), ] { let tree = raw_tree(source, language); let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_operator_assignment_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); assert_eq!( - normalizer.binary_operator(node).unwrap_or_default(), - ruby_private_string(source, language, suffix, "binary_operator", kind, text), - "binary_operator mismatch for {language:?} {kind} {text:?}" + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_operator_assignment_statement", + kind, + text + ), + "normalize_operator_assignment_statement mismatch for {language:?} {kind} {text:?}" ); } } @@ -5856,48 +28846,38 @@ class ColorParseError(Exception): #[test] fn python_ellipsis_only_function_body_is_empty_scope_with_root_source() { - let root = parse_language_source( + assert_ruby_parity( r#"def __rich__(): ... "#, Language::Python, ".py", ); - let defn = first_node(&root, "DEFN", "def __rich__():\n ..."); - let scope = child_node(defn, 1); - - assert_eq!(scope.r#type, "SCOPE"); - assert!(matches!(scope.children.get(2), Some(Child::Nil))); - assert_eq!( - scope.first_lineno, root.first_lineno, - "Ruby scope(body=nil,args=nil) falls back to document root source" - ); - assert_eq!(scope.text, root.text); } #[test] fn python_explicit_return_none_is_not_elided_from_function_body() { - let root = parse_language_source( - r#" + let source = r#" class Thing: def _repr_latex_(self): return None -"#, - Language::Python, - ".py", - ); - let iter = first_node( +"#; + let root = parse_language_source(source, Language::Python, ".py"); + let defn = first_node( &root, - "ITER", + "DEFN", "def _repr_latex_(self):\n return None", ); - let scope = child_node(iter, 1); + let scope = child_node(defn, 1); + let body = child_node(scope, 2); + assert_eq!(body.r#type, "RETURN"); assert_eq!( - child_node(scope, 2).r#type, + child_node(body, 0).r#type, "NIL", "Ruby only elides implicit nil bodies for Ruby, not explicit Python return None: {scope:#?}" ); + assert_ruby_parity(source, Language::Python, ".py"); } #[test] @@ -5945,7 +28925,7 @@ def _is_jupyter(): #[test] fn python_bare_identifier_only_block_has_no_children() { - let root = parse_language_source( + assert_ruby_parity( r#" def get_exception(): try: @@ -5956,23 +28936,17 @@ def get_exception(): Language::Python, ".py", ); - let block = first_node(&root, "BLOCK", "foobarbaz"); - - assert!( - block.children.is_empty(), - "Ruby exposes a bare identifier-only block as an empty block: {block:#?}" - ); } #[test] - fn python_bare_dotted_expression_statement_keeps_statement_wrapper() { + fn python_bare_dotted_expression_statement_normalizes_as_call() { let root = parse_language_source("os.get_terminal_size\n", Language::Python, ".py"); - let expression = first_node(&root, "EXPRESSION_STATEMENT", "os.get_terminal_size"); + let call = first_node(&root, "CALL", "os.get_terminal_size"); assert_eq!( - child_types(expression), - vec!["LVAR", "LVAR"], - "Ruby exposes bare dotted expression statements as expression_statement identifier children: {expression:#?}" + child_types(call), + vec!["LVAR"], + "bare Python dotted expression statements should normalize as calls: {call:#?}" ); } @@ -6001,7 +28975,7 @@ def test_get_style(): #[test] fn python_delete_statement_matches_ruby_block_contexts() { - let root = parse_language_source( + assert_ruby_parity( r#" def save(self, clear): if clear: @@ -6013,24 +28987,11 @@ def save(self, clear): Language::Python, ".py", ); - let if_node = first_node(&root, "IF", "if clear:\n del self._record_buffer[:]"); - assert_eq!( - child_node(if_node, 1).r#type, - "SUBSCRIPT", - "Ruby unwraps a single delete body to the deleted subscript: {if_node:#?}" - ); - - let delete = first_node(&root, "DELETE_STATEMENT", "del self._record_buffer[:]"); - assert_eq!( - child_types(delete), - vec!["SUBSCRIPT"], - "Ruby keeps delete_statement wrapper in multi-statement bodies: {delete:#?}" - ); } #[test] fn python_single_subscript_expression_block_exposes_subscript_children() { - let root = parse_language_source( + assert_ruby_parity( r#" def test_render(): with pytest.raises(KeyError): @@ -6039,17 +29000,10 @@ def test_render(): Language::Python, ".py", ); - let block = first_node(&root, "BLOCK", r#"top["asdasd"]"#); - - assert_eq!( - child_types(block), - vec!["LVAR", "STR"], - "Ruby exposes a single subscript expression block as subscript children: {block:#?}" - ); } #[test] - fn python_single_if_block_under_try_exposes_ruby_if_children() { + fn python_single_if_block_under_try_matches_ruby_if_shape() { let root = parse_language_source( r#" def load(args): @@ -6064,22 +29018,23 @@ def load(args): Language::Python, ".py", ); - let block = first_node( + let if_node = first_node( &root, - "BLOCK", + "IF", "if args.path == \"-\":\n json_data = sys.stdin.read()\n else:\n json_data = Path(args.path).read_text()", ); assert_eq!( - child_types(block), - vec!["OPCALL", "BLOCK", "ELSE_CLAUSE"], - "Ruby block lacks an if_statement wrapper in this parser shape: {block:#?}" + child_types(if_node), + vec!["OPCALL", "LASGN", "ELSE_CLAUSE"], + "Ruby normalizes this Python try-body child as an IF: {if_node:#?}" ); + assert_eq!(child_types(child_node(if_node, 2)), vec!["BLOCK"]); } #[test] fn python_single_decorated_definition_block_exposes_decorator_and_function() { - let root = parse_language_source( + assert_ruby_parity( r#" def test_inspect_swig_edge_case(): class Thing: @@ -6090,17 +29045,6 @@ def test_inspect_swig_edge_case(): Language::Python, ".py", ); - let block = first_node( - &root, - "BLOCK", - "@property\n def __class__(self):\n raise AttributeError", - ); - - assert_eq!( - child_types(block), - vec!["IVAR", "DEFN"], - "Ruby exposes decorated definitions as direct block children: {block:#?}" - ); } #[test] @@ -6207,101 +29151,59 @@ def test_can_handle_special_characters_in_docstrings(): #[test] fn lua_table_call_entry_matches_ruby_field_children_shape() { - let root = parse_language_source( + assert_ruby_parity( "assert.same(install, { bin = { P\"bin/binfile\" } })\n", Language::Lua, ".lua", ); - let field = first_node(&root, "FIELD", "P\"bin/binfile\""); - - assert_eq!( - child_types(field), - vec!["LVAR", "ARGUMENTS"], - "Ruby exposes a Lua table field call as the call children, without FUNCTION_CALL wrapper: {field:#?}" - ); } #[test] fn lua_table_identifier_entry_matches_ruby_empty_field_shape() { - let root = parse_language_source( + assert_ruby_parity( "local rocks_path = table.concat({rocks_tree, \"a_rock\"})\n", Language::Lua, ".lua", ); - let field = first_node(&root, "FIELD", "rocks_tree"); - - assert!( - field.children.is_empty(), - "Ruby exposes a bare identifier Lua table field with no normalized children: {field:#?}" - ); } #[test] fn lua_single_call_function_body_matches_ruby_block_shape() { - let root = parse_language_source( + assert_ruby_parity( "before_each(function()\n test_env.setup_specs(extra_rocks)\nend)\n", Language::Lua, ".lua", ); - let defn = first_node( - &root, - "DEFN", - "function()\n test_env.setup_specs(extra_rocks)\nend", - ); - let scope = child_node(defn, 1); - let body = child_node(scope, 2); - - assert_eq!(body.r#type, "BLOCK"); - assert_eq!( - child_types(body), - vec!["DOT_INDEX_EXPRESSION", "ARGUMENTS"], - "Ruby exposes a single Lua function-call body as a BLOCK of the call target and arguments: {body:#?}" - ); } #[test] - fn lua_single_assignment_function_body_matches_ruby_block_shape() { - let root = parse_language_source( + fn lua_single_assignment_function_body_matches_ruby_lasgn_shape() { + assert_ruby_parity( "lazy_setup(function()\n git = git_repo.start()\nend)\n", Language::Lua, ".lua", ); - let defn = first_node(&root, "DEFN", "function()\n git = git_repo.start()\nend"); - let scope = child_node(defn, 1); - let body = child_node(scope, 2); - - assert_eq!(body.r#type, "BLOCK"); - assert_eq!( - child_types(body), - vec!["VARIABLE_LIST", "EXPRESSION_LIST"], - "Ruby exposes a single Lua assignment body as a BLOCK of assignment children, without LASGN: {body:#?}" - ); } #[test] - fn lua_single_bare_assignment_function_body_matches_ruby_empty_lists() { + fn lua_single_bare_assignment_function_body_matches_ruby_lasgn_shape() { let root = parse_language_source("function()\n x = y\nend\n", Language::Lua, ".lua"); let defn = first_node(&root, "DEFN", "function()\n x = y\nend"); let scope = child_node(defn, 1); let body = child_node(scope, 2); - let variable_list = child_node(body, 0); - let expression_list = child_node(body, 1); + let right = child_node(body, 1); - assert_eq!(body.r#type, "BLOCK"); - assert_eq!(variable_list.r#type, "VARIABLE_LIST"); - assert_eq!(expression_list.r#type, "EXPRESSION_LIST"); - assert!( - variable_list.children.is_empty(), - "Ruby exposes a bare Lua single-assignment variable_list with no children: {variable_list:#?}" - ); + assert_eq!(body.r#type, "LASGN"); + assert_eq!(body.children.first(), Some(&Child::String("x".to_string()))); + assert_eq!(right.r#type, "EXPRESSION_LIST"); assert!( - expression_list.children.is_empty(), - "Ruby exposes a bare identifier Lua single-assignment RHS with no children: {expression_list:#?}" + right.children.is_empty(), + "Ruby exposes a bare identifier Lua single-assignment RHS with no children: {right:#?}" ); } #[test] - fn lua_single_dotted_assignment_function_body_keeps_ruby_variable_list_children() { + fn lua_single_dotted_assignment_function_body_normalizes_as_attribute_assignment() { let root = parse_language_source( "function()\n package.path = oldpath\nend\n", Language::Lua, @@ -6310,98 +29212,54 @@ def test_can_handle_special_characters_in_docstrings(): let defn = first_node(&root, "DEFN", "function()\n package.path = oldpath\nend"); let scope = child_node(defn, 1); let body = child_node(scope, 2); - let variable_list = child_node(body, 0); - let expression_list = child_node(body, 1); + let assignment = body; + let receiver = child_node(assignment, 0); + let args = child_node(assignment, 2); - assert_eq!(body.r#type, "BLOCK"); - assert_eq!(variable_list.r#type, "VARIABLE_LIST"); + assert_eq!(body.r#type, "ATTRASGN"); + assert_eq!(receiver.r#type, "LVAR"); assert_eq!( - child_types(variable_list), - vec!["LVAR", "LVAR"], - "Ruby keeps Lua dotted assignment targets as variable_list children: {variable_list:#?}" + receiver.children, + vec![Child::String("package".to_string())] ); - assert!( - expression_list.children.is_empty(), - "Ruby exposes a bare identifier Lua dotted-assignment RHS with no children: {expression_list:#?}" + assert_eq!( + assignment.children.get(1), + Some(&Child::Symbol("path=".to_string())) ); + assert_eq!(args.r#type, "LIST"); } #[test] fn lua_single_local_assignment_function_body_matches_ruby_lasgn_shape() { - let root = parse_language_source( + assert_ruby_parity( "it(function()\n local output = run.luarocks(\"show --rock-tree luacov\")\nend)\n", Language::Lua, ".lua", ); - let defn = first_node( - &root, - "DEFN", - "function()\n local output = run.luarocks(\"show --rock-tree luacov\")\nend", - ); - let scope = child_node(defn, 1); - let body = child_node(scope, 2); - - assert_eq!(body.r#type, "LASGN"); - assert_eq!( - body.children.first(), - Some(&Child::String("output".to_string())), - "Ruby exposes a single Lua local assignment function body as the inner LASGN: {body:#?}" - ); } #[test] fn lua_assigned_function_expression_matches_ruby_expression_list_shape() { - let root = parse_language_source( + assert_ruby_parity( "local test_with_location = function(location)\n lfs.mkdir(location)\nend\n", Language::Lua, ".lua", ); - let assignment = first_node( - &root, - "LASGN", - "test_with_location = function(location)\n lfs.mkdir(location)\nend", - ); - let expression_list = child_node(assignment, 1); - - assert_eq!(expression_list.r#type, "EXPRESSION_LIST"); - assert_eq!( - child_types(expression_list), - vec!["PARAMETERS", "BLOCK"], - "Ruby exposes a Lua assigned function expression as PARAMETERS and BLOCK inside the RHS expression_list: {expression_list:#?}" - ); } #[test] fn lua_assigned_function_if_else_matches_fixed_ruby_if_shape() { - let root = parse_language_source( + assert_ruby_parity( "local make_unreadable = function(path)\n if is_win then\n fs.execute(\"x\")\n else\n fs.execute(\"y\")\n end\nend\n", Language::Lua, ".lua", ); - let expression_list = first_node( - &root, - "EXPRESSION_LIST", - "function(path)\n if is_win then\n fs.execute(\"x\")\n else\n fs.execute(\"y\")\n end\nend", - ); - let if_node = child_node(expression_list, 1); - let mut iters = Vec::new(); - nodes_of_type(&root, "ITER", &mut iters); - - assert_eq!(if_node.r#type, "IF"); - assert_eq!(child_node(if_node, 2).r#type, "ELSE_STATEMENT"); - assert!( - iters.is_empty(), - "Ruby no longer misclassifies a Lua if/else in an assigned function expression as ITER: {iters:#?}" - ); } #[test] - fn lua_single_return_function_body_matches_ruby_expression_list_shape() { - let root = parse_language_source( - "function sum.sum(a, b)\n return a + b\nend\n", - Language::Lua, - ".lua", - ); + fn lua_single_return_function_body_matches_ruby_opcall_shape() { + let source = "function sum.sum(a, b)\n return a + b\nend\n"; + let root = parse_language_source(source, Language::Lua, ".lua"); let defn = first_node( &root, "DEFN", @@ -6409,13 +29267,16 @@ def test_can_handle_special_characters_in_docstrings(): ); let scope = child_node(defn, 1); let body = child_node(scope, 2); + let returned = child_node(body, 0); - assert_eq!(body.r#type, "EXPRESSION_LIST"); + assert_eq!(body.r#type, "RETURN"); + assert_eq!(returned.r#type, "OPCALL"); assert_eq!( - child_types(body), - vec!["LVAR", "LVAR"], - "Ruby exposes a single Lua return body as the returned expression_list, without RETURN: {body:#?}" + returned.children.get(1), + Some(&Child::Symbol("+".to_string())), + "Ruby exposes a single Lua return body as RETURN wrapping the returned operator call: {body:#?}" ); + assert_ruby_parity(source, Language::Lua, ".lua"); } #[test] @@ -6478,20 +29339,11 @@ def test_can_handle_special_characters_in_docstrings(): #[test] fn lua_long_string_assignment_matches_ruby_expression_list_content_shape() { - let root = parse_language_source( + assert_ruby_parity( "local c_module_source = [[\n #include \n]]\n", Language::Lua, ".lua", ); - let expression_list = first_node(&root, "EXPRESSION_LIST", "[[\n #include \n]]"); - let string = child_node(expression_list, 0); - - assert_eq!(child_types(expression_list), vec!["STR"]); - assert_eq!( - string.children, - vec![Child::String("\n #include \n".to_string())], - "Ruby normalizes a Lua long string assignment from string_content, without bracket delimiters: {string:#?}" - ); } #[test] diff --git a/gems/decomplex/test/ast_test.rb b/gems/decomplex/test/ast_test.rb index 8d90e35c6..295192024 100644 --- a/gems/decomplex/test/ast_test.rb +++ b/gems/decomplex/test/ast_test.rb @@ -48,10 +48,10 @@ def test_lua_assigned_function_if_else_normalizes_as_if_not_iter end LUA root, = parse_language(file, :lua) - expression_list = nodes_of_type(root, "EXPRESSION_LIST").find { |node| node.text.start_with?("function(path)") } + lambda_node = nodes_of_type(root, "LAMBDA").find { |node| node.text.start_with?("function(path)") } - refute_nil expression_list - if_node = expression_list.children.find { |child| Decomplex::Ast.node?(child) && child.type.to_s == "IF" } + refute_nil lambda_node + if_node = nodes_of_type(lambda_node, "IF").first refute_nil if_node assert_empty nodes_of_type(root, "ITER") assert_equal "ELSE_STATEMENT", if_node.children[2].type.to_s @@ -97,6 +97,66 @@ def gen(): end end + def test_wrapped_return_statement_normalizes_return_value_before_tail_elision + with_language_file(<<~RUBY, ".rb", :ruby) do |file| + def check + return value + end + RUBY + root, = parse_language(file, :ruby) + defn = nodes_of_type(root, "DEFN").find { |node| node.children.first == :check } + + refute_nil defn + body = defn.children[1].children[2] + assert_equal "VCALL", body.type.to_s + assert_equal :value, body.children.first + end + + with_language_file(<<~PY, ".py", :python) do |file| + def check(): + return value + PY + root, = parse_language(file, :python) + defn = nodes_of_type(root, "DEFN").find { |node| node.children.first == :check } + + refute_nil defn + body = defn.children[1].children[2] + assert_equal "RETURN", body.type.to_s + assert_equal "LVAR", body.children.first.type.to_s + end + + with_language_file(<<~LUA, ".lua", :lua) do |file| + function check() + return value + end + LUA + root, = parse_language(file, :lua) + defn = nodes_of_type(root, "DEFN").find { |node| node.children.first == :check } + + refute_nil defn + body = defn.children[1].children[2] + assert_equal "RETURN", body.type.to_s + assert_equal "EXPRESSION_LIST", body.children.first.type.to_s + end + end + + def test_ruby_singleton_method_receiver_ignores_method_body + with_language_file(<<~RUBY, ".rb", :ruby) do |file| + def object.hidden + value + end + RUBY + root, = parse_language(file, :ruby) + defs = nodes_of_type(root, "DEFS").find { |node| node.children[1] == :hidden } + + refute_nil defs + receiver = defs.children[0] + assert_equal "VCALL", receiver.type.to_s + assert_equal :object, receiver.children[0] + assert_equal "object", receiver.text + end + end + def test_ruby_super_statement_predicate_recognizes_bare_and_argument_forms with_language_file(<<~RUBY, ".rb", :ruby) do |file| class Child < Parent @@ -341,6 +401,20 @@ def test_tree_sitter_normalizer_selects_language_specific_normalization_adapters end end + def test_safe_navigation_call_recognizes_typescript_optional_chain + with_language_file("user?.name;\nuser?.name();\n", ".ts", :typescript) do |file| + document = parse_syntax(file, :typescript) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + member = ts_nodes(document.root).find { |candidate| candidate.kind == "member_expression" && candidate.text == "user?.name" } + call = ts_nodes(document.root).find { |candidate| candidate.kind == "call_expression" && candidate.text == "user?.name()" } + + refute_nil member + refute_nil call + assert normalizer.send(:safe_navigation_call?, member) + assert normalizer.send(:safe_navigation_call?, call) + end + end + def test_binary_operator ruby_source = "def calc\n left + right\n left && right\n value\nend\n" @@ -390,6 +464,1772 @@ def test_binary_operator end end + def test_operator_call_expression_predicate + { + ruby: ["def calc\n left + right\n left && right\nend\n", ".rb", "binary", "left + right", "binary", "left && right"], + typescript: ["const value = left + right && other;\n", ".ts", "binary_expression", "left + right", "binary_expression", "left + right && other"], + python: ["value = left + right and other\n", ".py", "binary_operator", "left + right", "boolean_operator", "left + right and other"], + lua: ["local value = left + right\nlocal other = left and right\n", ".lua", "expression_list", "left + right", "expression_list", "left and right"] + }.each do |language, (source, suffix, positive_kind, positive_text, negative_kind, negative_text)| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + positive = ts_nodes(document.root).find { |candidate| candidate.kind == positive_kind && candidate.text == positive_text } + negative = ts_nodes(document.root).find { |candidate| candidate.kind == negative_kind && candidate.text == negative_text } + + refute_nil positive + refute_nil negative + assert normalizer.send(:operator_call_expression?, positive) + refute normalizer.send(:operator_call_expression?, negative) + end + end + end + + def test_operator_call_normalizes_python_and_lua_arithmetic + { + python: ["value = left + right\n", ".py"], + lua: ["local value = left + right\n", ".lua"] + }.each do |language, (source, suffix)| + with_language_file(source, suffix, language) do |file| + root, = parse_language(file, language) + opcall = nodes_of_type(root, "OPCALL").find { |node| node.text == "left + right" } + + refute_nil opcall + assert_equal "+", opcall.children[1].to_s + end + end + end + + def test_lua_boolean_expression_normalizes_as_and + with_language_file("local value = left and right\n", ".lua", :lua) do |file| + root, = parse_language(file, :lua) + and_node = nodes_of_type(root, "AND").find { |node| node.text == "left and right" } + + refute_nil and_node + assert_equal %w[LVAR LVAR], and_node.children.map(&:type).map(&:to_s) + end + end + + def test_lua_comparison_expression_normalizes_as_opcall + with_language_file("local value = left == right\n", ".lua", :lua) do |file| + root, = parse_language(file, :lua) + opcall = nodes_of_type(root, "OPCALL").find { |node| node.text == "left == right" } + + refute_nil opcall + assert_equal "==", opcall.children[1].to_s + assert_equal %w[LVAR LVAR], [opcall.children[0].type, opcall.children[2].children.first.type].map(&:to_s) + end + end + + def test_lua_long_string_assignment_normalizes_as_literal_expression_list + with_language_file("local c_module_source = [[\n #include \n]]\n", ".lua", :lua) do |file| + root, = parse_language(file, :lua) + assignment = nodes_of_type(root, "LASGN").find { |node| node.children.first == "c_module_source" } + + refute_nil assignment + expression_list = assignment.children[1] + assert_equal "EXPRESSION_LIST", expression_list.type.to_s + assert_equal "[[\n #include \n]]", expression_list.text + assert_equal ["STR"], expression_list.children.map(&:type).map(&:to_s) + assert_equal "\n #include \n", expression_list.children.first.children.first + assert_empty nodes_of_type(root, "OPCALL").select { |node| node.text.include?("") } + end + end + + def test_comparison_operator + { + ruby: ["def calc\n left == right\nend\n", ".rb", "body_statement", "left == right", "identifier", "left"], + typescript: ["const value = left === right;\n", ".ts", "binary_expression", "left === right", "identifier", "left"], + python: ["value = left == right\n", ".py", "comparison_operator", "left == right", "identifier", "left"], + lua: ["local value = left == right\nlocal other = left + right\n", ".lua", "expression_list", "left == right", "expression_list", "left + right"] + }.each do |language, (source, suffix, positive_kind, positive_text, negative_kind, negative_text)| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + positive = ts_nodes(document.root).find { |candidate| candidate.kind == positive_kind && candidate.text == positive_text } + negative = ts_nodes(document.root).find { |candidate| candidate.kind == negative_kind && candidate.text == negative_text } + + refute_nil positive + refute_nil negative + refute_empty normalizer.send(:comparison_operator, positive).to_s + assert_empty normalizer.send(:comparison_operator, negative).to_s + end + end + end + + def test_spaced_text + { + ruby: ["def calc\n left + right\nend\n", ".rb", "body_statement", "left + right"], + typescript: ["const value = left + right;\n", ".ts", "binary_expression", "left + right"], + python: ["value = left + right\n", ".py", "binary_operator", "left + right"], + lua: ["local value = left + right\n", ".lua", "expression_list", "left + right"] + }.each do |language, (source, suffix, kind, text)| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal " #{text} ", normalizer.send(:spaced_text, node) + end + end + end + + def test_class_node_predicate + { + ruby: ["class Thing; end\n", ".rb", "class", "class Thing; end", true], + python: ["class Thing:\n pass\n", ".py", "class_definition", "class Thing:\n pass", true], + typescript: ["class Thing {}\n", ".ts", "class_declaration", "class Thing {}", true], + lua: ["local Thing = {}\n", ".lua", "variable_declaration", "local Thing = {}", false] + }.each do |language, (source, suffix, kind, text, expected)| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:class_node?, node) + end + end + end + + def test_empty_class_scope_uses_class_source + with_language_file("class Thing; end\n", ".rb", :ruby) do |file| + root, = parse_language(file, :ruby) + class_node = nodes_of_type(root, "CLASS").find { |node| node.text == "class Thing; end" } + + refute_nil class_node + scope = class_node.children[2] + assert_equal "SCOPE", scope.type.to_s + assert_equal "class Thing; end", scope.text + assert_equal [1, 0, 1, 16], [scope.first_lineno, scope.first_column, scope.last_lineno, scope.last_column] + end + end + + def test_unwrap_node_predicate + cases = [ + [:ruby, "def check\n (value)\n value\nend\n", ".rb", "parenthesized_statements", "(value)", true], + [:python, "value\n(value)\n", ".py", "expression_statement", "value", false], + [:python, "value\n(value)\n", ".py", "expression_statement", "(value)", true], + [:typescript, "const value = (other);\n", ".ts", "parenthesized_expression", "(other)", true], + [:lua, "local first = (other)\nlocal second = left + right\n", ".lua", "expression_list", "(other)", true], + [:lua, "local first = (other)\nlocal second = left + right\n", ".lua", "expression_list", "left + right", false] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:unwrap_node?, node) + end + end + end + + def test_statement_node_predicate + cases = [ + [:ruby, "def check\n return value\nend\n", ".rb", "body_statement", "return value", true], + [:ruby, "def check\n return value\nend\n", ".rb", "identifier", "check", false], + [:python, "value\n(value)\n", ".py", "expression_statement", "(value)", true], + [:python, "value\n(value)\n", ".py", "identifier", "value", false], + [:typescript, "function check() { return value + other; }\n", ".ts", "return_statement", "return value + other;", true], + [:typescript, "function check() { return value + other; }\n", ".ts", "binary_expression", "value + other", true], + [:typescript, "function check() { return value + other; }\n", ".ts", "identifier", "value", false], + [:lua, "return value\n", ".lua", "return_statement", "return value", true], + [:lua, "return value\n", ".lua", "expression_list", "value", false] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:statement_node?, node) + end + end + end + + def test_local_identifier_predicate + cases = [ + [:ruby, "def check\nend\nclass Thing; end\n", ".rb", "identifier", "check", true], + [:ruby, "def check\nend\nclass Thing; end\n", ".rb", "constant", "Thing", false], + [:python, "def check(value):\n pass\n", ".py", "identifier", "value", true], + [:python, "def check(value):\n pass\n", ".py", "parameters", "(value)", false], + [:typescript, "const value = object.field;\n", ".ts", "identifier", "value", true], + [:typescript, "const value = object.field;\n", ".ts", "property_identifier", "field", true], + [:typescript, "const value = object.field;\n", ".ts", "lexical_declaration", "const value = object.field;", false], + [:lua, "local value = other\nprint(value)\n", ".lua", "identifier", "value", true], + [:lua, "local value = other\n", ".lua", "expression_list", "other", false] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:local_identifier?, node) + end + end + end + + def test_ruby_local_name_predicate + normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + normalizer.instance_variable_set(:@local_stack, [ + Set.new(%w[outer shared]), + Set.new(%w[inner]) + ]) + + assert normalizer.send(:ruby_local_name?, "outer") + assert normalizer.send(:ruby_local_name?, "inner") + assert normalizer.send(:ruby_local_name?, "shared") + refute normalizer.send(:ruby_local_name?, "missing") + end + + def test_ruby_predicate + { + ruby: true, + python: false, + lua: false, + typescript: false + }.each do |language, expected| + normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + normalizer.instance_variable_set(:@document, fake_document(language)) + + assert_equal expected, normalizer.send(:ruby?) + end + end + + def test_interpolated_string_predicate + cases = [ + [:ruby, "name = \"hi \#{user}\"\nplain = \"hi\"\n", ".rb", "string", "\"hi \#{user}\"", true], + [:ruby, "name = \"hi \#{user}\"\nplain = \"hi\"\n", ".rb", "string", "\"hi\"", false], + [:python, "name = f\"hi {user}\"\nplain = \"hi\"\n", ".py", "string", "f\"hi {user}\"", true], + [:python, "name = f\"hi {user}\"\nplain = \"hi\"\n", ".py", "string", "\"hi\"", false], + [:typescript, "const name = `hi ${user}`;\nconst plain = `hi`;\n", ".ts", "template_string", "`hi ${user}`", true], + [:typescript, "const name = `hi ${user}`;\nconst plain = `hi`;\n", ".ts", "template_string", "`hi`", false], + [:lua, "local name = \"hi\"\n", ".lua", "expression_list", "\"hi\"", false] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:interpolated_string?, node) + end + end + end + + def test_const_node_predicate + cases = [ + [:ruby, "class Thing; end\ndef check; end\n", ".rb", "constant", "Thing", true], + [:ruby, "class Thing; end\ndef check; end\n", ".rb", "identifier", "check", false], + [:python, "class Thing:\n pass\n", ".py", "identifier", "Thing", false], + [:typescript, "type Thing = Other;\nconst value = Thing;\n", ".ts", "type_identifier", "Thing", true], + [:typescript, "type Thing = Other;\nconst value = Thing;\n", ".ts", "identifier", "value", false], + [:lua, "local Thing = {}\n", ".lua", "variable_list", "Thing", false] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:const_node?, node) + end + end + end + + def test_self_node_predicate + cases = [ + [:ruby, "self\nother\n", ".rb", "self", "self", true], + [:ruby, "self\nother\n", ".rb", "identifier", "other", false], + [:python, "self.value\nother.value\n", ".py", "identifier", "self", true], + [:python, "self.value\nother.value\n", ".py", "identifier", "other", false], + [:typescript, "this.value;\nother;\n", ".ts", "this", "this", true], + [:typescript, "this.value;\nother;\n", ".ts", "identifier", "other", false], + [:lua, "print(self.value)\nprint(other.value)\n", ".lua", "identifier", "self", true], + [:lua, "print(self.value)\nprint(other.value)\n", ".lua", "identifier", "other", false] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:self_node?, node) + end + end + end + + def test_instance_variable_predicate + cases = [ + [:ruby, "@value\nname\n", ".rb", "instance_variable", "@value", true], + [:ruby, "@value\nname\n", ".rb", "identifier", "name", false], + [:python, "@decorator\ndef call():\n pass\n", ".py", "decorator", "@decorator", false], + [:typescript, "@sealed\nclass Thing {}\n", ".ts", "decorator", "@sealed", false], + [:lua, "print(value)\n", ".lua", "identifier", "value", false] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:instance_variable?, node) + end + end + end + + def test_global_variable_predicate + cases = [ + [:ruby, "$value\nname\n", ".rb", "global_variable", "$value", true], + [:ruby, "$value\nname\n", ".rb", "identifier", "name", false], + [:python, "value = \"$name\"\n", ".py", "string_content", "$name", false], + [:typescript, "const $value = other;\n", ".ts", "identifier", "$value", false], + [:lua, "print(\"$name\")\n", ".lua", "string_content", "$name", false] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:global_variable?, node) + end + end + end + + def test_literal_fragment_assignment_context_predicate + cases = [ + [:ruby, "value = \"left = right\"\n", ".rb", "string_content", "left = right", true], + [:ruby, "value = 1\n", ".rb", "identifier", "value", false], + [:python, "value = \"left = right\"\n", ".py", "string_content", "left = right", true], + [:typescript, "const value = \"left = right\";\n", ".ts", "string_fragment", "left = right", true], + [:lua, "local value = \"left = right\"\n", ".lua", "string_content", "left = right", true], + [:lua, "local value = other\n", ".lua", "variable_list", "value", false] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:literal_fragment_assignment_context?, node) + end + end + end + + def test_collect_identifier_names + cases = [ + [:ruby, "left, *rest = values\n", ".rb", "left_assignment_list", "left, *rest", %w[left rest]], + [:typescript, "const value = { shorthand };\n", ".ts", "object", "{ shorthand }", %w[shorthand]], + [:lua, "local value = other\n", ".lua", "variable_declaration", "local value = other", %w[other value]] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + locals = Set.new + + refute_nil node + normalizer.send(:collect_identifier_names, node, locals) + assert_equal expected, locals.to_a.sort + end + end + end + + def test_assignment_operator_predicate + cases = [ + [:ruby, "=", true], + [:ruby, "**=", true], + [:ruby, "??=", false], + [:python, ":=", true], + [:python, "//=", true], + [:python, "&&=", false], + [:typescript, "??=", true], + [:typescript, ">>>=", true], + [:typescript, ":=", false], + [:lua, "=", true], + [:lua, "+=", false] + ] + + cases.each do |language, text, expected| + normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + normalizer.instance_variable_set(:@document, fake_document(language)) + + assert_equal expected, normalizer.send(:assignment_operator?, text) + end + end + + def test_operator_assignment_operator + cases = [ + [:ruby, "value **= other\nflag ||= fallback\n", ".rb", "operator_assignment", "value **= other", :"**"], + [:ruby, "value **= other\nflag ||= fallback\n", ".rb", "operator_assignment", "flag ||= fallback", :"||"], + [:python, "value //= other\n", ".py", "expression_statement", "value //= other", :"//"], + [:typescript, "value ??= other;\ncount >>>= 1;\n", ".ts", "augmented_assignment_expression", "value ??= other", :"??"], + [:typescript, "value ??= other;\ncount >>>= 1;\n", ".ts", "augmented_assignment_expression", "count >>>= 1", :">>>"] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:operator_assignment_operator, node) + end + end + end + + def test_ruby_global_augmented_assignment_uses_global_read_receiver + with_language_file("$value += 1\n", ".rb", :ruby) do |file| + root, = parse_language(file, :ruby) + assignment = nodes_of_type(root, "GASGN").find { |node| node.text == "$value += 1" } + + refute_nil assignment + call = assignment.children[1] + assert_equal "CALL", call.type.to_s + receiver = call.children[0] + assert_equal "GVAR", receiver.type.to_s + assert_equal ["$value"], receiver.children + end + end + + def test_lua_member_assignment_normalizes_as_attribute_assignment + with_language_file("user.name = value\n", ".lua", :lua) do |file| + root, = parse_language(file, :lua) + assignment = nodes_of_type(root, "ATTRASGN").find { |node| node.text == "user.name = value" } + + refute_nil assignment + receiver = assignment.children[0] + assert_equal "LVAR", receiver.type.to_s + assert_equal ["user"], receiver.children + assert_equal :name=, assignment.children[1] + assert_equal "LIST", assignment.children[2].type.to_s + end + end + + def test_first_named + cases = [ + [:ruby, "class Thing; end\nname\n", ".rb", "class", "class Thing; end", ["constant", "Thing"]], + [:ruby, "class Thing; end\nname\n", ".rb", "identifier", "name", nil], + [:python, "def check(value):\n return value\n", ".py", "function_definition", "def check(value):\n return value", ["identifier", "check"]], + [:typescript, "function check(value) { return value; }\n", ".ts", "function_declaration", "function check(value) { return value; }", ["identifier", "check"]], + [:lua, "print(value)\n", ".lua", "function_call", "print(value)", ["identifier", "print"]] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + found = normalizer.send(:first_named, node) + if expected + assert_equal expected, [found&.kind, found&.text] + else + assert_nil found + end + end + end + end + + def test_block_child + cases = [ + [:ruby, "def check\n call\nend\n", ".rb", "method", "def check\n call\nend", ["body_statement", "call"]], + [:ruby, "items.each do\n call\nend\n", ".rb", "call", "items.each do\n call\nend", ["do_block", "do\n call\nend"]], + [:python, "def check():\n call()\n", ".py", "function_definition", "def check():\n call()", ["block", "call()"]], + [:typescript, "function check() { call(); }\n", ".ts", "function_declaration", "function check() { call(); }", ["statement_block", "{ call(); }"]], + [:lua, "function check()\n call()\nend\n", ".lua", "function_declaration", "function check()\n call()\nend", ["block", "call()"]], + [:ruby, "name\n", ".rb", "identifier", "name", nil] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + found = normalizer.send(:block_child, node) + if expected + assert_equal expected, [found&.kind, found&.text] + else + assert_nil found + end + end + end + end + + def test_branch_child + cases = [ + [:ruby, "if ready\n call\nelse\n stop\nend\n", ".rb", "if", "if ready\n call\nelse\n stop\nend", "identifier", "ready", 0, ["then", "\n call"]], + [:ruby, "if ready\n call\nelse\n stop\nend\n", ".rb", "if", "if ready\n call\nelse\n stop\nend", "identifier", "ready", 1, nil], + [:ruby, "if ready\n # note\n call\nend\n", ".rb", "if", "if ready\n # note\n call\nend", "identifier", "ready", 0, ["then", "\n call"]], + [:python, "if ready:\n call()\nelse:\n stop()\n", ".py", "if_statement", "if ready:\n call()\nelse:\n stop()", "identifier", "ready", 1, ["else_clause", "else:\n stop()"]], + [:typescript, "if (ready) { call(); } else { stop(); }\n", ".ts", "if_statement", "if (ready) { call(); } else { stop(); }", "parenthesized_expression", "(ready)", 0, ["statement_block", "{ call(); }"]], + [:lua, "if ready then\n call()\nelse\n stop()\nend\n", ".lua", "if_statement", "if ready then\n call()\nelse\n stop()\nend", "identifier", "ready", 1, ["else_statement", "else\n stop()"]] + ] + + cases.each do |language, source, suffix, kind, text, cond_kind, cond_text, index, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + condition = ts_nodes(document.root).find { |candidate| candidate.kind == cond_kind && candidate.text == cond_text } + + refute_nil node + refute_nil condition + found = normalizer.send(:branch_child, node, condition, index) + if expected + assert_equal expected, [found&.kind, found&.text] + else + assert_nil found + end + end + end + end + + def test_explicit_alternative + cases = [ + [:ruby, "if ready\n call\nelsif other\n stop\nend\n", ".rb", "if", "if ready\n call\nelsif other\n stop\nend", ["elsif", "elsif other\n stop"]], + [:ruby, "if ready\n call\nend\n", ".rb", "if", "if ready\n call\nend", nil], + [:python, "if ready:\n call()\nelif other:\n stop()\n", ".py", "if_statement", "if ready:\n call()\nelif other:\n stop()", ["elif_clause", "elif other:\n stop()"]], + [:typescript, "if (ready) { call(); } else { stop(); }\n", ".ts", "if_statement", "if (ready) { call(); } else { stop(); }", ["else_clause", "else { stop(); }"]], + [:lua, "if ready then\n call()\nelseif other then\n stop()\nend\n", ".lua", "if_statement", "if ready then\n call()\nelseif other then\n stop()\nend", ["elseif_statement", "elseif other then\n stop()"]] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + found = normalizer.send(:explicit_alternative, node) + if expected + assert_equal expected, [found&.kind, found&.text] + else + assert_nil found + end + end + end + end + + def test_wrap + cases = [ + [:ruby, "first\nsecond\n", ".rb", "identifier", "second"], + [:python, "first\nsecond\n", ".py", "expression_statement", "second"], + [:typescript, "first;\nsecond;\n", ".ts", "identifier", "second"], + [:lua, "print(first)\nprint(second)\n", ".lua", "identifier", "second"] + ] + + cases.each do |language, source, suffix, kind, text| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + wrapped = normalizer.send(:wrap, :WRAPPED, children: [:child], source: node) + assert_equal :WRAPPED, wrapped.type + assert_equal [:child], wrapped.children + assert_equal node.start_point.row + 1, wrapped.first_lineno + assert_equal node.start_point.column, wrapped.first_column + assert_equal node.end_point.row + 1, wrapped.last_lineno + assert_equal node.end_point.column, wrapped.last_column + assert_equal node.text, wrapped.text + + inner = normalizer.send(:wrap, :INNER, children: [], source: node) + outer = normalizer.send(:wrap, :OUTER, children: [:child], source: inner) + assert_equal :OUTER, outer.type + assert_equal [:child], outer.children + assert_equal inner.first_lineno, outer.first_lineno + assert_equal inner.first_column, outer.first_column + assert_equal inner.last_lineno, outer.last_lineno + assert_equal inner.last_column, outer.last_column + assert_equal inner.text, outer.text + end + end + end + + def test_source_before_child + cases = [ + [:ruby, "if ready\n call\nend\n", ".rb", "if", "if ready\n call\nend", "then", "\n call", "if ready"], + [:python, "if ready:\n call()\n", ".py", "if_statement", "if ready:\n call()", "block", "call()", "if ready:"], + [:typescript, "if (ready) { call(); }\n", ".ts", "if_statement", "if (ready) { call(); }", "statement_block", "{ call(); }", "if (ready)"], + [:lua, "if ready then\n call()\nend\n", ".lua", "if_statement", "if ready then\n call()\nend", "block", "call()", "if ready then"], + [:ruby, "puts value\n", ".rb", "call", "puts value", "identifier", "puts", "puts value"], + [:python, "call()\n", ".py", "expression_statement", "call()", "identifier", "call", "call()"], + [:typescript, "call();\n", ".ts", "expression_statement", "call();", "identifier", "call", "call();"], + [:lua, "call()\n", ".lua", "function_call", "call()", "identifier", "call", "call()"] + ] + + cases.each do |language, source, suffix, kind, text, child_kind, child_text, expected_text| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + child = ts_nodes(document.root).find { |candidate| candidate.kind == child_kind && candidate.text == child_text } + + refute_nil node + refute_nil child + source_node = normalizer.send(:source_before_child, node, child) + wrapped = normalizer.send(:wrap, :WRAPPED, children: [], source: source_node) + + assert_equal expected_text, wrapped.text + assert_equal node.start_point.row + 1, wrapped.first_lineno + assert_equal node.start_point.column, wrapped.first_column + end + end + end + + def test_source_from_normalized_nodes + cases = [ + [:ruby, "first\nsecond\n", ".rb", "identifier", "first", "identifier", "second", "first\nsecond"], + [:python, "first\nsecond\n", ".py", "expression_statement", "first", "expression_statement", "second", "first\nsecond"], + [:typescript, "first;\nsecond;\n", ".ts", "expression_statement", "first;", "expression_statement", "second;", "first;\nsecond;"], + [:lua, "print(first)\nprint(second)\n", ".lua", "function_call", "print(first)", "function_call", "print(second)", "print(first)\nprint(second)"], + [:ruby, "first + second\n", ".rb", "identifier", "first", "identifier", "second", "first + second"] + ] + + cases.each do |language, source, suffix, first_kind, first_text, last_kind, last_text, expected_text| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + first_raw = ts_nodes(document.root).find { |candidate| candidate.kind == first_kind && candidate.text == first_text } + last_raw = ts_nodes(document.root).find { |candidate| candidate.kind == last_kind && candidate.text == last_text } + + refute_nil first_raw + refute_nil last_raw + first_node = normalizer.send(:wrap, :FIRST, children: [], source: first_raw) + last_node = normalizer.send(:wrap, :LAST, children: [], source: last_raw) + source_node = normalizer.send(:source_from_normalized_nodes, first_node, last_node) + + assert_equal :SOURCE, source_node.type + assert_equal [], source_node.children + assert_equal first_node.first_lineno, source_node.first_lineno + assert_equal first_node.first_column, source_node.first_column + assert_equal last_node.last_lineno, source_node.last_lineno + assert_equal last_node.last_column, source_node.last_column + assert_equal expected_text, source_node.text + end + end + end + + def test_named_field + cases = [ + [:ruby, "def check(value)\n value\nend\n", ".rb", "method", "def check(value)\n value\nend", "name", ["identifier", "check"]], + [:ruby, "def check(value)\n value\nend\n", ".rb", "method", "def check(value)\n value\nend", "missing", nil], + [:python, "if ready:\n call()\n", ".py", "if_statement", "if ready:\n call()", "body", ["block", "call()"]], + [:python, "if ready:\n call()\n", ".py", "if_statement", "if ready:\n call()", "condition", ["identifier", "ready"]], + [:typescript, "function check(value) { return value; }\n", ".ts", "function_declaration", "function check(value) { return value; }", "body", ["statement_block", "{ return value; }"]], + [:lua, "function check(value)\n return value\nend\n", ".lua", "function_declaration", "function check(value)\n return value\nend", "body", ["block", "return value"]] + ] + + cases.each do |language, source, suffix, kind, text, field, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + found = normalizer.send(:named_field, node, field) + if expected + assert_equal expected, [found&.kind, found&.text] + else + assert_nil found + end + end + end + end + + def test_parent_node + cases = [ + [:ruby, "def check\nend\n", ".rb", "identifier", "check", ["method", "def check\nend"]], + [:ruby, "value\n", ".rb", "program", "value\n", nil], + [:python, "if ready:\n call()\n", ".py", "identifier", "ready", ["if_statement", "if ready:\n call()"]], + [:typescript, "call(value);\n", ".ts", "identifier", "value", ["arguments", "(value)"]], + [:lua, "call(value)\n", ".lua", "identifier", "value", ["arguments", "(value)"]] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + found = normalizer.send(:parent_node, node) + if expected + assert_equal expected, [found&.kind, found&.text] + else + assert_nil found + end + end + end + end + + def test_next_sibling + cases = [ + [:ruby, "a + b\n", ".rb", "identifier", "a", ["+", "+"]], + [:python, "a + b\n", ".py", "identifier", "a", ["+", "+"]], + [:typescript, "a + b;\n", ".ts", "identifier", "a", ["+", "+"]], + [:lua, "print(a, b)\n", ".lua", "identifier", "a", [",", ","]], + [:ruby, "a\n", ".rb", "identifier", "a", nil] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + found = normalizer.send(:next_sibling, node) + if expected + assert_equal expected, [found&.kind, found&.text] + else + assert_nil found + end + end + end + end + + def test_prev_sibling + cases = [ + [:ruby, "a + b\n", ".rb", "identifier", "b", ["+", "+"]], + [:python, "a + b\n", ".py", "identifier", "b", ["+", "+"]], + [:typescript, "a + b;\n", ".ts", "identifier", "b", ["+", "+"]], + [:lua, "print(a, b)\n", ".lua", "identifier", "b", [",", ","]], + [:ruby, "a\n", ".rb", "identifier", "a", nil] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + found = normalizer.send(:prev_sibling, node) + if expected + assert_equal expected, [found&.kind, found&.text] + else + assert_nil found + end + end + end + end + + def test_next_named_sibling + cases = [ + [:ruby, "a + b\n", ".rb", "identifier", "a", ["identifier", "b"]], + [:python, "a + b\n", ".py", "identifier", "a", ["identifier", "b"]], + [:typescript, "a + b;\n", ".ts", "identifier", "a", ["identifier", "b"]], + [:lua, "print(a, b)\n", ".lua", "identifier", "a", ["identifier", "b"]], + [:ruby, "a\n", ".rb", "identifier", "a", nil] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + found = normalizer.send(:next_named_sibling, node) + if expected + assert_equal expected, [found&.kind, found&.text] + else + assert_nil found + end + end + end + end + + def test_ternary_statement_predicate + cases = [ + [:ruby, "def f(cond, a, b)\n cond ? a : b\nend\n", ".rb", "body_statement", "cond ? a : b", true], + [:python, "value = a if cond else b\n", ".py", "conditional_expression", "a if cond else b", true], + [:typescript, "const value = cond ? a : b;\n", ".ts", "ternary_expression", "cond ? a : b", true], + [:lua, "local value = cond and a or b\n", ".lua", "expression_list", "cond and a or b", false], + [:ruby, "def f(cond)\n cond\nend\n", ".rb", "body_statement", "cond", false] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:ternary_statement?, node) + end + end + end + + def test_ternary_statement_normalizes_to_if_across_languages + { + ruby: ["def f(cond, a, b)\n cond ? a : b\nend\n", ".rb"], + python: ["def f(cond, a, b):\n return a if cond else b\n", ".py"], + typescript: ["function f(cond: boolean, a: number, b: number) { return cond ? a : b; }\n", ".ts"] + }.each do |language, (source, suffix)| + with_language_file(source, suffix, language) do |file| + root, = parse_language(file, language) + if_node = nodes_of_type(root, "IF").find { |node| node.text.include?("cond") } + + refute_nil if_node + assert_equal %w[cond a b], if_node.children.map(&:text) + end + end + end + + def test_case_argument_list_predicate + cases = [ + [ + :ruby, + "def f(x)\n return case x\n when 1 then :one\n else :other\n end\nend\n", + ".rb", + "argument_list", + "case x\n when 1 then :one\n else :other\n end", + true + ], + [:ruby, "case x\nwhen 1 then :one\nelse :other\nend\n", ".rb", "case", "case x\nwhen 1 then :one\nelse :other\nend", false], + [:python, "match value:\n case 1:\n one()\n", ".py", "case_clause", "case 1:\n one()", false], + [:typescript, "switch (value) { case 1: one(); break; }\n", ".ts", "switch_case", "case 1: one(); break;", false], + [:lua, "if value == 1 then one() end\n", ".lua", "if_statement", "if value == 1 then one() end", false] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:case_argument_list?, node) + end + end + end + + def test_leading_function_statement_predicate + cases = [ + [:ruby, "def outer\n def inner\n x\n end\nend\n", ".rb", "body_statement", "def inner\n x\n end", true], + [:python, "def outer():\n def inner():\n x\n", ".py", "block", "def inner():\n x", true], + [:lua, "function outer()\n function inner()\n x()\n end\nend\n", ".lua", "block", "function inner()\n x()\n end", true], + [:typescript, "function outer() { function inner() { x; } }\n", ".ts", "function_declaration", "function inner() { x; }", false], + [:ruby, "def outer\n x\nend\n", ".rb", "body_statement", "x", false] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:leading_function_statement?, node) + end + end + end + + def test_leading_function_statement_normalizes_nested_functions + { + ruby: ["def outer\n def inner\n x\n end\nend\n", ".rb"], + python: ["def outer():\n def inner():\n x\n", ".py"], + lua: ["function outer()\n function inner()\n x()\n end\nend\n", ".lua"] + }.each do |language, (source, suffix)| + with_language_file(source, suffix, language) do |file| + root, = parse_language(file, language) + inner = nodes_of_type(root, "DEFN").find { |node| node.children.first == :inner } + + refute_nil inner + assert_empty nodes_of_type(root, "ITER").select { |node| node.text.include?("inner") } + end + end + end + + def test_lambda_expression_predicate + cases = [ + [:ruby, "fn = ->(x) { x + 1 }\n", ".rb", "lambda", "->(x) { x + 1 }", true], + [:python, "fn = lambda x: x + 1\n", ".py", "lambda", "lambda x: x + 1", true], + [:typescript, "const fn = (x) => x + 1;\n", ".ts", "arrow_function", "(x) => x + 1", true], + [:typescript, "const fn = function(x) { return x + 1; };\n", ".ts", "function_expression", "function(x) { return x + 1; }", true], + [:lua, "local fn = function(x) return x + 1 end\n", ".lua", "expression_list", "function(x) return x + 1 end", true], + [:lua, "function f(x) return x + 1 end\n", ".lua", "function_declaration", "function f(x) return x + 1 end", false] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:lambda_expression?, node) + end + end + end + + def test_lambda_expressions_normalize_across_languages + { + ruby: ["fn = ->(x) { x + 1 }\n", ".rb"], + python: ["fn = lambda x: x + 1\n", ".py"], + typescript: ["const fn = (x) => x + 1;\n", ".ts"], + lua: ["local fn = function(x) return x + 1 end\n", ".lua"] + }.each do |language, (source, suffix)| + with_language_file(source, suffix, language) do |file| + root, = parse_language(file, language) + + refute_empty nodes_of_type(root, "LAMBDA"), "expected LAMBDA for #{language}" + end + end + end + + def test_leading_owner_statement_predicate + cases = [ + [:ruby, "def outer\n class Inner\n value\n end\nend\n", ".rb", "body_statement", "class Inner\n value\n end", true], + [:ruby, "def outer\n module Inner\n value\n end\nend\n", ".rb", "body_statement", "module Inner\n value\n end", true], + [:python, "def outer():\n class Inner:\n pass\n", ".py", "block", "class Inner:\n pass", true], + [:typescript, "function outer() { class Inner {} }\n", ".ts", "class_declaration", "class Inner {}", false], + [:lua, "function outer()\n Inner = {}\nend\n", ".lua", "block", "Inner = {}", false] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:leading_owner_statement?, node) + end + end + end + + def test_leading_owner_statement_normalizes_nested_classes + { + ruby: ["def outer\n class Inner\n value\n end\nend\n", ".rb"], + python: ["def outer():\n class Inner:\n pass\n", ".py"] + }.each do |language, (source, suffix)| + with_language_file(source, suffix, language) do |file| + root, = parse_language(file, language) + inner = nodes_of_type(root, "CLASS").find { |node| node.text.include?("Inner") } + + refute_nil inner + assert_empty nodes_of_type(root, "ITER").select { |node| node.text.include?("Inner") } + end + end + end + + def test_zero_child_identifier_call_predicate + cases = [ + [:ruby, "foo?\n", ".rb", "call", "foo?", true], + [:ruby, "foo!\n", ".rb", "call", "foo!", true], + [:ruby, "foo()\n", ".rb", "call", "foo()", false], + [:python, "foo()\n", ".py", "expression_statement", "foo()", false], + [:typescript, "foo();\n", ".ts", "call_expression", "foo()", false], + [:lua, "foo()\n", ".lua", "function_call", "foo()", false] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:zero_child_identifier_call?, node) + end + end + end + + def test_zero_child_identifier_call_normalizes_to_vcall + %w[foo? foo!].each do |call| + with_language_file("#{call}\n", ".rb", :ruby) do |file| + root, = parse_language(file, :ruby) + vcall = nodes_of_type(root, "VCALL").find { |node| node.text == call } + + refute_nil vcall + assert_equal call.to_sym, vcall.children.first + end + end + end + + def test_dotted_call_parts + cases = [ + [:ruby, "user.name\n", ".rb", "call", "user.name", "identifier", "user", "name"], + [:ruby, "user&.name\n", ".rb", "call", "user&.name", "identifier", "user", "name"], + [:python, "user.name()\n", ".py", "attribute", "user.name", "identifier", "user", "name"], + [:typescript, "user.name();\n", ".ts", "member_expression", "user.name", "identifier", "user", "name"], + [:typescript, "user.name;\n", ".ts", "expression_statement", "user.name;", "identifier", "user", "name"], + [:lua, "user.name()\n", ".lua", "dot_index_expression", "user.name", "identifier", "user", "name"] + ] + + cases.each do |language, source, suffix, kind, text, receiver_kind, receiver_text, method_name| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + receiver, method = normalizer.send(:dotted_call_parts, node) + assert_equal receiver_kind, receiver.kind + assert_equal receiver_text, receiver.text.to_s + assert_equal method_name, method + end + end + end + + def test_python_bare_dotted_expression_normalizes_as_call + with_language_file("user.name\n", ".py", :python) do |file| + root, = parse_language(file, :python) + call = nodes_of_type(root, "CALL").find { |node| node.text == "user.name" } + + refute_nil call + assert_equal "LVAR", call.children.first.type.to_s + assert_equal :name, call.children[1] + end + end + + def test_typescript_bare_dotted_expression_normalizes_as_call + with_language_file("user.name;\n", ".ts", :typescript) do |file| + document = parse_syntax(file, :typescript) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find do |candidate| + candidate.kind == "expression_statement" && candidate.text == "user.name;" + end + call = normalizer.send(:normalize_dotted_expression, node) + + refute_nil call + assert_equal "CALL", call.type.to_s + assert_equal "LVAR", call.children.first.type.to_s + assert_equal :name, call.children[1] + end + end + + def test_leading_if_statement_predicate + cases = [ + [:ruby, "def f\n if x\n y\n end\nend\n", ".rb", "body_statement", "if x\n y\n end", true], + [:python, "def f():\n if x:\n y()\n", ".py", "block", "if x:\n y()", true], + [:lua, "function f()\n if x then\n y()\n end\nend\n", ".lua", "block", "if x then\n y()\n end", true], + [:typescript, "function f() { if (x) { y(); } }\n", ".ts", "if_statement", "if (x) { y(); }", false] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:leading_if_statement?, node) + end + end + end + + def test_leading_if_statement_normalizes_across_languages + { + ruby: ["def f\n if x\n y\n end\nend\n", ".rb"], + python: ["def f():\n if x:\n y()\n", ".py"], + lua: ["function f()\n if x then\n y()\n end\nend\n", ".lua"], + typescript: ["function f() { if (x) { y(); } }\n", ".ts"] + }.each do |language, (source, suffix)| + with_language_file(source, suffix, language) do |file| + root, = parse_language(file, language) + + refute_empty nodes_of_type(root, "IF") + end + end + end + + def test_leading_case_statement_predicate + cases = [ + [ + :ruby, + "def f(x)\n case x\n when 1 then y\n else z\n end\nend\n", + ".rb", + "body_statement", + "case x\n when 1 then y\n else z\n end", + true + ], + [ + :python, + "def f(x):\n match x:\n case 1:\n y()\n", + ".py", + "block", + "match x:\n case 1:\n y()", + true + ], + [ + :typescript, + "function f(x) { switch (x) { case 1: y(); break; default: z(); } }\n", + ".ts", + "switch_statement", + "switch (x) { case 1: y(); break; default: z(); }", + false + ], + [ + :lua, + "function f(x)\n if x == 1 then y() end\nend\n", + ".lua", + "block", + "if x == 1 then y() end", + false + ] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:leading_case_statement?, node) + end + end + end + + def test_leading_case_statement_normalizes_across_languages + { + ruby: ["def f(x)\n case x\n when 1 then y\n else z\n end\nend\n", ".rb"], + python: ["def f(x):\n match x:\n case 1:\n y()\n", ".py"], + typescript: ["function f(x) { switch (x) { case 1: y(); break; default: z(); } }\n", ".ts"] + }.each do |language, (source, suffix)| + with_language_file(source, suffix, language) do |file| + root, = parse_language(file, language) + + refute_empty nodes_of_type(root, "CASE") + end + end + end + + def test_case_default_branches_normalize_as_when_fallbacks + { + python: ["match x:\n case 1:\n one()\n case _:\n other()\n", ".py", "other()"], + typescript: ["switch (x) { case 1: one(); break; default: other(); }\n", ".ts", "other()"] + }.each do |language, (source, suffix, fallback_text)| + with_language_file(source, suffix, language) do |file| + root, = parse_language(file, language) + case_node = nodes_of_type(root, "CASE").first + + refute_nil case_node + whens = nodes_of_type(case_node, "WHEN") + assert_equal 1, whens.size + fallback = whens.first.children[2] + assert Decomplex::Ast.node?(fallback) + assert_equal "VCALL", fallback.type.to_s + assert_equal fallback_text, fallback.text + end + end + end + + def test_ruby_case_patterns_preserve_childless_tree_sitter_pattern_text + with_language_file("case value\nwhen Foo\n one\nend\ncase\nwhen ready\n two\nend\n", ".rb", :ruby) do |file| + root, = parse_language(file, :ruby) + whens = nodes_of_type(root, "WHEN") + + const_pattern = whens.find { |node| node.text == "when Foo\n one" }.children.first.children.first + assert_equal "CONST", const_pattern.type.to_s + assert_equal :Foo, const_pattern.children.first + + call_pattern = whens.find { |node| node.text == "when ready\n two" }.children.first.children.first + assert_equal "VCALL", call_pattern.type.to_s + assert_equal :ready, call_pattern.children.first + end + end + + def test_leading_loop_statement_predicate + cases = [ + [:ruby, "def f(x)\n while x\n y\n end\nend\n", ".rb", "body_statement", "while x\n y\n end", true], + [:python, "def f(x):\n while x:\n y()\n", ".py", "block", "while x:\n y()", true], + [:lua, "function f(x)\n while x do\n y()\n end\nend\n", ".lua", "block", "while x do\n y()\n end", true], + [:typescript, "function f(x) { while (x) { y(); } }\n", ".ts", "while_statement", "while (x) { y(); }", false] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:leading_loop_statement?, node) + end + end + end + + def test_leading_loop_statement_normalizes_across_languages + { + ruby: ["def f(x)\n while x\n y\n end\nend\n", ".rb"], + python: ["def f(x):\n while x:\n y()\n", ".py"], + lua: ["function f(x)\n while x do\n y()\n end\nend\n", ".lua"], + typescript: ["function f(x) { while (x) { y(); } }\n", ".ts"] + }.each do |language, (source, suffix)| + with_language_file(source, suffix, language) do |file| + root, = parse_language(file, language) + + refute_empty nodes_of_type(root, "WHILE") + end + end + end + + def test_rescue_body_statement_predicate + cases = [ + [ + :ruby, + "def f\n work\nrescue Error => e\n handle\nend\n", + ".rb", + "body_statement", + "work\nrescue Error => e\n handle", + true + ], + [ + :python, + "try:\n work()\nexcept Error as e:\n handle(e)\n", + ".py", + "try_statement", + "try:\n work()\nexcept Error as e:\n handle(e)", + true + ], + [ + :python, + "def f():\n try:\n work()\n except Error as e:\n handle(e)\n", + ".py", + "block", + "try:\n work()\n except Error as e:\n handle(e)", + true + ], + [ + :typescript, + "try { work(); } catch (e) { handle(e); }\n", + ".ts", + "try_statement", + "try { work(); } catch (e) { handle(e); }", + true + ], + [ + :lua, + "local ok, err = pcall(work)\n", + ".lua", + "variable_declaration", + "local ok, err = pcall(work)", + false + ] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:rescue_body_statement?, node) + end + end + end + + def test_python_flattened_bare_except_normalizes_as_rescue + with_python_file(<<~PY) do |file| + def get_exception(): + try: + pass + except: + foobarbaz + PY + root, = parse_python(file) + rescue_node = nodes_of_type(root, "RESCUE").first + resbody = nodes_of_type(root, "RESBODY").first + + refute_nil rescue_node + refute_nil resbody + assert_nil rescue_node.children.first + assert_nil resbody.children.first + assert_equal "VCALL", resbody.children[1].type.to_s + assert_equal :foobarbaz, resbody.children[1].children.first + end + end + + def test_python_flattened_try_except_preserves_try_body + with_python_file(<<~PY) do |file| + def f(): + try: + work() + except Error as e: + handle(e) + PY + root, = parse_python(file) + rescue_node = nodes_of_type(root, "RESCUE").first + resbody = nodes_of_type(root, "RESBODY").first + + refute_nil rescue_node + assert_equal "VCALL", rescue_node.children.first.type.to_s + assert_equal "work()", rescue_node.children.first.text + refute_nil resbody.children.first + end + end + + def test_rescue_body_statement_normalizes_across_languages + { + ruby: ["def f\n work\nrescue Error => e\n handle\nend\n", ".rb"], + python: ["try:\n work()\nexcept Error as e:\n handle(e)\n", ".py"], + typescript: ["try { work(); } catch (e) { handle(e); }\n", ".ts"] + }.each do |language, (source, suffix)| + with_language_file(source, suffix, language) do |file| + root, = parse_language(file, language) + + refute_empty nodes_of_type(root, "RESCUE") + resbodies = nodes_of_type(root, "RESBODY") + refute_empty resbodies + refute_nil resbodies.first.children.first if %i[ruby python].include?(language) + end + end + end + + def test_rescue_clause_preserves_qualified_exception_constant + with_language_file("begin\n work\nrescue Net::Error\n handle\nend\n", ".rb", :ruby) do |file| + root, = parse_language(file, :ruby) + resbody = nodes_of_type(root, "RESBODY").first + + refute_nil resbody + exceptions = resbody.children.first + assert_equal "LIST", exceptions.type.to_s + assert_equal ["Net::Error"], exceptions.children.map { |child| child.children.first.to_s } + end + end + + def test_ensure_body_statement_predicate + cases = [ + [ + :ruby, + "def f\n work\nensure\n cleanup\nend\n", + ".rb", + "body_statement", + "work\nensure\n cleanup", + true + ], + [ + :python, + "try:\n work()\nfinally:\n cleanup()\n", + ".py", + "try_statement", + "try:\n work()\nfinally:\n cleanup()", + true + ], + [ + :typescript, + "try { work(); } finally { cleanup(); }\n", + ".ts", + "try_statement", + "try { work(); } finally { cleanup(); }", + true + ], + [ + :lua, + "work()\ncleanup()\n", + ".lua", + "function_call", + "work()", + false + ] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:ensure_body_statement?, node) + end + end + end + + def test_ensure_body_statement_normalizes_across_languages + { + ruby: ["def f\n work\nensure\n cleanup\nend\n", ".rb"], + python: ["try:\n work()\nfinally:\n cleanup()\n", ".py"], + typescript: ["try { work(); } finally { cleanup(); }\n", ".ts"], + python_rescue: ["try:\n work()\nexcept Error as e:\n handle(e)\nfinally:\n cleanup()\n", ".py"] + }.each do |language, (source, suffix)| + parse_language_name = language == :python_rescue ? :python : language + with_language_file(source, suffix, parse_language_name) do |file| + root, = parse_language(file, parse_language_name) + + refute_empty nodes_of_type(root, "ENSURE") + refute_empty nodes_of_type(root, "RESCUE") if language == :python_rescue + end + end + end + + def test_array_literal_statement_predicate + cases = [ + [:ruby, "def f\n [a, b]\nend\n", ".rb", "body_statement", "[a, b]", true], + [:python, "def f():\n [a, b]\n", ".py", "block", "[a, b]", true], + [:typescript, "function f() { [a, b]; }\n", ".ts", "expression_statement", "[a, b];", true], + [:lua, "function f()\n {a, b}\nend\n", ".lua", "block", "\n {a, b}", true], + [:lua, "function f()\n {x = a, y = b}\nend\n", ".lua", "block", "\n {x = a, y = b}", false], + [ + :lua, + "local rocks_path = table.concat({rocks_tree, \"a_rock\"})\n", + ".lua", + "arguments", + "({rocks_tree, \"a_rock\"})", + false + ] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:array_literal_statement?, node) + end + end + end + + def test_array_literal_statement_normalizes_across_languages + { + ruby: ["def f\n [a, b]\nend\n", ".rb"], + python: ["def f():\n [a, b]\n", ".py"], + typescript: ["function f() { [a, b]; }\n", ".ts"], + lua: ["function f()\n {a, b}\nend\n", ".lua"] + }.each do |language, (source, suffix)| + with_language_file(source, suffix, language) do |file| + root, = parse_language(file, language) + lists = nodes_of_type(root, "LIST") + + refute_empty lists + assert lists.any? { |node| node.text.include?("a") && node.text.include?("b") } + end + end + end + + def test_element_reference_statement_predicate + cases = [ + [:ruby, "def f\n items[0]\nend\n", ".rb", "body_statement", "items[0]", true], + [:ruby, "def f\n [0]\nend\n", ".rb", "body_statement", "[0]", false], + [:python, "def f():\n items[0]\n", ".py", "block", "items[0]", true], + [:python, "return items[0]\n", ".py", "subscript", "items[0]", true], + [:typescript, "function f() { items[0]; }\n", ".ts", "expression_statement", "items[0];", true], + [:typescript, "return items[0];\n", ".ts", "subscript_expression", "items[0]", true], + [:lua, "return items[1]\n", ".lua", "expression_list", "items[1]", true], + [:lua, "print(items[1])\n", ".lua", "bracket_index_expression", "items[1]", true] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:element_reference_statement?, node) + end + end + end + + def test_element_reference_statement_normalizes_across_languages + { + ruby: ["def f\n items[0]\nend\n", ".rb"], + python: ["def f():\n items[0]\n", ".py"], + typescript: ["function f() { items[0]; }\n", ".ts"], + lua: ["return items[1]\n", ".lua"] + }.each do |language, (source, suffix)| + with_language_file(source, suffix, language) do |file| + root, = parse_language(file, language) + calls = nodes_of_type(root, "CALL") + + assert calls.any? { |node| node.children[1] == :[] && node.text.include?("items") }, + "expected element reference CALL for #{language}" + end + end + end + + def test_hash_literal_statement_predicate + cases = [ + [:ruby, "def f\n {a: b}\nend\n", ".rb", "body_statement", "{a: b}", true], + [:python, "def f():\n {\"a\": b}\n", ".py", "block", "{\"a\": b}", true], + [:typescript, "function f() { ({a: b}); }\n", ".ts", "expression_statement", "({a: b});", true], + [:typescript, "return {a: b};\n", ".ts", "object", "{a: b}", true], + [:lua, "function f()\n {a = b}\nend\n", ".lua", "block", "\n {a = b}", true], + [:lua, "function f()\n {a, b}\nend\n", ".lua", "block", "\n {a, b}", false], + [ + :lua, + "assert.same(install, { bin = { P\"bin/binfile\" } })\n", + ".lua", + "arguments", + "(install, { bin = { P\"bin/binfile\" } })", + false + ] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:hash_literal_statement?, node) + end + end + end + + def test_hash_literal_statement_normalizes_across_languages + { + ruby: ["def f\n {a: b}\nend\n", ".rb"], + python: ["def f():\n {\"a\": b}\n", ".py"], + typescript: ["function f() { ({a: b}); }\n", ".ts"], + lua: ["function f()\n {a = b}\nend\n", ".lua"] + }.each do |language, (source, suffix)| + with_language_file(source, suffix, language) do |file| + root, = parse_language(file, language) + hashes = nodes_of_type(root, "HASH") + + assert hashes.any? { |node| node.text.include?("a") && node.text.include?("b") }, + "expected hash literal HASH for #{language}" + assert_empty nodes_of_type(root, "OBJECT") if language == :typescript + assert_empty nodes_of_type(root, "FCALL").select { |node| node.children.first == :"" } if language == :lua + end + end + end + + def test_lua_call_arguments_with_keyed_table_preserve_argument_list + with_language_file("assert.same(install, { bin = { P\"bin/binfile\" } })\n", ".lua", :lua) do |file| + root, = parse_language(file, :lua) + call = nodes_of_type(root, "FUNCTION_CALL").find { |node| node.text.start_with?("assert.same") } + + refute_nil call + arguments = call.children[1] + assert_equal "ARGUMENTS", arguments.type.to_s + assert_equal %w[LVAR HASH], arguments.children.map(&:type).map(&:to_s) + assert_equal "install", arguments.children.first.children.first + end + end + + def test_lua_call_arguments_with_positional_table_preserve_table_fields + with_language_file("local rocks_path = table.concat({rocks_tree, \"a_rock\"})\n", ".lua", :lua) do |file| + root, = parse_language(file, :lua) + arguments = nodes_of_type(root, "ARGUMENTS").find { |node| node.text == "({rocks_tree, \"a_rock\"})" } + + refute_nil arguments + table = arguments.children.first + assert_equal "ARGUMENTS", arguments.type.to_s + assert_equal "HASH", table.type.to_s + assert_equal %w[FIELD FIELD], table.children.map(&:type).map(&:to_s) + assert_empty table.children.first.children + assert_equal "STR", table.children[1].children.first.type.to_s + end + end + + def test_empty_body_statement_predicate + cases = [ + [:python, "def f():\n pass\n", ".py", "block", "pass", true], + [:typescript, "function f() {}\n", ".ts", "statement_block", "{}", true], + [:typescript, "function f() { work(); }\n", ".ts", "statement_block", "{ work(); }", false] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:empty_body_statement?, node) + end + end + end + + def test_empty_body_statement_normalizes_across_languages + { + python: ["def f():\n pass\n", ".py"], + typescript: ["function f() {}\n", ".ts"] + }.each do |language, (source, suffix)| + with_language_file(source, suffix, language) do |file| + root, = parse_language(file, language) + defn = nodes_of_type(root, "DEFN").first + scope = defn.children[1] + + assert_nil scope.children[2] + assert_empty nodes_of_type(root, "VCALL").select { |node| node.text == "pass" } if language == :python + end + end + end + + def test_heredoc_body_statement_predicate + ruby_source = "def f\n puts <<~TXT\n hi\n TXT\nend\n" + cases = [ + [:ruby, ruby_source, ".rb", "body_statement", "puts <<~TXT\n hi\n TXT", true], + [:ruby, ruby_source, ".rb", "call", "puts <<~TXT", false], + [:python, "def f():\n value = 1\n", ".py", "block", "value = 1", false], + [:typescript, "function f() { value = 1; }\n", ".ts", "statement_block", "{ value = 1; }", false], + [:lua, "function f()\n value = 1\nend\n", ".lua", "block", "value = 1", false] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:heredoc_body_statement?, node) + end + end + end + + def test_heredoc_call_for_body_predicate + ruby_source = "def f\n puts <<~TXT\n hi\n TXT\nend\n" + cases = [ + [:ruby, ruby_source, ".rb", "body_statement", "puts <<~TXT\n hi\n TXT", true], + [:ruby, ruby_source, ".rb", "call", "puts <<~TXT", true], + [:ruby, ruby_source, ".rb", "argument_list", "<<~TXT", true], + [:ruby, ruby_source, ".rb", "method", ruby_source.chomp, false], + [:python, "def f():\n value = 1\n", ".py", "block", "value = 1", false] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:heredoc_call_for_body?, node) + end + end + end + + def test_ruby_heredoc_argument_normalizes_as_dynamic_string + with_language_file("def f\n puts <<~TXT\n hi\n TXT\nend\n", ".rb", :ruby) do |file| + root, = parse_language(file, :ruby) + call = nodes_of_type(root, "FCALL").find { |node| node.text == "puts <<~TXT" } + + refute_nil call + assert_equal :puts, call.children[0] + + args = call.children[1] + assert_equal "LIST", args.type.to_s + dstr = args.children.first + assert_equal "DSTR", dstr.type.to_s + assert_equal ["STR"], dstr.children.map { |child| child.type.to_s } + assert_equal "\n hi\n ", dstr.children.first.children.first + end + end + + def test_normalize_children_skips_heredoc_body + with_language_file("def f\n x = <<~TXT\n hi\n TXT\nend\n", ".rb", :ruby) do |file| + document = parse_syntax(file, :ruby) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + body = ts_nodes(document.root).find do |node| + node.kind == "body_statement" && node.text.include?("<<~TXT") + end + + refute_nil body + children = normalizer.send(:normalize_children, body) + assert_equal ["LASGN"], children.map { |child| child.type.to_s } + assert_equal ["STR"], children.first.children[1].children.map { |child| child.type.to_s } + end + end + + def test_with_current_heredoc_body_restores_previous_body + normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + normalizer.instance_variable_set(:@current_heredoc_body, :outer) + + result = normalizer.send(:with_current_heredoc_body, :inner) do + assert_equal :inner, normalizer.instance_variable_get(:@current_heredoc_body) + :result + end + + assert_equal :result, result + assert_equal :outer, normalizer.instance_variable_get(:@current_heredoc_body) + end + + def test_interpolated_statement_predicate + cases = [ + [:ruby, "def f\n \"hi \#{name}\"\nend\n", ".rb", "body_statement", "\"hi \#{name}\"", true], + [:python, "def f():\n f\"hi {name}\"\n", ".py", "block", "f\"hi {name}\"", false], + [:typescript, "function f() { `hi ${name}`; }\n", ".ts", "expression_statement", "`hi ${name}`;", false], + [:lua, "function f()\n \"hi\"\nend\n", ".lua", "block", "\n \"hi\"", false] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:interpolated_statement?, node) + end + end + end + + def test_concatenated_string_statement_predicate + cases = [ + [:ruby, "def f\n \"a\" \"b\"\nend\n", ".rb", "body_statement", "\"a\" \"b\"", true], + [:python, "def f():\n \"a\" \"b\"\n", ".py", "block", "\"a\" \"b\"", true], + [:typescript, "function f() { \"a\"; }\n", ".ts", "expression_statement", "\"a\";", false], + [:lua, "function f()\n \"a\"\nend\n", ".lua", "block", "\n \"a\"", false] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:concatenated_string_statement?, node) + end + end + end + + def test_concatenated_string_statement_normalizes_python_adjacent_strings + with_python_file(<<~PY) do |file| + def f(): + "a" "b" + PY + root, = parse_python(file) + dstr = nodes_of_type(root, "DSTR").find { |node| node.text == "\"a\"" } + + refute_nil dstr + assert_equal %w[STR STR], dstr.children.map(&:type).map(&:to_s) + end + end + private def ast_node(type, children: []) From fde8f51876205b49c57311c5608f44bba7524862 Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Fri, 19 Jun 2026 11:27:57 +0000 Subject: [PATCH 21/52] Refactor syntax fact extraction profiles --- .../lib/decomplex/false_simplicity.rb | 8 +- .../decomplex/lib/decomplex/native/command.rb | 2 +- gems/decomplex/lib/decomplex/syntax.rb | 608 ++++++++++++------ gems/decomplex/rust/src/bin/dump_ast.rs | 2 + .../decomplex/syntax/tree_sitter_adapter.rs | 359 ++++++++--- gems/decomplex/test/syntax_test.rb | 14 + 6 files changed, 705 insertions(+), 288 deletions(-) diff --git a/gems/decomplex/lib/decomplex/false_simplicity.rb b/gems/decomplex/lib/decomplex/false_simplicity.rb index a4b8f21c8..fd3f8ff32 100644 --- a/gems/decomplex/lib/decomplex/false_simplicity.rb +++ b/gems/decomplex/lib/decomplex/false_simplicity.rb @@ -395,7 +395,8 @@ def classify_call(call, defs) end m = mid.to_s - if block_pass?(call) && callback?(m) && !@lexicon.meta_mids.include?(m) + if (block_pass?(call) || block_literal_call?(call)) && + callback?(m) && !@lexicon.meta_mids.include?(m) return emit(:callback_inversion, m, dn(defs), call) end return emit(:metaprogramming, m, dn(defs), call) if @lexicon.meta_mids.include?(m) @@ -469,6 +470,11 @@ def block_pass?(call) args.children.any? { |c| Ast.node?(c) && c.type == :BLOCK_PASS } end + def block_literal_call?(call) + text = call.text.to_s + text.include?("{") || text.match?(/\bdo\b/) + end + def method_obj?(recv) Ast.node?(recv) && %i[CALL FCALL].include?(recv.type) && @lexicon.method_obj_mids.include?( diff --git a/gems/decomplex/lib/decomplex/native/command.rb b/gems/decomplex/lib/decomplex/native/command.rb index 64219a2fa..16829d427 100644 --- a/gems/decomplex/lib/decomplex/native/command.rb +++ b/gems/decomplex/lib/decomplex/native/command.rb @@ -63,7 +63,7 @@ def language_for(path) [binary_path, *args] else ["cargo", "run", "--quiet", "--release", "--manifest-path", - File.join(crate_root, "Cargo.toml"), "--", *args] + File.join(crate_root, "Cargo.toml"), "--bin", "decomplex-rust", "--", *args] end end diff --git a/gems/decomplex/lib/decomplex/syntax.rb b/gems/decomplex/lib/decomplex/syntax.rb index 60960b709..8f8ce9d57 100644 --- a/gems/decomplex/lib/decomplex/syntax.rb +++ b/gems/decomplex/lib/decomplex/syntax.rb @@ -268,23 +268,341 @@ def call_name?(source, names) /\Areturn\s+(?:null|true|false|0|1)\s*;?\z/ ].freeze ).freeze - LANGUAGE_LEXICONS = { - ruby: RUBY_LEXICON, - python: PYTHON_LEXICON, - javascript: JAVASCRIPT_LEXICON, - typescript: JAVASCRIPT_LEXICON, - go: GO_LEXICON, - rust: RUST_LEXICON, - zig: ZIG_LEXICON, - lua: LUA_LEXICON, - c: C_LEXICON, - cpp: CPP_LEXICON, - csharp: CSHARP_LEXICON, - java: JAVA_LEXICON, - swift: SWIFT_LEXICON, - kotlin: KOTLIN_LEXICON + class TreeSitterLanguageAdapter + attr_reader :language, :extensions, :lexicon, :package, :grammar_names, + :tree_sitter_language_name + + def initialize(language:, extensions:, lexicon:, package:, grammar_names: nil, + tree_sitter_language_name: nil, first_argument_receiver: false) + @language = language.to_sym + @extensions = Array(extensions).freeze + @lexicon = lexicon + @package = package + @grammar_names = Array(grammar_names || language.to_s).freeze + @tree_sitter_language_name = tree_sitter_language_name || language.to_s + @first_argument_receiver = first_argument_receiver + end + + def first_argument_receiver? + @first_argument_receiver + end + + def function_name(syntax, node) + case node.kind + when "method", "function_definition", "function_declaration", + "method_definition", "function_item" + syntax.send(:named_field, node, "name")&.text || + syntax.send(:declarator_name, syntax.send(:named_field, node, "declarator")) || + syntax.send(:first_named_text, node, %w[identifier constant property_identifier]) + when "method_declaration" + syntax.send(:named_field, node, "name")&.text || + syntax.send(:first_named_text, node, %w[field_identifier identifier]) + end + end + + def function_kind(syntax, node, stack) + syntax.send(:owner_for_node, nil, node, stack: stack) ? :method : :function + end + + def visibility(syntax, _document, node) + syntax.send(:modifier_visibility, node) + end + + def owner_name_from_declaration(syntax, document, node) + case node.kind + when "class", "class_definition", "class_declaration", "class_specifier", "module" + syntax.send(:named_field, node, "name")&.text || + syntax.send(:first_named_text, node, %w[constant identifier type_identifier]) + when "impl_item", "impl_block" + syntax.send(:impl_owner_name, node) + when "struct_item", "struct_spec", "struct_specifier", "type_spec", "type_declaration" + syntax.send(:named_field, node, "name")&.text || + syntax.send(:first_named_text, node, %w[type_identifier identifier]) + when "struct_declaration", "union_declaration", "enum_declaration" + syntax.send(:bound_container_name, node) || + syntax.send(:returned_container_owner, document, node) || + syntax.send(:anonymous_owner_name, document, node) + end + end + + def owner_kind(syntax, node) + case node.kind + when "class", "class_definition", "class_declaration", "class_specifier" then :class + when "module" then :module + when "impl_item", "impl_block" then :impl + when "struct_declaration", "struct_item", "struct_spec", "struct_specifier" then :struct + when "union_declaration" then :union + when "enum_declaration" then :enum + else :owner + end + end + + def function_receiver_name(syntax, node, stack) + receiver_param = syntax.send(:method_receiver_param_node, node) + receiver_param&.text || + receiver_convention_param_name(syntax, node, language: syntax.send(:current_language, stack)) + end + + def receiver_convention_owner_name(syntax, node, language:) + return nil unless first_argument_receiver? + return nil unless node.kind == "function_definition" + + receiver = syntax.send(:first_argument_receiver_parameter, node) + return nil unless receiver + + type = syntax.send(:normalize_type_owner, receiver[:type]) + name = function_name(syntax, node).to_s + return nil if type.empty? || name.empty? + + prefix = syntax.send(:snake_case_type_name, type) + name.start_with?("#{prefix}_") ? type : nil + end + + def receiver_convention_param_name(syntax, node, language:) + return nil unless first_argument_receiver? + + syntax.send(:first_argument_receiver_parameter, node)&.fetch(:name, nil) + end + + def generated_prelude?(_syntax, _document, _node) + false + end + + def call_target(syntax, document, node) + case node.kind + when "call_expression", "method_invocation", "invocation_expression" + syntax.send(:generic_call_target, document, node) + when "attribute", "selector_expression", "field", "field_access", "member_expression", + "member_access_expression", "field_expression", "expression_list" + syntax.send(:adjacent_argument_call_target, node) + end + end + + def state_declaration(syntax, node) + syntax.send(:generic_state_declaration, node, language: language) + end + + def state_read_target(syntax, node) + syntax.send(:generic_state_read_target, node) + end + + def state_target(syntax, lhs) + syntax.send(:generic_state_target, lhs) + end + end + + class RubySyntaxAdapter < TreeSitterLanguageAdapter + def function_name(syntax, node) + case node.kind + when "body_statement" + syntax.send(:hidden_ruby_method_name, node) + when "singleton_method" + name = syntax.send(:named_field, node, "name")&.text || + node.named_children.reverse.find do |child| + %w[identifier field_identifier property_identifier].include?(child.kind) + end&.text + name && "self.#{name}" + when "argument_list" + syntax.send(:inline_def_name, node) + else + super + end + end + + def visibility(syntax, _document, node) + return syntax.send(:ruby_inline_def_visibility, node) if syntax.send(:inline_def_argument_list?, node) + + syntax.send(:ruby_method_visibility, node) + end + + def owner_name_from_declaration(syntax, document, node) + return syntax.send(:hidden_ruby_owner_name, node) if syntax.send(:hidden_ruby_owner_declaration?, node) + + super + end + + def owner_kind(syntax, node) + return syntax.send(:hidden_ruby_owner_kind, node) if syntax.send(:hidden_ruby_owner_declaration?, node) + + super + end + + def call_target(syntax, document, node) + case node.kind + when "call" + syntax.send(:ruby_call_target, node) + when "body_statement" + syntax.send(:ruby_bare_body_call_target, document, node) + when "identifier" + syntax.send(:ruby_bare_call_target, document, node) + else + super + end + end + end + + class PythonSyntaxAdapter < TreeSitterLanguageAdapter + def visibility(syntax, _document, node) + name = function_name(syntax, node).to_s + return :private if name.start_with?("_") && !name.start_with?("__") + + :public + end + end + + class GoSyntaxAdapter < TreeSitterLanguageAdapter + def visibility(syntax, _document, node) + syntax.send(:exported_name_visibility, function_name(syntax, node)) + end + end + + class RustSyntaxAdapter < TreeSitterLanguageAdapter + def visibility(syntax, _document, node) + syntax.send(:modifier_visibility, node) || :private + end + end + + class JavaScriptSyntaxAdapter < TreeSitterLanguageAdapter + def visibility(syntax, _document, node) + syntax.send(:modifier_visibility, node) || typescript_visibility(syntax, node) + end + + private + + def typescript_visibility(syntax, node) + function_name(syntax, node).to_s.start_with?("#") ? :private : :public + end + end + + class CppSyntaxAdapter < TreeSitterLanguageAdapter + def visibility(syntax, _document, node) + syntax.send(:modifier_visibility, node) || syntax.send(:cpp_visibility, node) + end + end + + class CSharpSyntaxAdapter < TreeSitterLanguageAdapter + def visibility(syntax, _document, node) + syntax.send(:modifier_visibility, node) || :private + end + end + + class CSyntaxAdapter < TreeSitterLanguageAdapter + def visibility(syntax, _document, node) + syntax.send(:c_visibility, node) + end + end + + class LuaSyntaxAdapter < TreeSitterLanguageAdapter + def generated_prelude?(syntax, document, node) + return false unless syntax.send(:line, node) == 1 + + first_line = document.lines.first.to_s + first_line.include?("_tl_compat") && first_line.include?("compat53.module") + end + end + + LanguageProfile = TreeSitterLanguageAdapter + + LANGUAGE_PROFILES = { + ruby: RubySyntaxAdapter.new( + language: :ruby, + extensions: %w[.rb], + lexicon: RUBY_LEXICON, + package: "tree-sitter-ruby" + ), + python: PythonSyntaxAdapter.new( + language: :python, + extensions: %w[.py .pyi], + lexicon: PYTHON_LEXICON, + package: "tree-sitter-python" + ), + javascript: JavaScriptSyntaxAdapter.new( + language: :javascript, + extensions: %w[.js .jsx .mjs .cjs], + lexicon: JAVASCRIPT_LEXICON, + package: "tree-sitter-javascript" + ), + typescript: JavaScriptSyntaxAdapter.new( + language: :typescript, + extensions: %w[.ts .tsx], + lexicon: JAVASCRIPT_LEXICON, + package: "tree-sitter-typescript" + ), + go: GoSyntaxAdapter.new( + language: :go, + extensions: %w[.go], + lexicon: GO_LEXICON, + package: "tree-sitter-go" + ), + rust: RustSyntaxAdapter.new( + language: :rust, + extensions: %w[.rs], + lexicon: RUST_LEXICON, + package: "tree-sitter-rust" + ), + zig: TreeSitterLanguageAdapter.new( + language: :zig, + extensions: %w[.zig], + lexicon: ZIG_LEXICON, + package: "@tree-sitter-grammars/tree-sitter-zig" + ), + lua: LuaSyntaxAdapter.new( + language: :lua, + extensions: %w[.lua], + lexicon: LUA_LEXICON, + package: "@tree-sitter-grammars/tree-sitter-lua" + ), + c: CSyntaxAdapter.new( + language: :c, + extensions: %w[.c .h], + lexicon: C_LEXICON, + package: "tree-sitter-c", + first_argument_receiver: true + ), + cpp: CppSyntaxAdapter.new( + language: :cpp, + extensions: %w[.cc .cpp .cxx .hh .hpp .hxx], + lexicon: CPP_LEXICON, + package: "tree-sitter-cpp" + ), + csharp: CSharpSyntaxAdapter.new( + language: :csharp, + extensions: %w[.cs], + lexicon: CSHARP_LEXICON, + package: "tree-sitter-c-sharp", + grammar_names: %w[c-sharp csharp], + tree_sitter_language_name: "c_sharp" + ), + java: TreeSitterLanguageAdapter.new( + language: :java, + extensions: %w[.java], + lexicon: JAVA_LEXICON, + package: "tree-sitter-java" + ), + swift: TreeSitterLanguageAdapter.new( + language: :swift, + extensions: %w[.swift], + lexicon: SWIFT_LEXICON, + package: "tree-sitter-swift" + ), + kotlin: TreeSitterLanguageAdapter.new( + language: :kotlin, + extensions: %w[.kt .kts], + lexicon: KOTLIN_LEXICON, + package: "tree-sitter-kotlin" + ) }.freeze + LANGUAGE_BY_EXTENSION = LANGUAGE_PROFILES.values.each_with_object({}) do |profile, index| + profile.extensions.each { |extension| index[extension] ||= profile.language } + end.freeze + GENERIC_LANGUAGE_PROFILE = TreeSitterLanguageAdapter.new( + language: :generic, + extensions: [], + lexicon: RUBY_LEXICON, + package: "" + ).freeze + module_function def parse(file, language: nil, parser: ENV.fetch("DECOMPLEX_PARSER", "tree_sitter")) @@ -332,29 +650,13 @@ def language_for(file) forced = ENV["DECOMPLEX_FORCE_LANGUAGE"].to_s.strip return forced.tr("-", "_").to_sym unless forced.empty? - case File.extname(file).downcase - when ".rb" then :ruby - when ".py", ".pyi" then :python - when ".js", ".jsx", ".mjs", ".cjs" then :javascript - when ".ts", ".tsx" then :typescript - when ".go" then :go - when ".rs" then :rust - when ".zig" then :zig - when ".lua" then :lua - when ".c", ".h" then :c - when ".cc", ".cpp", ".cxx", ".hh", ".hpp", ".hxx" then :cpp - when ".cs" then :csharp - when ".java" then :java - when ".swift" then :swift - when ".kt", ".kts" then :kotlin - else :ruby - end + LANGUAGE_BY_EXTENSION.fetch(File.extname(file).downcase, :ruby) end def supported_exts(parser: self.parser) case parser.to_s.tr("-", "_") when "", "tree_sitter", "treesitter" - %w[.rb .py .pyi .js .jsx .mjs .cjs .ts .tsx .go .rs .zig .lua .c .h .cc .cpp .cxx .hh .hpp .hxx .cs .java .swift .kt .kts] + LANGUAGE_PROFILES.values.flat_map(&:extensions).uniq else [] end @@ -366,7 +668,11 @@ def supported_source?(file, parser: self.parser) def language_lexicon(language) key = language.to_s.empty? ? nil : language.to_sym - LANGUAGE_LEXICONS.fetch(key) + language_profile(key).lexicon + end + + def language_profile(language) + LANGUAGE_PROFILES.fetch(language.to_sym) end class Document @@ -642,10 +948,18 @@ def children context.children(self) end + def child_count + children.length + end + def named_children context.named_children(self) end + def named_child_count + named_children.length + end + def child_by_field_name(name) context.child_by_field_name(self, name) end @@ -696,29 +1010,6 @@ class TreeSitterAdapter case switch_statement expression_switch_statement switch_expression match_statement match_expression when_expression].freeze NOISE_MESSAGES = %w[! != == === < <= > >= [] []= to_s inspect class].freeze - LANGUAGE_PACKAGES = { - ruby: "tree-sitter-ruby", - python: "tree-sitter-python", - javascript: "tree-sitter-javascript", - typescript: "tree-sitter-typescript", - go: "tree-sitter-go", - rust: "tree-sitter-rust", - zig: "@tree-sitter-grammars/tree-sitter-zig", - lua: "@tree-sitter-grammars/tree-sitter-lua", - c: "tree-sitter-c", - cpp: "tree-sitter-cpp", - csharp: "tree-sitter-c-sharp", - java: "tree-sitter-java", - swift: "tree-sitter-swift", - kotlin: "tree-sitter-kotlin" - }.freeze - LANGUAGE_GRAMMAR_NAMES = { - csharp: ["c-sharp", "csharp"] - }.freeze - TREE_SITTER_LANGUAGE_NAMES = { - csharp: "c_sharp" - }.freeze - FIRST_ARGUMENT_RECEIVER_LANGUAGES = %i[c].freeze def parse(file, language: nil) lang = (language || Syntax.language_for(file)).to_sym @@ -845,9 +1136,13 @@ def type_aliases(lines) private + def syntax_profile(language) + language ? Syntax.language_profile(language) : Syntax::GENERIC_LANGUAGE_PROFILE + end + def parser_for(language) require_tree_sitter - lang_name = TREE_SITTER_LANGUAGE_NAMES.fetch(language, language.to_s) + lang_name = Syntax.language_profile(language).tree_sitter_language_name register_language(lang_name, grammar_path(language)) ::TreeSitter::Parser.new.tap { |parser| parser.language = lang_name } end @@ -881,8 +1176,9 @@ def grammar_path(language) end def grammar_candidates(language) - pkg = LANGUAGE_PACKAGES.fetch(language) - stems = LANGUAGE_GRAMMAR_NAMES.fetch(language, [language.to_s]) + profile = Syntax.language_profile(language) + pkg = profile.package + stems = profile.grammar_names names = stems.flat_map do |stem| ["#{stem}.so", "tree-sitter-#{stem}.so", "libtree-sitter-#{stem}.so", "#{stem}.node", @@ -954,14 +1250,14 @@ def walk(node, stack, &block) def push_context(stack, node) next_stack = push_owner_context(stack, node) - name = function_name(node) + name = function_name(node, language: current_language(next_stack)) next_stack = name ? next_stack + [function_context(node, next_stack)] : next_stack control = control_context(node) control ? next_stack + [{ control: control }] : next_stack end def push_owner_context(stack, node) - owner = owner_name_from_declaration(nil, node) + owner = owner_name_from_declaration(nil, node, language: current_language(stack)) return stack unless owner parent_owner = current_owner_from_stack(stack) @@ -970,7 +1266,7 @@ def push_owner_context(stack, node) else owner end - stack + [{ owner: full_owner, owner_declaration: true, owner_kind: owner_kind(node) }] + stack + [{ owner: full_owner, owner_declaration: true, owner_kind: owner_kind(node, language: current_language(stack)) }] end def current_function(stack) @@ -1003,7 +1299,7 @@ def current_control(stack) def function_context(node, stack) { - function: function_name(node), + function: function_name(node, language: current_language(stack)), owner: function_owner_name(node, stack), params: function_params(node), receiver: function_receiver_name(node, stack) @@ -1016,55 +1312,16 @@ def function_owner_name(node, stack) receiver_convention_owner_name(node, language: current_language(stack)) end - def function_name(node) - case node.kind - when "body_statement" - hidden_ruby_method_name(node) - when "method", "function_definition", "function_declaration", - "method_definition", "function_item" - named_field(node, "name")&.text || - declarator_name(named_field(node, "declarator")) || - first_named_text(node, %w[identifier constant property_identifier]) - when "singleton_method" - name = named_field(node, "name")&.text || - node.named_children.reverse.find { |child| %w[identifier field_identifier property_identifier].include?(child.kind) }&.text - name && "self.#{name}" - when "argument_list" - inline_def_name(node) - when "method_declaration" - named_field(node, "name")&.text || first_named_text(node, %w[field_identifier identifier]) - end + def function_name(node, language: nil) + syntax_profile(language).function_name(self, node) end def function_kind(node, stack) - return :method if owner_for_node(nil, node, stack: stack) - - :function + syntax_profile(current_language(stack)).function_kind(self, node, stack) end def visibility_for(document, node) - return ruby_inline_def_visibility(node) if inline_def_argument_list?(node) - - case document.language - when :ruby - ruby_method_visibility(node) - when :python - python_visibility(node) - when :go - exported_name_visibility(function_name(node)) - when :rust - modifier_visibility(node) || :private - when :typescript, :javascript - modifier_visibility(node) || typescript_visibility(node) - when :cpp - modifier_visibility(node) || cpp_visibility(node) - when :csharp - modifier_visibility(node) || :private - when :c - c_visibility(node) - else - modifier_visibility(node) - end + syntax_profile(document.language).visibility(self, document, node) end def ruby_method_visibility(node) @@ -1322,7 +1579,7 @@ def record_conjunction_decision(document, node, stack, out) end def record_function_def(document, node, stack, out) - name = function_name(node) + name = function_name(node, language: document.language) return unless name out << FunctionDef.new( @@ -1347,7 +1604,7 @@ def record_owner_def(document, node, stack, out) out << OwnerDef.new( file: document.file, name: full_owner, - kind: owner_kind(node), + kind: owner_kind(node, language: document.language), line: line(node), span: span(node) ) @@ -1374,7 +1631,7 @@ def record_call_site(document, node, stack, out) end def record_state_declaration(document, node, stack, out) - declaration = state_declaration(node) + declaration = state_declaration(node, language: document.language) return unless declaration out << StateDeclaration.new( @@ -1592,7 +1849,7 @@ def record_state_write(document, node, stack, out) end return unless lhs - target = state_target(lhs) + target = state_target(lhs, language: document.language) return unless target target = normalize_target_receiver(target, stack) return if target[:field] == "[]" @@ -1611,7 +1868,7 @@ def record_state_write(document, node, stack, out) end def record_state_read(document, node, stack, out) - target = state_read_target(node) + target = state_read_target(node, language: document.language) return unless target target = normalize_target_receiver(target, stack) @@ -1769,7 +2026,7 @@ def record_state_param_origin(document, node, stack, out) end return unless lhs && rhs - target = state_target(lhs) + target = state_target(lhs, language: document.language) return unless target && rhs target = normalize_target_receiver(target, stack) @@ -1806,6 +2063,7 @@ def record_branch_decision(document, node, stack, out, immutable_readers:, immut collect_state_refs( cond, refs, + language: document.language, defn: current_function(stack), immutable_readers: immutable_readers, immutable_reader_types: immutable_reader_types, @@ -1957,11 +2215,11 @@ def first_token_kind(node) node.children.first&.kind.to_s end - def collect_state_refs(node, refs, defn:, immutable_readers:, immutable_reader_types:, type_aliases:, + def collect_state_refs(node, refs, language:, defn:, immutable_readers:, immutable_reader_types:, type_aliases:, method_param_types:) if node.kind == "instance_variable" || node.kind == "global_variable" refs << node.text - elsif (target = state_read_target(node)) + elsif (target = state_read_target(node, language: language)) unless namespace_receiver?(target[:receiver]) unless immutable_state_read?(target, defn, immutable_readers, immutable_reader_types, type_aliases, method_param_types) refs << (target[:receiver] == "self" ? target[:field] : "#{target[:receiver]}.#{target[:field]}") @@ -1972,6 +2230,7 @@ def collect_state_refs(node, refs, defn:, immutable_readers:, immutable_reader_t collect_state_refs( child, refs, + language: language, defn: defn, immutable_readers: immutable_readers, immutable_reader_types: immutable_reader_types, @@ -2087,10 +2346,11 @@ def collect_identifiers(node, out) end - def owner_for_node(document, node, stack: nil) + def owner_for_node(document, node, stack: nil, language: nil) + language ||= document&.language || current_language(Array(stack)) receiver_owner = receiver_owner_name(node) return receiver_owner if receiver_owner - convention_owner = receiver_convention_owner_name(node, language: document&.language) + convention_owner = receiver_convention_owner_name(node, language: language) return convention_owner if convention_owner stacked_owner = current_owner_from_stack(Array(stack)) @@ -2122,35 +2382,12 @@ def owner_chain_for_node(document, node) chain.reverse end - def owner_name_from_declaration(document, node) - if hidden_ruby_owner_declaration?(node) - return hidden_ruby_owner_name(node) - end - - case node.kind - when "class", "class_definition", "class_declaration", "class_specifier", "module" - named_field(node, "name")&.text || first_named_text(node, %w[constant identifier type_identifier]) - when "impl_item", "impl_block" - impl_owner_name(node) - when "struct_item", "struct_spec", "struct_specifier", "type_spec", "type_declaration" - named_field(node, "name")&.text || first_named_text(node, %w[type_identifier identifier]) - when "struct_declaration", "union_declaration", "enum_declaration" - bound_container_name(node) || returned_container_owner(node) || anonymous_owner_name(document, node) - end + def owner_name_from_declaration(document, node, language: nil) + syntax_profile(language || document&.language).owner_name_from_declaration(self, document, node) end - def owner_kind(node) - return hidden_ruby_owner_kind(node) if hidden_ruby_owner_declaration?(node) - - case node.kind - when "class", "class_definition", "class_declaration", "class_specifier" then :class - when "module" then :module - when "impl_item", "impl_block" then :impl - when "struct_declaration", "struct_item", "struct_spec", "struct_specifier" then :struct - when "union_declaration" then :union - when "enum_declaration" then :enum - else :owner - end + def owner_kind(node, language: nil) + syntax_profile(language).owner_kind(self, node) end def impl_owner_name(node) @@ -2165,9 +2402,7 @@ def receiver_owner_name(node) end def function_receiver_name(node, stack) - receiver_param = method_receiver_param_node(node) - receiver_param&.text || - receiver_convention_param_name(node, language: current_language(stack)) + syntax_profile(current_language(stack)).function_receiver_name(self, node, stack) end def method_receiver_type_node(node) @@ -2194,24 +2429,11 @@ def method_receiver_declaration(node) end def receiver_convention_owner_name(node, language:) - return nil unless first_argument_receiver_language?(language) - return nil unless node.kind == "function_definition" - - receiver = first_argument_receiver_parameter(node) - return nil unless receiver - - type = normalize_type_owner(receiver[:type]) - name = function_name(node).to_s - return nil if type.empty? || name.empty? - - prefix = snake_case_type_name(type) - name.start_with?("#{prefix}_") ? type : nil + syntax_profile(language).receiver_convention_owner_name(self, node, language: language) end def receiver_convention_param_name(node, language:) - return nil unless first_argument_receiver_language?(language) - - first_argument_receiver_parameter(node)&.fetch(:name, nil) + syntax_profile(language).receiver_convention_param_name(self, node, language: language) end def first_argument_receiver_parameter(node) @@ -2236,7 +2458,9 @@ def first_argument_receiver_parameter(node) end def first_argument_receiver_language?(language) - FIRST_ARGUMENT_RECEIVER_LANGUAGES.include?(language&.to_sym) + return false unless language + + Syntax.language_profile(language).first_argument_receiver? end def snake_case_type_name(type) @@ -2266,12 +2490,15 @@ def bound_container_name(node) nil end - def returned_container_owner(node) + def returned_container_owner(document, node) parent = parent_node(node) seen_nodes = Set.new while parent && !seen_nodes.include?(node_key(parent)) seen_nodes << node_key(parent) - return function_name(parent) if function_name(parent) + if (name = function_name(parent, language: document&.language)) + return name + end + parent = parent_node(parent) end nil @@ -2295,19 +2522,7 @@ def file_owner(file) end def call_target(document, node) - case node.kind - when "call" - ruby_call_target(node) - when "body_statement" - ruby_bare_body_call_target(document, node) - when "identifier" - ruby_bare_call_target(document, node) - when "call_expression", "method_invocation", "invocation_expression" - generic_call_target(document, node) - when "attribute", "selector_expression", "field", "field_access", "member_expression", - "member_access_expression", "field_expression", "expression_list" - adjacent_argument_call_target(node) - end + syntax_profile(document.language).call_target(self, document, node) end def ruby_call_target(node) @@ -2386,7 +2601,7 @@ def first_argument_receiver_call_target(document, node, target) return nil unless target[:receiver] == "self" first_arg = call_argument_nodes(node).first - arg_target = state_read_target(first_arg) + arg_target = state_read_target(first_arg, language: document.language) return nil unless arg_target { @@ -2462,10 +2677,15 @@ def noise_call?(target) false end - def state_declaration(node) + def state_declaration(node, language: nil) + syntax_profile(language).state_declaration(self, node) + end + + def generic_state_declaration(node, language: nil) case node.kind when "assignment", "assignment_expression", "assignment_statement" - ruby_t_let_state_declaration(node) || assignment_state_declaration(node) + ruby_t_let_state_declaration(node, language: language) || + assignment_state_declaration(node, language: language) when "container_field" zig_container_field_declaration(node) when "property_declaration", "public_field_definition", "field_definition", "field_declaration" @@ -2554,10 +2774,10 @@ def declared_type_before_name(text, node, name_node) candidate end - def assignment_state_declaration(node) + def assignment_state_declaration(node, language: nil) lhs = named_field(node, "left") || node.named_children.first rhs = named_field(node, "right") || named_field(node, "value") || node.named_children[1] - target = state_target(lhs) + target = state_target(lhs, language: language) return nil unless target return nil unless %w[self this].include?(target[:receiver].to_s) @@ -2580,14 +2800,14 @@ def inferred_assignment_type(node) end def generated_lua_compat_prelude?(document, node) - return false unless document.language == :lua - return false unless line(node) == 1 + syntax_profile(document.language).generated_prelude?(self, document, node) + end - first_line = document.lines.first.to_s - first_line.include?("_tl_compat") && first_line.include?("compat53.module") + def state_read_target(node, language: nil) + syntax_profile(language).state_read_target(self, node) end - def state_read_target(node) + def generic_state_read_target(node) case node.kind when "call" receiver = named_field(node, "receiver") @@ -2626,7 +2846,11 @@ def state_read_target(node) end end - def state_target(lhs) + def state_target(lhs, language: nil) + syntax_profile(language).state_target(self, lhs) + end + + def generic_state_target(lhs) return nil unless ts_node?(lhs) return nil if prev_sibling(lhs)&.text == ":" @@ -2640,7 +2864,7 @@ def state_target(lhs) when "field", "field_access", "selector_expression", "member_expression", "member_access_expression", "attribute", "field_expression", "navigation_expression", "directly_assignable_expression", "expression_list" if lhs.kind == "expression_list" && !(named_field(lhs, "operand") && named_field(lhs, "field")) - return state_target(lhs.named_children.first) + return generic_state_target(lhs.named_children.first) end object = named_field(lhs, "object") || named_field(lhs, "receiver") || @@ -2888,10 +3112,10 @@ def current_receiver_name(stack) entry && entry[:receiver] end - def ruby_t_let_state_declaration(node) + def ruby_t_let_state_declaration(node, language: nil) lhs = named_field(node, "left") || node.named_children.first rhs = named_field(node, "right") || named_field(node, "value") || node.named_children[1] - target = state_target(lhs) + target = state_target(lhs, language: language) return nil unless target && target[:receiver] == "self" && target[:field].to_s.start_with?("@") return nil unless rhs&.kind == "call" diff --git a/gems/decomplex/rust/src/bin/dump_ast.rs b/gems/decomplex/rust/src/bin/dump_ast.rs index c1e077865..c798831fd 100644 --- a/gems/decomplex/rust/src/bin/dump_ast.rs +++ b/gems/decomplex/rust/src/bin/dump_ast.rs @@ -63,6 +63,8 @@ fn child_value(child: &Child) -> Value { match child { Child::Node(node) => node_value(node), Child::Symbol(value) | Child::String(value) => Value::String(value.clone()), + Child::Integer(value) => Value::Number((*value).into()), + Child::Bool(value) => Value::Bool(*value), Child::Nil => Value::Null, } } diff --git a/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs b/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs index 3bbb894d7..765732f39 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs @@ -8,6 +8,217 @@ use std::fs; use std::path::{Path, PathBuf}; use tree_sitter::{Language as TreeSitterLanguage, Node, Parser}; +trait LanguageProfile { + fn language(&self) -> Language; + fn grammar(&self) -> TreeSitterLanguage; + + fn first_argument_receiver(&self) -> bool { + false + } + + fn function_name(&self, node: Node<'_>, source: &str) -> Option { + generic_function_name(node, source) + } + + fn owner_name_from_declaration(&self, node: Node<'_>, source: &str) -> Option { + generic_owner_name_from_declaration(node, source) + } + + fn generated_prelude(&self, _node: Node<'_>, _source: &str) -> bool { + false + } + + fn receiver_convention_owner_name(&self, node: Node<'_>, source: &str) -> Option { + if !self.first_argument_receiver() || node.kind() != "function_definition" { + return None; + } + + let (type_name, _) = first_argument_receiver_parameter(node, source)?; + let type_name = normalize_type_owner(&type_name); + let name = self.function_name(node, source)?; + + if name.starts_with(&snake_case_type_name(&type_name)) { + Some(type_name) + } else if type_name.ends_with("_t") + && name.starts_with(type_name.strip_suffix("_t").unwrap()) + { + Some(type_name) + } else { + None + } + } + + fn function_receiver_name(&self, node: Node<'_>, source: &str) -> Option { + if self.first_argument_receiver() && node.kind() == "function_definition" { + if let Some((_, name)) = first_argument_receiver_parameter(node, source) { + return Some(name); + } + } + None + } + + fn state_target(&self, lhs: Node<'_>, source: &str) -> Option { + generic_state_target(lhs, source) + } +} + +macro_rules! default_profile { + ($name:ident, $language:ident, $grammar:expr) => { + struct $name; + + impl LanguageProfile for $name { + fn language(&self) -> Language { + Language::$language + } + + fn grammar(&self) -> TreeSitterLanguage { + $grammar.into() + } + } + }; +} + +struct RubyProfile; + +impl LanguageProfile for RubyProfile { + fn language(&self) -> Language { + Language::Ruby + } + + fn grammar(&self) -> TreeSitterLanguage { + tree_sitter_ruby::LANGUAGE.into() + } + + fn function_name(&self, node: Node<'_>, source: &str) -> Option { + match node.kind() { + "singleton_method" => { + let name = node + .child_by_field_name("name") + .map(|name| node_text(name, source).to_string()) + .or_else(|| { + named_children(node) + .into_iter() + .rev() + .find(|child| { + matches!( + child.kind(), + "identifier" | "field_identifier" | "property_identifier" + ) + }) + .map(|child| node_text(child, source).to_string()) + })?; + Some(format!("self.{name}")) + } + "body_statement" if first_child_kind(node) == Some("def") => { + hidden_ruby_method_name(node, source) + } + "argument_list" if first_child_kind(node) == Some("def") => { + inline_def_name(node, source) + } + _ => generic_function_name(node, source), + } + } + + fn owner_name_from_declaration(&self, node: Node<'_>, source: &str) -> Option { + if node.kind() == "body_statement" + && matches!(first_child_kind(node), Some("class" | "module")) + { + return first_named_text(node, source, &["constant", "identifier", "type_identifier"]); + } + generic_owner_name_from_declaration(node, source) + } +} + +struct CProfile; + +impl LanguageProfile for CProfile { + fn language(&self) -> Language { + Language::C + } + + fn grammar(&self) -> TreeSitterLanguage { + tree_sitter_c::LANGUAGE.into() + } + + fn first_argument_receiver(&self) -> bool { + true + } +} + +struct LuaProfile; + +impl LanguageProfile for LuaProfile { + fn language(&self) -> Language { + Language::Lua + } + + fn grammar(&self) -> TreeSitterLanguage { + tree_sitter_lua::LANGUAGE.into() + } + + fn generated_prelude(&self, node: Node<'_>, source: &str) -> bool { + if line(node) != 1 { + return false; + } + let first_line = source.lines().next().unwrap_or(""); + first_line.contains("_tl_compat") && first_line.contains("compat53.module") + } +} + +default_profile!(PythonProfile, Python, tree_sitter_python::LANGUAGE); +default_profile!( + JavaScriptProfile, + JavaScript, + tree_sitter_javascript::LANGUAGE +); +default_profile!(JavaProfile, Java, tree_sitter_java::LANGUAGE); +default_profile!( + TypeScriptProfile, + TypeScript, + tree_sitter_typescript::LANGUAGE_TYPESCRIPT +); +default_profile!(SwiftProfile, Swift, tree_sitter_swift::LANGUAGE); +default_profile!(KotlinProfile, Kotlin, tree_sitter_kotlin_ng::LANGUAGE); +default_profile!(GoProfile, Go, tree_sitter_go::LANGUAGE); +default_profile!(RustProfile, Rust, tree_sitter_rust::LANGUAGE); +default_profile!(ZigProfile, Zig, tree_sitter_zig::LANGUAGE); +default_profile!(CppProfile, Cpp, tree_sitter_cpp::LANGUAGE); +default_profile!(CSharpProfile, CSharp, tree_sitter_c_sharp::LANGUAGE); + +static RUBY_PROFILE: RubyProfile = RubyProfile; +static PYTHON_PROFILE: PythonProfile = PythonProfile; +static JAVASCRIPT_PROFILE: JavaScriptProfile = JavaScriptProfile; +static JAVA_PROFILE: JavaProfile = JavaProfile; +static TYPESCRIPT_PROFILE: TypeScriptProfile = TypeScriptProfile; +static SWIFT_PROFILE: SwiftProfile = SwiftProfile; +static KOTLIN_PROFILE: KotlinProfile = KotlinProfile; +static GO_PROFILE: GoProfile = GoProfile; +static RUST_PROFILE: RustProfile = RustProfile; +static ZIG_PROFILE: ZigProfile = ZigProfile; +static LUA_PROFILE: LuaProfile = LuaProfile; +static C_PROFILE: CProfile = CProfile; +static CPP_PROFILE: CppProfile = CppProfile; +static CSHARP_PROFILE: CSharpProfile = CSharpProfile; + +fn language_profile(language: Language) -> &'static dyn LanguageProfile { + match language { + Language::Ruby => &RUBY_PROFILE, + Language::Python => &PYTHON_PROFILE, + Language::JavaScript => &JAVASCRIPT_PROFILE, + Language::Java => &JAVA_PROFILE, + Language::TypeScript => &TYPESCRIPT_PROFILE, + Language::Swift => &SWIFT_PROFILE, + Language::Kotlin => &KOTLIN_PROFILE, + Language::Go => &GO_PROFILE, + Language::Rust => &RUST_PROFILE, + Language::Zig => &ZIG_PROFILE, + Language::Lua => &LUA_PROFILE, + Language::C => &C_PROFILE, + Language::Cpp => &CPP_PROFILE, + Language::CSharp => &CSHARP_PROFILE, + } +} + pub fn parse_file(file: PathBuf, language: Language) -> Result { let parsed = ParsedDocument::parse(file, language)?; let mut function_defs = Vec::new(); @@ -48,25 +259,6 @@ pub fn parse_file(file: PathBuf, language: Language) -> Result { }) } -fn language_grammar(language: Language) -> TreeSitterLanguage { - match language { - Language::Ruby => tree_sitter_ruby::LANGUAGE.into(), - Language::Python => tree_sitter_python::LANGUAGE.into(), - Language::JavaScript => tree_sitter_javascript::LANGUAGE.into(), - Language::Java => tree_sitter_java::LANGUAGE.into(), - Language::TypeScript => tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(), - Language::Swift => tree_sitter_swift::LANGUAGE.into(), - Language::Kotlin => tree_sitter_kotlin_ng::LANGUAGE.into(), - Language::Go => tree_sitter_go::LANGUAGE.into(), - Language::Rust => tree_sitter_rust::LANGUAGE.into(), - Language::Zig => tree_sitter_zig::LANGUAGE.into(), - Language::Lua => tree_sitter_lua::LANGUAGE.into(), - Language::C => tree_sitter_c::LANGUAGE.into(), - Language::Cpp => tree_sitter_cpp::LANGUAGE.into(), - Language::CSharp => tree_sitter_c_sharp::LANGUAGE.into(), - } -} - struct ParsedDocument { file: PathBuf, source: String, @@ -79,7 +271,7 @@ impl ParsedDocument { .with_context(|| format!("failed to read {}", file.display()))?; let mut parser = Parser::new(); parser - .set_language(&language_grammar(language)) + .set_language(&language_profile(language).grammar()) .with_context(|| "failed to initialize tree-sitter parser")?; let tree = parser .parse(&source, None) @@ -184,11 +376,11 @@ fn record_function_def( node: Node<'_>, source: &str, file: &Path, - _language: Language, + language: Language, context: &ContextState, out: &mut Vec, ) { - let Some(name) = function_name(node, source) else { + let Some(name) = language_profile(language).function_name(node, source) else { return; }; let function = FunctionDef { @@ -222,13 +414,13 @@ fn record_predicate_alias( node: Node<'_>, source: &str, file: &Path, - _language: Language, + language: Language, out: &mut Vec, ) { if !matches!(node.kind(), "method" | "function_definition") { return; } - let Some(name) = function_name(node, source) else { + let Some(name) = language_profile(language).function_name(node, source) else { return; }; let Some(body) = method_single_expression_body(node) else { @@ -295,7 +487,7 @@ fn record_decision_site( out: &mut Vec, seen: &mut HashSet, ) { - if generated_lua_compat_prelude(node, source, language) { + if language_profile(language).generated_prelude(node, source) { return; } @@ -329,17 +521,6 @@ fn record_decision_site( } } -fn generated_lua_compat_prelude(node: Node<'_>, source: &str, language: Language) -> bool { - if language != Language::Lua { - return false; - } - if line(node) != 1 { - return false; - } - let first_line = source.lines().next().unwrap_or(""); - first_line.contains("_tl_compat") && first_line.contains("compat53.module") -} - fn record_conjunction_decision( mut node: Node<'_>, source: &str, @@ -446,8 +627,10 @@ fn push_owner_context( context: &ContextState, language: Language, ) -> ContextState { - let Some(owner) = owner_name_from_declaration(node, source) - .or_else(|| receiver_convention_owner_name(node, source, language)) + let profile = language_profile(language); + let Some(owner) = profile + .owner_name_from_declaration(node, source) + .or_else(|| profile.receiver_convention_owner_name(node, source)) else { return context.clone(); }; @@ -472,13 +655,14 @@ fn push_function_context( source: &str, language: Language, ) -> ContextState { - let Some(function) = function_name(node, source) else { + let profile = language_profile(language); + let Some(function) = profile.function_name(node, source) else { return context; }; let owner = context.current_owner(); context.function = Some(function); context.owner = Some(owner); - context.receiver = function_receiver_name(node, source, language); + context.receiver = profile.function_receiver_name(node, source); context } @@ -486,7 +670,7 @@ fn record_state_write( node: Node<'_>, source: &str, file: &Path, - _language: Language, + language: Language, context: &ContextState, out: &mut Vec, seen: &mut HashSet, @@ -498,7 +682,7 @@ fn record_state_write( let Some(assignment) = assignment_target(node) else { return; }; - let Some(target) = state_target(assignment.lhs, source) else { + let Some(target) = language_profile(language).state_target(assignment.lhs, source) else { return; }; let target = normalize_target_receiver(target, context); @@ -569,7 +753,7 @@ fn assignment_lhs_node(node: Node<'_>) -> bool { ) } -fn state_target(lhs: Node<'_>, source: &str) -> Option { +fn generic_state_target(lhs: Node<'_>, source: &str) -> Option { if previous_sibling_text(lhs, source).as_deref() == Some(":") { return None; } @@ -622,7 +806,7 @@ fn state_target(lhs: Node<'_>, source: &str) -> Option { } } -fn function_name(node: Node<'_>, source: &str) -> Option { +fn generic_function_name(node: Node<'_>, source: &str) -> Option { match node.kind() { "method" | "function_definition" @@ -690,12 +874,7 @@ fn declarator_name(node: Option>, source: &str) -> Option { None } -fn owner_name_from_declaration(node: Node<'_>, source: &str) -> Option { - if node.kind() == "body_statement" && matches!(first_child_kind(node), Some("class" | "module")) - { - return first_named_text(node, source, &["constant", "identifier", "type_identifier"]); - } - +fn generic_owner_name_from_declaration(node: Node<'_>, source: &str) -> Option { match node.kind() { "class" | "module" | "class_definition" | "class_declaration" | "class_specifier" => node .child_by_field_name("name") @@ -1276,6 +1455,33 @@ end assert_eq!(doc.state_writes[0].function, "set"); assert_eq!(doc.state_writes[0].field, "state"); } + + #[test] + fn language_profiles_own_parser_and_receiver_metadata() { + assert_eq!(language_profile(Language::Ruby).language(), Language::Ruby); + assert_eq!(language_profile(Language::C).language(), Language::C); + assert!(language_profile(Language::C).first_argument_receiver()); + assert!(!language_profile(Language::Lua).first_argument_receiver()); + + let mut parser = Parser::new(); + parser + .set_language(&language_profile(Language::Lua).grammar()) + .expect("lua grammar"); + } + + #[test] + fn lua_profile_owns_generated_prelude_filter() { + let source = "local _tl_compat; local ok, compat53 = pcall(require, \"compat53.module\")\nfunction real() end\n"; + let mut parser = Parser::new(); + parser + .set_language(&language_profile(Language::Lua).grammar()) + .expect("lua grammar"); + let tree = parser.parse(source, None).expect("parse lua"); + let node = tree.root_node().named_child(0).expect("first lua node"); + + assert!(language_profile(Language::Lua).generated_prelude(node, source)); + assert!(!language_profile(Language::Ruby).generated_prelude(node, source)); + } } #[cfg(test)] @@ -1287,16 +1493,15 @@ mod c_tests { #[test] fn test_c_assignment() { let mut file = NamedTempFile::new().unwrap(); - file.write_all(b"void foo() { handle->loop = 1; }").unwrap(); + file.write_all(b"typedef struct Node { int storage; } Node; void node_set(Node* node) { node->storage = 1; }") + .unwrap(); let doc = parse_file(file.path().to_path_buf(), Language::C).unwrap(); - assert!(!doc.state_writes.is_empty()); + assert_eq!(doc.function_defs[0].owner, "Node"); + assert_eq!(doc.state_writes[0].receiver, "self"); + assert_eq!(doc.state_writes[0].field, "storage"); } } -fn first_argument_receiver_language(language: Language) -> bool { - matches!(language, Language::C) -} - fn first_argument_receiver_parameter(node: Node<'_>, source: &str) -> Option<(String, String)> { let params = node .child_by_field_name("declarator") @@ -1320,16 +1525,14 @@ fn first_argument_receiver_parameter(node: Node<'_>, source: &str) -> Option<(St ) })?; - let name_node = named_children(first) + let name = named_children(first) .into_iter() .rev() .find(|child| matches!(child.kind(), "identifier" | "field_identifier")) - .or_else(|| first_named_child(first))?; + .map(|child| node_text(child, source).to_string()) + .or_else(|| declarator_name(Some(first), source))?; - Some(( - node_text(type_node, source).to_string(), - node_text(name_node, source).to_string(), - )) + Some((node_text(type_node, source).to_string(), name)) } fn snake_case_type_name(type_str: &str) -> String { @@ -1340,38 +1543,6 @@ fn snake_case_type_name(type_str: &str) -> String { last } -fn receiver_convention_owner_name( - node: Node<'_>, - source: &str, - language: Language, -) -> Option { - if !first_argument_receiver_language(language) || node.kind() != "function_definition" { - return None; - } - - let (type_name, _) = first_argument_receiver_parameter(node, source)?; - let type_name = normalize_type_owner(&type_name); - let name = function_name(node, source)?; - - if name.starts_with(&snake_case_type_name(&type_name)) { - Some(type_name) - } else if type_name.ends_with("_t") && name.starts_with(type_name.strip_suffix("_t").unwrap()) { - Some(type_name) - } else { - None - } -} - -fn function_receiver_name(node: Node<'_>, source: &str, language: Language) -> Option { - // Only handling C convention for now - if first_argument_receiver_language(language) && node.kind() == "function_definition" { - if let Some((_, name)) = first_argument_receiver_parameter(node, source) { - return Some(name); - } - } - None -} - fn normalize_target_receiver(mut target: Target, context: &ContextState) -> Target { if let Some(current_receiver) = &context.receiver { if &target.receiver == current_receiver { diff --git a/gems/decomplex/test/syntax_test.rb b/gems/decomplex/test/syntax_test.rb index 383ad10a9..e309aabf6 100644 --- a/gems/decomplex/test/syntax_test.rb +++ b/gems/decomplex/test/syntax_test.rb @@ -111,6 +111,20 @@ def test_language_profiles_have_language_specific_lexicons end end + def test_tree_sitter_language_profile_owns_parser_metadata + c = Decomplex::Syntax.language_profile(:c) + assert_equal %w[.c .h], c.extensions + assert_equal "tree-sitter-c", c.package + assert_equal %w[c], c.grammar_names + assert c.first_argument_receiver? + + csharp = Decomplex::Syntax.language_profile(:csharp) + assert_equal "tree-sitter-c-sharp", csharp.package + assert_equal %w[c-sharp csharp], csharp.grammar_names + assert_equal "c_sharp", csharp.tree_sitter_language_name + refute csharp.first_argument_receiver? + end + def test_force_language_override_handles_ambiguous_headers assert_equal :c, Decomplex::Syntax.language_for("include/demo.h") From 606fd0d5085766b1406ae63779399c416ef35f25 Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Fri, 19 Jun 2026 11:32:13 +0000 Subject: [PATCH 22/52] Seed syntax walks with language context --- gems/decomplex/lib/decomplex/syntax.rb | 162 +++++++++++++++---------- gems/decomplex/test/syntax_test.rb | 19 +++ 2 files changed, 119 insertions(+), 62 deletions(-) diff --git a/gems/decomplex/lib/decomplex/syntax.rb b/gems/decomplex/lib/decomplex/syntax.rb index 8f8ce9d57..5a3ca9584 100644 --- a/gems/decomplex/lib/decomplex/syntax.rb +++ b/gems/decomplex/lib/decomplex/syntax.rb @@ -291,37 +291,37 @@ def function_name(syntax, node) case node.kind when "method", "function_definition", "function_declaration", "method_definition", "function_item" - syntax.send(:named_field, node, "name")&.text || - syntax.send(:declarator_name, syntax.send(:named_field, node, "declarator")) || - syntax.send(:first_named_text, node, %w[identifier constant property_identifier]) + syntax.named_field(node, "name")&.text || + syntax.declarator_name(syntax.named_field(node, "declarator")) || + syntax.first_named_text(node, %w[identifier constant property_identifier]) when "method_declaration" - syntax.send(:named_field, node, "name")&.text || - syntax.send(:first_named_text, node, %w[field_identifier identifier]) + syntax.named_field(node, "name")&.text || + syntax.first_named_text(node, %w[field_identifier identifier]) end end def function_kind(syntax, node, stack) - syntax.send(:owner_for_node, nil, node, stack: stack) ? :method : :function + syntax.owner_for_node(nil, node, stack: stack) ? :method : :function end def visibility(syntax, _document, node) - syntax.send(:modifier_visibility, node) + syntax.modifier_visibility(node) end def owner_name_from_declaration(syntax, document, node) case node.kind when "class", "class_definition", "class_declaration", "class_specifier", "module" - syntax.send(:named_field, node, "name")&.text || - syntax.send(:first_named_text, node, %w[constant identifier type_identifier]) + syntax.named_field(node, "name")&.text || + syntax.first_named_text(node, %w[constant identifier type_identifier]) when "impl_item", "impl_block" - syntax.send(:impl_owner_name, node) + syntax.impl_owner_name(node) when "struct_item", "struct_spec", "struct_specifier", "type_spec", "type_declaration" - syntax.send(:named_field, node, "name")&.text || - syntax.send(:first_named_text, node, %w[type_identifier identifier]) + syntax.named_field(node, "name")&.text || + syntax.first_named_text(node, %w[type_identifier identifier]) when "struct_declaration", "union_declaration", "enum_declaration" - syntax.send(:bound_container_name, node) || - syntax.send(:returned_container_owner, document, node) || - syntax.send(:anonymous_owner_name, document, node) + syntax.bound_container_name(node) || + syntax.returned_container_owner(document, node) || + syntax.anonymous_owner_name(document, node) end end @@ -338,30 +338,30 @@ def owner_kind(syntax, node) end def function_receiver_name(syntax, node, stack) - receiver_param = syntax.send(:method_receiver_param_node, node) + receiver_param = syntax.method_receiver_param_node(node) receiver_param&.text || - receiver_convention_param_name(syntax, node, language: syntax.send(:current_language, stack)) + receiver_convention_param_name(syntax, node, language: syntax.current_language(stack)) end def receiver_convention_owner_name(syntax, node, language:) return nil unless first_argument_receiver? return nil unless node.kind == "function_definition" - receiver = syntax.send(:first_argument_receiver_parameter, node) + receiver = syntax.first_argument_receiver_parameter(node) return nil unless receiver - type = syntax.send(:normalize_type_owner, receiver[:type]) + type = syntax.normalize_type_owner(receiver[:type]) name = function_name(syntax, node).to_s return nil if type.empty? || name.empty? - prefix = syntax.send(:snake_case_type_name, type) + prefix = syntax.snake_case_type_name(type) name.start_with?("#{prefix}_") ? type : nil end def receiver_convention_param_name(syntax, node, language:) return nil unless first_argument_receiver? - syntax.send(:first_argument_receiver_parameter, node)&.fetch(:name, nil) + syntax.first_argument_receiver_parameter(node)&.fetch(:name, nil) end def generated_prelude?(_syntax, _document, _node) @@ -371,23 +371,23 @@ def generated_prelude?(_syntax, _document, _node) def call_target(syntax, document, node) case node.kind when "call_expression", "method_invocation", "invocation_expression" - syntax.send(:generic_call_target, document, node) + syntax.generic_call_target(document, node) when "attribute", "selector_expression", "field", "field_access", "member_expression", "member_access_expression", "field_expression", "expression_list" - syntax.send(:adjacent_argument_call_target, node) + syntax.adjacent_argument_call_target(node) end end def state_declaration(syntax, node) - syntax.send(:generic_state_declaration, node, language: language) + syntax.generic_state_declaration(node, language: language) end def state_read_target(syntax, node) - syntax.send(:generic_state_read_target, node) + syntax.generic_state_read_target(node) end def state_target(syntax, lhs) - syntax.send(:generic_state_target, lhs) + syntax.generic_state_target(lhs) end end @@ -395,34 +395,34 @@ class RubySyntaxAdapter < TreeSitterLanguageAdapter def function_name(syntax, node) case node.kind when "body_statement" - syntax.send(:hidden_ruby_method_name, node) + syntax.hidden_ruby_method_name(node) when "singleton_method" - name = syntax.send(:named_field, node, "name")&.text || + name = syntax.named_field(node, "name")&.text || node.named_children.reverse.find do |child| %w[identifier field_identifier property_identifier].include?(child.kind) end&.text name && "self.#{name}" when "argument_list" - syntax.send(:inline_def_name, node) + syntax.inline_def_name(node) else super end end def visibility(syntax, _document, node) - return syntax.send(:ruby_inline_def_visibility, node) if syntax.send(:inline_def_argument_list?, node) + return syntax.ruby_inline_def_visibility(node) if syntax.inline_def_argument_list?(node) - syntax.send(:ruby_method_visibility, node) + syntax.ruby_method_visibility(node) end def owner_name_from_declaration(syntax, document, node) - return syntax.send(:hidden_ruby_owner_name, node) if syntax.send(:hidden_ruby_owner_declaration?, node) + return syntax.hidden_ruby_owner_name(node) if syntax.hidden_ruby_owner_declaration?(node) super end def owner_kind(syntax, node) - return syntax.send(:hidden_ruby_owner_kind, node) if syntax.send(:hidden_ruby_owner_declaration?, node) + return syntax.hidden_ruby_owner_kind(node) if syntax.hidden_ruby_owner_declaration?(node) super end @@ -430,11 +430,11 @@ def owner_kind(syntax, node) def call_target(syntax, document, node) case node.kind when "call" - syntax.send(:ruby_call_target, node) + syntax.ruby_call_target(node) when "body_statement" - syntax.send(:ruby_bare_body_call_target, document, node) + syntax.ruby_bare_body_call_target(document, node) when "identifier" - syntax.send(:ruby_bare_call_target, document, node) + syntax.ruby_bare_call_target(document, node) else super end @@ -452,19 +452,19 @@ def visibility(syntax, _document, node) class GoSyntaxAdapter < TreeSitterLanguageAdapter def visibility(syntax, _document, node) - syntax.send(:exported_name_visibility, function_name(syntax, node)) + syntax.exported_name_visibility(function_name(syntax, node)) end end class RustSyntaxAdapter < TreeSitterLanguageAdapter def visibility(syntax, _document, node) - syntax.send(:modifier_visibility, node) || :private + syntax.modifier_visibility(node) || :private end end class JavaScriptSyntaxAdapter < TreeSitterLanguageAdapter def visibility(syntax, _document, node) - syntax.send(:modifier_visibility, node) || typescript_visibility(syntax, node) + syntax.modifier_visibility(node) || typescript_visibility(syntax, node) end private @@ -476,31 +476,61 @@ def typescript_visibility(syntax, node) class CppSyntaxAdapter < TreeSitterLanguageAdapter def visibility(syntax, _document, node) - syntax.send(:modifier_visibility, node) || syntax.send(:cpp_visibility, node) + syntax.modifier_visibility(node) || syntax.cpp_visibility(node) end end class CSharpSyntaxAdapter < TreeSitterLanguageAdapter def visibility(syntax, _document, node) - syntax.send(:modifier_visibility, node) || :private + syntax.modifier_visibility(node) || :private end end class CSyntaxAdapter < TreeSitterLanguageAdapter def visibility(syntax, _document, node) - syntax.send(:c_visibility, node) + syntax.c_visibility(node) end end class LuaSyntaxAdapter < TreeSitterLanguageAdapter def generated_prelude?(syntax, document, node) - return false unless syntax.send(:line, node) == 1 + return false unless syntax.line(node) == 1 first_line = document.lines.first.to_s first_line.include?("_tl_compat") && first_line.include?("compat53.module") end end + class TreeSitterLanguageAdapterHelpers + HELPER_METHODS = %i[ + adjacent_argument_call_target anonymous_owner_name bound_container_name + c_visibility cpp_visibility current_language declarator_name + exported_name_visibility first_argument_receiver_parameter first_named_text + generic_call_target generic_state_declaration generic_state_read_target + generic_state_target hidden_ruby_method_name hidden_ruby_owner_declaration? + hidden_ruby_owner_kind hidden_ruby_owner_name impl_owner_name + inline_def_argument_list? inline_def_name line method_receiver_param_node + modifier_visibility named_field normalize_type_owner owner_for_node + returned_container_owner ruby_bare_body_call_target ruby_bare_call_target + ruby_call_target ruby_inline_def_visibility ruby_method_visibility + snake_case_type_name + ].freeze + + def initialize(adapter) + @adapter = adapter + end + + HELPER_METHODS.each do |helper| + define_method(helper) do |*args, **kwargs, &block| + if kwargs.empty? + @adapter.__send__(helper, *args, &block) + else + @adapter.__send__(helper, *args, **kwargs, &block) + end + end + end + end + LanguageProfile = TreeSitterLanguageAdapter LANGUAGE_PROFILES = { @@ -1030,7 +1060,7 @@ def parse(file, language: nil) def decision_sites(document) out = [] - walk(document.root, []) do |node, stack| + walk(document.root, initial_stack(document)) do |node, stack| record_decision_site(document, node, stack, out) end out @@ -1046,7 +1076,7 @@ def state_reads(document) def branch_decisions(document, immutable_readers:, immutable_reader_types:, type_aliases:) out = [] - walk(document.root, []) do |node, stack| + walk(document.root, initial_stack(document)) do |node, stack| record_branch_decision( document, node, @@ -1093,7 +1123,7 @@ def structural_facts(document) state_reads: [], state_writes: [] } - walk(document.root, [{ file_owner: file_owner(document.file), language: document.language }]) do |node, stack| + walk(document.root, initial_stack(document)) do |node, stack| record_function_def(document, node, stack, out[:function_defs]) record_owner_def(document, node, stack, out[:owner_defs]) record_call_site(document, node, stack, out[:call_sites]) @@ -1116,7 +1146,7 @@ def structural_facts(document) def branch_arms(document) out = [] - walk(document.root, []) do |node, stack| + walk(document.root, initial_stack(document)) do |node, stack| record_branch_arm(document, node, stack, out) end out @@ -1136,6 +1166,14 @@ def type_aliases(lines) private + def initial_stack(document) + [{ file_owner: file_owner(document.file), language: document.language }] + end + + def adapter_helpers + @adapter_helpers ||= TreeSitterLanguageAdapterHelpers.new(self) + end + def syntax_profile(language) language ? Syntax.language_profile(language) : Syntax::GENERIC_LANGUAGE_PROFILE end @@ -1313,15 +1351,15 @@ def function_owner_name(node, stack) end def function_name(node, language: nil) - syntax_profile(language).function_name(self, node) + syntax_profile(language).function_name(adapter_helpers, node) end def function_kind(node, stack) - syntax_profile(current_language(stack)).function_kind(self, node, stack) + syntax_profile(current_language(stack)).function_kind(adapter_helpers, node, stack) end def visibility_for(document, node) - syntax_profile(document.language).visibility(self, document, node) + syntax_profile(document.language).visibility(adapter_helpers, document, node) end def ruby_method_visibility(node) @@ -1891,7 +1929,7 @@ def record_implicit_state_accesses(document, out) locals = local_declaration_index(document) params = function_param_index(out[:function_defs]) - walk(document.root, [{ file_owner: file_owner(document.file), language: document.language }]) do |node, stack| + walk(document.root, initial_stack(document)) do |node, stack| next unless implicit_state_identifier?(node) owner = current_owner(document, stack) @@ -1943,7 +1981,7 @@ def function_param_index(functions) def local_declaration_index(document) index = Hash.new { |h, k| h[k] = Set.new } - walk(document.root, [{ file_owner: file_owner(document.file), language: document.language }]) do |node, stack| + walk(document.root, initial_stack(document)) do |node, stack| next unless local_variable_declarator?(node) owner = current_owner(document, stack) @@ -2383,11 +2421,11 @@ def owner_chain_for_node(document, node) end def owner_name_from_declaration(document, node, language: nil) - syntax_profile(language || document&.language).owner_name_from_declaration(self, document, node) + syntax_profile(language || document&.language).owner_name_from_declaration(adapter_helpers, document, node) end def owner_kind(node, language: nil) - syntax_profile(language).owner_kind(self, node) + syntax_profile(language).owner_kind(adapter_helpers, node) end def impl_owner_name(node) @@ -2402,7 +2440,7 @@ def receiver_owner_name(node) end def function_receiver_name(node, stack) - syntax_profile(current_language(stack)).function_receiver_name(self, node, stack) + syntax_profile(current_language(stack)).function_receiver_name(adapter_helpers, node, stack) end def method_receiver_type_node(node) @@ -2429,11 +2467,11 @@ def method_receiver_declaration(node) end def receiver_convention_owner_name(node, language:) - syntax_profile(language).receiver_convention_owner_name(self, node, language: language) + syntax_profile(language).receiver_convention_owner_name(adapter_helpers, node, language: language) end def receiver_convention_param_name(node, language:) - syntax_profile(language).receiver_convention_param_name(self, node, language: language) + syntax_profile(language).receiver_convention_param_name(adapter_helpers, node, language: language) end def first_argument_receiver_parameter(node) @@ -2522,7 +2560,7 @@ def file_owner(file) end def call_target(document, node) - syntax_profile(document.language).call_target(self, document, node) + syntax_profile(document.language).call_target(adapter_helpers, document, node) end def ruby_call_target(node) @@ -2678,7 +2716,7 @@ def noise_call?(target) end def state_declaration(node, language: nil) - syntax_profile(language).state_declaration(self, node) + syntax_profile(language).state_declaration(adapter_helpers, node) end def generic_state_declaration(node, language: nil) @@ -2800,11 +2838,11 @@ def inferred_assignment_type(node) end def generated_lua_compat_prelude?(document, node) - syntax_profile(document.language).generated_prelude?(self, document, node) + syntax_profile(document.language).generated_prelude?(adapter_helpers, document, node) end def state_read_target(node, language: nil) - syntax_profile(language).state_read_target(self, node) + syntax_profile(language).state_read_target(adapter_helpers, node) end def generic_state_read_target(node) @@ -2847,7 +2885,7 @@ def generic_state_read_target(node) end def state_target(lhs, language: nil) - syntax_profile(language).state_target(self, lhs) + syntax_profile(language).state_target(adapter_helpers, lhs) end def generic_state_target(lhs) diff --git a/gems/decomplex/test/syntax_test.rb b/gems/decomplex/test/syntax_test.rb index e309aabf6..ce144b5fc 100644 --- a/gems/decomplex/test/syntax_test.rb +++ b/gems/decomplex/test/syntax_test.rb @@ -125,6 +125,25 @@ def test_tree_sitter_language_profile_owns_parser_metadata refute csharp.first_argument_receiver? end + def test_tree_sitter_document_walks_seed_language_context + adapter = Decomplex::Syntax::TreeSitterAdapter.new + document = Struct.new(:root, :file, :language, :lines) + .new(Object.new, "/tmp/demo.py", :python, []) + captured = [] + + adapter.define_singleton_method(:walk) do |_root, stack, &_block| + captured << stack + end + + adapter.decision_sites(document) + adapter.branch_decisions(document, immutable_readers: {}, immutable_reader_types: {}, type_aliases: {}) + adapter.branch_arms(document) + adapter.structural_facts(document) + + expected = [{ file_owner: "demo", language: :python }] + assert_equal [expected, expected, expected, expected], captured + end + def test_force_language_override_handles_ambiguous_headers assert_equal :c, Decomplex::Syntax.language_for("include/demo.h") From b32e7fa7605d9d0abbb7c9e19776af17745351ad Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Fri, 19 Jun 2026 11:43:24 +0000 Subject: [PATCH 23/52] Remove generic syntax profile fallback --- gems/decomplex/lib/decomplex/syntax.rb | 64 ++++++++++++-------------- gems/decomplex/test/syntax_test.rb | 23 +++++++++ 2 files changed, 53 insertions(+), 34 deletions(-) diff --git a/gems/decomplex/lib/decomplex/syntax.rb b/gems/decomplex/lib/decomplex/syntax.rb index 5a3ca9584..571fc6356 100644 --- a/gems/decomplex/lib/decomplex/syntax.rb +++ b/gems/decomplex/lib/decomplex/syntax.rb @@ -464,12 +464,12 @@ def visibility(syntax, _document, node) class JavaScriptSyntaxAdapter < TreeSitterLanguageAdapter def visibility(syntax, _document, node) - syntax.modifier_visibility(node) || typescript_visibility(syntax, node) + syntax.modifier_visibility(node) || private_name_visibility(syntax, node) end private - def typescript_visibility(syntax, node) + def private_name_visibility(syntax, node) function_name(syntax, node).to_s.start_with?("#") ? :private : :public end end @@ -517,16 +517,25 @@ class TreeSitterLanguageAdapterHelpers ].freeze def initialize(adapter) - @adapter = adapter + @helpers = HELPER_METHODS.each_with_object({}) do |helper, helpers| + helpers[helper] = adapter.method(helper) + end.freeze end HELPER_METHODS.each do |helper| define_method(helper) do |*args, **kwargs, &block| - if kwargs.empty? - @adapter.__send__(helper, *args, &block) - else - @adapter.__send__(helper, *args, **kwargs, &block) - end + call_helper(helper, *args, **kwargs, &block) + end + end + + private + + def call_helper(helper, *args, **kwargs, &block) + method = @helpers.fetch(helper) + if kwargs.empty? + method.call(*args, &block) + else + method.call(*args, **kwargs, &block) end end end @@ -626,12 +635,6 @@ def initialize(adapter) LANGUAGE_BY_EXTENSION = LANGUAGE_PROFILES.values.each_with_object({}) do |profile, index| profile.extensions.each { |extension| index[extension] ||= profile.language } end.freeze - GENERIC_LANGUAGE_PROFILE = TreeSitterLanguageAdapter.new( - language: :generic, - extensions: [], - lexicon: RUBY_LEXICON, - package: "" - ).freeze module_function @@ -697,12 +700,16 @@ def supported_source?(file, parser: self.parser) end def language_lexicon(language) - key = language.to_s.empty? ? nil : language.to_sym - language_profile(key).lexicon + language_profile(language).lexicon end def language_profile(language) - LANGUAGE_PROFILES.fetch(language.to_sym) + key = language.to_s.empty? ? nil : language.to_sym + raise ArgumentError, "missing Syntax language profile" unless key + + LANGUAGE_PROFILES.fetch(key) + rescue KeyError + raise ArgumentError, "unsupported Syntax language profile: #{language.inspect}" end class Document @@ -1175,7 +1182,9 @@ def adapter_helpers end def syntax_profile(language) - language ? Syntax.language_profile(language) : Syntax::GENERIC_LANGUAGE_PROFILE + raise ArgumentError, "missing Syntax language profile context" if language.nil? + + Syntax.language_profile(language) end def parser_for(language) @@ -1366,13 +1375,6 @@ def ruby_method_visibility(node) modifier_visibility(node) end - def python_visibility(node) - name = function_name(node).to_s - return :private if name.start_with?("_") && !name.start_with?("__") - - :public - end - def exported_name_visibility(name) text = name.to_s return nil if text.empty? @@ -1380,12 +1382,6 @@ def exported_name_visibility(name) text.match?(/\A[A-Z]/) ? :public : :private end - def typescript_visibility(node) - return :private if function_name(node).to_s.start_with?("#") - - :public - end - def modifier_visibility(node) return :private if node.children.any? { |child| child.text == "private" } return :protected if node.children.any? { |child| child.text == "protected" } @@ -2394,7 +2390,7 @@ def owner_for_node(document, node, stack: nil, language: nil) stacked_owner = current_owner_from_stack(Array(stack)) return stacked_owner if stacked_owner - chain = owner_chain_for_node(document, node) + chain = owner_chain_for_node(document, node, language: language) return chain.join("::") unless chain.empty? return file_owner(document.file) if document @@ -2402,14 +2398,14 @@ def owner_for_node(document, node, stack: nil, language: nil) nil end - def owner_chain_for_node(document, node) + def owner_chain_for_node(document, node, language: nil) chain = [] seen = Set.new seen_nodes = Set.new parent = parent_node(node) while parent && !seen_nodes.include?(node_key(parent)) seen_nodes << node_key(parent) - if (owner = owner_name_from_declaration(document, parent)) + if (owner = owner_name_from_declaration(document, parent, language: language)) unless seen.include?(owner) chain << owner seen << owner diff --git a/gems/decomplex/test/syntax_test.rb b/gems/decomplex/test/syntax_test.rb index ce144b5fc..e13878572 100644 --- a/gems/decomplex/test/syntax_test.rb +++ b/gems/decomplex/test/syntax_test.rb @@ -125,6 +125,29 @@ def test_tree_sitter_language_profile_owns_parser_metadata refute csharp.first_argument_receiver? end + def test_language_profile_fails_loudly_without_supported_language + refute Decomplex::Syntax.const_defined?(:GENERIC_LANGUAGE_PROFILE, false) + + missing = assert_raises(ArgumentError) do + Decomplex::Syntax.language_profile(nil) + end + assert_match(/missing Syntax language profile/, missing.message) + + unsupported = assert_raises(ArgumentError) do + Decomplex::Syntax.language_profile(:wat) + end + assert_match(/unsupported Syntax language profile/, unsupported.message) + end + + def test_tree_sitter_adapter_requires_language_profile_context + adapter = Decomplex::Syntax::TreeSitterAdapter.new + + error = assert_raises(ArgumentError) do + adapter.send(:syntax_profile, nil) + end + assert_match(/missing Syntax language profile context/, error.message) + end + def test_tree_sitter_document_walks_seed_language_context adapter = Decomplex::Syntax::TreeSitterAdapter.new document = Struct.new(:root, :file, :language, :lines) From a408e47488719acf032dbe8c14553e5e221cc744 Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Fri, 19 Jun 2026 12:09:03 +0000 Subject: [PATCH 24/52] Move Tree-sitter quirks into language profiles --- gems/decomplex/lib/decomplex/syntax.rb | 3810 +++++++++-------- .../decomplex/syntax/tree_sitter_adapter.rs | 168 +- gems/decomplex/test/syntax_test.rb | 20 +- 3 files changed, 2069 insertions(+), 1929 deletions(-) diff --git a/gems/decomplex/lib/decomplex/syntax.rb b/gems/decomplex/lib/decomplex/syntax.rb index 571fc6356..cb557320a 100644 --- a/gems/decomplex/lib/decomplex/syntax.rb +++ b/gems/decomplex/lib/decomplex/syntax.rb @@ -287,45 +287,45 @@ def first_argument_receiver? @first_argument_receiver end - def function_name(syntax, node) + def function_name(node) case node.kind when "method", "function_definition", "function_declaration", "method_definition", "function_item" - syntax.named_field(node, "name")&.text || - syntax.declarator_name(syntax.named_field(node, "declarator")) || - syntax.first_named_text(node, %w[identifier constant property_identifier]) + named_field(node, "name")&.text || + declarator_name(named_field(node, "declarator")) || + first_named_text(node, %w[identifier constant property_identifier]) when "method_declaration" - syntax.named_field(node, "name")&.text || - syntax.first_named_text(node, %w[field_identifier identifier]) + named_field(node, "name")&.text || + first_named_text(node, %w[field_identifier identifier]) end end - def function_kind(syntax, node, stack) - syntax.owner_for_node(nil, node, stack: stack) ? :method : :function + def function_kind(_document, node, stack) + owner_for_node(nil, node, stack: stack) ? :method : :function end - def visibility(syntax, _document, node) - syntax.modifier_visibility(node) + def visibility(_document, node) + modifier_visibility(node) end - def owner_name_from_declaration(syntax, document, node) + def owner_name_from_declaration(document, node) case node.kind when "class", "class_definition", "class_declaration", "class_specifier", "module" - syntax.named_field(node, "name")&.text || - syntax.first_named_text(node, %w[constant identifier type_identifier]) + named_field(node, "name")&.text || + first_named_text(node, %w[constant identifier type_identifier]) when "impl_item", "impl_block" - syntax.impl_owner_name(node) + impl_owner_name(node) when "struct_item", "struct_spec", "struct_specifier", "type_spec", "type_declaration" - syntax.named_field(node, "name")&.text || - syntax.first_named_text(node, %w[type_identifier identifier]) + named_field(node, "name")&.text || + first_named_text(node, %w[type_identifier identifier]) when "struct_declaration", "union_declaration", "enum_declaration" - syntax.bound_container_name(node) || - syntax.returned_container_owner(document, node) || - syntax.anonymous_owner_name(document, node) + bound_container_name(node) || + returned_container_owner(document, node) || + anonymous_owner_name(document, node) end end - def owner_kind(syntax, node) + def owner_kind(node) case node.kind when "class", "class_definition", "class_declaration", "class_specifier" then :class when "module" then :module @@ -337,104 +337,104 @@ def owner_kind(syntax, node) end end - def function_receiver_name(syntax, node, stack) - receiver_param = syntax.method_receiver_param_node(node) + def function_receiver_name(node, stack) + receiver_param = method_receiver_param_node(node) receiver_param&.text || - receiver_convention_param_name(syntax, node, language: syntax.current_language(stack)) + receiver_convention_param_name(node, stack: stack) end - def receiver_convention_owner_name(syntax, node, language:) + def receiver_convention_owner_name(node, **_context) return nil unless first_argument_receiver? return nil unless node.kind == "function_definition" - receiver = syntax.first_argument_receiver_parameter(node) + receiver = first_argument_receiver_parameter(node) return nil unless receiver - type = syntax.normalize_type_owner(receiver[:type]) - name = function_name(syntax, node).to_s + type = normalize_type_owner(receiver[:type]) + name = function_name(node).to_s return nil if type.empty? || name.empty? - prefix = syntax.snake_case_type_name(type) + prefix = snake_case_type_name(type) name.start_with?("#{prefix}_") ? type : nil end - def receiver_convention_param_name(syntax, node, language:) + def receiver_convention_param_name(node, **_context) return nil unless first_argument_receiver? - syntax.first_argument_receiver_parameter(node)&.fetch(:name, nil) + first_argument_receiver_parameter(node)&.fetch(:name, nil) end - def generated_prelude?(_syntax, _document, _node) + def generated_prelude?(_document, _node) false end - def call_target(syntax, document, node) + def call_target(document, node) case node.kind when "call_expression", "method_invocation", "invocation_expression" - syntax.generic_call_target(document, node) + generic_call_target(document, node) when "attribute", "selector_expression", "field", "field_access", "member_expression", "member_access_expression", "field_expression", "expression_list" - syntax.adjacent_argument_call_target(node) + adjacent_argument_call_target(node) end end - def state_declaration(syntax, node) - syntax.generic_state_declaration(node, language: language) + def state_declaration(node) + generic_state_declaration(node) end - def state_read_target(syntax, node) - syntax.generic_state_read_target(node) + def state_read_target(node) + generic_state_read_target(node) end - def state_target(syntax, lhs) - syntax.generic_state_target(lhs) + def state_target(lhs) + generic_state_target(lhs) end end class RubySyntaxAdapter < TreeSitterLanguageAdapter - def function_name(syntax, node) + def function_name(node) case node.kind when "body_statement" - syntax.hidden_ruby_method_name(node) + hidden_ruby_method_name(node) when "singleton_method" - name = syntax.named_field(node, "name")&.text || + name = named_field(node, "name")&.text || node.named_children.reverse.find do |child| %w[identifier field_identifier property_identifier].include?(child.kind) end&.text name && "self.#{name}" when "argument_list" - syntax.inline_def_name(node) + inline_def_name(node) else super end end - def visibility(syntax, _document, node) - return syntax.ruby_inline_def_visibility(node) if syntax.inline_def_argument_list?(node) + def visibility(_document, node) + return ruby_inline_def_visibility(node) if inline_def_argument_list?(node) - syntax.ruby_method_visibility(node) + ruby_method_visibility(node) end - def owner_name_from_declaration(syntax, document, node) - return syntax.hidden_ruby_owner_name(node) if syntax.hidden_ruby_owner_declaration?(node) + def owner_name_from_declaration(document, node) + return hidden_ruby_owner_name(node) if hidden_ruby_owner_declaration?(node) super end - def owner_kind(syntax, node) - return syntax.hidden_ruby_owner_kind(node) if syntax.hidden_ruby_owner_declaration?(node) + def owner_kind(node) + return hidden_ruby_owner_kind(node) if hidden_ruby_owner_declaration?(node) super end - def call_target(syntax, document, node) + def call_target(document, node) case node.kind when "call" - syntax.ruby_call_target(node) + ruby_call_target(node) when "body_statement" - syntax.ruby_bare_body_call_target(document, node) + ruby_bare_body_call_target(node) when "identifier" - syntax.ruby_bare_call_target(document, node) + ruby_bare_call_target(node) else super end @@ -442,8 +442,8 @@ def call_target(syntax, document, node) end class PythonSyntaxAdapter < TreeSitterLanguageAdapter - def visibility(syntax, _document, node) - name = function_name(syntax, node).to_s + def visibility(_document, node) + name = function_name(node).to_s return :private if name.start_with?("_") && !name.start_with?("__") :public @@ -451,874 +451,620 @@ def visibility(syntax, _document, node) end class GoSyntaxAdapter < TreeSitterLanguageAdapter - def visibility(syntax, _document, node) - syntax.exported_name_visibility(function_name(syntax, node)) + def visibility(_document, node) + exported_name_visibility(function_name(node)) end end class RustSyntaxAdapter < TreeSitterLanguageAdapter - def visibility(syntax, _document, node) - syntax.modifier_visibility(node) || :private + def visibility(_document, node) + modifier_visibility(node) || :private end end class JavaScriptSyntaxAdapter < TreeSitterLanguageAdapter - def visibility(syntax, _document, node) - syntax.modifier_visibility(node) || private_name_visibility(syntax, node) + def visibility(_document, node) + modifier_visibility(node) || private_name_visibility(node) end private - def private_name_visibility(syntax, node) - function_name(syntax, node).to_s.start_with?("#") ? :private : :public + def private_name_visibility(node) + function_name(node).to_s.start_with?("#") ? :private : :public end end class CppSyntaxAdapter < TreeSitterLanguageAdapter - def visibility(syntax, _document, node) - syntax.modifier_visibility(node) || syntax.cpp_visibility(node) + def visibility(_document, node) + modifier_visibility(node) || cpp_visibility(node) end end class CSharpSyntaxAdapter < TreeSitterLanguageAdapter - def visibility(syntax, _document, node) - syntax.modifier_visibility(node) || :private + def visibility(_document, node) + modifier_visibility(node) || :private end end class CSyntaxAdapter < TreeSitterLanguageAdapter - def visibility(syntax, _document, node) - syntax.c_visibility(node) + def visibility(_document, node) + c_visibility(node) end end class LuaSyntaxAdapter < TreeSitterLanguageAdapter - def generated_prelude?(syntax, document, node) - return false unless syntax.line(node) == 1 + def generated_prelude?(document, node) + return false unless line(node) == 1 first_line = document.lines.first.to_s first_line.include?("_tl_compat") && first_line.include?("compat53.module") end end - class TreeSitterLanguageAdapterHelpers - HELPER_METHODS = %i[ - adjacent_argument_call_target anonymous_owner_name bound_container_name - c_visibility cpp_visibility current_language declarator_name - exported_name_visibility first_argument_receiver_parameter first_named_text - generic_call_target generic_state_declaration generic_state_read_target - generic_state_target hidden_ruby_method_name hidden_ruby_owner_declaration? - hidden_ruby_owner_kind hidden_ruby_owner_name impl_owner_name - inline_def_argument_list? inline_def_name line method_receiver_param_node - modifier_visibility named_field normalize_type_owner owner_for_node - returned_container_owner ruby_bare_body_call_target ruby_bare_call_target - ruby_call_target ruby_inline_def_visibility ruby_method_visibility - snake_case_type_name - ].freeze - - def initialize(adapter) - @helpers = HELPER_METHODS.each_with_object({}) do |helper, helpers| - helpers[helper] = adapter.method(helper) - end.freeze - end + class ZigSyntaxAdapter < TreeSitterLanguageAdapter + def state_declaration(node) + return zig_container_field_declaration(node) if node.kind == "container_field" - HELPER_METHODS.each do |helper| - define_method(helper) do |*args, **kwargs, &block| - call_helper(helper, *args, **kwargs, &block) - end + super end private - def call_helper(helper, *args, **kwargs, &block) - method = @helpers.fetch(helper) - if kwargs.empty? - method.call(*args, &block) - else - method.call(*args, **kwargs, &block) - end + def zig_container_field_declaration(node) + name = node.named_children.find { |child| child.kind == "identifier" } + return nil unless name + + { field: name.text, type: declared_type_text(node, name) } end end - LanguageProfile = TreeSitterLanguageAdapter + class CppSyntaxAdapter + def implicit_state_accesses? + true + end - LANGUAGE_PROFILES = { - ruby: RubySyntaxAdapter.new( - language: :ruby, - extensions: %w[.rb], - lexicon: RUBY_LEXICON, - package: "tree-sitter-ruby" - ), - python: PythonSyntaxAdapter.new( - language: :python, - extensions: %w[.py .pyi], - lexicon: PYTHON_LEXICON, - package: "tree-sitter-python" - ), - javascript: JavaScriptSyntaxAdapter.new( - language: :javascript, - extensions: %w[.js .jsx .mjs .cjs], - lexicon: JAVASCRIPT_LEXICON, - package: "tree-sitter-javascript" - ), - typescript: JavaScriptSyntaxAdapter.new( - language: :typescript, - extensions: %w[.ts .tsx], - lexicon: JAVASCRIPT_LEXICON, - package: "tree-sitter-typescript" - ), - go: GoSyntaxAdapter.new( - language: :go, - extensions: %w[.go], - lexicon: GO_LEXICON, - package: "tree-sitter-go" - ), - rust: RustSyntaxAdapter.new( - language: :rust, - extensions: %w[.rs], - lexicon: RUST_LEXICON, - package: "tree-sitter-rust" - ), - zig: TreeSitterLanguageAdapter.new( - language: :zig, - extensions: %w[.zig], - lexicon: ZIG_LEXICON, - package: "@tree-sitter-grammars/tree-sitter-zig" - ), - lua: LuaSyntaxAdapter.new( - language: :lua, - extensions: %w[.lua], - lexicon: LUA_LEXICON, - package: "@tree-sitter-grammars/tree-sitter-lua" - ), - c: CSyntaxAdapter.new( - language: :c, - extensions: %w[.c .h], - lexicon: C_LEXICON, - package: "tree-sitter-c", - first_argument_receiver: true - ), - cpp: CppSyntaxAdapter.new( - language: :cpp, - extensions: %w[.cc .cpp .cxx .hh .hpp .hxx], - lexicon: CPP_LEXICON, - package: "tree-sitter-cpp" - ), - csharp: CSharpSyntaxAdapter.new( - language: :csharp, - extensions: %w[.cs], - lexicon: CSHARP_LEXICON, - package: "tree-sitter-c-sharp", - grammar_names: %w[c-sharp csharp], - tree_sitter_language_name: "c_sharp" - ), - java: TreeSitterLanguageAdapter.new( - language: :java, - extensions: %w[.java], - lexicon: JAVA_LEXICON, - package: "tree-sitter-java" - ), - swift: TreeSitterLanguageAdapter.new( - language: :swift, - extensions: %w[.swift], - lexicon: SWIFT_LEXICON, - package: "tree-sitter-swift" - ), - kotlin: TreeSitterLanguageAdapter.new( - language: :kotlin, - extensions: %w[.kt .kts], - lexicon: KOTLIN_LEXICON, - package: "tree-sitter-kotlin" - ) - }.freeze + private - LANGUAGE_BY_EXTENSION = LANGUAGE_PROFILES.values.each_with_object({}) do |profile, index| - profile.extensions.each { |extension| index[extension] ||= profile.language } - end.freeze + def cpp_visibility(node) + visibility = previous_cpp_access_specifier(node) + return visibility if visibility - module_function + owner = nearest_owner_declaration(node) + return :public if owner&.kind == "struct_specifier" - def parse(file, language: nil, parser: ENV.fetch("DECOMPLEX_PARSER", "tree_sitter")) - normalized_parser = parser.to_s.tr("-", "_") - lang = (language || language_for(file)).to_sym - key = document_cache_key(file, lang, normalized_parser) - document_cache.fetch(key) do - document_cache[key] = - case normalized_parser - when "", "tree_sitter", "treesitter" - TreeSitterAdapter.new.parse(file, language: lang) - else - raise ArgumentError, "unknown decomplex parser #{parser.inspect}" - end + :private end - end - - def document_cache - @document_cache ||= {} - end - def document_cache_key(file, language, parser) - stat = File.stat(file) - [File.expand_path(file), language, parser, stat.size, stat.mtime.to_f] - end + def previous_cpp_access_specifier(node) + sibling = prev_sibling(node) + while sibling + return sibling.text.to_sym if sibling.kind == "access_specifier" && + %w[public private protected].include?(sibling.text) - def parse_uncached(file, language: nil, parser: ENV.fetch("DECOMPLEX_PARSER", "tree_sitter")) - case parser.to_s.tr("-", "_") - when "", "tree_sitter", "treesitter" - TreeSitterAdapter.new.parse(file, language: language) - else - raise ArgumentError, "unknown decomplex parser #{parser.inspect}" + sibling = prev_sibling(sibling) + end + nil end - end - def parser - ENV.fetch("DECOMPLEX_PARSER", "tree_sitter").to_s.tr("-", "_") - end + def nearest_owner_declaration(node) + parent = parent_node(node) + seen = Set.new + while parent && !seen.include?(node_key(parent)) + seen << node_key(parent) + return parent if %w[class_specifier struct_specifier class class_definition class_declaration].include?(parent.kind) - def tree_sitter? - %w[tree_sitter treesitter].include?(parser) + parent = parent_node(parent) + end + nil + end end - def language_for(file) - forced = ENV["DECOMPLEX_FORCE_LANGUAGE"].to_s.strip - return forced.tr("-", "_").to_sym unless forced.empty? - - LANGUAGE_BY_EXTENSION.fetch(File.extname(file).downcase, :ruby) - end - - def supported_exts(parser: self.parser) - case parser.to_s.tr("-", "_") - when "", "tree_sitter", "treesitter" - LANGUAGE_PROFILES.values.flat_map(&:extensions).uniq - else - [] - end + class CSharpSyntaxAdapter + def implicit_state_accesses? + true + end end - def supported_source?(file, parser: self.parser) - supported_exts(parser: parser).include?(File.extname(file).downcase) - end + class CSyntaxAdapter + private - def language_lexicon(language) - language_profile(language).lexicon + def c_visibility(node) + node.children.any? { |child| child.text == "static" } ? :private : :public + end end - def language_profile(language) - key = language.to_s.empty? ? nil : language.to_sym - raise ArgumentError, "missing Syntax language profile" unless key + class RubySyntaxAdapter + def function_params(node) + return hidden_ruby_method_params(node) if hidden_ruby_method_definition?(node) - LANGUAGE_PROFILES.fetch(key) - rescue KeyError - raise ArgumentError, "unsupported Syntax language profile: #{language.inspect}" - end + params = super + if inline_def_argument_list?(node) + params = node.named_children.find { |child| child.kind == "method_parameters" } + &.named_children + &.filter_map { |param| parameter_name(param) } + &.uniq || params + end + params + end - class Document - attr_reader :file, :language, :source, :lines, :root, :adapter + def function_signature(document, node) + if hidden_ruby_method_definition?(node) + return normalize_text(hidden_ruby_method_signature(document, node)) + end - def initialize(file:, language:, source:, lines:, root:, adapter:) - @file = file - @language = language - @source = source - @lines = lines - @tree_sitter_facade = TreeSitterFacadeContext.new(root) - @root = @tree_sitter_facade.root - @adapter = adapter - end + signature = preceding_ruby_signature(document, node) + return signature unless signature.empty? - def decision_sites - @decision_sites ||= adapter.decision_sites(self) + super end - def state_writes - @state_writes ||= adapter.state_writes(self) + def state_declaration(node) + ruby_t_let_state_declaration(node) || super end - def state_reads - @state_reads ||= adapter.state_reads(self) + def state_read_target(node) + ruby_state_variable_target(node) || super end - def branch_decisions(immutable_readers:, immutable_reader_types:, type_aliases:) - adapter.branch_decisions( - self, - immutable_readers: immutable_readers, - immutable_reader_types: immutable_reader_types, - type_aliases: type_aliases - ) + def state_target(lhs) + ruby_state_variable_target(lhs) || super end - def function_defs - @function_defs ||= adapter.function_defs(self) - end + private - def owner_defs - @owner_defs ||= adapter.owner_defs(self) + def inline_def_argument_list?(node) + ts_node?(node) && node.kind == "argument_list" && node.children.first&.kind.to_s == "def" end - def call_sites - @call_sites ||= adapter.call_sites(self) - end + def inline_def_name(node) + return nil unless inline_def_argument_list?(node) - def state_declarations - @state_declarations ||= adapter.state_declarations(self) + receiver_index = node.named_children.index { |child| child.kind == "self" || child.kind == "constant" } + search = receiver_index ? node.named_children[(receiver_index + 1)..] : node.named_children + name = search&.find { |child| %w[identifier field_identifier property_identifier].include?(child.kind) }&.text + receiver_index ? "self.#{name}" : name end - def state_param_origins - @state_param_origins ||= adapter.state_param_origins(self) + def hidden_ruby_method_definition?(node) + ts_node?(node) && node.kind == "body_statement" && node.children.first&.kind.to_s == "def" end - def branch_arms - @branch_arms ||= adapter.branch_arms(self) - end + def hidden_ruby_method_name(node) + return nil unless hidden_ruby_method_definition?(node) - def immutable_struct_readers - adapter.immutable_struct_readers(lines) + receiver_index = node.named_children.index { |child| child.kind == "self" || child.kind == "constant" } + search = receiver_index ? node.named_children[(receiver_index + 1)..] : node.named_children + name = search&.find { |child| %w[identifier field_identifier property_identifier].include?(child.kind) }&.text + receiver_index ? "self.#{name}" : name end - def immutable_struct_reader_types - adapter.immutable_struct_reader_types(lines) + def hidden_ruby_method_params(node) + params = node.named_children.find { |child| child.kind == "method_parameters" } + return [] unless params + + params.named_children.filter_map { |param| parameter_name(param) }.uniq end - def type_aliases - adapter.type_aliases(lines) + def hidden_ruby_method_signature(document, node) + body = node.named_children.find { |child| child.kind == "body_statement" } + end_byte = body ? body.start_byte : node.end_byte + document.source.byteslice(node.start_byte, end_byte - node.start_byte).to_s.strip.sub(/;+\z/, "") + rescue StandardError + line_text(document, node).strip end - end - module SourceTextHelpers - module_function + def hidden_ruby_owner_declaration?(node) + return false unless ts_node?(node) + return false unless node.kind == "body_statement" - def immutable_struct_readers(lines) - readers = Hash.new { |h, k| h[k] = Set.new } - class_stack = [] - lines.each do |line| - if (match = line.match(/\A\s*class\s+([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\s*<\s*T::Struct\b/)) - class_stack << match[1] - next - end - if class_stack.any? && (match = line.match(/\A\s*const\s+:([A-Za-z_]\w*)\b/)) - readers[class_stack.last].add(match[1].to_sym) - next - end - class_stack.pop if class_stack.any? && line.match?(/\A\s*end\s*(?:#.*)?\z/) - end - readers + %w[class module].include?(node.children.first&.kind.to_s) end - def immutable_struct_reader_types(lines) - reader_types = Hash.new { |h, k| h[k] = {} } - class_stack = [] - lines.each do |line| - if (match = line.match(/\A\s*class\s+([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\s*<\s*T::Struct\b/)) - class_stack << match[1] - next - end - if class_stack.any? && (match = line.match(/\A\s*const\s+:([A-Za-z_]\w*)\s*,\s*([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\b/)) - reader_types[class_stack.last][match[1].to_sym] = match[2] - next - end - class_stack.pop if class_stack.any? && line.match?(/\A\s*end\s*(?:#.*)?\z/) - end - reader_types + def hidden_ruby_owner_name(node) + node.named_children.find { |child| %w[constant identifier type_identifier].include?(child.kind) }&.text end - def type_aliases(lines) - aliases = {} - lines.each do |line| - if (match = line.match(/\A\s*([A-Z]\w*)\s*=\s*T\.type_alias\s*\{\s*([A-Z]\w*(?:::[A-Z]\w*)*)\s*\}/)) - aliases[match[1]] = match[2] - elsif (match = line.match(/\A\s*([A-Z]\w*)\s*=\s*([A-Z]\w*(?:::[A-Z]\w*)*)\b/)) - aliases[match[1]] = match[2] - end - end - aliases + def hidden_ruby_owner_kind(node) + node.children.first&.kind.to_s == "module" ? :module : :class end - end - class TreeSitterFacadeContext - attr_reader :root - - def initialize(raw_root) - @wrappers = {} - @children_cache = {} - @named_children_cache = {} - @named_field_cache = {} - @parent_cache = {} - @prev_sibling_cache = {} - @next_sibling_cache = {} - @prev_named_sibling_cache = {} - @next_named_sibling_cache = {} - @root = wrap(raw_root) - index_tree(raw_root) + def ruby_method_visibility(node) + modifier_visibility(node) end - def wrap(raw) - return nil unless raw - return raw if raw.is_a?(TreeSitterNodeFacade) + def ruby_inline_def_visibility(node) + parent = parent_node(node) + return nil unless parent&.kind == "call" - key = node_key(raw) - @wrappers[key] ||= TreeSitterNodeFacade.new(self, raw, key) + target = ruby_call_target(parent) + visibility = target && target[:receiver] == "self" && target[:message]&.to_sym + %i[private protected public].include?(visibility) ? visibility : nil end - def children(raw) - node = unwrap(raw) - @children_cache.fetch(node_key(node)) { [] } - end + def ruby_call_target(node) + receiver = named_field(node, "receiver") + method = named_field(node, "method") + message = method&.text || first_named_text(node, %w[identifier constant]) + message ||= normalize_text(node.text) if receiver.nil? && ruby_simple_call_text?(node.text) + return nil unless message - def named_children(raw) - node = unwrap(raw) - @named_children_cache.fetch(node_key(node)) { [] } + { + receiver: receiver ? normalize_text(receiver.text) : "self", + message: message, + arguments: ruby_argument_texts(node) + } end - def child_by_field_name(raw, name) - node = unwrap(raw) - key = [node_key(node), name.to_s] - return @named_field_cache[key] if @named_field_cache.key?(key) + def ruby_bare_call_target(node) + return nil unless ruby_bare_call_identifier?(node) - @named_field_cache[key] = wrap(node.child_by_field_name(name)) - rescue StandardError - nil + { + receiver: "self", + message: node.text, + arguments: [] + } end - def parent(raw) - @parent_cache[node_key(unwrap(raw))] - end + def ruby_bare_body_call_target(node) + return nil if hidden_ruby_method_definition?(node) || hidden_ruby_owner_declaration?(node) - def prev_sibling(raw) - @prev_sibling_cache[node_key(unwrap(raw))] + explicit = ruby_explicit_receiver_body_call_target(node) + return explicit if explicit + + message = node.text.to_s.strip + return nil unless ruby_simple_call_text?(message) + return nil if %w[true false nil self].include?(message) + + { + receiver: "self", + message: message, + arguments: [] + } end - def next_sibling(raw) - @next_sibling_cache[node_key(unwrap(raw))] + def ruby_explicit_receiver_body_call_target(node) + receiver, message = node.named_children + return nil unless receiver && message + return nil unless %w[self constant identifier].include?(receiver.kind) + return nil unless %w[identifier constant].include?(message.kind) + + { + receiver: normalize_text(receiver.text), + message: message.text, + arguments: [] + } end - def prev_named_sibling(raw) - @prev_named_sibling_cache[node_key(unwrap(raw))] + def ruby_simple_call_text?(text) + text.to_s.strip.match?(/\A[a-z_]\w*[!?=]?\z/) end - def next_named_sibling(raw) - @next_named_sibling_cache[node_key(unwrap(raw))] + def ruby_bare_call_identifier?(node) + parent = parent_node(node) + return false unless parent + return false if ruby_declaration_name?(node, parent) + return false if %w[method_parameters block_parameters argument_list assignment].include?(parent.kind) + if parent.kind == "call" + return false if named_field(parent, "receiver") + + first = parent.named_children.first + return first == node && next_sibling(node)&.kind == "argument_list" + end + return false if next_sibling(node)&.text == "=" || prev_sibling(node)&.text == "=" + return false if next_sibling(node)&.text == "." || prev_sibling(node)&.text == "." + + %w[body_statement then else elsif ensure rescue].include?(parent.kind) || + next_sibling(node)&.kind == "argument_list" end - def node_key(raw) - node = unwrap(raw) - [node.kind, node.start_byte, node.end_byte, node.named?] + def ruby_declaration_name?(node, parent) + return true if hidden_ruby_method_definition?(parent) + return true if hidden_ruby_owner_declaration?(parent) + return true if %w[method singleton_method class module].include?(parent.kind) + + false end - private + def ruby_argument_texts(node) + args = named_field(node, "arguments") || node.named_children.find { |child| child.kind == "argument_list" } + return [] unless args - def unwrap(raw) - raw.is_a?(TreeSitterNodeFacade) ? raw.raw : raw + values = args.named_children.map { |child| normalize_text(child.text) } + return values unless values.empty? + + text = args.text.to_s.strip + text = text[1...-1] if text.start_with?("(") && text.end_with?(")") + text.split(/\s*,\s*/).map { |arg| normalize_text(arg) }.reject(&:empty?) end - def index_tree(raw_root) - pending = [raw_root] - until pending.empty? - raw = pending.pop - key = node_key(raw) - raw_children = Array(raw.children) - wrapped_children = raw_children.map { |child| wrap(child) } - @children_cache[key] = wrapped_children - @named_children_cache[key] = wrapped_children.select(&:named?) + def ruby_t_let_state_declaration(node) + lhs = named_field(node, "left") || node.named_children.first + rhs = named_field(node, "right") || named_field(node, "value") || node.named_children[1] + target = state_target(lhs) + return nil unless target && target[:receiver] == "self" && target[:field].to_s.start_with?("@") + return nil unless rhs&.kind == "call" - raw_children.each do |child| - child_key = node_key(child) - @parent_cache[child_key] = wrap(raw) - end + receiver = named_field(rhs, "receiver") || rhs.named_children.first + method = named_field(rhs, "method") || rhs.named_children.find { |child| child.kind == "identifier" } + return nil unless receiver&.text == "T" && method&.text == "let" - index_siblings(raw_children, @prev_sibling_cache, @next_sibling_cache) - index_siblings(raw_children.select(&:named?), @prev_named_sibling_cache, @next_named_sibling_cache) + args = named_field(rhs, "arguments") || rhs.named_children.find { |child| child.kind == "argument_list" } + type = args&.named_children&.[](1)&.text + return nil if type.to_s.empty? - pending.concat(raw_children.reverse) - end + { field: target[:field], type: normalize_text(type) } end - def index_siblings(raw_children, prev_cache, next_cache) - raw_children.each_with_index do |child, index| - key = node_key(child) - prev_cache[key] = wrap(raw_children[index - 1]) if index.positive? - next_cache[key] = wrap(raw_children[index + 1]) if index + 1 < raw_children.length - end + def skip_state_write_node?(node) + node.kind == "operator_assignment" || + (assignment_lhs?(node) && next_sibling(node)&.text.to_s != "=" && !ruby_instance_variable_node?(node)) end - end - class TreeSitterNodeFacade - attr_reader :context, :raw - - def initialize(context, raw, key) - @context = context - @raw = raw - @key = key + def skip_state_write_target?(target) + super || target[:field].to_s.start_with?("$") end - def kind - @kind ||= raw.kind + def state_write_source_node(node) + assignment_lhs?(node) ? (parent_node(node) || node) : super end - def text - @text ||= raw.text.to_s + def direct_state_ref(node) + node.text if ruby_state_variable_node?(node) end - def start_byte - raw.start_byte - end + def hidden_if?(node) + return false unless ts_node?(node) + return false unless %w[expression_statement block body_statement].include?(node.kind) - def end_byte - raw.end_byte + %w[if unless].include?(first_token_kind(node)) end - def start_point - raw.start_point - end + def hidden_modifier_if?(node) + return false unless ts_node?(node) + return false unless node.kind == "body_statement" - def end_point - raw.end_point + seen_named = false + node.children.any? do |child| + seen_named ||= child.named? + seen_named && !child.named? && %w[if unless].include?(child.kind) + end end - def named? - raw.named? + def modifier_condition(node) + node.named_children.last end - def has_error? - raw.respond_to?(:has_error?) && raw.has_error? - end + def hidden_case?(node) + return false unless ts_node?(node) + return false unless %w[body_statement block_body argument_list].include?(node.kind) - def children - context.children(self) + first_token_kind(node) == "case" end - def child_count - children.length + def hidden_match?(node) + node.kind == "expression_statement" && + first_token_kind(node) == "match" && + node.named_children.any? { |child| child.kind == "match_block" } end - def named_children - context.named_children(self) - end + def case_pattern_texts(patterns) + texts = super + return texts unless texts.any? { |text| text.start_with?("*") } - def named_child_count - named_children.length + out = [] + pending_plain = [] + texts.each_with_index do |text, index| + if text.start_with?("*") + out << pending_plain.join(", ") unless pending_plain.empty? + pending_plain = [] + out << if texts.size == 1 || index.positive? + text.delete_prefix("*") + else + text + end + else + pending_plain << text + end + end + out << pending_plain.join(", ") unless pending_plain.empty? + out end - def child_by_field_name(name) - context.child_by_field_name(self, name) - end + def ruby_state_variable_target(node) + return nil unless ruby_state_variable_node?(node) - def parent - context.parent(self) + { receiver: "self", field: node.text } end - def prev_sibling - context.prev_sibling(self) + def ruby_state_variable_node?(node) + ts_node?(node) && %w[instance_variable global_variable].include?(node.kind) end - def next_sibling - context.next_sibling(self) + def ruby_instance_variable_node?(node) + ts_node?(node) && node.kind == "instance_variable" end - def prev_named_sibling - context.prev_named_sibling(self) - end + def preceding_ruby_signature(document, node) + cursor = line(node) - 2 + lines = document.lines + cursor -= 1 while cursor >= 0 && lines[cursor].to_s.strip.empty? + return "" if cursor.negative? - def next_named_sibling - context.next_named_sibling(self) - end + stripped = lines[cursor].to_s.strip + if stripped == "end" + start = cursor + while start >= 0 + text = lines[start].to_s.strip + return normalize_text(lines[start..cursor].join("\n")) if text == "sig do" + return "" if start != cursor && text.match?(/\A(?:def|class|module)\b/) - def ==(other) - other = other.raw if other.is_a?(TreeSitterNodeFacade) - other.respond_to?(:kind) && - kind == other.kind && - start_byte == other.start_byte && - end_byte == other.end_byte && - named? == other.named? + start -= 1 + end + return "" if start.negative? + end + + return normalize_text(stripped) if stripped.start_with?("sig ") + return "" unless stripped == "}" || stripped.end_with?("}") + + start = cursor + while start >= 0 + text = lines[start].to_s.strip + return normalize_text(lines[start..cursor].join("\n")) if text.start_with?("sig ") + return "" if text.match?(/\A(?:def|class|module)\b/) + + start -= 1 + end + "" end - alias eql? == + def method_param_types(document) + types_by_method = {} + pending_sig = +"" + document.lines.each do |line| + pending_sig << line if pending_sig_active?(line, pending_sig) + if (match = line.match(/\A\s*def\s+([A-Za-z_]\w*[!?=]?)(?:\s|\(|$)/)) + types_by_method[match[1]] = sig_param_types(pending_sig) + pending_sig = +"" + end + end + types_by_method + end - def hash - @key.hash + def pending_sig_active?(line, pending_sig) + !pending_sig.empty? || line.match?(/\A\s*sig\b/) end - def inspect - "#<#{self.class} kind=#{kind.inspect} start_byte=#{start_byte} end_byte=#{end_byte}>" + def sig_param_types(sig_source) + match = sig_source.match(/params\s*\((.*?)\)/m) + return {} unless match + + match[1].scan(/([A-Za-z_]\w*)\s*:\s*([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)/).to_h end end - class TreeSitterAdapter + class TreeSitterLanguageAdapter BRANCH_KINDS = %w[if unless if_statement if_modifier unless_modifier if_expression while until while_statement for for_statement case switch_statement expression_switch_statement switch_expression match_statement match_expression when_expression].freeze NOISE_MESSAGES = %w[! != == === < <= > >= [] []= to_s inspect class].freeze - def parse(file, language: nil) - lang = (language || Syntax.language_for(file)).to_sym - source = File.read(file) - parser = parser_for(lang) - tree = parser.parse(source) - raise "tree-sitter parse timed out for #{file}" unless tree + def initial_stack(document) + [{ file_owner: file_owner(document.file), language: document.language }] + end - Document.new( - file: file, - language: lang, - source: source, - lines: source.lines, - root: tree.root_node, - adapter: self - ) + def push_context(document, stack, node) + next_stack = push_owner_context(document, stack, node) + name = function_name(node) + next_stack = name ? next_stack + [function_context(node, next_stack)] : next_stack + control = control_context(node) + control ? next_stack + [{ control: control }] : next_stack end - def decision_sites(document) - out = [] - walk(document.root, initial_stack(document)) do |node, stack| - record_decision_site(document, node, stack, out) - end + def structural_facts_for_node(document, node, stack) + out = { + function_defs: [], + owner_defs: [], + call_sites: [], + state_declarations: [], + state_param_origins: [], + state_reads: [], + state_writes: [] + } + record_function_def(document, node, stack, out[:function_defs]) + record_owner_def(document, node, stack, out[:owner_defs]) + record_call_site(document, node, stack, out[:call_sites]) + record_state_declaration(document, node, stack, out[:state_declarations]) + record_state_param_origin(document, node, stack, out[:state_param_origins]) + record_state_read(document, node, stack, out[:state_reads]) + record_state_write(document, node, stack, out[:state_writes]) out end - def state_writes(document) - structural_facts(document).fetch(:state_writes) + def after_structural_facts(document, out) + record_implicit_state_accesses(document, out) if implicit_state_accesses? end - def state_reads(document) - structural_facts(document).fetch(:state_reads) + def decision_site_facts(document, node, stack) + out = [] + record_decision_site(document, node, stack, out) + out end - def branch_decisions(document, immutable_readers:, immutable_reader_types:, type_aliases:) + def branch_decision_facts(document, node, stack, immutable_readers:, immutable_reader_types:, type_aliases:) out = [] - walk(document.root, initial_stack(document)) do |node, stack| - record_branch_decision( - document, - node, - stack, + record_branch_decision( + document, + node, + stack, out, immutable_readers: immutable_readers, immutable_reader_types: immutable_reader_types, type_aliases: type_aliases, - method_param_types: method_param_types(document.lines) + method_param_types: method_param_types(document) ) - end out end - def function_defs(document) - structural_facts(document).fetch(:function_defs) - end - - def owner_defs(document) - structural_facts(document).fetch(:owner_defs) + def branch_arm_facts(document, node, stack) + out = [] + record_branch_arm(document, node, stack, out) + out end - def call_sites(document) - structural_facts(document).fetch(:call_sites) + def implicit_state_accesses? + false end - def state_declarations(document) - structural_facts(document).fetch(:state_declarations) - end + def function_params(node) + params = if node.kind == "method_declaration" + node.named_children.select { |child| child.kind == "parameter_list" }[1] + else + named_field(node, "parameters") || + node.named_children.find { |child| %w[parameters formal_parameters parameter_list].include?(child.kind) } + end + return [] unless params - def state_param_origins(document) - structural_facts(document).fetch(:state_param_origins) + params.named_children.filter_map do |param| + parameter_name(param) + end.uniq end - def structural_facts(document) - @structural_fact_cache ||= {} - @structural_fact_cache[document.object_id] ||= begin - out = { - function_defs: [], - owner_defs: [], - call_sites: [], - state_declarations: [], - state_param_origins: [], - state_reads: [], - state_writes: [] - } - walk(document.root, initial_stack(document)) do |node, stack| - record_function_def(document, node, stack, out[:function_defs]) - record_owner_def(document, node, stack, out[:owner_defs]) - record_call_site(document, node, stack, out[:call_sites]) - record_state_declaration(document, node, stack, out[:state_declarations]) - record_state_param_origin(document, node, stack, out[:state_param_origins]) - record_state_read(document, node, stack, out[:state_reads]) - record_state_write(document, node, stack, out[:state_writes]) + def function_signature(document, node) + body = named_field(node, "body") + text = + if body + document.source.byteslice(node.start_byte, body.start_byte - node.start_byte).to_s.strip + else + line_text(document, node).strip end - record_implicit_state_accesses(document, out) - out[:function_defs].uniq! { |fn| [fn.file, fn.owner, fn.name, fn.line] } - out[:owner_defs].uniq! { |owner| [owner.file, owner.name, owner.kind] } - out[:call_sites].uniq! { |call| [call.file, call.owner, call.function, call.line, call.receiver, call.message] } - out[:state_declarations].uniq! { |decl| [decl.file, decl.owner, decl.field] } - out[:state_param_origins].uniq! { |origin| [origin.file, origin.owner, origin.function, origin.field, origin.param] } - out[:state_reads].uniq! { |read| [read.file, read.owner, read.function, read.line, read.receiver, read.field] } - out[:state_writes].uniq! { |write| [write.file, write.owner, write.function, write.line, write.receiver, write.field] } - out - end + normalize_text(text.empty? ? line_text(document, node) : text) + rescue StandardError + normalize_text(line_text(document, node)) end - def branch_arms(document) - out = [] - walk(document.root, initial_stack(document)) do |node, stack| - record_branch_arm(document, node, stack, out) - end - out + def method_param_types(_document) + {} end - def immutable_struct_readers(lines) - SourceTextHelpers.immutable_struct_readers(lines) - end + private - def immutable_struct_reader_types(lines) - SourceTextHelpers.immutable_struct_reader_types(lines) + def push_owner_context(document, stack, node) + owner = owner_name_from_declaration(document, node) + return stack unless owner + + parent_owner = current_owner_from_stack(stack) + full_owner = if parent_owner && parent_owner != owner && !owner.include?("::") + "#{parent_owner}::#{owner}" + else + owner + end + stack + [{ owner: full_owner, owner_declaration: true, owner_kind: owner_kind(node) }] end - def type_aliases(lines) - SourceTextHelpers.type_aliases(lines) - end - - private - - def initial_stack(document) - [{ file_owner: file_owner(document.file), language: document.language }] - end - - def adapter_helpers - @adapter_helpers ||= TreeSitterLanguageAdapterHelpers.new(self) - end - - def syntax_profile(language) - raise ArgumentError, "missing Syntax language profile context" if language.nil? - - Syntax.language_profile(language) - end - - def parser_for(language) - require_tree_sitter - lang_name = Syntax.language_profile(language).tree_sitter_language_name - register_language(lang_name, grammar_path(language)) - ::TreeSitter::Parser.new.tap { |parser| parser.language = lang_name } - end - - def require_tree_sitter - gem "tree_sitter", "~> 0.1" - require "tree_sitter" - rescue Gem::LoadError, LoadError => e - raise LoadError, "DECOMPLEX_PARSER=tree_sitter requires the tree_sitter gem: #{e.message}" - end - - def register_language(name, path) - @registered ||= {} - return if @registered[name] - - ::TreeSitter.register_language(name, path) - @registered[name] = true - end - - def grammar_path(language) - env_name = "DECOMPLEX_TS_#{language.to_s.upcase}_PATH" - return ENV.fetch(env_name) if ENV[env_name] && File.file?(ENV[env_name]) - - candidates = grammar_candidates(language) - found = candidates.find { |path| File.file?(path) } - return found if found - - raise LoadError, - "missing Tree-sitter grammar for #{language}. Set #{env_name} " \ - "to a parser shared library (.so/.dylib/.node). Checked: #{candidates.join(', ')}" - end - - def grammar_candidates(language) - profile = Syntax.language_profile(language) - pkg = profile.package - stems = profile.grammar_names - names = stems.flat_map do |stem| - ["#{stem}.so", "tree-sitter-#{stem}.so", - "libtree-sitter-#{stem}.so", "#{stem}.node", - "tree-sitter-#{stem}.node", - "#{stem}_binding.node", - "tree_sitter_#{stem.tr('-', '_')}_binding.node", - "@tree-sitter-grammars+tree-sitter-#{stem}.node"] - end - roots = [ - File.expand_path("../../vendor/tree-sitter", __dir__), - File.expand_path("../../vendor/tree-sitter/#{language}", __dir__), - File.expand_path("../../node_modules/#{pkg}", __dir__), - File.expand_path("../../node_modules/#{pkg}/build/Release", __dir__), - File.expand_path("../../../../node_modules/#{pkg}", __dir__), - File.expand_path("../../../../node_modules/#{pkg}/build/Release", __dir__), - File.expand_path("../../../../../node_modules/#{pkg}", __dir__), - File.expand_path("../../../../../node_modules/#{pkg}/build/Release", __dir__) - ] - all_prebuilds = roots.flat_map do |root| - stems.flat_map do |stem| - Dir.glob(File.join(root, "prebuilds", "*", "*tree-sitter-#{stem}.node")) - end - end - prebuilds = platform_prebuilds(all_prebuilds) - roots.product(names).map { |root, name| File.join(root, name) } + prebuilds - end - - def platform_prebuilds(paths) - os = host_os - arch = host_arch - return paths if os.nil? || arch.nil? - - paths.select { |path| path.include?("/#{os}-#{arch}/") } - end - - def host_os - case RbConfig::CONFIG["host_os"] - when /linux/i then "linux" - when /darwin/i then "darwin" - when /mswin|mingw|cygwin/i then "win32" - end - end - - def host_arch - case RbConfig::CONFIG["host_cpu"] - when /x86_64|amd64/i then "x64" - when /aarch64|arm64/i then "arm64" - end - end - - def walk(node, stack, &block) - return unless ts_node?(node) - - pending = [[node, stack]] - seen = Set.new - until pending.empty? - current, current_stack = pending.pop - next unless ts_node?(current) - key = node_key(current) - next if seen.include?(key) - - seen << key - - next_stack = push_context(current_stack, current) - yield current, next_stack - current.children.reverse_each { |child| pending << [child, next_stack] } - end - end - - def push_context(stack, node) - next_stack = push_owner_context(stack, node) - name = function_name(node, language: current_language(next_stack)) - next_stack = name ? next_stack + [function_context(node, next_stack)] : next_stack - control = control_context(node) - control ? next_stack + [{ control: control }] : next_stack - end - - def push_owner_context(stack, node) - owner = owner_name_from_declaration(nil, node, language: current_language(stack)) - return stack unless owner - - parent_owner = current_owner_from_stack(stack) - full_owner = if parent_owner && parent_owner != owner && !owner.include?("::") - "#{parent_owner}::#{owner}" - else - owner - end - stack + [{ owner: full_owner, owner_declaration: true, owner_kind: owner_kind(node, language: current_language(stack)) }] - end - - def current_function(stack) - entry = stack.reverse.find { |item| item.is_a?(Hash) && item[:function] } - entry ? entry[:function] : "(top-level)" + def current_function(stack) + entry = stack.reverse.find { |item| item.is_a?(Hash) && item[:function] } + entry ? entry[:function] : "(top-level)" end def current_owner(document, stack) @@ -1346,7 +1092,7 @@ def current_control(stack) def function_context(node, stack) { - function: function_name(node, language: current_language(stack)), + function: function_name(node), owner: function_owner_name(node, stack), params: function_params(node), receiver: function_receiver_name(node, stack) @@ -1356,265 +1102,112 @@ def function_context(node, stack) def function_owner_name(node, stack) receiver_owner_name(node) || current_owner_from_stack(stack) || - receiver_convention_owner_name(node, language: current_language(stack)) + receiver_convention_owner_name(node, stack: stack) end - def function_name(node, language: nil) - syntax_profile(language).function_name(adapter_helpers, node) + def line_text(document, node) + document.lines[line(node) - 1].to_s end - def function_kind(node, stack) - syntax_profile(current_language(stack)).function_kind(adapter_helpers, node, stack) - end + def control_context(node) + return :iterates if %w[while until while_statement for for_statement for_in_statement + loop_expression do_block].include?(node.kind) + return :conditional if branch_node?(node) - def visibility_for(document, node) - syntax_profile(document.language).visibility(adapter_helpers, document, node) + nil end - def ruby_method_visibility(node) - modifier_visibility(node) - end + def record_decision_site(document, node, stack, out) + return if generated_prelude?(document, node) - def exported_name_visibility(name) - text = name.to_s - return nil if text.empty? + if boolean_container?(node) && boolean_and?(node) + record_conjunction_decision(document, node, stack, out) + return + end - text.match?(/\A[A-Z]/) ? :public : :private - end + case node.kind + when "case", "switch_statement", "expression_switch_statement", "switch_expression", + "match_statement", "match_expression", "when_expression" + return if predicate_less_case?(node) - def modifier_visibility(node) - return :private if node.children.any? { |child| child.text == "private" } - return :protected if node.children.any? { |child| child.text == "protected" } - return :public if node.children.any? { |child| %w[public pub].include?(child.text) } + patterns = case_patterns(node) + return if patterns.size < 2 - nil - end + out << DecisionSite.new( + kind: :case_dispatch, + members: patterns, + file: document.file, + function: current_function(stack), + line: line(node), + span: span(node), + predicate: decision_predicate(node) + ) + when "body_statement", "block_body", "argument_list" + return unless hidden_case?(node) + return if node.named_children.any? { |child| child.kind == "case" } + return if predicate_less_case?(node) - def cpp_visibility(node) - visibility = previous_cpp_access_specifier(node) - return visibility if visibility + patterns = case_patterns(node) + return if patterns.size < 2 - owner = nearest_owner_declaration(node) - return :public if owner&.kind == "struct_specifier" + out << DecisionSite.new( + kind: :case_dispatch, + members: patterns, + file: document.file, + function: current_function(stack), + line: line(node), + span: span(node), + predicate: decision_predicate(node) + ) + when "expression_statement" + return unless hidden_match?(node) - :private - end + patterns = case_patterns(node) + return if patterns.size < 2 - def c_visibility(node) - node.children.any? { |child| child.text == "static" } ? :private : :public + out << DecisionSite.new( + kind: :case_dispatch, + members: patterns, + file: document.file, + function: current_function(stack), + line: line(node), + span: span(node), + predicate: decision_predicate(node) + ) + end end - def previous_cpp_access_specifier(node) - sibling = prev_sibling(node) - while sibling - return sibling.text.to_sym if sibling.kind == "access_specifier" && - %w[public private protected].include?(sibling.text) + def record_conjunction_decision(document, node, stack, out) + from_wrapper = parenthesized_wrapper?(node) + return if from_wrapper && + ts_node?(node.parent) && + boolean_container?(node.parent) && + boolean_and?(node.parent) - sibling = prev_sibling(sibling) - end - nil - end + node = node.named_children.first if from_wrapper + return if !from_wrapper && + ts_node?(node.parent) && + boolean_container?(node.parent) && + boolean_and?(node.parent) && + !same_span?(node.parent, node) - def nearest_owner_declaration(node) - parent = parent_node(node) - seen = Set.new - while parent && !seen.include?(node_key(parent)) - seen << node_key(parent) - return parent if %w[class_specifier struct_specifier class class_definition class_declaration].include?(parent.kind) + members = flatten_boolean_and(node).map { |child| decision_member_text(child) }.uniq.sort + return if members.size < 2 - parent = parent_node(parent) - end - nil + out << DecisionSite.new( + kind: :conjunction, + members: members, + file: document.file, + function: current_function(stack), + line: conjunction_span(node)[0], + span: conjunction_span(node), + predicate: normalize_text(node.text) + ) end - def function_params(node) - return hidden_ruby_method_params(node) if hidden_ruby_method_definition?(node) - - params = if node.kind == "method_declaration" - node.named_children.select { |child| child.kind == "parameter_list" }[1] - else - named_field(node, "parameters") || - node.named_children.find { |child| %w[parameters formal_parameters parameter_list].include?(child.kind) } - end - params ||= node.named_children.find { |child| child.kind == "method_parameters" } if inline_def_argument_list?(node) - return [] unless params - - params.named_children.filter_map do |param| - parameter_name(param) - end.uniq - end - - def parameter_name(param) - return nil unless ts_node?(param) - return param.text if %w[identifier simple_identifier shorthand_property_identifier_pattern].include?(param.kind) - - name = named_field(param, "name") || - param.named_children.find do |child| - %w[identifier simple_identifier field_identifier property_identifier].include?(child.kind) - end - text = name&.text.to_s - return nil if text.empty? || text == "_" - - text - end - - def function_signature(document, node) - if hidden_ruby_method_definition?(node) - return normalize_text(hidden_ruby_method_signature(document, node)) - end - if document.language == :ruby - signature = preceding_ruby_signature(document, node) - return signature unless signature.empty? - end - - body = named_field(node, "body") - text = - if body - document.source.byteslice(node.start_byte, body.start_byte - node.start_byte).to_s.strip - else - line_text(document, node).strip - end - normalize_text(text.empty? ? line_text(document, node) : text) - rescue StandardError - normalize_text(line_text(document, node)) - end - - def preceding_ruby_signature(document, node) - cursor = line(node) - 2 - lines = document.lines - cursor -= 1 while cursor >= 0 && lines[cursor].to_s.strip.empty? - return "" if cursor.negative? - - stripped = lines[cursor].to_s.strip - if stripped == "end" - start = cursor - while start >= 0 - text = lines[start].to_s.strip - return normalize_text(lines[start..cursor].join("\n")) if text == "sig do" - return "" if start != cursor && text.match?(/\A(?:def|class|module)\b/) - - start -= 1 - end - return "" if start.negative? - end - - return normalize_text(stripped) if stripped.start_with?("sig ") - return "" unless stripped == "}" || stripped.end_with?("}") - - start = cursor - while start >= 0 - text = lines[start].to_s.strip - return normalize_text(lines[start..cursor].join("\n")) if text.start_with?("sig ") - return "" if text.match?(/\A(?:def|class|module)\b/) - - start -= 1 - end - "" - end - - def line_text(document, node) - document.lines[line(node) - 1].to_s - end - - def control_context(node) - return :iterates if %w[while until while_statement for for_statement for_in_statement - loop_expression do_block].include?(node.kind) - return :conditional if branch_node?(node) - - nil - end - - def record_decision_site(document, node, stack, out) - return if generated_lua_compat_prelude?(document, node) - - if boolean_container?(node) && boolean_and?(node) - record_conjunction_decision(document, node, stack, out) - return - end - - case node.kind - when "case", "switch_statement", "expression_switch_statement", "switch_expression", - "match_statement", "match_expression", "when_expression" - return if ruby_predicate_less_case?(node) - - patterns = case_patterns(node) - return if patterns.size < 2 - - out << DecisionSite.new( - kind: :case_dispatch, - members: patterns, - file: document.file, - function: current_function(stack), - line: line(node), - span: span(node), - predicate: decision_predicate(node) - ) - when "body_statement", "block_body", "argument_list" - return unless hidden_case?(node) - return if node.named_children.any? { |child| child.kind == "case" } - return if ruby_predicate_less_case?(node) - - patterns = case_patterns(node) - return if patterns.size < 2 - - out << DecisionSite.new( - kind: :case_dispatch, - members: patterns, - file: document.file, - function: current_function(stack), - line: line(node), - span: span(node), - predicate: decision_predicate(node) - ) - when "expression_statement" - return unless hidden_match?(node) - - patterns = case_patterns(node) - return if patterns.size < 2 - - out << DecisionSite.new( - kind: :case_dispatch, - members: patterns, - file: document.file, - function: current_function(stack), - line: line(node), - span: span(node), - predicate: decision_predicate(node) - ) - end - end - - def record_conjunction_decision(document, node, stack, out) - from_wrapper = parenthesized_wrapper?(node) - return if from_wrapper && - ts_node?(node.parent) && - boolean_container?(node.parent) && - boolean_and?(node.parent) - - node = node.named_children.first if from_wrapper - return if !from_wrapper && - ts_node?(node.parent) && - boolean_container?(node.parent) && - boolean_and?(node.parent) && - !same_span?(node.parent, node) - - members = flatten_boolean_and(node).map { |child| decision_member_text(child) }.uniq.sort - return if members.size < 2 - - out << DecisionSite.new( - kind: :conjunction, - members: members, - file: document.file, - function: current_function(stack), - line: conjunction_span(node)[0], - span: conjunction_span(node), - predicate: normalize_text(node.text) - ) - end - - def record_function_def(document, node, stack, out) - name = function_name(node, language: document.language) - return unless name + def record_function_def(document, node, stack, out) + name = function_name(node) + return unless name out << FunctionDef.new( file: document.file, @@ -1623,10 +1216,10 @@ def record_function_def(document, node, stack, out) line: line(node), span: span(node), body: node, - visibility: visibility_for(document, node), + visibility: visibility(document, node), params: function_params(node), signature: function_signature(document, node), - kind: function_kind(node, stack) + kind: function_kind(document, node, stack) ) end @@ -1638,7 +1231,7 @@ def record_owner_def(document, node, stack, out) out << OwnerDef.new( file: document.file, name: full_owner, - kind: owner_kind(node, language: document.language), + kind: owner_kind(node), line: line(node), span: span(node) ) @@ -1665,7 +1258,7 @@ def record_call_site(document, node, stack, out) end def record_state_declaration(document, node, stack, out) - declaration = state_declaration(node, language: document.language) + declaration = state_declaration(node) return unless declaration out << StateDeclaration.new( @@ -1678,254 +1271,226 @@ def record_state_declaration(document, node, stack, out) ) end - def case_patterns(node) - case_arms(node).flat_map do |child| - case_arm_patterns(child).reject { |normalized| default_case_pattern?(normalized) } - end.uniq.sort - end - - def case_arm_patterns(child) - case child.kind - when "when", "match_arm" - patterns = child.named_children.select { |node| %w[pattern case_pattern match_pattern].include?(node.kind) } - patterns = [named_field(child, "pattern") || child.named_children.first].compact if patterns.empty? - ruby_when_pattern_texts(patterns) - when "switch_case", "case_clause", "expression_case", "case_statement", "switch_section", - "switch_block_statement_group", "switch_entry", "when_entry" - return [] if child.text.to_s.lstrip.start_with?("else") + def record_state_write(document, node, stack, out) + return if skip_state_write_node?(node) - value = named_field(child, "value") || named_field(child, "pattern") || - child.named_children.find { |candidate| candidate.kind == "when_condition" } || - child.named_children.find { |candidate| candidate.kind == "switch_pattern" } || - child.named_children.first - value && value.kind !~ /statement|block/ ? [normalize_text(value.text)] : [] - else - [] - end - end + lhs = + if %w[assignment assignment_expression augmented_assignment assignment_statement operator_assignment].include?(node.kind) + named_field(node, "left") || node.named_children.first + elsif assignment_lhs?(node) + node + end + return unless lhs - def case_arm_pattern(child) - patterns = case_arm_patterns(child) - return nil if patterns.empty? + target = state_target(lhs) + return unless target + target = normalize_target_receiver(target, stack) + return if skip_state_write_target?(target) - patterns.join(", ") + source_node = state_write_source_node(node) + out << StateWrite.new( + field: target[:field], + receiver: target[:receiver], + file: document.file, + function: current_function(stack), + line: line(source_node), + span: span(source_node), + owner: current_owner(document, stack) + ) end - def ruby_when_pattern_texts(patterns) - return [] if patterns.empty? - - texts = patterns.map { |pattern| normalize_text(pattern.text) } - return texts unless texts.any? { |text| text.start_with?("*") } + def skip_state_write_node?(_node) + false + end - out = [] - pending_plain = [] - texts.each_with_index do |text, index| - splat = text.start_with?("*") - if splat - out << pending_plain.join(", ") unless pending_plain.empty? - pending_plain = [] - out << if texts.size == 1 || index.positive? - text.delete_prefix("*") - else - text - end - else - pending_plain << text - end - end - out << pending_plain.join(", ") unless pending_plain.empty? - out + def skip_state_write_target?(target) + target[:field] == "[]" end - def case_arm_body(child) - pattern = named_field(child, "pattern") || named_field(child, "value") || child.named_children.first - members = child.named_children - body = members.drop_while { |node| node == pattern }.drop(1) - body = members[1..] if body.empty? - Array(body).map(&:text).join(" ") + def state_write_source_node(node) + node end - def case_arms(node) - arms = [] - stack = node.named_children.dup - until stack.empty? - child = stack.shift - next unless ts_node?(child) + def record_state_read(document, node, stack, out) + target = state_read_target(node) + return unless target + target = normalize_target_receiver(target, stack) - if %w[when switch_case case_clause expression_case case_statement switch_section switch_block_statement_group switch_entry when_entry match_arm].include?(child.kind) - arms << child - elsif !%w[method function_definition function_declaration method_definition - method_declaration function_item class class_definition - class_declaration].include?(child.kind) - stack.concat(child.named_children) - end - end - arms - end - - def decision_predicate(node) - return normalize_text(modifier_condition(node).text) if hidden_modifier_if?(node) && modifier_condition(node) - - target = decision_subject(node) - normalize_text(target ? target.text : node.text) - end - - def decision_subject(node) - named_field(node, "value") || named_field(node, "subject") || - node.named_children.find { |child| child.kind == "when_subject" } || - named_field(node, "condition") || - node.named_children.find do |child| - !%w[when switch_case case_clause expression_case case_statement switch_section switch_block_statement_group switch_entry when_entry match_arm else then comment].include?(child.kind) - end + out << StateRead.new( + field: target[:field], + receiver: target[:receiver], + file: document.file, + function: current_function(stack), + line: line(node), + span: span(node), + owner: current_owner(document, stack) + ) end - def ruby_predicate_less_case?(node) - return false unless node.kind == "case" || hidden_case?(node) + def record_state_param_origin(document, node, stack, out) + lhs = nil + rhs = nil + if %w[assignment assignment_expression augmented_assignment assignment_statement].include?(node.kind) + lhs = named_field(node, "left") || node.named_children.first + rhs = named_field(node, "right") || named_field(node, "value") || node.named_children[1] + elsif assignment_lhs?(node) + lhs = node + rhs = next_sibling(next_sibling(node)) + end + return unless lhs && rhs - !decision_subject(node) - end + target = state_target(lhs) + return unless target && rhs + target = normalize_target_receiver(target, stack) - def default_case_pattern?(text) - text.nil? || %w[_ default].include?(text) - end + params = current_params(stack) + return if params.empty? - def boolean_and?(node) - if parenthesized_wrapper?(node) - child = node.named_children.first - return boolean_and?(child) + rhs_param_names(rhs, params).each do |param| + out << StateParamOrigin.new( + field: target[:field], + receiver: target[:receiver], + owner: current_owner(document, stack), + param: param, + file: document.file, + function: current_function(stack), + line: line(node), + span: span(node) + ) end - - %w[&& and].include?(direct_operator(node)) end - def flatten_boolean_and(node) - return [node] unless ts_node?(node) && - boolean_container?(node) && - boolean_and?(node) - return flatten_boolean_and(node.named_children.first) if parenthesized_wrapper?(node) + def record_branch_decision(document, node, stack, out, immutable_readers:, immutable_reader_types:, type_aliases:, + method_param_types:) + return unless branch_node?(node) - node.named_children.flat_map { |child| flatten_boolean_and(child) } - end + cond = if hidden_modifier_if?(node) + modifier_condition(node) + else + named_field(node, "condition") || named_field(node, "value") || + named_field(node, "subject") || node.named_children.first + end + return unless cond - def boolean_container?(node) - return false unless ts_node?(node) - return true if %w[binary binary_expression boolean_operator].include?(node.kind) - return boolean_container?(node.named_children.first) if parenthesized_wrapper?(node) - return false unless %w[body_statement block_body statement pattern argument_list].include?(node.kind) - return false unless %w[&& and].include?(direct_operator(node)) - return false if node.named_children.size < 2 + refs = [] + collect_state_refs( + cond, + refs, + defn: current_function(stack), + immutable_readers: immutable_readers, + immutable_reader_types: immutable_reader_types, + type_aliases: type_aliases, + method_param_types: method_param_types + ) + refs.uniq! + refs.sort! + return if refs.empty? - node.children.all? do |child| - child.named? || %w[&& and ( )].include?(child.text.to_s) - end + out << BranchDecision.new( + file: document.file, + function: current_function(stack), + line: line(node), + span: span(node), + predicate: normalize_text(cond.text), + state_refs: refs + ) end - def same_span?(left, right) - span(left) == span(right) - end + def record_branch_arm(document, node, stack, out) + return if generated_prelude?(document, node) - def conjunction_span(node) - base = span(node) - if node.kind == "pattern" && node.text.to_s.lstrip.start_with?("(") - base = base.dup - base[1] += 1 + if if_node?(node) + record_if_arms(document, node, stack, out) + return end - base - end - def parenthesized_wrapper?(node) - ts_node?(node) && %w[parenthesized_statements parenthesized_expression].include?(node.kind) && - node.named_children.size == 1 - end + case node.kind + when "while", "until", "while_statement", "for", "for_statement" + record_loop_arm(document, node, stack, out) + when "case", "body_statement", "switch_statement", "expression_switch_statement", "switch_expression", + "match_statement", "match_expression", "when_expression" + return if node.kind == "body_statement" && !hidden_case?(node) - def decision_member_text(node) - normalize_text(strip_enclosing_parentheses(node.text)) + record_case_arms(document, node, stack, out) + end end - def strip_enclosing_parentheses(text) - value = text.to_s.strip - loop do - break value unless value.start_with?("(") && value.end_with?(")") - break value unless enclosing_parentheses_wrap_all?(value) + def record_if_arms(document, node, stack, out) + predicate = decision_predicate(node) + dspan = span(node) + dline = line(node) + consequence = named_field(node, "consequence") || named_field(node, "body") || + node.named_children[1] + alternative = named_field(node, "alternative") || + node.named_children.find { |child| child.kind.match?(/else|elsif|alternative/) } + alternative ||= node.named_children[2] if node.named_children[2] != consequence - value = value[1...-1].strip - end - value - end + [[consequence, "then"], [alternative, "else"]].each do |arm_node, member| + next unless ts_node?(arm_node) - def enclosing_parentheses_wrap_all?(text) - depth = 0 - text.each_char.with_index do |char, index| - depth += 1 if char == "(" - depth -= 1 if char == ")" - return false if depth.zero? && index < text.length - 1 - return false if depth.negative? + out << BranchArm.new( + file: document.file, + function: current_function(stack), + kind: :if, + line: line(arm_node), + span: span(arm_node), + decision_line: dline, + decision_span: dspan, + predicate: predicate, + member: member, + body: normalize_text(arm_node.text) + ) end - depth.zero? - end - - def direct_operator(node) - node.children.find { |child| !child.named? && !%w[( )].include?(child.text.to_s) }&.text.to_s - rescue StandardError - "" end - def record_state_write(document, node, stack, out) - return if document.language == :ruby && node.kind == "operator_assignment" - return if document.language == :ruby && assignment_lhs?(node) && next_sibling(node)&.text.to_s != "=" && - !instance_variable_node?(node) - - lhs = - if %w[assignment assignment_expression augmented_assignment assignment_statement operator_assignment].include?(node.kind) - named_field(node, "left") || node.named_children.first - elsif assignment_lhs?(node) - node - end - return unless lhs - - target = state_target(lhs, language: document.language) - return unless target - target = normalize_target_receiver(target, stack) - return if target[:field] == "[]" - return if document.language == :ruby && target[:field].to_s.start_with?("$") + def record_loop_arm(document, node, stack, out) + body = named_field(node, "body") || node.named_children[1] + return unless ts_node?(body) - source_node = document.language == :ruby && assignment_lhs?(node) ? (parent_node(node) || node) : node - out << StateWrite.new( - field: target[:field], - receiver: target[:receiver], + out << BranchArm.new( file: document.file, function: current_function(stack), - line: line(source_node), - span: span(source_node), - owner: current_owner(document, stack) + kind: :loop, + line: line(body), + span: span(body), + decision_line: line(node), + decision_span: span(node), + predicate: decision_predicate(node), + member: "body", + body: normalize_text(body.text) ) end - def record_state_read(document, node, stack, out) - target = state_read_target(node, language: document.language) - return unless target - target = normalize_target_receiver(target, stack) + def record_case_arms(document, node, stack, out) + predicate = decision_predicate(node) + dspan = span(node) + dline = line(node) + case_arms(node).each do |arm| + pattern = case_arm_pattern(arm) + next if default_case_pattern?(pattern) - out << StateRead.new( - field: target[:field], - receiver: target[:receiver], - file: document.file, - function: current_function(stack), - line: line(node), - span: span(node), - owner: current_owner(document, stack) - ) + out << BranchArm.new( + file: document.file, + function: current_function(stack), + kind: :case, + line: line(arm), + span: span(arm), + decision_line: dline, + decision_span: dspan, + predicate: predicate, + member: pattern, + body: normalize_text(case_arm_body(arm)) + ) + end end def record_implicit_state_accesses(document, out) - return unless %i[cpp csharp].include?(document.language) - declared = declared_state_index(out[:state_declarations]) return if declared.empty? locals = local_declaration_index(document) params = function_param_index(out[:function_defs]) - walk(document.root, initial_stack(document)) do |node, stack| + TreeSitterAdapter.walk_document(document, initial_stack(document), self) do |node, stack| next unless implicit_state_identifier?(node) owner = current_owner(document, stack) @@ -1963,248 +1528,175 @@ def record_implicit_state_accesses(document, out) end end - def declared_state_index(declarations) - declarations.each_with_object(Hash.new { |h, k| h[k] = Set.new }) do |decl, index| - index[decl.owner.to_s].add(decl.field.to_s) - end + def case_patterns(node) + case_arms(node).flat_map do |child| + case_arm_patterns(child).reject { |normalized| default_case_pattern?(normalized) } + end.uniq.sort end - def function_param_index(functions) - functions.each_with_object(Hash.new { |h, k| h[k] = Set.new }) do |fn, index| - index[[fn.owner.to_s, fn.name.to_s]].merge(Array(fn.params).map(&:to_s)) + def case_arm_patterns(child) + case child.kind + when "when", "match_arm" + patterns = child.named_children.select { |node| %w[pattern case_pattern match_pattern].include?(node.kind) } + patterns = [named_field(child, "pattern") || child.named_children.first].compact if patterns.empty? + case_pattern_texts(patterns) + when "switch_case", "case_clause", "expression_case", "case_statement", "switch_section", + "switch_block_statement_group", "switch_entry", "when_entry" + return [] if child.text.to_s.lstrip.start_with?("else") + + value = named_field(child, "value") || named_field(child, "pattern") || + child.named_children.find { |candidate| candidate.kind == "when_condition" } || + child.named_children.find { |candidate| candidate.kind == "switch_pattern" } || + child.named_children.first + value && value.kind !~ /statement|block/ ? [normalize_text(value.text)] : [] + else + [] end end - def local_declaration_index(document) - index = Hash.new { |h, k| h[k] = Set.new } - walk(document.root, initial_stack(document)) do |node, stack| - next unless local_variable_declarator?(node) - - owner = current_owner(document, stack) - function = current_function(stack) - next if function == "(top-level)" + def case_arm_pattern(child) + patterns = case_arm_patterns(child) + return nil if patterns.empty? - local_name_node(node)&.then { |name| index[[owner, function]].add(name.text.to_s) } - end - index + patterns.join(", ") end - def local_variable_declarator?(node) - return false unless ts_node?(node) - return false unless %w[variable_declarator init_declarator].include?(node.kind) + def case_pattern_texts(patterns) + return [] if patterns.empty? - !inside_kind?(node, %w[field_declaration property_declaration public_field_definition]) + patterns.map { |pattern| normalize_text(pattern.text) } end - def local_name_node(node) - named_field(node, "name") || - node.named_children.find { |child| %w[identifier field_identifier property_identifier].include?(child.kind) } + def case_arm_body(child) + pattern = named_field(child, "pattern") || named_field(child, "value") || child.named_children.first + members = child.named_children + body = members.drop_while { |node| node == pattern }.drop(1) + body = members[1..] if body.empty? + Array(body).map(&:text).join(" ") end - def implicit_state_identifier?(node) - ts_node?(node) && %w[identifier field_identifier property_identifier].include?(node.kind) + def case_arms(node) + arms = [] + stack = node.named_children.dup + until stack.empty? + child = stack.shift + next unless ts_node?(child) + + if %w[when switch_case case_clause expression_case case_statement switch_section switch_block_statement_group switch_entry when_entry match_arm].include?(child.kind) + arms << child + elsif !%w[method function_definition function_declaration method_definition + method_declaration function_item class class_definition + class_declaration].include?(child.kind) + stack.concat(child.named_children) + end + end + arms end - def identifier_declaration_site?(node) - parent = parent_node(node) - return false unless parent - return true if %w[parameter_declaration parameter variable_declarator init_declarator function_declarator - method_declaration function_definition class_specifier class].include?(parent.kind) - return true if inside_kind?(node, %w[field_declaration property_declaration public_field_definition]) + def decision_predicate(node) + return normalize_text(modifier_condition(node).text) if hidden_modifier_if?(node) && modifier_condition(node) - false + target = decision_subject(node) + normalize_text(target ? target.text : node.text) end - def member_message_identifier?(node) - parent = parent_node(node) - return false unless parent && field_like_node?(parent) + def decision_subject(node) + named_field(node, "value") || named_field(node, "subject") || + node.named_children.find { |child| child.kind == "when_subject" } || + named_field(node, "condition") || + node.named_children.find do |child| + !%w[when switch_case case_clause expression_case case_statement switch_section switch_block_statement_group switch_entry when_entry match_arm else then comment].include?(child.kind) + end + end - field = named_field(parent, "field") || named_field(parent, "property") || - named_field(parent, "name") || parent.named_children.last - field == node + def predicate_less_case?(node) + (node.kind == "case" || hidden_case?(node)) && !decision_subject(node) end - def implicit_assignment_lhs?(node) - parent = parent_node(node) - return false unless parent + def default_case_pattern?(text) + text.nil? || %w[_ default].include?(text) + end - if %w[assignment_expression assignment assignment_statement augmented_assignment operator_assignment].include?(parent.kind) - lhs = named_field(parent, "left") || parent.named_children.first - return lhs == node + def boolean_and?(node) + if parenthesized_wrapper?(node) + child = node.named_children.first + return boolean_and?(child) end - assignment_lhs?(node) + %w[&& and].include?(direct_operator(node)) end - def inside_kind?(node, kinds) - parent = parent_node(node) - seen = Set.new - while parent && !seen.include?(node_key(parent)) - seen << node_key(parent) - return true if kinds.include?(parent.kind) + def flatten_boolean_and(node) + return [node] unless ts_node?(node) && + boolean_container?(node) && + boolean_and?(node) + return flatten_boolean_and(node.named_children.first) if parenthesized_wrapper?(node) - parent = parent_node(parent) - end - false + node.named_children.flat_map { |child| flatten_boolean_and(child) } end - def record_state_param_origin(document, node, stack, out) - lhs = nil - rhs = nil - if %w[assignment assignment_expression augmented_assignment assignment_statement].include?(node.kind) - lhs = named_field(node, "left") || node.named_children.first - rhs = named_field(node, "right") || named_field(node, "value") || node.named_children[1] - elsif assignment_lhs?(node) - lhs = node - rhs = next_sibling(next_sibling(node)) - end - return unless lhs && rhs - - target = state_target(lhs, language: document.language) - return unless target && rhs - target = normalize_target_receiver(target, stack) - - params = current_params(stack) - return if params.empty? + def boolean_container?(node) + return false unless ts_node?(node) + return true if %w[binary binary_expression boolean_operator].include?(node.kind) + return boolean_container?(node.named_children.first) if parenthesized_wrapper?(node) + return false unless %w[body_statement block_body statement pattern argument_list].include?(node.kind) + return false unless %w[&& and].include?(direct_operator(node)) + return false if node.named_children.size < 2 - rhs_param_names(rhs, params).each do |param| - out << StateParamOrigin.new( - field: target[:field], - receiver: target[:receiver], - owner: current_owner(document, stack), - param: param, - file: document.file, - function: current_function(stack), - line: line(node), - span: span(node) - ) + node.children.all? do |child| + child.named? || %w[&& and ( )].include?(child.text.to_s) end end - def record_branch_decision(document, node, stack, out, immutable_readers:, immutable_reader_types:, type_aliases:, - method_param_types:) - return unless branch_node?(node) + def same_span?(left, right) + span(left) == span(right) + end - cond = if hidden_modifier_if?(node) - modifier_condition(node) - else - named_field(node, "condition") || named_field(node, "value") || - named_field(node, "subject") || node.named_children.first - end - return unless cond + def conjunction_span(node) + base = span(node) + if node.kind == "pattern" && node.text.to_s.lstrip.start_with?("(") + base = base.dup + base[1] += 1 + end + base + end - refs = [] - collect_state_refs( - cond, - refs, - language: document.language, - defn: current_function(stack), - immutable_readers: immutable_readers, - immutable_reader_types: immutable_reader_types, - type_aliases: type_aliases, - method_param_types: method_param_types - ) - refs.uniq! - refs.sort! - return if refs.empty? + def parenthesized_wrapper?(node) + ts_node?(node) && %w[parenthesized_statements parenthesized_expression].include?(node.kind) && + node.named_children.size == 1 + end - out << BranchDecision.new( - file: document.file, - function: current_function(stack), - line: line(node), - span: span(node), - predicate: normalize_text(cond.text), - state_refs: refs - ) + def decision_member_text(node) + normalize_text(strip_enclosing_parentheses(node.text)) end - def record_branch_arm(document, node, stack, out) - return if generated_lua_compat_prelude?(document, node) + def strip_enclosing_parentheses(text) + value = text.to_s.strip + loop do + break value unless value.start_with?("(") && value.end_with?(")") + break value unless enclosing_parentheses_wrap_all?(value) - if if_node?(node) - record_if_arms(document, node, stack, out) - return + value = value[1...-1].strip end + value + end - case node.kind - when "while", "until", "while_statement", "for", "for_statement" - record_loop_arm(document, node, stack, out) - when "case", "body_statement", "switch_statement", "expression_switch_statement", "switch_expression", - "match_statement", "match_expression", "when_expression" - return if node.kind == "body_statement" && !hidden_case?(node) - - record_case_arms(document, node, stack, out) + def enclosing_parentheses_wrap_all?(text) + depth = 0 + text.each_char.with_index do |char, index| + depth += 1 if char == "(" + depth -= 1 if char == ")" + return false if depth.zero? && index < text.length - 1 + return false if depth.negative? end + depth.zero? end - def record_if_arms(document, node, stack, out) - predicate = decision_predicate(node) - dspan = span(node) - dline = line(node) - consequence = named_field(node, "consequence") || named_field(node, "body") || - node.named_children[1] - alternative = named_field(node, "alternative") || - node.named_children.find { |child| child.kind.match?(/else|elsif|alternative/) } - alternative ||= node.named_children[2] if node.named_children[2] != consequence - - [[consequence, "then"], [alternative, "else"]].each do |arm_node, member| - next unless ts_node?(arm_node) - - out << BranchArm.new( - file: document.file, - function: current_function(stack), - kind: :if, - line: line(arm_node), - span: span(arm_node), - decision_line: dline, - decision_span: dspan, - predicate: predicate, - member: member, - body: normalize_text(arm_node.text) - ) - end - end - - def record_loop_arm(document, node, stack, out) - body = named_field(node, "body") || node.named_children[1] - return unless ts_node?(body) - - out << BranchArm.new( - file: document.file, - function: current_function(stack), - kind: :loop, - line: line(body), - span: span(body), - decision_line: line(node), - decision_span: span(node), - predicate: decision_predicate(node), - member: "body", - body: normalize_text(body.text) - ) - end - - def record_case_arms(document, node, stack, out) - predicate = decision_predicate(node) - dspan = span(node) - dline = line(node) - case_arms(node).each do |arm| - pattern = case_arm_pattern(arm) - next if default_case_pattern?(pattern) - - out << BranchArm.new( - file: document.file, - function: current_function(stack), - kind: :case, - line: line(arm), - span: span(arm), - decision_line: dline, - decision_span: dspan, - predicate: predicate, - member: pattern, - body: normalize_text(case_arm_body(arm)) - ) - end - end + def direct_operator(node) + node.children.find { |child| !child.named? && !%w[( )].include?(child.text.to_s) }&.text.to_s + rescue StandardError + "" + end def branch_node?(node) BRANCH_KINDS.include?(node.kind) || hidden_match?(node) || hidden_if?(node) || @@ -2217,21 +1709,11 @@ def if_node?(node) end def hidden_if?(node) - return false unless ts_node?(node) - return false unless %w[expression_statement block body_statement].include?(node.kind) - - %w[if unless].include?(first_token_kind(node)) + false end def hidden_modifier_if?(node) - return false unless ts_node?(node) - return false unless node.kind == "body_statement" - - seen_named = false - node.children.any? do |child| - seen_named ||= child.named? - seen_named && !child.named? && %w[if unless].include?(child.kind) - end + false end def modifier_condition(node) @@ -2239,21 +1721,22 @@ def modifier_condition(node) end def hidden_case?(node) - return false unless ts_node?(node) - return false unless %w[body_statement block_body argument_list].include?(node.kind) + false + end - first_token_kind(node) == "case" + def hidden_match?(node) + false end def first_token_kind(node) node.children.first&.kind.to_s end - def collect_state_refs(node, refs, language:, defn:, immutable_readers:, immutable_reader_types:, type_aliases:, + def collect_state_refs(node, refs, defn:, immutable_readers:, immutable_reader_types:, type_aliases:, method_param_types:) - if node.kind == "instance_variable" || node.kind == "global_variable" - refs << node.text - elsif (target = state_read_target(node, language: language)) + if (ref = direct_state_ref(node)) + refs << ref + elsif (target = state_read_target(node)) unless namespace_receiver?(target[:receiver]) unless immutable_state_read?(target, defn, immutable_readers, immutable_reader_types, type_aliases, method_param_types) refs << (target[:receiver] == "self" ? target[:field] : "#{target[:receiver]}.#{target[:field]}") @@ -2264,7 +1747,6 @@ def collect_state_refs(node, refs, language:, defn:, immutable_readers:, immutab collect_state_refs( child, refs, - language: language, defn: defn, immutable_readers: immutable_readers, immutable_reader_types: immutable_reader_types, @@ -2327,70 +1809,129 @@ def resolve_type_alias(type_name, type_aliases) end end - def method_param_types(lines) - types_by_method = {} - pending_sig = +"" - lines.each do |line| - pending_sig << line if pending_sig_active?(line, pending_sig) - if (match = line.match(/\A\s*def\s+([A-Za-z_]\w*[!?=]?)(?:\s|\(|$)/)) - types_by_method[match[1]] = sig_param_types(pending_sig) - pending_sig = +"" - end + def current_params(stack) + entry = stack.reverse.find { |item| item.is_a?(Hash) && item[:params] } + Array(entry && entry[:params]) + end + + def rhs_param_names(node, params) + found = [] + collect_identifiers(node, found) + found & params + end + + def collect_identifiers(node, out) + return unless ts_node?(node) + + pending = [node] + seen = Set.new + until pending.empty? + current = pending.pop + next unless ts_node?(current) + key = node_key(current) + next if seen.include?(key) + + seen << key + out << current.text if current.kind == "identifier" + current.children.reverse_each { |child| pending << child } end - types_by_method end - def pending_sig_active?(line, pending_sig) - !pending_sig.empty? || line.match?(/\A\s*sig\b/) + def declared_state_index(declarations) + declarations.each_with_object(Hash.new { |h, k| h[k] = Set.new }) do |decl, index| + index[decl.owner.to_s].add(decl.field.to_s) + end end - def sig_param_types(sig_source) - match = sig_source.match(/params\s*\((.*?)\)/m) - return {} unless match + def function_param_index(functions) + functions.each_with_object(Hash.new { |h, k| h[k] = Set.new }) do |fn, index| + index[[fn.owner.to_s, fn.name.to_s]].merge(Array(fn.params).map(&:to_s)) + end + end - match[1].scan(/([A-Za-z_]\w*)\s*:\s*([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)/).to_h + def local_declaration_index(document) + index = Hash.new { |h, k| h[k] = Set.new } + TreeSitterAdapter.walk_document(document, initial_stack(document), self) do |node, stack| + next unless local_variable_declarator?(node) + + owner = current_owner(document, stack) + function = current_function(stack) + next if function == "(top-level)" + + local_name_node(node)&.then { |name| index[[owner, function]].add(name.text.to_s) } + end + index end - def current_params(stack) - entry = stack.reverse.find { |item| item.is_a?(Hash) && item[:params] } - Array(entry && entry[:params]) + def local_variable_declarator?(node) + return false unless ts_node?(node) + return false unless %w[variable_declarator init_declarator].include?(node.kind) + + !inside_kind?(node, %w[field_declaration property_declaration public_field_definition]) end - def rhs_param_names(node, params) - found = [] - collect_identifiers(node, found) - found & params + def local_name_node(node) + named_field(node, "name") || + node.named_children.find { |child| %w[identifier field_identifier property_identifier].include?(child.kind) } + end + + def implicit_state_identifier?(node) + ts_node?(node) && %w[identifier field_identifier property_identifier].include?(node.kind) end - def collect_identifiers(node, out) - return unless ts_node?(node) + def identifier_declaration_site?(node) + parent = parent_node(node) + return false unless parent + return true if %w[parameter_declaration parameter variable_declarator init_declarator function_declarator + method_declaration function_definition class_specifier class].include?(parent.kind) + return true if inside_kind?(node, %w[field_declaration property_declaration public_field_definition]) - pending = [node] - seen = Set.new - until pending.empty? - current = pending.pop - next unless ts_node?(current) - key = node_key(current) - next if seen.include?(key) + false + end - seen << key - out << current.text if current.kind == "identifier" - current.children.reverse_each { |child| pending << child } - end - end + def member_message_identifier?(node) + parent = parent_node(node) + return false unless parent && field_like_node?(parent) + + field = named_field(parent, "field") || named_field(parent, "property") || + named_field(parent, "name") || parent.named_children.last + field == node + end + + def implicit_assignment_lhs?(node) + parent = parent_node(node) + return false unless parent + + if %w[assignment_expression assignment assignment_statement augmented_assignment operator_assignment].include?(parent.kind) + lhs = named_field(parent, "left") || parent.named_children.first + return lhs == node + end + + assignment_lhs?(node) + end + + def inside_kind?(node, kinds) + parent = parent_node(node) + seen = Set.new + while parent && !seen.include?(node_key(parent)) + seen << node_key(parent) + return true if kinds.include?(parent.kind) + parent = parent_node(parent) + end + false + end - def owner_for_node(document, node, stack: nil, language: nil) - language ||= document&.language || current_language(Array(stack)) + def owner_for_node(document, node, stack: nil) receiver_owner = receiver_owner_name(node) return receiver_owner if receiver_owner - convention_owner = receiver_convention_owner_name(node, language: language) + convention_owner = receiver_convention_owner_name(node) return convention_owner if convention_owner stacked_owner = current_owner_from_stack(Array(stack)) return stacked_owner if stacked_owner - chain = owner_chain_for_node(document, node, language: language) + chain = owner_chain_for_node(document, node) return chain.join("::") unless chain.empty? return file_owner(document.file) if document @@ -2398,14 +1939,14 @@ def owner_for_node(document, node, stack: nil, language: nil) nil end - def owner_chain_for_node(document, node, language: nil) + def owner_chain_for_node(document, node) chain = [] seen = Set.new seen_nodes = Set.new parent = parent_node(node) while parent && !seen_nodes.include?(node_key(parent)) seen_nodes << node_key(parent) - if (owner = owner_name_from_declaration(document, parent, language: language)) + if (owner = owner_name_from_declaration(document, parent)) unless seen.include?(owner) chain << owner seen << owner @@ -2416,14 +1957,6 @@ def owner_chain_for_node(document, node, language: nil) chain.reverse end - def owner_name_from_declaration(document, node, language: nil) - syntax_profile(language || document&.language).owner_name_from_declaration(adapter_helpers, document, node) - end - - def owner_kind(node, language: nil) - syntax_profile(language).owner_kind(adapter_helpers, node) - end - def impl_owner_name(node) type = named_field(node, "type") || node.named_children.find { |child| child.kind.match?(/type|identifier/) } @@ -2435,10 +1968,6 @@ def receiver_owner_name(node) receiver_type && normalize_type_owner(receiver_type.text) end - def function_receiver_name(node, stack) - syntax_profile(current_language(stack)).function_receiver_name(adapter_helpers, node, stack) - end - def method_receiver_type_node(node) declaration = method_receiver_declaration(node) return nil unless declaration @@ -2462,14 +1991,6 @@ def method_receiver_declaration(node) receiver_params&.named_children&.find { |child| child.kind == "parameter_declaration" } end - def receiver_convention_owner_name(node, language:) - syntax_profile(language).receiver_convention_owner_name(adapter_helpers, node, language: language) - end - - def receiver_convention_param_name(node, language:) - syntax_profile(language).receiver_convention_param_name(adapter_helpers, node, language: language) - end - def first_argument_receiver_parameter(node) params = named_field(named_field(node, "declarator"), "parameters") || named_field(node, "parameters") || @@ -2491,12 +2012,6 @@ def first_argument_receiver_parameter(node) { type: type_node.text, name: name } end - def first_argument_receiver_language?(language) - return false unless language - - Syntax.language_profile(language).first_argument_receiver? - end - def snake_case_type_name(type) type.to_s .split("::").last @@ -2529,7 +2044,7 @@ def returned_container_owner(document, node) seen_nodes = Set.new while parent && !seen_nodes.include?(node_key(parent)) seen_nodes << node_key(parent) - if (name = function_name(parent, language: document&.language)) + if (name = function_name(parent)) return name end @@ -2538,121 +2053,46 @@ def returned_container_owner(document, node) nil end - def node_key(node) - [node.kind, node.start_byte, node.end_byte] - rescue StandardError - node.object_id - end - def anonymous_owner_name(document, node) return nil unless document "#{file_owner(document.file)}::anonymous@#{line(node)}" end - def file_owner(file) - base = File.basename(file.to_s, File.extname(file.to_s)) - base.empty? ? "(file)" : base - end + def generic_call_target(document, node) + callee = named_field(node, "function") || named_field(node, "callee") || node.named_children.first + return nil unless callee + return nil if callee.kind == "builtin_function" || callee.text.to_s.start_with?("@") - def call_target(document, node) - syntax_profile(document.language).call_target(adapter_helpers, document, node) + target = target_from_callee(callee).merge(arguments: []) + first_argument_receiver_call_target(document, node, target) || target + rescue NoMethodError + nil end - def ruby_call_target(node) - receiver = named_field(node, "receiver") - method = named_field(node, "method") - message = method&.text || first_named_text(node, %w[identifier constant]) - message ||= normalize_text(node.text) if receiver.nil? && ruby_simple_call_text?(node.text) - return nil unless message + def first_argument_receiver_call_target(_document, node, target) + return nil unless first_argument_receiver? + return nil unless target[:receiver] == "self" + + first_arg = call_argument_nodes(node).first + arg_target = state_read_target(first_arg) + return nil unless arg_target { - receiver: receiver ? normalize_text(receiver.text) : "self", - message: message, - arguments: ruby_argument_texts(node) + receiver: "#{arg_target[:receiver]}.#{arg_target[:field]}", + message: target[:message], + arguments: target[:arguments] } end - def ruby_bare_call_target(document, node) - return nil unless document.language == :ruby - return nil unless ruby_bare_call_identifier?(node) - - { - receiver: "self", - message: node.text, - arguments: [] - } + def call_argument_nodes(node) + args = named_field(node, "arguments") || + node.named_children.find { |child| child.kind == "argument_list" } + Array(args&.named_children) end - def ruby_bare_body_call_target(document, node) - return nil unless document.language == :ruby - return nil if hidden_ruby_method_definition?(node) || hidden_ruby_owner_declaration?(node) - - explicit = ruby_explicit_receiver_body_call_target(node) - return explicit if explicit - - message = node.text.to_s.strip - return nil unless ruby_simple_call_text?(message) - return nil if %w[true false nil self].include?(message) - - { - receiver: "self", - message: message, - arguments: [] - } - end - - def ruby_explicit_receiver_body_call_target(node) - receiver, message = node.named_children - return nil unless receiver && message - return nil unless %w[self constant identifier].include?(receiver.kind) - return nil unless %w[identifier constant].include?(message.kind) - - { - receiver: normalize_text(receiver.text), - message: message.text, - arguments: [] - } - end - - def ruby_simple_call_text?(text) - text.to_s.strip.match?(/\A[a-z_]\w*[!?=]?\z/) - end - - def generic_call_target(document, node) - callee = named_field(node, "function") || named_field(node, "callee") || node.named_children.first - return nil unless callee - return nil if callee.kind == "builtin_function" || callee.text.to_s.start_with?("@") - - target = target_from_callee(callee).merge(arguments: []) - first_argument_receiver_call_target(document, node, target) || target - rescue NoMethodError - nil - end - - def first_argument_receiver_call_target(document, node, target) - return nil unless first_argument_receiver_language?(document.language) - return nil unless target[:receiver] == "self" - - first_arg = call_argument_nodes(node).first - arg_target = state_read_target(first_arg, language: document.language) - return nil unless arg_target - - { - receiver: "#{arg_target[:receiver]}.#{arg_target[:field]}", - message: target[:message], - arguments: target[:arguments] - } - end - - def call_argument_nodes(node) - args = named_field(node, "arguments") || - node.named_children.find { |child| child.kind == "argument_list" } - Array(args&.named_children) - end - - def adjacent_argument_call_target(node) - return nil unless next_sibling(node)&.kind == "argument_list" + def adjacent_argument_call_target(node) + return nil unless next_sibling(node)&.kind == "argument_list" target_from_callee(node).merge(arguments: []) rescue NoMethodError @@ -2711,17 +2151,10 @@ def noise_call?(target) false end - def state_declaration(node, language: nil) - syntax_profile(language).state_declaration(adapter_helpers, node) - end - - def generic_state_declaration(node, language: nil) + def generic_state_declaration(node) case node.kind when "assignment", "assignment_expression", "assignment_statement" - ruby_t_let_state_declaration(node, language: language) || - assignment_state_declaration(node, language: language) - when "container_field" - zig_container_field_declaration(node) + assignment_state_declaration(node) when "property_declaration", "public_field_definition", "field_definition", "field_declaration" generic_field_declaration(node) else @@ -2729,13 +2162,6 @@ def generic_state_declaration(node, language: nil) end end - def zig_container_field_declaration(node) - name = node.named_children.find { |child| child.kind == "identifier" } - return nil unless name - - { field: name.text, type: declared_type_text(node, name) } - end - def generic_field_declaration(node) name = field_declaration_name_node(node) return nil unless name @@ -2773,413 +2199,1067 @@ def variable_declarator_name(node) %w[identifier field_identifier property_identifier].include?(child.kind) end end - pending.concat(current.named_children) + pending.concat(current.named_children) + end + nil + end + + def declared_type_text(node, name_node) + text = node.text.to_s + after_name = text[(name_node.end_byte - node.start_byte)..].to_s + if (match = after_name.match(/\A\s*:\s*([^=,\n]+)/)) + normalize_text(match[1]) + elsif (match = text.match(/\A\s*(?:pub\s+)?(?:const|var)\s+\w+\s*:\s*([^=;\n]+)/)) + normalize_text(match[1]) + elsif (match = after_name.match(/\A\s+([^=;,\n]+)/)) + normalize_text(match[1]) + elsif (type = declared_type_before_name(text, node, name_node)) + type + end + rescue StandardError + nil + end + + def declared_type_before_name(text, node, name_node) + before_name = text[0...(name_node.start_byte - node.start_byte)].to_s + before_name = before_name.gsub(/\b(?:public|private|protected|internal|static|readonly|const|pub|mut|var|let)\b/, " ") + before_name = before_name.gsub(/[;,{].*\z/m, " ") + before_name = normalize_text(before_name) + return nil if before_name.empty? + + tokens = before_name.split(/\s+/).reject { |token| token.match?(/\A[*&]+\z/) } + candidate = tokens.last.to_s.delete_suffix("*").delete_suffix("&") + return nil if candidate.empty? + + candidate + end + + def assignment_state_declaration(node) + lhs = named_field(node, "left") || node.named_children.first + rhs = named_field(node, "right") || named_field(node, "value") || node.named_children[1] + target = state_target(lhs) + return nil unless target + return nil unless %w[self this].include?(target[:receiver].to_s) + + type = inferred_assignment_type(rhs) + return nil unless type + + { field: target[:field], type: type } + end + + def inferred_assignment_type(node) + return nil unless ts_node?(node) + + text = normalize_text(node.text) + patterns = [ + /\Anew\s+([A-Z][A-Za-z0-9_:]*)\s*(?:[({<]|$)/, + /\A([A-Z][A-Za-z0-9_:]*)\s*(?:[({<]|$)/ + ] + match = patterns.filter_map { |pattern| text.match(pattern) }.first + match && match[1] + end + + def generic_state_read_target(node) + case node.kind + when "call" + receiver = named_field(node, "receiver") + method = named_field(node, "method") + return nil unless receiver && method + return nil if namespace_receiver?(receiver.text) + return nil if NOISE_MESSAGES.include?(method.text) + return nil if named_field(node, "arguments") + + { receiver: normalize_text(receiver.text), field: method.text } + when "field", "field_access", "selector_expression", "member_expression", "member_access_expression", "attribute", + "field_expression", "navigation_expression", "directly_assignable_expression", "expression_list" + return nil if node.kind == "expression_list" && !(named_field(node, "operand") && named_field(node, "field")) + + object = named_field(node, "object") || named_field(node, "receiver") || + named_field(node, "expression") || + named_field(node, "operand") || named_field(node, "value") || + named_field(node, "argument") || + node.named_children.find { |child| child.kind != "navigation_suffix" } + field = named_field(node, "field") || named_field(node, "property") || + named_field(node, "name") || named_field(node, "suffix") || + node.named_children.find { |child| child.kind == "navigation_suffix" } || + node.named_children.last + if node.kind == "field_expression" && node.text.to_s.start_with?(".") + field = node.named_children.find { |child| child.kind == "identifier" } || field + return { receiver: ".literal", field: field.text } if field + end + field_text = member_field_text(field) + return nil unless object && field_text + return nil if namespace_receiver?(object.text) + return nil if NOISE_MESSAGES.include?(field_text) + + { receiver: normalize_text(object.text), field: field_text } + end + end + + def generic_state_target(lhs) + return nil unless ts_node?(lhs) + return nil if prev_sibling(lhs)&.text == ":" + + case lhs.kind + when "call" + receiver = named_field(lhs, "receiver") + method = named_field(lhs, "method") + return nil unless receiver && method + + { receiver: normalize_text(receiver.text), field: method.text.sub(/=\z/, "") } + when "field", "field_access", "selector_expression", "member_expression", "member_access_expression", "attribute", + "field_expression", "navigation_expression", "directly_assignable_expression", "expression_list" + if lhs.kind == "expression_list" && !(named_field(lhs, "operand") && named_field(lhs, "field")) + return generic_state_target(lhs.named_children.first) + end + + object = named_field(lhs, "object") || named_field(lhs, "receiver") || + named_field(lhs, "expression") || + named_field(lhs, "operand") || named_field(lhs, "value") || + named_field(lhs, "argument") || + lhs.named_children.find { |child| child.kind != "navigation_suffix" } + field = named_field(lhs, "field") || named_field(lhs, "property") || + named_field(lhs, "name") || named_field(lhs, "suffix") || + lhs.named_children.find { |child| child.kind == "navigation_suffix" } || + lhs.named_children.last + if lhs.kind == "field_expression" && lhs.text.to_s.start_with?(".") + field = lhs.named_children.find { |child| child.kind == "identifier" } || field + return { receiver: ".literal", field: field.text.sub(/=\z/, "") } if field + end + field_text = member_field_text(field) + return nil unless object && field_text + + { receiver: normalize_text(object.text), field: field_text.sub(/=\z/, "") } + end + end + + def assignment_lhs?(node) + return false if prev_sibling(node)&.text == ":" + + sibling = next_sibling(node) + sibling && %w[= += -= *= /= %= &&= ||=].include?(sibling.text.to_s) + end + + def direct_state_ref(_node) + nil + end + + def next_sibling(node) + node.next_sibling + rescue StandardError + nil + end + + def prev_sibling(node) + node.prev_sibling + rescue StandardError + nil + end + + def namespace_receiver?(text) + receiver = text.to_s + return true if receiver.match?(/\A(?:std|builtin|build_options)(?:\.|\z)/) + return true if receiver.start_with?("@") + + receiver.match?(/\A[A-Z][A-Za-z0-9_]*(?:\.[A-Z][A-Za-z0-9_]*)*\z/) + end + + def named_field(node, name) + node.child_by_field_name(name) + rescue StandardError + nil + end + + def parent_node(node) + node.parent + rescue StandardError + nil + end + + def field_like_node?(node) + %w[field field_access selector_expression member_expression member_access_expression attribute field_expression + navigation_expression directly_assignable_expression expression_list scoped_identifier].include?(node.kind) + end + + def member_field_text(field) + return nil unless ts_node?(field) + + if field.kind == "navigation_suffix" + suffix = named_field(field, "suffix") || + field.named_children.find { |child| %w[identifier simple_identifier field_identifier property_identifier].include?(child.kind) } || + field.named_children.last + text = suffix&.text.to_s + return nil if text.empty? + + return text.sub(/\A[.?]+/, "") + end + + field.text.to_s.sub(/\A[.?]+/, "") + end + + def normalize_type_owner(text) + value = text.to_s.strip + value = value.sub(/\A[&*]+/, "") + value = value.gsub(/\b(?:const|mut|var)\b/, "").strip + value.split(/[({<\s]/).first.to_s.split(".").last + end + + def first_named_text(node, kinds) + expanded = kinds.include?("identifier") ? kinds + %w[simple_identifier] : kinds + child = node.named_children.find { |c| expanded.include?(c.kind) } + child&.text + end + + def declarator_name(node) + return nil unless ts_node?(node) + + pending = [node] + seen = Set.new + until pending.empty? + current = pending.pop + next unless ts_node?(current) + key = node_key(current) + next if seen.include?(key) + + seen << key + return current.text if %w[identifier simple_identifier field_identifier property_identifier].include?(current.kind) + + current.named_children.reverse_each { |child| pending << child } + end + nil + end + + def exported_name_visibility(name) + text = name.to_s + return nil if text.empty? + + text.match?(/\A[A-Z]/) ? :public : :private + end + + def modifier_visibility(node) + return :private if node.children.any? { |child| child.text == "private" } + return :protected if node.children.any? { |child| child.text == "protected" } + return :public if node.children.any? { |child| %w[public pub].include?(child.text) } + + nil + end + + def parameter_name(param) + return nil unless ts_node?(param) + return param.text if %w[identifier simple_identifier shorthand_property_identifier_pattern].include?(param.kind) + + name = named_field(param, "name") || + param.named_children.find do |child| + %w[identifier simple_identifier field_identifier property_identifier].include?(child.kind) + end + text = name&.text.to_s + return nil if text.empty? || text == "_" + + text + end + + def normalize_target_receiver(target, stack) + receiver = target[:receiver].to_s + current_receiver = current_receiver_name(stack) + return target unless current_receiver + return target.merge(receiver: "self") if receiver == current_receiver + + if receiver.start_with?("#{current_receiver}.") + return target.merge(receiver: "self.#{receiver.delete_prefix("#{current_receiver}.")}") + end + + target + end + + def current_receiver_name(stack) + entry = stack.reverse.find { |item| item.is_a?(Hash) && item[:receiver] } + entry && entry[:receiver] + end + + def file_owner(file) + base = File.basename(file.to_s, File.extname(file.to_s)) + base.empty? ? "(file)" : base + end + + def node_key(node) + [node.kind, node.start_byte, node.end_byte] + rescue StandardError + node.object_id + end + + def ts_node?(node) + node && node.respond_to?(:kind) && node.respond_to?(:children) + end + + def span(node) + [node.start_point.row + 1, node.start_point.column, + node.end_point.row + 1, node.end_point.column] + end + + def line(node) + node.start_point.row + 1 + end + + def normalize_text(text) + text.to_s.strip.gsub(/\s+/, " ") + end + end + + LanguageProfile = TreeSitterLanguageAdapter + + LANGUAGE_PROFILES = { + ruby: RubySyntaxAdapter.new( + language: :ruby, + extensions: %w[.rb], + lexicon: RUBY_LEXICON, + package: "tree-sitter-ruby" + ), + python: PythonSyntaxAdapter.new( + language: :python, + extensions: %w[.py .pyi], + lexicon: PYTHON_LEXICON, + package: "tree-sitter-python" + ), + javascript: JavaScriptSyntaxAdapter.new( + language: :javascript, + extensions: %w[.js .jsx .mjs .cjs], + lexicon: JAVASCRIPT_LEXICON, + package: "tree-sitter-javascript" + ), + typescript: JavaScriptSyntaxAdapter.new( + language: :typescript, + extensions: %w[.ts .tsx], + lexicon: JAVASCRIPT_LEXICON, + package: "tree-sitter-typescript" + ), + go: GoSyntaxAdapter.new( + language: :go, + extensions: %w[.go], + lexicon: GO_LEXICON, + package: "tree-sitter-go" + ), + rust: RustSyntaxAdapter.new( + language: :rust, + extensions: %w[.rs], + lexicon: RUST_LEXICON, + package: "tree-sitter-rust" + ), + zig: ZigSyntaxAdapter.new( + language: :zig, + extensions: %w[.zig], + lexicon: ZIG_LEXICON, + package: "@tree-sitter-grammars/tree-sitter-zig" + ), + lua: LuaSyntaxAdapter.new( + language: :lua, + extensions: %w[.lua], + lexicon: LUA_LEXICON, + package: "@tree-sitter-grammars/tree-sitter-lua" + ), + c: CSyntaxAdapter.new( + language: :c, + extensions: %w[.c .h], + lexicon: C_LEXICON, + package: "tree-sitter-c", + first_argument_receiver: true + ), + cpp: CppSyntaxAdapter.new( + language: :cpp, + extensions: %w[.cc .cpp .cxx .hh .hpp .hxx], + lexicon: CPP_LEXICON, + package: "tree-sitter-cpp" + ), + csharp: CSharpSyntaxAdapter.new( + language: :csharp, + extensions: %w[.cs], + lexicon: CSHARP_LEXICON, + package: "tree-sitter-c-sharp", + grammar_names: %w[c-sharp csharp], + tree_sitter_language_name: "c_sharp" + ), + java: TreeSitterLanguageAdapter.new( + language: :java, + extensions: %w[.java], + lexicon: JAVA_LEXICON, + package: "tree-sitter-java" + ), + swift: TreeSitterLanguageAdapter.new( + language: :swift, + extensions: %w[.swift], + lexicon: SWIFT_LEXICON, + package: "tree-sitter-swift" + ), + kotlin: TreeSitterLanguageAdapter.new( + language: :kotlin, + extensions: %w[.kt .kts], + lexicon: KOTLIN_LEXICON, + package: "tree-sitter-kotlin" + ) + }.freeze + + LANGUAGE_BY_EXTENSION = LANGUAGE_PROFILES.values.each_with_object({}) do |profile, index| + profile.extensions.each { |extension| index[extension] ||= profile.language } + end.freeze + + module_function + + def parse(file, language: nil, parser: ENV.fetch("DECOMPLEX_PARSER", "tree_sitter")) + normalized_parser = parser.to_s.tr("-", "_") + lang = (language || language_for(file)).to_sym + key = document_cache_key(file, lang, normalized_parser) + document_cache.fetch(key) do + document_cache[key] = + case normalized_parser + when "", "tree_sitter", "treesitter" + TreeSitterAdapter.new.parse(file, language: lang) + else + raise ArgumentError, "unknown decomplex parser #{parser.inspect}" + end + end + end + + def document_cache + @document_cache ||= {} + end + + def document_cache_key(file, language, parser) + stat = File.stat(file) + [File.expand_path(file), language, parser, stat.size, stat.mtime.to_f] + end + + def parse_uncached(file, language: nil, parser: ENV.fetch("DECOMPLEX_PARSER", "tree_sitter")) + case parser.to_s.tr("-", "_") + when "", "tree_sitter", "treesitter" + TreeSitterAdapter.new.parse(file, language: language) + else + raise ArgumentError, "unknown decomplex parser #{parser.inspect}" + end + end + + def parser + ENV.fetch("DECOMPLEX_PARSER", "tree_sitter").to_s.tr("-", "_") + end + + def tree_sitter? + %w[tree_sitter treesitter].include?(parser) + end + + def language_for(file) + forced = ENV["DECOMPLEX_FORCE_LANGUAGE"].to_s.strip + return forced.tr("-", "_").to_sym unless forced.empty? + + LANGUAGE_BY_EXTENSION.fetch(File.extname(file).downcase, :ruby) + end + + def supported_exts(parser: self.parser) + case parser.to_s.tr("-", "_") + when "", "tree_sitter", "treesitter" + LANGUAGE_PROFILES.values.flat_map(&:extensions).uniq + else + [] + end + end + + def supported_source?(file, parser: self.parser) + supported_exts(parser: parser).include?(File.extname(file).downcase) + end + + def language_lexicon(language) + language_profile(language).lexicon + end + + def language_profile(language) + key = language.to_s.empty? ? nil : language.to_sym + raise ArgumentError, "missing Syntax language profile" unless key + + LANGUAGE_PROFILES.fetch(key) + rescue KeyError + raise ArgumentError, "unsupported Syntax language profile: #{language.inspect}" + end + + class Document + attr_reader :file, :language, :source, :lines, :root, :adapter + + def initialize(file:, language:, source:, lines:, root:, adapter:) + @file = file + @language = language + @source = source + @lines = lines + @tree_sitter_facade = TreeSitterFacadeContext.new(root) + @root = @tree_sitter_facade.root + @adapter = adapter + end + + def decision_sites + @decision_sites ||= adapter.decision_sites(self) + end + + def state_writes + @state_writes ||= adapter.state_writes(self) + end + + def state_reads + @state_reads ||= adapter.state_reads(self) + end + + def branch_decisions(immutable_readers:, immutable_reader_types:, type_aliases:) + adapter.branch_decisions( + self, + immutable_readers: immutable_readers, + immutable_reader_types: immutable_reader_types, + type_aliases: type_aliases + ) + end + + def function_defs + @function_defs ||= adapter.function_defs(self) + end + + def owner_defs + @owner_defs ||= adapter.owner_defs(self) + end + + def call_sites + @call_sites ||= adapter.call_sites(self) + end + + def state_declarations + @state_declarations ||= adapter.state_declarations(self) + end + + def state_param_origins + @state_param_origins ||= adapter.state_param_origins(self) + end + + def branch_arms + @branch_arms ||= adapter.branch_arms(self) + end + + def immutable_struct_readers + adapter.immutable_struct_readers(lines) + end + + def immutable_struct_reader_types + adapter.immutable_struct_reader_types(lines) + end + + def type_aliases + adapter.type_aliases(lines) + end + end + + module SourceTextHelpers + module_function + + def immutable_struct_readers(lines) + readers = Hash.new { |h, k| h[k] = Set.new } + class_stack = [] + lines.each do |line| + if (match = line.match(/\A\s*class\s+([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\s*<\s*T::Struct\b/)) + class_stack << match[1] + next + end + if class_stack.any? && (match = line.match(/\A\s*const\s+:([A-Za-z_]\w*)\b/)) + readers[class_stack.last].add(match[1].to_sym) + next + end + class_stack.pop if class_stack.any? && line.match?(/\A\s*end\s*(?:#.*)?\z/) + end + readers + end + + def immutable_struct_reader_types(lines) + reader_types = Hash.new { |h, k| h[k] = {} } + class_stack = [] + lines.each do |line| + if (match = line.match(/\A\s*class\s+([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\s*<\s*T::Struct\b/)) + class_stack << match[1] + next + end + if class_stack.any? && (match = line.match(/\A\s*const\s+:([A-Za-z_]\w*)\s*,\s*([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\b/)) + reader_types[class_stack.last][match[1].to_sym] = match[2] + next + end + class_stack.pop if class_stack.any? && line.match?(/\A\s*end\s*(?:#.*)?\z/) + end + reader_types + end + + def type_aliases(lines) + aliases = {} + lines.each do |line| + if (match = line.match(/\A\s*([A-Z]\w*)\s*=\s*T\.type_alias\s*\{\s*([A-Z]\w*(?:::[A-Z]\w*)*)\s*\}/)) + aliases[match[1]] = match[2] + elsif (match = line.match(/\A\s*([A-Z]\w*)\s*=\s*([A-Z]\w*(?:::[A-Z]\w*)*)\b/)) + aliases[match[1]] = match[2] + end + end + aliases + end + end + + class TreeSitterFacadeContext + attr_reader :root + + def initialize(raw_root) + @wrappers = {} + @children_cache = {} + @named_children_cache = {} + @named_field_cache = {} + @parent_cache = {} + @prev_sibling_cache = {} + @next_sibling_cache = {} + @prev_named_sibling_cache = {} + @next_named_sibling_cache = {} + @root = wrap(raw_root) + index_tree(raw_root) + end + + def wrap(raw) + return nil unless raw + return raw if raw.is_a?(TreeSitterNodeFacade) + + key = node_key(raw) + @wrappers[key] ||= TreeSitterNodeFacade.new(self, raw, key) + end + + def children(raw) + node = unwrap(raw) + @children_cache.fetch(node_key(node)) { [] } + end + + def named_children(raw) + node = unwrap(raw) + @named_children_cache.fetch(node_key(node)) { [] } + end + + def child_by_field_name(raw, name) + node = unwrap(raw) + key = [node_key(node), name.to_s] + return @named_field_cache[key] if @named_field_cache.key?(key) + + @named_field_cache[key] = wrap(node.child_by_field_name(name)) + rescue StandardError + nil + end + + def parent(raw) + @parent_cache[node_key(unwrap(raw))] + end + + def prev_sibling(raw) + @prev_sibling_cache[node_key(unwrap(raw))] + end + + def next_sibling(raw) + @next_sibling_cache[node_key(unwrap(raw))] + end + + def prev_named_sibling(raw) + @prev_named_sibling_cache[node_key(unwrap(raw))] + end + + def next_named_sibling(raw) + @next_named_sibling_cache[node_key(unwrap(raw))] + end + + def node_key(raw) + node = unwrap(raw) + [node.kind, node.start_byte, node.end_byte, node.named?] + end + + private + + def unwrap(raw) + raw.is_a?(TreeSitterNodeFacade) ? raw.raw : raw + end + + def index_tree(raw_root) + pending = [raw_root] + until pending.empty? + raw = pending.pop + key = node_key(raw) + raw_children = Array(raw.children) + wrapped_children = raw_children.map { |child| wrap(child) } + @children_cache[key] = wrapped_children + @named_children_cache[key] = wrapped_children.select(&:named?) + + raw_children.each do |child| + child_key = node_key(child) + @parent_cache[child_key] = wrap(raw) + end + + index_siblings(raw_children, @prev_sibling_cache, @next_sibling_cache) + index_siblings(raw_children.select(&:named?), @prev_named_sibling_cache, @next_named_sibling_cache) + + pending.concat(raw_children.reverse) end - nil end - def declared_type_text(node, name_node) - text = node.text.to_s - after_name = text[(name_node.end_byte - node.start_byte)..].to_s - if (match = after_name.match(/\A\s*:\s*([^=,\n]+)/)) - normalize_text(match[1]) - elsif (match = text.match(/\A\s*(?:pub\s+)?(?:const|var)\s+\w+\s*:\s*([^=;\n]+)/)) - normalize_text(match[1]) - elsif (match = after_name.match(/\A\s+([^=;,\n]+)/)) - normalize_text(match[1]) - elsif (type = declared_type_before_name(text, node, name_node)) - type + def index_siblings(raw_children, prev_cache, next_cache) + raw_children.each_with_index do |child, index| + key = node_key(child) + prev_cache[key] = wrap(raw_children[index - 1]) if index.positive? + next_cache[key] = wrap(raw_children[index + 1]) if index + 1 < raw_children.length end - rescue StandardError - nil end + end - def declared_type_before_name(text, node, name_node) - before_name = text[0...(name_node.start_byte - node.start_byte)].to_s - before_name = before_name.gsub(/\b(?:public|private|protected|internal|static|readonly|const|pub|mut|var|let)\b/, " ") - before_name = before_name.gsub(/[;,{].*\z/m, " ") - before_name = normalize_text(before_name) - return nil if before_name.empty? + class TreeSitterNodeFacade + attr_reader :context, :raw - tokens = before_name.split(/\s+/).reject { |token| token.match?(/\A[*&]+\z/) } - candidate = tokens.last.to_s.delete_suffix("*").delete_suffix("&") - return nil if candidate.empty? + def initialize(context, raw, key) + @context = context + @raw = raw + @key = key + end - candidate + def kind + @kind ||= raw.kind end - def assignment_state_declaration(node, language: nil) - lhs = named_field(node, "left") || node.named_children.first - rhs = named_field(node, "right") || named_field(node, "value") || node.named_children[1] - target = state_target(lhs, language: language) - return nil unless target - return nil unless %w[self this].include?(target[:receiver].to_s) + def text + @text ||= raw.text.to_s + end - type = inferred_assignment_type(rhs) - return nil unless type + def start_byte + raw.start_byte + end - { field: target[:field], type: type } + def end_byte + raw.end_byte end - def inferred_assignment_type(node) - return nil unless ts_node?(node) + def start_point + raw.start_point + end - text = normalize_text(node.text) - patterns = [ - /\Anew\s+([A-Z][A-Za-z0-9_:]*)\s*(?:[({<]|$)/, - /\A([A-Z][A-Za-z0-9_:]*)\s*(?:[({<]|$)/ - ] - match = patterns.filter_map { |pattern| text.match(pattern) }.first - match && match[1] + def end_point + raw.end_point end - def generated_lua_compat_prelude?(document, node) - syntax_profile(document.language).generated_prelude?(adapter_helpers, document, node) + def named? + raw.named? end - def state_read_target(node, language: nil) - syntax_profile(language).state_read_target(adapter_helpers, node) + def has_error? + raw.respond_to?(:has_error?) && raw.has_error? end - def generic_state_read_target(node) - case node.kind - when "call" - receiver = named_field(node, "receiver") - method = named_field(node, "method") - return nil unless receiver && method - return nil if namespace_receiver?(receiver.text) - return nil if NOISE_MESSAGES.include?(method.text) - return nil if named_field(node, "arguments") + def children + context.children(self) + end - { receiver: normalize_text(receiver.text), field: method.text } - when "field", "field_access", "selector_expression", "member_expression", "member_access_expression", "attribute", - "field_expression", "navigation_expression", "directly_assignable_expression", "expression_list" - return nil if node.kind == "expression_list" && !(named_field(node, "operand") && named_field(node, "field")) + def child_count + children.length + end - object = named_field(node, "object") || named_field(node, "receiver") || - named_field(node, "expression") || - named_field(node, "operand") || named_field(node, "value") || - named_field(node, "argument") || - node.named_children.find { |child| child.kind != "navigation_suffix" } - field = named_field(node, "field") || named_field(node, "property") || - named_field(node, "name") || named_field(node, "suffix") || - node.named_children.find { |child| child.kind == "navigation_suffix" } || - node.named_children.last - if node.kind == "field_expression" && node.text.to_s.start_with?(".") - field = node.named_children.find { |child| child.kind == "identifier" } || field - return { receiver: ".literal", field: field.text } if field - end - field_text = member_field_text(field) - return nil unless object && field_text - return nil if namespace_receiver?(object.text) - return nil if NOISE_MESSAGES.include?(field_text) + def named_children + context.named_children(self) + end - { receiver: normalize_text(object.text), field: field_text } - when "instance_variable", "global_variable" - { receiver: "self", field: node.text } - end + def named_child_count + named_children.length end - def state_target(lhs, language: nil) - syntax_profile(language).state_target(adapter_helpers, lhs) + def child_by_field_name(name) + context.child_by_field_name(self, name) end - def generic_state_target(lhs) - return nil unless ts_node?(lhs) - return nil if prev_sibling(lhs)&.text == ":" + def parent + context.parent(self) + end - case lhs.kind - when "call" - receiver = named_field(lhs, "receiver") - method = named_field(lhs, "method") - return nil unless receiver && method + def prev_sibling + context.prev_sibling(self) + end - { receiver: normalize_text(receiver.text), field: method.text.sub(/=\z/, "") } - when "field", "field_access", "selector_expression", "member_expression", "member_access_expression", "attribute", - "field_expression", "navigation_expression", "directly_assignable_expression", "expression_list" - if lhs.kind == "expression_list" && !(named_field(lhs, "operand") && named_field(lhs, "field")) - return generic_state_target(lhs.named_children.first) - end + def next_sibling + context.next_sibling(self) + end - object = named_field(lhs, "object") || named_field(lhs, "receiver") || - named_field(lhs, "expression") || - named_field(lhs, "operand") || named_field(lhs, "value") || - named_field(lhs, "argument") || - lhs.named_children.find { |child| child.kind != "navigation_suffix" } - field = named_field(lhs, "field") || named_field(lhs, "property") || - named_field(lhs, "name") || named_field(lhs, "suffix") || - lhs.named_children.find { |child| child.kind == "navigation_suffix" } || - lhs.named_children.last - if lhs.kind == "field_expression" && lhs.text.to_s.start_with?(".") - field = lhs.named_children.find { |child| child.kind == "identifier" } || field - return { receiver: ".literal", field: field.text.sub(/=\z/, "") } if field - end - field_text = member_field_text(field) - return nil unless object && field_text + def prev_named_sibling + context.prev_named_sibling(self) + end - { receiver: normalize_text(object.text), field: field_text.sub(/=\z/, "") } - when "instance_variable", "global_variable" - { receiver: "self", field: lhs.text } - end + def next_named_sibling + context.next_named_sibling(self) end - def hidden_match?(node) - node.kind == "expression_statement" && - first_token_kind(node) == "match" && - node.named_children.any? { |child| child.kind == "match_block" } + def ==(other) + other = other.raw if other.is_a?(TreeSitterNodeFacade) + other.respond_to?(:kind) && + kind == other.kind && + start_byte == other.start_byte && + end_byte == other.end_byte && + named? == other.named? end - def assignment_lhs?(node) - return false if prev_sibling(node)&.text == ":" + alias eql? == - sibling = next_sibling(node) - sibling && %w[= += -= *= /= %= &&= ||=].include?(sibling.text.to_s) + def hash + @key.hash end - def instance_variable_node?(node) - ts_node?(node) && node.kind == "instance_variable" + def inspect + "#<#{self.class} kind=#{kind.inspect} start_byte=#{start_byte} end_byte=#{end_byte}>" end + end - def next_sibling(node) - node.next_sibling - rescue StandardError - nil - end + class TreeSitterAdapter + def self.walk_document(document, stack, profile, &block) + node = document.root + return unless tree_sitter_node?(node) - def prev_sibling(node) - node.prev_sibling - rescue StandardError - nil - end + pending = [[node, stack]] + seen = Set.new + until pending.empty? + current, current_stack = pending.pop + next unless tree_sitter_node?(current) + key = node_key(current) + next if seen.include?(key) - def namespace_receiver?(text) - receiver = text.to_s - return true if receiver.match?(/\A(?:std|builtin|build_options)(?:\.|\z)/) - return true if receiver.start_with?("@") + seen << key - receiver.match?(/\A[A-Z][A-Za-z0-9_]*(?:\.[A-Z][A-Za-z0-9_]*)*\z/) + next_stack = profile.push_context(document, current_stack, current) + yield current, next_stack + current.children.reverse_each { |child| pending << [child, next_stack] } + end end - def named_field(node, name) - node.child_by_field_name(name) - rescue StandardError - nil + def self.tree_sitter_node?(node) + node && node.respond_to?(:kind) && node.respond_to?(:children) end - def parent_node(node) - node.parent + def self.node_key(node) + [node.kind, node.start_byte, node.end_byte] rescue StandardError - nil - end - - def field_like_node?(node) - %w[field field_access selector_expression member_expression member_access_expression attribute field_expression - navigation_expression directly_assignable_expression expression_list scoped_identifier].include?(node.kind) + node.object_id end - def member_field_text(field) - return nil unless ts_node?(field) + def parse(file, language: nil) + lang = (language || Syntax.language_for(file)).to_sym + source = File.read(file) + parser = parser_for(lang) + tree = parser.parse(source) + raise "tree-sitter parse timed out for #{file}" unless tree - if field.kind == "navigation_suffix" - suffix = named_field(field, "suffix") || - field.named_children.find { |child| %w[identifier simple_identifier field_identifier property_identifier].include?(child.kind) } || - field.named_children.last - text = suffix&.text.to_s - return nil if text.empty? + Document.new( + file: file, + language: lang, + source: source, + lines: source.lines, + root: tree.root_node, + adapter: self + ) + end - return text.sub(/\A[.?]+/, "") + def decision_sites(document) + profile = syntax_profile(document.language) + out = [] + walk(document, profile) do |node, stack| + out.concat(profile.decision_site_facts(document, node, stack)) end - - field.text.to_s.sub(/\A[.?]+/, "") + out end - def normalize_type_owner(text) - value = text.to_s.strip - value = value.sub(/\A[&*]+/, "") - value = value.gsub(/\b(?:const|mut|var)\b/, "").strip - value.split(/[({<\s]/).first.to_s.split(".").last + def state_writes(document) + structural_facts(document).fetch(:state_writes) end - def first_named_text(node, kinds) - expanded = kinds.include?("identifier") ? kinds + %w[simple_identifier] : kinds - child = node.named_children.find { |c| expanded.include?(c.kind) } - child&.text + def state_reads(document) + structural_facts(document).fetch(:state_reads) end - def declarator_name(node) - return nil unless ts_node?(node) - - pending = [node] - seen = Set.new - until pending.empty? - current = pending.pop - next unless ts_node?(current) - key = node_key(current) - next if seen.include?(key) - - seen << key - return current.text if %w[identifier simple_identifier field_identifier property_identifier].include?(current.kind) - - current.named_children.reverse_each { |child| pending << child } - end - nil - end - - def inline_def_argument_list?(node) - ts_node?(node) && node.kind == "argument_list" && node.children.first&.kind.to_s == "def" + def branch_decisions(document, immutable_readers:, immutable_reader_types:, type_aliases:) + profile = syntax_profile(document.language) + out = [] + walk(document, profile) do |node, stack| + out.concat(profile.branch_decision_facts( + document, + node, + stack, + immutable_readers: immutable_readers, + immutable_reader_types: immutable_reader_types, + type_aliases: type_aliases + )) + end + out end - def inline_def_name(node) - return nil unless inline_def_argument_list?(node) - - receiver_index = node.named_children.index { |child| child.kind == "self" || child.kind == "constant" } - search = receiver_index ? node.named_children[(receiver_index + 1)..] : node.named_children - name = search&.find { |child| %w[identifier field_identifier property_identifier].include?(child.kind) }&.text - receiver_index ? "self.#{name}" : name + def function_defs(document) + structural_facts(document).fetch(:function_defs) end - def hidden_ruby_method_definition?(node) - ts_node?(node) && node.kind == "body_statement" && node.children.first&.kind.to_s == "def" + def owner_defs(document) + structural_facts(document).fetch(:owner_defs) end - def hidden_ruby_method_name(node) - return nil unless hidden_ruby_method_definition?(node) - - receiver_index = node.named_children.index { |child| child.kind == "self" || child.kind == "constant" } - search = receiver_index ? node.named_children[(receiver_index + 1)..] : node.named_children - name = search&.find { |child| %w[identifier field_identifier property_identifier].include?(child.kind) }&.text - receiver_index ? "self.#{name}" : name + def call_sites(document) + structural_facts(document).fetch(:call_sites) end - def hidden_ruby_method_params(node) - params = node.named_children.find { |child| child.kind == "method_parameters" } - return [] unless params - - params.named_children.filter_map { |param| parameter_name(param) }.uniq + def state_declarations(document) + structural_facts(document).fetch(:state_declarations) end - def hidden_ruby_method_signature(document, node) - body = node.named_children.find { |child| child.kind == "body_statement" } - end_byte = body ? body.start_byte : node.end_byte - document.source.byteslice(node.start_byte, end_byte - node.start_byte).to_s.strip.sub(/;+\z/, "") - rescue StandardError - line_text(document, node).strip + def state_param_origins(document) + structural_facts(document).fetch(:state_param_origins) end - def hidden_ruby_owner_declaration?(node) - return false unless ts_node?(node) - return false unless node.kind == "body_statement" - - %w[class module].include?(node.children.first&.kind.to_s) + def structural_facts(document) + @structural_fact_cache ||= {} + @structural_fact_cache[document.object_id] ||= begin + profile = syntax_profile(document.language) + out = { + function_defs: [], + owner_defs: [], + call_sites: [], + state_declarations: [], + state_param_origins: [], + state_reads: [], + state_writes: [] + } + walk(document, profile) do |node, stack| + facts = profile.structural_facts_for_node(document, node, stack) + facts.each do |key, values| + out.fetch(key).concat(values) + end + end + profile.after_structural_facts(document, out) + out[:function_defs].uniq! { |fn| [fn.file, fn.owner, fn.name, fn.line] } + out[:owner_defs].uniq! { |owner| [owner.file, owner.name, owner.kind] } + out[:call_sites].uniq! { |call| [call.file, call.owner, call.function, call.line, call.receiver, call.message] } + out[:state_declarations].uniq! { |decl| [decl.file, decl.owner, decl.field] } + out[:state_param_origins].uniq! { |origin| [origin.file, origin.owner, origin.function, origin.field, origin.param] } + out[:state_reads].uniq! { |read| [read.file, read.owner, read.function, read.line, read.receiver, read.field] } + out[:state_writes].uniq! { |write| [write.file, write.owner, write.function, write.line, write.receiver, write.field] } + out + end end - def hidden_ruby_owner_name(node) - node.named_children.find { |child| %w[constant identifier type_identifier].include?(child.kind) }&.text + def branch_arms(document) + profile = syntax_profile(document.language) + out = [] + walk(document, profile) do |node, stack| + out.concat(profile.branch_arm_facts(document, node, stack)) + end + out end - def hidden_ruby_owner_kind(node) - node.children.first&.kind.to_s == "module" ? :module : :class + def immutable_struct_readers(lines) + SourceTextHelpers.immutable_struct_readers(lines) end - def ruby_inline_def_visibility(node) - parent = parent_node(node) - return nil unless parent&.kind == "call" + def immutable_struct_reader_types(lines) + SourceTextHelpers.immutable_struct_reader_types(lines) + end - target = ruby_call_target(parent) - visibility = target && target[:receiver] == "self" && target[:message]&.to_sym - %i[private protected public].include?(visibility) ? visibility : nil + def type_aliases(lines) + SourceTextHelpers.type_aliases(lines) end - def ruby_bare_call_identifier?(node) - parent = parent_node(node) - return false unless parent - return false if ruby_declaration_name?(node, parent) - return false if %w[method_parameters block_parameters argument_list assignment].include?(parent.kind) - if parent.kind == "call" - return false if named_field(parent, "receiver") + private - first = parent.named_children.first - return first == node && next_sibling(node)&.kind == "argument_list" - end - return false if next_sibling(node)&.text == "=" || prev_sibling(node)&.text == "=" - return false if next_sibling(node)&.text == "." || prev_sibling(node)&.text == "." + def syntax_profile(language) + raise ArgumentError, "missing Syntax language profile context" if language.nil? - %w[body_statement then else elsif ensure rescue].include?(parent.kind) || - next_sibling(node)&.kind == "argument_list" + Syntax.language_profile(language) end - def ruby_declaration_name?(node, parent) - return true if hidden_ruby_method_definition?(parent) - return true if hidden_ruby_owner_declaration?(parent) - return true if %w[method singleton_method class module].include?(parent.kind) + def parser_for(language) + require_tree_sitter + lang_name = Syntax.language_profile(language).tree_sitter_language_name + register_language(lang_name, grammar_path(language)) + ::TreeSitter::Parser.new.tap { |parser| parser.language = lang_name } + end - false + def require_tree_sitter + gem "tree_sitter", "~> 0.1" + require "tree_sitter" + rescue Gem::LoadError, LoadError => e + raise LoadError, "DECOMPLEX_PARSER=tree_sitter requires the tree_sitter gem: #{e.message}" end - def ruby_argument_texts(node) - args = named_field(node, "arguments") || node.named_children.find { |child| child.kind == "argument_list" } - return [] unless args - - values = args.named_children.map { |child| normalize_text(child.text) } - return values unless values.empty? + def register_language(name, path) + @registered ||= {} + return if @registered[name] - text = args.text.to_s.strip - text = text[1...-1] if text.start_with?("(") && text.end_with?(")") - text.split(/\s*,\s*/).map { |arg| normalize_text(arg) }.reject(&:empty?) + ::TreeSitter.register_language(name, path) + @registered[name] = true end - def normalize_target_receiver(target, stack) - receiver = target[:receiver].to_s - current_receiver = current_receiver_name(stack) - return target unless current_receiver - return target.merge(receiver: "self") if receiver == current_receiver + def grammar_path(language) + env_name = "DECOMPLEX_TS_#{language.to_s.upcase}_PATH" + return ENV.fetch(env_name) if ENV[env_name] && File.file?(ENV[env_name]) - if receiver.start_with?("#{current_receiver}.") - return target.merge(receiver: "self.#{receiver.delete_prefix("#{current_receiver}.")}") - end + candidates = grammar_candidates(language) + found = candidates.find { |path| File.file?(path) } + return found if found - target + raise LoadError, + "missing Tree-sitter grammar for #{language}. Set #{env_name} " \ + "to a parser shared library (.so/.dylib/.node). Checked: #{candidates.join(', ')}" end - def current_receiver_name(stack) - entry = stack.reverse.find { |item| item.is_a?(Hash) && item[:receiver] } - entry && entry[:receiver] + def grammar_candidates(language) + profile = Syntax.language_profile(language) + pkg = profile.package + stems = profile.grammar_names + names = stems.flat_map do |stem| + ["#{stem}.so", "tree-sitter-#{stem}.so", + "libtree-sitter-#{stem}.so", "#{stem}.node", + "tree-sitter-#{stem}.node", + "#{stem}_binding.node", + "tree_sitter_#{stem.tr('-', '_')}_binding.node", + "@tree-sitter-grammars+tree-sitter-#{stem}.node"] + end + roots = [ + File.expand_path("../../vendor/tree-sitter", __dir__), + File.expand_path("../../vendor/tree-sitter/#{language}", __dir__), + File.expand_path("../../node_modules/#{pkg}", __dir__), + File.expand_path("../../node_modules/#{pkg}/build/Release", __dir__), + File.expand_path("../../../../node_modules/#{pkg}", __dir__), + File.expand_path("../../../../node_modules/#{pkg}/build/Release", __dir__), + File.expand_path("../../../../../node_modules/#{pkg}", __dir__), + File.expand_path("../../../../../node_modules/#{pkg}/build/Release", __dir__) + ] + all_prebuilds = roots.flat_map do |root| + stems.flat_map do |stem| + Dir.glob(File.join(root, "prebuilds", "*", "*tree-sitter-#{stem}.node")) + end + end + prebuilds = platform_prebuilds(all_prebuilds) + roots.product(names).map { |root, name| File.join(root, name) } + prebuilds end - def ruby_t_let_state_declaration(node, language: nil) - lhs = named_field(node, "left") || node.named_children.first - rhs = named_field(node, "right") || named_field(node, "value") || node.named_children[1] - target = state_target(lhs, language: language) - return nil unless target && target[:receiver] == "self" && target[:field].to_s.start_with?("@") - return nil unless rhs&.kind == "call" - - receiver = named_field(rhs, "receiver") || rhs.named_children.first - method = named_field(rhs, "method") || rhs.named_children.find { |child| child.kind == "identifier" } - return nil unless receiver&.text == "T" && method&.text == "let" - - args = named_field(rhs, "arguments") || rhs.named_children.find { |child| child.kind == "argument_list" } - type = args&.named_children&.[](1)&.text - return nil if type.to_s.empty? + def platform_prebuilds(paths) + os = host_os + arch = host_arch + return paths if os.nil? || arch.nil? - { field: target[:field], type: normalize_text(type) } + paths.select { |path| path.include?("/#{os}-#{arch}/") } end - def ts_node?(node) - node && node.respond_to?(:kind) && node.respond_to?(:children) + def host_os + case RbConfig::CONFIG["host_os"] + when /linux/i then "linux" + when /darwin/i then "darwin" + when /mswin|mingw|cygwin/i then "win32" + end end - def span(node) - [node.start_point.row + 1, node.start_point.column, - node.end_point.row + 1, node.end_point.column] + def host_arch + case RbConfig::CONFIG["host_cpu"] + when /x86_64|amd64/i then "x64" + when /aarch64|arm64/i then "arm64" + end end - def line(node) - node.start_point.row + 1 + def walk(document, profile, &block) + self.class.walk_document(document, profile.initial_stack(document), profile, &block) end - def normalize_text(text) - text.to_s.strip.gsub(/\s+/, " ") - end end end diff --git a/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs b/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs index 765732f39..41931fba0 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs @@ -28,6 +28,21 @@ trait LanguageProfile { false } + fn hidden_case(&self, _node: Node<'_>) -> bool { + false + } + + fn predicate_less_case(&self, node: Node<'_>) -> bool { + node.kind() == "case" && decision_subject(node).is_none() + } + + fn case_pattern_texts(&self, patterns: &[Node<'_>], source: &str) -> Vec { + patterns + .iter() + .map(|pattern| normalize_text(node_text(*pattern, source))) + .collect() + } + fn receiver_convention_owner_name(&self, node: Node<'_>, source: &str) -> Option { if !self.first_argument_receiver() || node.kind() != "function_definition" { return None; @@ -60,6 +75,26 @@ trait LanguageProfile { fn state_target(&self, lhs: Node<'_>, source: &str) -> Option { generic_state_target(lhs, source) } + + fn assignment_target<'tree>(&self, node: Node<'tree>) -> Option> { + generic_assignment_target(node) + } + + fn skip_state_write_node(&self, _node: Node<'_>) -> bool { + false + } + + fn skip_state_write_target(&self, target: &Target) -> bool { + target.field == "[]" + } + + fn state_write_source_node<'tree>( + &self, + _node: Node<'tree>, + assignment: &AssignmentTarget<'tree>, + ) -> Node<'tree> { + assignment.source + } } macro_rules! default_profile { @@ -127,6 +162,48 @@ impl LanguageProfile for RubyProfile { } generic_owner_name_from_declaration(node, source) } + + fn hidden_case(&self, node: Node<'_>) -> bool { + matches!( + node.kind(), + "body_statement" | "block_body" | "argument_list" + ) && first_child_kind(node) == Some("case") + } + + fn predicate_less_case(&self, node: Node<'_>) -> bool { + (node.kind() == "case" || self.hidden_case(node)) && decision_subject(node).is_none() + } + + fn case_pattern_texts(&self, patterns: &[Node<'_>], source: &str) -> Vec { + ruby_case_pattern_texts(patterns, source) + } + + fn state_target(&self, lhs: Node<'_>, source: &str) -> Option { + ruby_state_variable_target(lhs, source).or_else(|| generic_state_target(lhs, source)) + } + + fn assignment_target<'tree>(&self, node: Node<'tree>) -> Option> { + generic_assignment_target(node).or_else(|| match node.kind() { + "instance_variable" | "global_variable" if assignment_lhs_node(node) => { + Some(AssignmentTarget { + lhs: node, + source: node.parent().unwrap_or(node), + }) + } + _ => None, + }) + } + + fn skip_state_write_node(&self, node: Node<'_>) -> bool { + node.kind() == "operator_assignment" + || (assignment_lhs_node(node) + && next_sibling_raw_text(node).as_deref() != Some("=") + && node.kind() != "instance_variable") + } + + fn skip_state_write_target(&self, target: &Target) -> bool { + target.field == "[]" || target.field.starts_with('$') + } } struct CProfile; @@ -487,7 +564,8 @@ fn record_decision_site( out: &mut Vec, seen: &mut HashSet, ) { - if language_profile(language).generated_prelude(node, source) { + let profile = language_profile(language); + if profile.generated_prelude(node, source) { return; } @@ -496,12 +574,12 @@ fn record_decision_site( return; } - if case_node(node) || hidden_case(node) { - let decision_node = case_source_node(node); - if ruby_predicate_less_case(decision_node) { + if case_node(node) || profile.hidden_case(node) { + let decision_node = case_source_node(node, profile); + if profile.predicate_less_case(decision_node) { return; } - let patterns = case_patterns(decision_node, source); + let patterns = case_patterns(decision_node, source, profile); if patterns.len() < 2 { return; } @@ -675,25 +753,27 @@ fn record_state_write( out: &mut Vec, seen: &mut HashSet, ) { - if node.kind() == "operator_assignment" || node.kind() == "augmented_assignment" { + let profile = language_profile(language); + if profile.skip_state_write_node(node) { return; } - let Some(assignment) = assignment_target(node) else { + let Some(assignment) = profile.assignment_target(node) else { return; }; - let Some(target) = language_profile(language).state_target(assignment.lhs, source) else { + let Some(target) = profile.state_target(assignment.lhs, source) else { return; }; let target = normalize_target_receiver(target, context); - if target.field == "[]" || target.field.starts_with('$') { + if profile.skip_state_write_target(&target) { return; } let file_name = file.to_string_lossy().to_string(); let owner = context.current_owner(); let function = context.current_function(); - let line = line(assignment.source); + let source_node = profile.state_write_source_node(node, &assignment); + let line = line(source_node); let key = format!( "{}\0{}\0{}\0{}\0{}\0{}", file_name, owner, function, line, target.receiver, target.field @@ -708,7 +788,7 @@ fn record_state_write( file: file_name, function, line, - span: span(assignment.source), + span: span(source_node), owner, }); } @@ -725,7 +805,7 @@ struct Target { field: String, } -fn assignment_target(node: Node<'_>) -> Option> { +fn generic_assignment_target(node: Node<'_>) -> Option> { match node.kind() { "assignment" | "assignment_expression" | "assignment_statement" => { let lhs = node @@ -733,12 +813,6 @@ fn assignment_target(node: Node<'_>) -> Option> { .or_else(|| first_named_child(node))?; Some(AssignmentTarget { lhs, source: node }) } - "instance_variable" | "global_variable" if assignment_lhs_node(node) => { - Some(AssignmentTarget { - lhs: node, - source: node.parent().unwrap_or(node), - }) - } _ => None, } } @@ -798,14 +872,17 @@ fn generic_state_target(lhs: Node<'_>, source: &str) -> Option { field: strip_assignment_suffix(&field_text), }) } - "instance_variable" | "global_variable" => Some(Target { - receiver: "self".to_string(), - field: node_text(lhs, source).to_string(), - }), _ => None, } } +fn ruby_state_variable_target(node: Node<'_>, source: &str) -> Option { + matches!(node.kind(), "instance_variable" | "global_variable").then(|| Target { + receiver: "self".to_string(), + field: node_text(node, source).to_string(), + }) +} + fn generic_function_name(node: Node<'_>, source: &str) -> Option { match node.kind() { "method" @@ -823,32 +900,10 @@ fn generic_function_name(node: Node<'_>, source: &str) -> Option { &["identifier", "constant", "property_identifier"], ) }), - "singleton_method" => { - let name = node - .child_by_field_name("name") - .map(|name| node_text(name, source).to_string()) - .or_else(|| { - named_children(node) - .into_iter() - .rev() - .find(|child| { - matches!( - child.kind(), - "identifier" | "field_identifier" | "property_identifier" - ) - }) - .map(|child| node_text(child, source).to_string()) - })?; - Some(format!("self.{name}")) - } "method_declaration" => node .child_by_field_name("name") .map(|name| node_text(name, source).to_string()) .or_else(|| first_named_text(node, source, &["field_identifier", "identifier"])), - "body_statement" if first_child_kind(node) == Some("def") => { - hidden_ruby_method_name(node, source) - } - "argument_list" if first_child_kind(node) == Some("def") => inline_def_name(node, source), _ => None, } } @@ -1052,15 +1107,8 @@ fn case_node(node: Node<'_>) -> bool { ) } -fn hidden_case(node: Node<'_>) -> bool { - matches!( - node.kind(), - "body_statement" | "block_body" | "argument_list" - ) && first_child_kind(node) == Some("case") -} - -fn case_source_node(node: Node<'_>) -> Node<'_> { - if !hidden_case(node) { +fn case_source_node<'tree>(node: Node<'tree>, profile: &dyn LanguageProfile) -> Node<'tree> { + if !profile.hidden_case(node) { return node; } let mut cursor = node.walk(); @@ -1071,14 +1119,10 @@ fn case_source_node(node: Node<'_>) -> Node<'_> { result } -fn ruby_predicate_less_case(node: Node<'_>) -> bool { - (node.kind() == "case" || hidden_case(node)) && decision_subject(node).is_none() -} - -fn case_patterns(node: Node<'_>, source: &str) -> Vec { +fn case_patterns(node: Node<'_>, source: &str, profile: &dyn LanguageProfile) -> Vec { let mut out = case_arms(node) .into_iter() - .flat_map(|arm| case_arm_patterns(arm, source)) + .flat_map(|arm| case_arm_patterns(arm, source, profile)) .filter(|pattern| !default_case_pattern(pattern)) .collect::>(); out.sort(); @@ -1124,7 +1168,7 @@ fn case_arms(node: Node<'_>) -> Vec> { arms } -fn case_arm_patterns(child: Node<'_>, source: &str) -> Vec { +fn case_arm_patterns(child: Node<'_>, source: &str, profile: &dyn LanguageProfile) -> Vec { match child.kind() { "when" | "match_arm" => { let mut patterns = named_children(child) @@ -1138,7 +1182,7 @@ fn case_arm_patterns(child: Node<'_>, source: &str) -> Vec { .into_iter() .collect(); } - ruby_when_pattern_texts(&patterns, source) + profile.case_pattern_texts(&patterns, source) } "switch_case" | "case_clause" @@ -1174,7 +1218,7 @@ fn case_arm_patterns(child: Node<'_>, source: &str) -> Vec { } } -fn ruby_when_pattern_texts(patterns: &[Node<'_>], source: &str) -> Vec { +fn ruby_case_pattern_texts(patterns: &[Node<'_>], source: &str) -> Vec { if patterns.is_empty() { return Vec::new(); } diff --git a/gems/decomplex/test/syntax_test.rb b/gems/decomplex/test/syntax_test.rb index e13878572..3e73304bb 100644 --- a/gems/decomplex/test/syntax_test.rb +++ b/gems/decomplex/test/syntax_test.rb @@ -148,14 +148,30 @@ def test_tree_sitter_adapter_requires_language_profile_context assert_match(/missing Syntax language profile context/, error.message) end + def test_tree_sitter_adapter_delegates_language_normalization_to_profiles + adapter_class = Decomplex::Syntax::TreeSitterAdapter + profile_class = Decomplex::Syntax::TreeSitterLanguageAdapter + ruby_profile_class = Decomplex::Syntax::RubySyntaxAdapter + + refute adapter_class.const_defined?(:BRANCH_KINDS, false) + refute adapter_class.const_defined?(:NOISE_MESSAGES, false) + refute adapter_class.private_method_defined?(:record_state_write) + assert profile_class.private_method_defined?(:record_state_write) + assert ruby_profile_class.private_method_defined?(:skip_state_write_node?) + assert ruby_profile_class.private_method_defined?(:skip_state_write_target?) + assert ruby_profile_class.private_method_defined?(:hidden_case?) + assert ruby_profile_class.private_method_defined?(:case_pattern_texts) + assert ruby_profile_class.private_method_defined?(:direct_state_ref) + end + def test_tree_sitter_document_walks_seed_language_context adapter = Decomplex::Syntax::TreeSitterAdapter.new document = Struct.new(:root, :file, :language, :lines) .new(Object.new, "/tmp/demo.py", :python, []) captured = [] - adapter.define_singleton_method(:walk) do |_root, stack, &_block| - captured << stack + adapter.define_singleton_method(:walk) do |doc, profile, &_block| + captured << profile.initial_stack(doc) end adapter.decision_sites(document) From cda67cd87a0de3f0ff61a8f39fd535f728ce5f99 Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Fri, 19 Jun 2026 15:46:05 +0000 Subject: [PATCH 25/52] Add cross-language decomplex oracle fixtures --- .../docs/agents/architectural-issues.md | 544 + .../ruby-first-cross-language-ast-design.md | 621 + gems/decomplex/examples/c/co-update.c | 4 + gems/decomplex/examples/c/decision-pressure.c | 1 + gems/decomplex/examples/c/derived-state.c | 1 + gems/decomplex/examples/c/false-simplicity.c | 1 + gems/decomplex/examples/c/fat-union.c | 7 + gems/decomplex/examples/c/flay-similarity.c | 2 + gems/decomplex/examples/c/function-lcom.c | 13 + .../examples/c/implicit-control-flow.c | 9 + .../examples/c/inconsistent-rename-clone.c | 2 + gems/decomplex/examples/c/local-flow.c | 9 + gems/decomplex/examples/c/locality-drag.c | 27 + gems/decomplex/examples/c/miner.c | 4 + .../examples/c/operational-discontinuity.c | 9 + .../examples/c/oversized-predicate.c | 1 + gems/decomplex/examples/c/path-condition.c | 4 + gems/decomplex/examples/c/predicate-alias.c | 3 + .../examples/c/redundant-nil-guard.c | 1 + gems/decomplex/examples/c/semantic-alias.c | 4 + gems/decomplex/examples/c/sequence-mine.c | 5 + .../examples/c/state-branch-density.c | 2 + gems/decomplex/examples/c/state-mesh.c | 5 + .../examples/c/structural-topology.c | 6 + .../examples/c/temporal-ordering-pressure.c | 5 + .../examples/c/weighted-inlined-complexity.c | 5 + gems/decomplex/examples/cpp/co-update.cpp | 4 + .../examples/cpp/decision-pressure.cpp | 1 + gems/decomplex/examples/cpp/derived-state.cpp | 1 + .../examples/cpp/false-simplicity.cpp | 1 + gems/decomplex/examples/cpp/fat-union.cpp | 1 + .../examples/cpp/flay-similarity.cpp | 2 + gems/decomplex/examples/cpp/function-lcom.cpp | 13 + .../examples/cpp/implicit-control-flow.cpp | 1 + .../cpp/inconsistent-rename-clone.cpp | 2 + gems/decomplex/examples/cpp/local-flow.cpp | 9 + gems/decomplex/examples/cpp/locality-drag.cpp | 27 + gems/decomplex/examples/cpp/miner.cpp | 4 + .../cpp/operational-discontinuity.cpp | 9 + .../examples/cpp/oversized-predicate.cpp | 1 + .../decomplex/examples/cpp/path-condition.cpp | 4 + .../examples/cpp/predicate-alias.cpp | 3 + .../examples/cpp/redundant-nil-guard.cpp | 1 + .../decomplex/examples/cpp/semantic-alias.cpp | 4 + gems/decomplex/examples/cpp/sequence-mine.cpp | 5 + .../examples/cpp/state-branch-density.cpp | 1 + gems/decomplex/examples/cpp/state-mesh.cpp | 1 + .../examples/cpp/structural-topology.cpp | 1 + .../cpp/temporal-ordering-pressure.cpp | 1 + .../cpp/weighted-inlined-complexity.cpp | 5 + gems/decomplex/examples/csharp/co-update.cs | 6 + .../examples/csharp/decision-pressure.cs | 1 + .../examples/csharp/derived-state.cs | 1 + .../examples/csharp/false-simplicity.cs | 1 + gems/decomplex/examples/csharp/fat-union.cs | 1 + .../examples/csharp/flay-similarity.cs | 75 + .../examples/csharp/function-lcom.cs | 13 + .../examples/csharp/implicit-control-flow.cs | 1 + .../csharp/inconsistent-rename-clone.cs | 1 + gems/decomplex/examples/csharp/local-flow.cs | 9 + .../examples/csharp/locality-drag.cs | 29 + gems/decomplex/examples/csharp/miner.cs | 1 + .../csharp/operational-discontinuity.cs | 9 + .../examples/csharp/oversized-predicate.cs | 1 + .../examples/csharp/path-condition.cs | 1 + .../examples/csharp/predicate-alias.cs | 1 + .../examples/csharp/redundant-nil-guard.cs | 1 + .../examples/csharp/semantic-alias.cs | 1 + .../examples/csharp/sequence-mine.cs | 1 + .../examples/csharp/state-branch-density.cs | 1 + gems/decomplex/examples/csharp/state-mesh.cs | 1 + .../examples/csharp/structural-topology.cs | 1 + .../csharp/temporal-ordering-pressure.cs | 1 + .../csharp/weighted-inlined-complexity.cs | 1 + gems/decomplex/examples/go/co-update.go | 5 + .../examples/go/decision-pressure.go | 2 + gems/decomplex/examples/go/derived-state.go | 2 + .../decomplex/examples/go/false-simplicity.go | 3 + gems/decomplex/examples/go/fat-union.go | 8 + gems/decomplex/examples/go/flay-similarity.go | 3 + gems/decomplex/examples/go/function-lcom.go | 14 + .../examples/go/implicit-control-flow.go | 10 + .../examples/go/inconsistent-rename-clone.go | 3 + gems/decomplex/examples/go/local-flow.go | 10 + gems/decomplex/examples/go/locality-drag.go | 31 + gems/decomplex/examples/go/miner.go | 5 + .../examples/go/operational-discontinuity.go | 11 + .../examples/go/oversized-predicate.go | 2 + gems/decomplex/examples/go/path-condition.go | 5 + gems/decomplex/examples/go/predicate-alias.go | 4 + .../examples/go/redundant-nil-guard.go | 2 + gems/decomplex/examples/go/semantic-alias.go | 5 + gems/decomplex/examples/go/sequence-mine.go | 6 + .../examples/go/state-branch-density.go | 3 + gems/decomplex/examples/go/state-mesh.go | 6 + .../examples/go/structural-topology.go | 7 + .../examples/go/temporal-ordering-pressure.go | 6 + .../go/weighted-inlined-complexity.go | 6 + gems/decomplex/examples/java/co-update.java | 6 + .../examples/java/decision-pressure.java | 1 + .../examples/java/derived-state.java | 1 + .../examples/java/false-simplicity.java | 1 + gems/decomplex/examples/java/fat-union.java | 1 + .../examples/java/flay-similarity.java | 75 + .../examples/java/function-lcom.java | 13 + .../examples/java/implicit-control-flow.java | 1 + .../java/inconsistent-rename-clone.java | 1 + gems/decomplex/examples/java/local-flow.java | 9 + .../examples/java/locality-drag.java | 29 + gems/decomplex/examples/java/miner.java | 1 + .../java/operational-discontinuity.java | 9 + .../examples/java/oversized-predicate.java | 1 + .../examples/java/path-condition.java | 1 + .../examples/java/predicate-alias.java | 1 + .../examples/java/redundant-nil-guard.java | 1 + .../examples/java/semantic-alias.java | 1 + .../examples/java/sequence-mine.java | 1 + .../examples/java/state-branch-density.java | 1 + gems/decomplex/examples/java/state-mesh.java | 1 + .../examples/java/structural-topology.java | 1 + .../java/temporal-ordering-pressure.java | 1 + .../java/weighted-inlined-complexity.java | 1 + .../examples/javascript/co-update.js | 4 + .../examples/javascript/decision-pressure.js | 1 + .../examples/javascript/derived-state.js | 1 + .../examples/javascript/false-simplicity.js | 1 + .../examples/javascript/fat-union.js | 1 + .../examples/javascript/flay-similarity.js | 2 + .../examples/javascript/function-lcom.js | 12 + .../javascript/implicit-control-flow.js | 1 + .../javascript/inconsistent-rename-clone.js | 2 + .../examples/javascript/local-flow.js | 9 + .../examples/javascript/locality-drag.js | 27 + gems/decomplex/examples/javascript/miner.js | 4 + .../javascript/operational-discontinuity.js | 9 + .../javascript/oversized-predicate.js | 1 + .../examples/javascript/path-condition.js | 4 + .../examples/javascript/predicate-alias.js | 3 + .../javascript/redundant-nil-guard.js | 1 + .../examples/javascript/semantic-alias.js | 4 + .../examples/javascript/sequence-mine.js | 5 + .../javascript/state-branch-density.js | 1 + .../examples/javascript/state-mesh.js | 1 + .../javascript/structural-topology.js | 1 + .../javascript/temporal-ordering-pressure.js | 1 + .../javascript/weighted-inlined-complexity.js | 5 + gems/decomplex/examples/kotlin/co-update.kt | 4 + .../examples/kotlin/decision-pressure.kt | 1 + .../examples/kotlin/derived-state.kt | 1 + .../examples/kotlin/false-simplicity.kt | 1 + gems/decomplex/examples/kotlin/fat-union.kt | 7 + .../examples/kotlin/flay-similarity.kt | 73 + .../examples/kotlin/function-lcom.kt | 13 + .../examples/kotlin/implicit-control-flow.kt | 13 + .../kotlin/inconsistent-rename-clone.kt | 2 + gems/decomplex/examples/kotlin/local-flow.kt | 9 + .../examples/kotlin/locality-drag.kt | 27 + gems/decomplex/examples/kotlin/miner.kt | 4 + .../kotlin/operational-discontinuity.kt | 9 + .../examples/kotlin/oversized-predicate.kt | 1 + .../examples/kotlin/path-condition.kt | 4 + .../examples/kotlin/predicate-alias.kt | 3 + .../examples/kotlin/redundant-nil-guard.kt | 1 + .../examples/kotlin/semantic-alias.kt | 4 + .../examples/kotlin/sequence-mine.kt | 5 + .../examples/kotlin/state-branch-density.kt | 1 + gems/decomplex/examples/kotlin/state-mesh.kt | 1 + .../examples/kotlin/structural-topology.kt | 1 + .../kotlin/temporal-ordering-pressure.kt | 21 + .../kotlin/weighted-inlined-complexity.kt | 5 + gems/decomplex/examples/lua/co-update.lua | 15 + .../examples/lua/decision-pressure.lua | 4 + gems/decomplex/examples/lua/derived-state.lua | 1 + .../examples/lua/false-simplicity.lua | 2 + gems/decomplex/examples/lua/fat-union.lua | 5 + .../examples/lua/flay-similarity.lua | 2 + gems/decomplex/examples/lua/function-lcom.lua | 13 + .../examples/lua/implicit-control-flow.lua | 9 + .../lua/inconsistent-rename-clone.lua | 2 + gems/decomplex/examples/lua/local-flow.lua | 9 + gems/decomplex/examples/lua/locality-drag.lua | 27 + gems/decomplex/examples/lua/miner.lua | 4 + .../lua/operational-discontinuity.lua | 9 + .../examples/lua/oversized-predicate.lua | 1 + .../decomplex/examples/lua/path-condition.lua | 4 + .../examples/lua/predicate-alias.lua | 3 + .../examples/lua/redundant-nil-guard.lua | 1 + .../decomplex/examples/lua/semantic-alias.lua | 4 + gems/decomplex/examples/lua/sequence-mine.lua | 5 + .../examples/lua/state-branch-density.lua | 2 + gems/decomplex/examples/lua/state-mesh.lua | 5 + .../examples/lua/structural-topology.lua | 6 + .../lua/temporal-ordering-pressure.lua | 5 + .../lua/weighted-inlined-complexity.lua | 5 + .../decomplex/examples/oracles/co-update.json | 28 + .../examples/oracles/decision-pressure.json | 11 + .../examples/oracles/derived-state.json | 12 + .../examples/oracles/false-simplicity.json | 11 + .../decomplex/examples/oracles/fat-union.json | 11 + .../examples/oracles/flay-similarity.json | 13 + .../examples/oracles/function-lcom.json | 11 + .../oracles/implicit-control-flow.json | 18 + .../oracles/inconsistent-rename-clone.json | 12 + .../examples/oracles/local-flow.json | 12 + .../examples/oracles/locality-drag.json | 11 + gems/decomplex/examples/oracles/miner.json | 13 + .../oracles/operational-discontinuity.json | 12 + .../examples/oracles/oversized-predicate.json | 12 + .../examples/oracles/path-condition.json | 11 + .../examples/oracles/predicate-alias.json | 13 + .../examples/oracles/redundant-nil-guard.json | 11 + .../examples/oracles/semantic-alias.json | 13 + .../examples/oracles/sequence-mine.json | 17 + .../oracles/state-branch-density.json | 11 + .../examples/oracles/state-mesh.json | 11 + .../examples/oracles/structural-topology.json | 9 + .../oracles/temporal-ordering-pressure.json | 11 + .../oracles/weighted-inlined-complexity.json | 13 + gems/decomplex/examples/python/co-update.py | 14 + .../examples/python/decision-pressure.py | 3 + .../examples/python/derived-state.py | 4 + .../examples/python/false-simplicity.py | 3 + gems/decomplex/examples/python/fat-union.py | 8 + .../examples/python/flay-similarity.py | 55 + .../examples/python/function-lcom.py | 11 + .../examples/python/implicit-control-flow.py | 9 + .../python/inconsistent-rename-clone.py | 11 + gems/decomplex/examples/python/local-flow.py | 8 + .../examples/python/locality-drag.py | 14 + gems/decomplex/examples/python/miner.py | 4 + .../python/operational-discontinuity.py | 8 + .../examples/python/oversized-predicate.py | 3 + .../examples/python/path-condition.py | 8 + .../examples/python/predicate-alias.py | 3 + .../examples/python/redundant-nil-guard.py | 3 + .../examples/python/semantic-alias.py | 7 + .../examples/python/sequence-mine.py | 5 + .../examples/python/state-branch-density.py | 6 + gems/decomplex/examples/python/state-mesh.py | 10 + .../examples/python/structural-topology.py | 11 + .../python/temporal-ordering-pressure.py | 5 + .../python/weighted-inlined-complexity.py | 30 + gems/decomplex/examples/ruby/co-update.rb | 20 + .../examples/ruby/decision-pressure.rb | 6 + gems/decomplex/examples/ruby/derived-state.rb | 7 + .../examples/ruby/false-simplicity.rb | 7 + gems/decomplex/examples/ruby/fat-union.rb | 27 + .../examples/ruby/flay-similarity.rb | 75 + gems/decomplex/examples/ruby/function-lcom.rb | 16 + .../examples/ruby/implicit-control-flow.rb | 13 + .../ruby/inconsistent-rename-clone.rb | 15 + gems/decomplex/examples/ruby/local-flow.rb | 13 + gems/decomplex/examples/ruby/locality-drag.rb | 31 + gems/decomplex/examples/ruby/miner.rb | 17 + .../ruby/operational-discontinuity.rb | 11 + .../examples/ruby/oversized-predicate.rb | 7 + .../decomplex/examples/ruby/path-condition.rb | 17 + .../examples/ruby/predicate-alias.rb | 13 + .../examples/ruby/redundant-nil-guard.rb | 7 + .../decomplex/examples/ruby/semantic-alias.rb | 9 + gems/decomplex/examples/ruby/sequence-mine.rb | 7 + .../examples/ruby/state-branch-density.rb | 19 + gems/decomplex/examples/ruby/state-mesh.rb | 20 + .../examples/ruby/structural-topology.rb | 22 + .../ruby/temporal-ordering-pressure.rb | 20 + .../ruby/weighted-inlined-complexity.rb | 59 + gems/decomplex/examples/rust/co-update.rs | 23 + .../examples/rust/decision-pressure.rs | 3 + gems/decomplex/examples/rust/derived-state.rs | 5 + .../examples/rust/false-simplicity.rs | 7 + gems/decomplex/examples/rust/fat-union.rs | 28 + .../examples/rust/flay-similarity.rs | 41 + gems/decomplex/examples/rust/function-lcom.rs | 12 + .../examples/rust/implicit-control-flow.rs | 17 + .../rust/inconsistent-rename-clone.rs | 13 + gems/decomplex/examples/rust/local-flow.rs | 9 + gems/decomplex/examples/rust/locality-drag.rs | 27 + gems/decomplex/examples/rust/miner.rs | 15 + .../rust/operational-discontinuity.rs | 10 + .../examples/rust/oversized-predicate.rs | 5 + .../decomplex/examples/rust/path-condition.rs | 15 + .../examples/rust/predicate-alias.rs | 11 + .../examples/rust/redundant-nil-guard.rs | 5 + .../decomplex/examples/rust/semantic-alias.rs | 8 + gems/decomplex/examples/rust/sequence-mine.rs | 5 + .../examples/rust/state-branch-density.rs | 15 + gems/decomplex/examples/rust/state-mesh.rs | 23 + .../examples/rust/structural-topology.rs | 18 + .../rust/temporal-ordering-pressure.rs | 23 + .../rust/weighted-inlined-complexity.rs | 44 + gems/decomplex/examples/swift/co-update.swift | 4 + .../examples/swift/decision-pressure.swift | 1 + .../examples/swift/derived-state.swift | 1 + .../examples/swift/false-simplicity.swift | 1 + gems/decomplex/examples/swift/fat-union.swift | 7 + .../examples/swift/flay-similarity.swift | 73 + .../examples/swift/function-lcom.swift | 13 + .../swift/implicit-control-flow.swift | 13 + .../swift/inconsistent-rename-clone.swift | 2 + .../decomplex/examples/swift/local-flow.swift | 9 + .../examples/swift/locality-drag.swift | 27 + gems/decomplex/examples/swift/miner.swift | 4 + .../swift/operational-discontinuity.swift | 9 + .../examples/swift/oversized-predicate.swift | 1 + .../examples/swift/path-condition.swift | 4 + .../examples/swift/predicate-alias.swift | 3 + .../examples/swift/redundant-nil-guard.swift | 1 + .../examples/swift/semantic-alias.swift | 4 + .../examples/swift/sequence-mine.swift | 5 + .../examples/swift/state-branch-density.swift | 1 + .../decomplex/examples/swift/state-mesh.swift | 1 + .../examples/swift/structural-topology.swift | 1 + .../swift/temporal-ordering-pressure.swift | 1 + .../swift/weighted-inlined-complexity.swift | 5 + .../examples/typescript/co-update.ts | 4 + .../examples/typescript/decision-pressure.ts | 1 + .../examples/typescript/derived-state.ts | 1 + .../examples/typescript/false-simplicity.ts | 1 + .../examples/typescript/fat-union.ts | 1 + .../examples/typescript/flay-similarity.ts | 2 + .../examples/typescript/function-lcom.ts | 12 + .../typescript/implicit-control-flow.ts | 1 + .../typescript/inconsistent-rename-clone.ts | 2 + .../examples/typescript/local-flow.ts | 9 + .../examples/typescript/locality-drag.ts | 27 + gems/decomplex/examples/typescript/miner.ts | 4 + .../typescript/operational-discontinuity.ts | 9 + .../typescript/oversized-predicate.ts | 1 + .../examples/typescript/path-condition.ts | 4 + .../examples/typescript/predicate-alias.ts | 3 + .../typescript/redundant-nil-guard.ts | 1 + .../examples/typescript/semantic-alias.ts | 4 + .../examples/typescript/sequence-mine.ts | 5 + .../typescript/state-branch-density.ts | 1 + .../examples/typescript/state-mesh.ts | 1 + .../typescript/structural-topology.ts | 1 + .../typescript/temporal-ordering-pressure.ts | 1 + .../typescript/weighted-inlined-complexity.ts | 5 + gems/decomplex/examples/zig/co-update.zig | 23 + .../examples/zig/decision-pressure.zig | 3 + gems/decomplex/examples/zig/derived-state.zig | 6 + .../examples/zig/false-simplicity.zig | 6 + gems/decomplex/examples/zig/fat-union.zig | 28 + .../examples/zig/flay-similarity.zig | 41 + gems/decomplex/examples/zig/function-lcom.zig | 12 + .../examples/zig/implicit-control-flow.zig | 15 + .../zig/inconsistent-rename-clone.zig | 13 + gems/decomplex/examples/zig/local-flow.zig | 9 + gems/decomplex/examples/zig/locality-drag.zig | 30 + gems/decomplex/examples/zig/miner.zig | 15 + .../zig/operational-discontinuity.zig | 12 + .../examples/zig/oversized-predicate.zig | 5 + .../decomplex/examples/zig/path-condition.zig | 15 + .../examples/zig/predicate-alias.zig | 11 + .../examples/zig/redundant-nil-guard.zig | 5 + .../decomplex/examples/zig/semantic-alias.zig | 8 + gems/decomplex/examples/zig/sequence-mine.zig | 5 + .../examples/zig/state-branch-density.zig | 13 + gems/decomplex/examples/zig/state-mesh.zig | 21 + .../examples/zig/structural-topology.zig | 16 + .../zig/temporal-ordering-pressure.zig | 21 + .../zig/weighted-inlined-complexity.zig | 45 + gems/decomplex/lib/decomplex/ast.rb | 3970 +-- .../lib/decomplex/ast/adapters/base.rb | 790 + .../lib/decomplex/ast/adapters/lua.rb | 184 + .../lib/decomplex/ast/adapters/python.rb | 224 + .../lib/decomplex/ast/adapters/ruby.rb | 101 + .../lib/decomplex/ast/adapters/typescript.rb | 147 + gems/decomplex/lib/decomplex/ast/cache.rb | 11 + .../lib/decomplex/ast/legacy_normalizer.rb | 2563 ++ gems/decomplex/lib/decomplex/ast/node.rb | 17 + .../lib/decomplex/ast/semantic_node.rb | 31 + .../lib/decomplex/ast/semantic_normalizer.rb | 136 + .../decomplex/lib/decomplex/ast/source_map.rb | 16 + .../lib/decomplex/decision_pressure.rb | 266 +- gems/decomplex/lib/decomplex/derived_state.rb | 104 +- .../lib/decomplex/false_simplicity.rb | 569 +- gems/decomplex/lib/decomplex/fat_union.rb | 213 +- .../lib/decomplex/flay_similarity.rb | 27 +- .../decomplex/inconsistent_rename_clone.rb | 94 +- gems/decomplex/lib/decomplex/local_flow.rb | 234 +- .../lib/decomplex/mutability_pressure.rb | 3 +- .../decomplex/operational_discontinuity.rb | 2 +- .../lib/decomplex/ordered_protocol_mine.rb | 436 +- .../lib/decomplex/oversized_predicate.rb | 56 +- .../decomplex/lib/decomplex/path_condition.rb | 89 +- .../lib/decomplex/predicate_alias.rb | 50 +- .../lib/decomplex/redundant_nil_guard.rb | 509 +- .../decomplex/lib/decomplex/semantic_alias.rb | 84 +- gems/decomplex/lib/decomplex/sequence_mine.rb | 84 +- .../lib/decomplex/state_branch_density.rb | 282 +- gems/decomplex/lib/decomplex/state_mesh.rb | 151 +- .../lib/decomplex/structural_topology.rb | 363 +- .../lib/decomplex/superfluous_state.rb | 1 - gems/decomplex/lib/decomplex/syntax.rb | 1011 +- .../decomplex/lib/decomplex/syntax/effects.rb | 192 + .../lib/decomplex/syntax/protocols.rb | 82 + gems/decomplex/lib/decomplex/syntax/ruby.rb | 896 + .../lib/decomplex/syntax/ruby_effects.rb | 197 + .../lib/decomplex/syntax/ruby_protocols.rb | 360 + .../decomplex/temporal_ordering_pressure.rb | 108 +- .../weighted_inlined_cognitive_complexity.rb | 313 +- gems/decomplex/rust/src/decomplex/ast-test.rs | 20851 +++++++++++++++ gems/decomplex/rust/src/decomplex/ast.rs | 20943 +--------------- gems/decomplex/test/ast_test.rb | 49 + gems/decomplex/test/decision_pressure_test.rb | 13 + gems/decomplex/test/derived_state_test.rb | 14 + gems/decomplex/test/examples_oracle_test.rb | 239 + gems/decomplex/test/false_simplicity_test.rb | 53 +- gems/decomplex/test/fat_union_test.rb | 16 + .../test/inconsistent_rename_clone_test.rb | 21 + gems/decomplex/test/local_flow_test.rb | 13 + .../test/ordered_protocol_mine_test.rb | 18 + .../test/oversized_predicate_test.rb | 19 + gems/decomplex/test/path_condition_test.rb | 11 + gems/decomplex/test/predicate_alias_test.rb | 11 + .../test/redundant_nil_guard_test.rb | 11 + gems/decomplex/test/semantic_alias_test.rb | 13 +- gems/decomplex/test/sequence_mine_test.rb | 15 + .../test/state_branch_density_test.rb | 17 + gems/decomplex/test/state_mesh_test.rb | 24 +- .../test/structural_topology_test.rb | 20 + gems/decomplex/test/syntax_test.rb | 43 +- .../test/temporal_ordering_pressure_test.rb | 17 + ...ghted_inlined_cognitive_complexity_test.rb | 23 + 425 files changed, 33662 insertions(+), 28147 deletions(-) create mode 100644 gems/decomplex/docs/agents/architectural-issues.md create mode 100644 gems/decomplex/docs/agents/ruby-first-cross-language-ast-design.md create mode 100644 gems/decomplex/examples/c/co-update.c create mode 100644 gems/decomplex/examples/c/decision-pressure.c create mode 100644 gems/decomplex/examples/c/derived-state.c create mode 100644 gems/decomplex/examples/c/false-simplicity.c create mode 100644 gems/decomplex/examples/c/fat-union.c create mode 100644 gems/decomplex/examples/c/flay-similarity.c create mode 100644 gems/decomplex/examples/c/function-lcom.c create mode 100644 gems/decomplex/examples/c/implicit-control-flow.c create mode 100644 gems/decomplex/examples/c/inconsistent-rename-clone.c create mode 100644 gems/decomplex/examples/c/local-flow.c create mode 100644 gems/decomplex/examples/c/locality-drag.c create mode 100644 gems/decomplex/examples/c/miner.c create mode 100644 gems/decomplex/examples/c/operational-discontinuity.c create mode 100644 gems/decomplex/examples/c/oversized-predicate.c create mode 100644 gems/decomplex/examples/c/path-condition.c create mode 100644 gems/decomplex/examples/c/predicate-alias.c create mode 100644 gems/decomplex/examples/c/redundant-nil-guard.c create mode 100644 gems/decomplex/examples/c/semantic-alias.c create mode 100644 gems/decomplex/examples/c/sequence-mine.c create mode 100644 gems/decomplex/examples/c/state-branch-density.c create mode 100644 gems/decomplex/examples/c/state-mesh.c create mode 100644 gems/decomplex/examples/c/structural-topology.c create mode 100644 gems/decomplex/examples/c/temporal-ordering-pressure.c create mode 100644 gems/decomplex/examples/c/weighted-inlined-complexity.c create mode 100644 gems/decomplex/examples/cpp/co-update.cpp create mode 100644 gems/decomplex/examples/cpp/decision-pressure.cpp create mode 100644 gems/decomplex/examples/cpp/derived-state.cpp create mode 100644 gems/decomplex/examples/cpp/false-simplicity.cpp create mode 100644 gems/decomplex/examples/cpp/fat-union.cpp create mode 100644 gems/decomplex/examples/cpp/flay-similarity.cpp create mode 100644 gems/decomplex/examples/cpp/function-lcom.cpp create mode 100644 gems/decomplex/examples/cpp/implicit-control-flow.cpp create mode 100644 gems/decomplex/examples/cpp/inconsistent-rename-clone.cpp create mode 100644 gems/decomplex/examples/cpp/local-flow.cpp create mode 100644 gems/decomplex/examples/cpp/locality-drag.cpp create mode 100644 gems/decomplex/examples/cpp/miner.cpp create mode 100644 gems/decomplex/examples/cpp/operational-discontinuity.cpp create mode 100644 gems/decomplex/examples/cpp/oversized-predicate.cpp create mode 100644 gems/decomplex/examples/cpp/path-condition.cpp create mode 100644 gems/decomplex/examples/cpp/predicate-alias.cpp create mode 100644 gems/decomplex/examples/cpp/redundant-nil-guard.cpp create mode 100644 gems/decomplex/examples/cpp/semantic-alias.cpp create mode 100644 gems/decomplex/examples/cpp/sequence-mine.cpp create mode 100644 gems/decomplex/examples/cpp/state-branch-density.cpp create mode 100644 gems/decomplex/examples/cpp/state-mesh.cpp create mode 100644 gems/decomplex/examples/cpp/structural-topology.cpp create mode 100644 gems/decomplex/examples/cpp/temporal-ordering-pressure.cpp create mode 100644 gems/decomplex/examples/cpp/weighted-inlined-complexity.cpp create mode 100644 gems/decomplex/examples/csharp/co-update.cs create mode 100644 gems/decomplex/examples/csharp/decision-pressure.cs create mode 100644 gems/decomplex/examples/csharp/derived-state.cs create mode 100644 gems/decomplex/examples/csharp/false-simplicity.cs create mode 100644 gems/decomplex/examples/csharp/fat-union.cs create mode 100644 gems/decomplex/examples/csharp/flay-similarity.cs create mode 100644 gems/decomplex/examples/csharp/function-lcom.cs create mode 100644 gems/decomplex/examples/csharp/implicit-control-flow.cs create mode 100644 gems/decomplex/examples/csharp/inconsistent-rename-clone.cs create mode 100644 gems/decomplex/examples/csharp/local-flow.cs create mode 100644 gems/decomplex/examples/csharp/locality-drag.cs create mode 100644 gems/decomplex/examples/csharp/miner.cs create mode 100644 gems/decomplex/examples/csharp/operational-discontinuity.cs create mode 100644 gems/decomplex/examples/csharp/oversized-predicate.cs create mode 100644 gems/decomplex/examples/csharp/path-condition.cs create mode 100644 gems/decomplex/examples/csharp/predicate-alias.cs create mode 100644 gems/decomplex/examples/csharp/redundant-nil-guard.cs create mode 100644 gems/decomplex/examples/csharp/semantic-alias.cs create mode 100644 gems/decomplex/examples/csharp/sequence-mine.cs create mode 100644 gems/decomplex/examples/csharp/state-branch-density.cs create mode 100644 gems/decomplex/examples/csharp/state-mesh.cs create mode 100644 gems/decomplex/examples/csharp/structural-topology.cs create mode 100644 gems/decomplex/examples/csharp/temporal-ordering-pressure.cs create mode 100644 gems/decomplex/examples/csharp/weighted-inlined-complexity.cs create mode 100644 gems/decomplex/examples/go/co-update.go create mode 100644 gems/decomplex/examples/go/decision-pressure.go create mode 100644 gems/decomplex/examples/go/derived-state.go create mode 100644 gems/decomplex/examples/go/false-simplicity.go create mode 100644 gems/decomplex/examples/go/fat-union.go create mode 100644 gems/decomplex/examples/go/flay-similarity.go create mode 100644 gems/decomplex/examples/go/function-lcom.go create mode 100644 gems/decomplex/examples/go/implicit-control-flow.go create mode 100644 gems/decomplex/examples/go/inconsistent-rename-clone.go create mode 100644 gems/decomplex/examples/go/local-flow.go create mode 100644 gems/decomplex/examples/go/locality-drag.go create mode 100644 gems/decomplex/examples/go/miner.go create mode 100644 gems/decomplex/examples/go/operational-discontinuity.go create mode 100644 gems/decomplex/examples/go/oversized-predicate.go create mode 100644 gems/decomplex/examples/go/path-condition.go create mode 100644 gems/decomplex/examples/go/predicate-alias.go create mode 100644 gems/decomplex/examples/go/redundant-nil-guard.go create mode 100644 gems/decomplex/examples/go/semantic-alias.go create mode 100644 gems/decomplex/examples/go/sequence-mine.go create mode 100644 gems/decomplex/examples/go/state-branch-density.go create mode 100644 gems/decomplex/examples/go/state-mesh.go create mode 100644 gems/decomplex/examples/go/structural-topology.go create mode 100644 gems/decomplex/examples/go/temporal-ordering-pressure.go create mode 100644 gems/decomplex/examples/go/weighted-inlined-complexity.go create mode 100644 gems/decomplex/examples/java/co-update.java create mode 100644 gems/decomplex/examples/java/decision-pressure.java create mode 100644 gems/decomplex/examples/java/derived-state.java create mode 100644 gems/decomplex/examples/java/false-simplicity.java create mode 100644 gems/decomplex/examples/java/fat-union.java create mode 100644 gems/decomplex/examples/java/flay-similarity.java create mode 100644 gems/decomplex/examples/java/function-lcom.java create mode 100644 gems/decomplex/examples/java/implicit-control-flow.java create mode 100644 gems/decomplex/examples/java/inconsistent-rename-clone.java create mode 100644 gems/decomplex/examples/java/local-flow.java create mode 100644 gems/decomplex/examples/java/locality-drag.java create mode 100644 gems/decomplex/examples/java/miner.java create mode 100644 gems/decomplex/examples/java/operational-discontinuity.java create mode 100644 gems/decomplex/examples/java/oversized-predicate.java create mode 100644 gems/decomplex/examples/java/path-condition.java create mode 100644 gems/decomplex/examples/java/predicate-alias.java create mode 100644 gems/decomplex/examples/java/redundant-nil-guard.java create mode 100644 gems/decomplex/examples/java/semantic-alias.java create mode 100644 gems/decomplex/examples/java/sequence-mine.java create mode 100644 gems/decomplex/examples/java/state-branch-density.java create mode 100644 gems/decomplex/examples/java/state-mesh.java create mode 100644 gems/decomplex/examples/java/structural-topology.java create mode 100644 gems/decomplex/examples/java/temporal-ordering-pressure.java create mode 100644 gems/decomplex/examples/java/weighted-inlined-complexity.java create mode 100644 gems/decomplex/examples/javascript/co-update.js create mode 100644 gems/decomplex/examples/javascript/decision-pressure.js create mode 100644 gems/decomplex/examples/javascript/derived-state.js create mode 100644 gems/decomplex/examples/javascript/false-simplicity.js create mode 100644 gems/decomplex/examples/javascript/fat-union.js create mode 100644 gems/decomplex/examples/javascript/flay-similarity.js create mode 100644 gems/decomplex/examples/javascript/function-lcom.js create mode 100644 gems/decomplex/examples/javascript/implicit-control-flow.js create mode 100644 gems/decomplex/examples/javascript/inconsistent-rename-clone.js create mode 100644 gems/decomplex/examples/javascript/local-flow.js create mode 100644 gems/decomplex/examples/javascript/locality-drag.js create mode 100644 gems/decomplex/examples/javascript/miner.js create mode 100644 gems/decomplex/examples/javascript/operational-discontinuity.js create mode 100644 gems/decomplex/examples/javascript/oversized-predicate.js create mode 100644 gems/decomplex/examples/javascript/path-condition.js create mode 100644 gems/decomplex/examples/javascript/predicate-alias.js create mode 100644 gems/decomplex/examples/javascript/redundant-nil-guard.js create mode 100644 gems/decomplex/examples/javascript/semantic-alias.js create mode 100644 gems/decomplex/examples/javascript/sequence-mine.js create mode 100644 gems/decomplex/examples/javascript/state-branch-density.js create mode 100644 gems/decomplex/examples/javascript/state-mesh.js create mode 100644 gems/decomplex/examples/javascript/structural-topology.js create mode 100644 gems/decomplex/examples/javascript/temporal-ordering-pressure.js create mode 100644 gems/decomplex/examples/javascript/weighted-inlined-complexity.js create mode 100644 gems/decomplex/examples/kotlin/co-update.kt create mode 100644 gems/decomplex/examples/kotlin/decision-pressure.kt create mode 100644 gems/decomplex/examples/kotlin/derived-state.kt create mode 100644 gems/decomplex/examples/kotlin/false-simplicity.kt create mode 100644 gems/decomplex/examples/kotlin/fat-union.kt create mode 100644 gems/decomplex/examples/kotlin/flay-similarity.kt create mode 100644 gems/decomplex/examples/kotlin/function-lcom.kt create mode 100644 gems/decomplex/examples/kotlin/implicit-control-flow.kt create mode 100644 gems/decomplex/examples/kotlin/inconsistent-rename-clone.kt create mode 100644 gems/decomplex/examples/kotlin/local-flow.kt create mode 100644 gems/decomplex/examples/kotlin/locality-drag.kt create mode 100644 gems/decomplex/examples/kotlin/miner.kt create mode 100644 gems/decomplex/examples/kotlin/operational-discontinuity.kt create mode 100644 gems/decomplex/examples/kotlin/oversized-predicate.kt create mode 100644 gems/decomplex/examples/kotlin/path-condition.kt create mode 100644 gems/decomplex/examples/kotlin/predicate-alias.kt create mode 100644 gems/decomplex/examples/kotlin/redundant-nil-guard.kt create mode 100644 gems/decomplex/examples/kotlin/semantic-alias.kt create mode 100644 gems/decomplex/examples/kotlin/sequence-mine.kt create mode 100644 gems/decomplex/examples/kotlin/state-branch-density.kt create mode 100644 gems/decomplex/examples/kotlin/state-mesh.kt create mode 100644 gems/decomplex/examples/kotlin/structural-topology.kt create mode 100644 gems/decomplex/examples/kotlin/temporal-ordering-pressure.kt create mode 100644 gems/decomplex/examples/kotlin/weighted-inlined-complexity.kt create mode 100644 gems/decomplex/examples/lua/co-update.lua create mode 100644 gems/decomplex/examples/lua/decision-pressure.lua create mode 100644 gems/decomplex/examples/lua/derived-state.lua create mode 100644 gems/decomplex/examples/lua/false-simplicity.lua create mode 100644 gems/decomplex/examples/lua/fat-union.lua create mode 100644 gems/decomplex/examples/lua/flay-similarity.lua create mode 100644 gems/decomplex/examples/lua/function-lcom.lua create mode 100644 gems/decomplex/examples/lua/implicit-control-flow.lua create mode 100644 gems/decomplex/examples/lua/inconsistent-rename-clone.lua create mode 100644 gems/decomplex/examples/lua/local-flow.lua create mode 100644 gems/decomplex/examples/lua/locality-drag.lua create mode 100644 gems/decomplex/examples/lua/miner.lua create mode 100644 gems/decomplex/examples/lua/operational-discontinuity.lua create mode 100644 gems/decomplex/examples/lua/oversized-predicate.lua create mode 100644 gems/decomplex/examples/lua/path-condition.lua create mode 100644 gems/decomplex/examples/lua/predicate-alias.lua create mode 100644 gems/decomplex/examples/lua/redundant-nil-guard.lua create mode 100644 gems/decomplex/examples/lua/semantic-alias.lua create mode 100644 gems/decomplex/examples/lua/sequence-mine.lua create mode 100644 gems/decomplex/examples/lua/state-branch-density.lua create mode 100644 gems/decomplex/examples/lua/state-mesh.lua create mode 100644 gems/decomplex/examples/lua/structural-topology.lua create mode 100644 gems/decomplex/examples/lua/temporal-ordering-pressure.lua create mode 100644 gems/decomplex/examples/lua/weighted-inlined-complexity.lua create mode 100644 gems/decomplex/examples/oracles/co-update.json create mode 100644 gems/decomplex/examples/oracles/decision-pressure.json create mode 100644 gems/decomplex/examples/oracles/derived-state.json create mode 100644 gems/decomplex/examples/oracles/false-simplicity.json create mode 100644 gems/decomplex/examples/oracles/fat-union.json create mode 100644 gems/decomplex/examples/oracles/flay-similarity.json create mode 100644 gems/decomplex/examples/oracles/function-lcom.json create mode 100644 gems/decomplex/examples/oracles/implicit-control-flow.json create mode 100644 gems/decomplex/examples/oracles/inconsistent-rename-clone.json create mode 100644 gems/decomplex/examples/oracles/local-flow.json create mode 100644 gems/decomplex/examples/oracles/locality-drag.json create mode 100644 gems/decomplex/examples/oracles/miner.json create mode 100644 gems/decomplex/examples/oracles/operational-discontinuity.json create mode 100644 gems/decomplex/examples/oracles/oversized-predicate.json create mode 100644 gems/decomplex/examples/oracles/path-condition.json create mode 100644 gems/decomplex/examples/oracles/predicate-alias.json create mode 100644 gems/decomplex/examples/oracles/redundant-nil-guard.json create mode 100644 gems/decomplex/examples/oracles/semantic-alias.json create mode 100644 gems/decomplex/examples/oracles/sequence-mine.json create mode 100644 gems/decomplex/examples/oracles/state-branch-density.json create mode 100644 gems/decomplex/examples/oracles/state-mesh.json create mode 100644 gems/decomplex/examples/oracles/structural-topology.json create mode 100644 gems/decomplex/examples/oracles/temporal-ordering-pressure.json create mode 100644 gems/decomplex/examples/oracles/weighted-inlined-complexity.json create mode 100644 gems/decomplex/examples/python/co-update.py create mode 100644 gems/decomplex/examples/python/decision-pressure.py create mode 100644 gems/decomplex/examples/python/derived-state.py create mode 100644 gems/decomplex/examples/python/false-simplicity.py create mode 100644 gems/decomplex/examples/python/fat-union.py create mode 100644 gems/decomplex/examples/python/flay-similarity.py create mode 100644 gems/decomplex/examples/python/function-lcom.py create mode 100644 gems/decomplex/examples/python/implicit-control-flow.py create mode 100644 gems/decomplex/examples/python/inconsistent-rename-clone.py create mode 100644 gems/decomplex/examples/python/local-flow.py create mode 100644 gems/decomplex/examples/python/locality-drag.py create mode 100644 gems/decomplex/examples/python/miner.py create mode 100644 gems/decomplex/examples/python/operational-discontinuity.py create mode 100644 gems/decomplex/examples/python/oversized-predicate.py create mode 100644 gems/decomplex/examples/python/path-condition.py create mode 100644 gems/decomplex/examples/python/predicate-alias.py create mode 100644 gems/decomplex/examples/python/redundant-nil-guard.py create mode 100644 gems/decomplex/examples/python/semantic-alias.py create mode 100644 gems/decomplex/examples/python/sequence-mine.py create mode 100644 gems/decomplex/examples/python/state-branch-density.py create mode 100644 gems/decomplex/examples/python/state-mesh.py create mode 100644 gems/decomplex/examples/python/structural-topology.py create mode 100644 gems/decomplex/examples/python/temporal-ordering-pressure.py create mode 100644 gems/decomplex/examples/python/weighted-inlined-complexity.py create mode 100644 gems/decomplex/examples/ruby/co-update.rb create mode 100644 gems/decomplex/examples/ruby/decision-pressure.rb create mode 100644 gems/decomplex/examples/ruby/derived-state.rb create mode 100644 gems/decomplex/examples/ruby/false-simplicity.rb create mode 100644 gems/decomplex/examples/ruby/fat-union.rb create mode 100644 gems/decomplex/examples/ruby/flay-similarity.rb create mode 100644 gems/decomplex/examples/ruby/function-lcom.rb create mode 100644 gems/decomplex/examples/ruby/implicit-control-flow.rb create mode 100644 gems/decomplex/examples/ruby/inconsistent-rename-clone.rb create mode 100644 gems/decomplex/examples/ruby/local-flow.rb create mode 100644 gems/decomplex/examples/ruby/locality-drag.rb create mode 100644 gems/decomplex/examples/ruby/miner.rb create mode 100644 gems/decomplex/examples/ruby/operational-discontinuity.rb create mode 100644 gems/decomplex/examples/ruby/oversized-predicate.rb create mode 100644 gems/decomplex/examples/ruby/path-condition.rb create mode 100644 gems/decomplex/examples/ruby/predicate-alias.rb create mode 100644 gems/decomplex/examples/ruby/redundant-nil-guard.rb create mode 100644 gems/decomplex/examples/ruby/semantic-alias.rb create mode 100644 gems/decomplex/examples/ruby/sequence-mine.rb create mode 100644 gems/decomplex/examples/ruby/state-branch-density.rb create mode 100644 gems/decomplex/examples/ruby/state-mesh.rb create mode 100644 gems/decomplex/examples/ruby/structural-topology.rb create mode 100644 gems/decomplex/examples/ruby/temporal-ordering-pressure.rb create mode 100644 gems/decomplex/examples/ruby/weighted-inlined-complexity.rb create mode 100644 gems/decomplex/examples/rust/co-update.rs create mode 100644 gems/decomplex/examples/rust/decision-pressure.rs create mode 100644 gems/decomplex/examples/rust/derived-state.rs create mode 100644 gems/decomplex/examples/rust/false-simplicity.rs create mode 100644 gems/decomplex/examples/rust/fat-union.rs create mode 100644 gems/decomplex/examples/rust/flay-similarity.rs create mode 100644 gems/decomplex/examples/rust/function-lcom.rs create mode 100644 gems/decomplex/examples/rust/implicit-control-flow.rs create mode 100644 gems/decomplex/examples/rust/inconsistent-rename-clone.rs create mode 100644 gems/decomplex/examples/rust/local-flow.rs create mode 100644 gems/decomplex/examples/rust/locality-drag.rs create mode 100644 gems/decomplex/examples/rust/miner.rs create mode 100644 gems/decomplex/examples/rust/operational-discontinuity.rs create mode 100644 gems/decomplex/examples/rust/oversized-predicate.rs create mode 100644 gems/decomplex/examples/rust/path-condition.rs create mode 100644 gems/decomplex/examples/rust/predicate-alias.rs create mode 100644 gems/decomplex/examples/rust/redundant-nil-guard.rs create mode 100644 gems/decomplex/examples/rust/semantic-alias.rs create mode 100644 gems/decomplex/examples/rust/sequence-mine.rs create mode 100644 gems/decomplex/examples/rust/state-branch-density.rs create mode 100644 gems/decomplex/examples/rust/state-mesh.rs create mode 100644 gems/decomplex/examples/rust/structural-topology.rs create mode 100644 gems/decomplex/examples/rust/temporal-ordering-pressure.rs create mode 100644 gems/decomplex/examples/rust/weighted-inlined-complexity.rs create mode 100644 gems/decomplex/examples/swift/co-update.swift create mode 100644 gems/decomplex/examples/swift/decision-pressure.swift create mode 100644 gems/decomplex/examples/swift/derived-state.swift create mode 100644 gems/decomplex/examples/swift/false-simplicity.swift create mode 100644 gems/decomplex/examples/swift/fat-union.swift create mode 100644 gems/decomplex/examples/swift/flay-similarity.swift create mode 100644 gems/decomplex/examples/swift/function-lcom.swift create mode 100644 gems/decomplex/examples/swift/implicit-control-flow.swift create mode 100644 gems/decomplex/examples/swift/inconsistent-rename-clone.swift create mode 100644 gems/decomplex/examples/swift/local-flow.swift create mode 100644 gems/decomplex/examples/swift/locality-drag.swift create mode 100644 gems/decomplex/examples/swift/miner.swift create mode 100644 gems/decomplex/examples/swift/operational-discontinuity.swift create mode 100644 gems/decomplex/examples/swift/oversized-predicate.swift create mode 100644 gems/decomplex/examples/swift/path-condition.swift create mode 100644 gems/decomplex/examples/swift/predicate-alias.swift create mode 100644 gems/decomplex/examples/swift/redundant-nil-guard.swift create mode 100644 gems/decomplex/examples/swift/semantic-alias.swift create mode 100644 gems/decomplex/examples/swift/sequence-mine.swift create mode 100644 gems/decomplex/examples/swift/state-branch-density.swift create mode 100644 gems/decomplex/examples/swift/state-mesh.swift create mode 100644 gems/decomplex/examples/swift/structural-topology.swift create mode 100644 gems/decomplex/examples/swift/temporal-ordering-pressure.swift create mode 100644 gems/decomplex/examples/swift/weighted-inlined-complexity.swift create mode 100644 gems/decomplex/examples/typescript/co-update.ts create mode 100644 gems/decomplex/examples/typescript/decision-pressure.ts create mode 100644 gems/decomplex/examples/typescript/derived-state.ts create mode 100644 gems/decomplex/examples/typescript/false-simplicity.ts create mode 100644 gems/decomplex/examples/typescript/fat-union.ts create mode 100644 gems/decomplex/examples/typescript/flay-similarity.ts create mode 100644 gems/decomplex/examples/typescript/function-lcom.ts create mode 100644 gems/decomplex/examples/typescript/implicit-control-flow.ts create mode 100644 gems/decomplex/examples/typescript/inconsistent-rename-clone.ts create mode 100644 gems/decomplex/examples/typescript/local-flow.ts create mode 100644 gems/decomplex/examples/typescript/locality-drag.ts create mode 100644 gems/decomplex/examples/typescript/miner.ts create mode 100644 gems/decomplex/examples/typescript/operational-discontinuity.ts create mode 100644 gems/decomplex/examples/typescript/oversized-predicate.ts create mode 100644 gems/decomplex/examples/typescript/path-condition.ts create mode 100644 gems/decomplex/examples/typescript/predicate-alias.ts create mode 100644 gems/decomplex/examples/typescript/redundant-nil-guard.ts create mode 100644 gems/decomplex/examples/typescript/semantic-alias.ts create mode 100644 gems/decomplex/examples/typescript/sequence-mine.ts create mode 100644 gems/decomplex/examples/typescript/state-branch-density.ts create mode 100644 gems/decomplex/examples/typescript/state-mesh.ts create mode 100644 gems/decomplex/examples/typescript/structural-topology.ts create mode 100644 gems/decomplex/examples/typescript/temporal-ordering-pressure.ts create mode 100644 gems/decomplex/examples/typescript/weighted-inlined-complexity.ts create mode 100644 gems/decomplex/examples/zig/co-update.zig create mode 100644 gems/decomplex/examples/zig/decision-pressure.zig create mode 100644 gems/decomplex/examples/zig/derived-state.zig create mode 100644 gems/decomplex/examples/zig/false-simplicity.zig create mode 100644 gems/decomplex/examples/zig/fat-union.zig create mode 100644 gems/decomplex/examples/zig/flay-similarity.zig create mode 100644 gems/decomplex/examples/zig/function-lcom.zig create mode 100644 gems/decomplex/examples/zig/implicit-control-flow.zig create mode 100644 gems/decomplex/examples/zig/inconsistent-rename-clone.zig create mode 100644 gems/decomplex/examples/zig/local-flow.zig create mode 100644 gems/decomplex/examples/zig/locality-drag.zig create mode 100644 gems/decomplex/examples/zig/miner.zig create mode 100644 gems/decomplex/examples/zig/operational-discontinuity.zig create mode 100644 gems/decomplex/examples/zig/oversized-predicate.zig create mode 100644 gems/decomplex/examples/zig/path-condition.zig create mode 100644 gems/decomplex/examples/zig/predicate-alias.zig create mode 100644 gems/decomplex/examples/zig/redundant-nil-guard.zig create mode 100644 gems/decomplex/examples/zig/semantic-alias.zig create mode 100644 gems/decomplex/examples/zig/sequence-mine.zig create mode 100644 gems/decomplex/examples/zig/state-branch-density.zig create mode 100644 gems/decomplex/examples/zig/state-mesh.zig create mode 100644 gems/decomplex/examples/zig/structural-topology.zig create mode 100644 gems/decomplex/examples/zig/temporal-ordering-pressure.zig create mode 100644 gems/decomplex/examples/zig/weighted-inlined-complexity.zig create mode 100644 gems/decomplex/lib/decomplex/ast/adapters/base.rb create mode 100644 gems/decomplex/lib/decomplex/ast/adapters/lua.rb create mode 100644 gems/decomplex/lib/decomplex/ast/adapters/python.rb create mode 100644 gems/decomplex/lib/decomplex/ast/adapters/ruby.rb create mode 100644 gems/decomplex/lib/decomplex/ast/adapters/typescript.rb create mode 100644 gems/decomplex/lib/decomplex/ast/cache.rb create mode 100644 gems/decomplex/lib/decomplex/ast/legacy_normalizer.rb create mode 100644 gems/decomplex/lib/decomplex/ast/node.rb create mode 100644 gems/decomplex/lib/decomplex/ast/semantic_node.rb create mode 100644 gems/decomplex/lib/decomplex/ast/semantic_normalizer.rb create mode 100644 gems/decomplex/lib/decomplex/ast/source_map.rb create mode 100644 gems/decomplex/lib/decomplex/syntax/effects.rb create mode 100644 gems/decomplex/lib/decomplex/syntax/protocols.rb create mode 100644 gems/decomplex/lib/decomplex/syntax/ruby.rb create mode 100644 gems/decomplex/lib/decomplex/syntax/ruby_effects.rb create mode 100644 gems/decomplex/lib/decomplex/syntax/ruby_protocols.rb create mode 100644 gems/decomplex/rust/src/decomplex/ast-test.rs create mode 100644 gems/decomplex/test/examples_oracle_test.rb diff --git a/gems/decomplex/docs/agents/architectural-issues.md b/gems/decomplex/docs/agents/architectural-issues.md new file mode 100644 index 000000000..b9202ff7f --- /dev/null +++ b/gems/decomplex/docs/agents/architectural-issues.md @@ -0,0 +1,544 @@ +# Architectural Issues in `ast.rb` + +This is a gap analysis of the current `gems/decomplex/lib/decomplex/ast.rb` +Tree-sitter normalization layer. Line references below refer to the current +file state at the time of this analysis. + +## Executive Summary + +`ast.rb` is not only an AST facade. It currently blends three separate jobs: + +1. Tree-sitter grammar adaptation. +2. Cross-language semantic normalization. +3. Ruby AST compatibility and Ruby-specific scope semantics. + +That mix defeats the intended architecture. If Decomplex has a parser facade, +a Tree-sitter normalizer, and per-language adapters, then grammar-specific +quirks must be owned by the adapters. The shared normalizer should consume +already-classified semantic facts, not mine native grammar tokens for every +language. + +The current design still centralizes language knowledge in one giant shared +normalizer. Adding a language means editing shared dispatch tables, shared +punctuation checks, and Ruby-shaped AST output logic. That is brittle, hard to +test, and likely to regress existing languages. + +## Quantitative Signals + +Current `ast.rb` size: + +- `gems/decomplex/lib/decomplex/ast.rb`: 4,023 lines. +- Rough static scan: 439 method definitions. +- Rough static scan: 129 methods contain `rescue StandardError`. +- Rough static scan: at least 10 trivial hook methods return only `false`, + `nil`, or `true`. + +The trivial hooks found by the scan are: + +- `TreeSitterNormalizationAdapter#ruby?` at line 145 returns `false`. +- `TreeSitterNormalizationAdapter#super_statement?` at line 156 returns `false`. +- `TreeSitterNormalizationAdapter#member_assignment_target?` at line 222 returns `false`. +- `TreeSitterNormalizationAdapter#identifier_text_node?` at line 226 returns `false`. +- `TreeSitterNormalizationAdapter#case_argument_list?` at line 266 returns `false`. +- `TreeSitterNormalizationAdapter#case_else_arm?` at line 293 returns `false`. +- `TreeSitterNormalizationAdapter#ensure_clause_body` at line 489 returns `nil`. +- `TreeSitterNormalizationAdapter#heredoc_call_for_body?` at line 612 returns `false`. +- `TreeSitterNormalizationAdapter#zero_child_identifier_call?` at line 642 returns `false`. +- `RubyTreeSitterNormalizationAdapter#ruby?` at line 838 returns `true`. + +Some no-op hooks are reasonable when they are an explicit adapter contract. +Here they are mixed into a large base adapter that also contains many concrete +language heuristics, so it is not clear which methods are deliberate extension +points and which are unimplemented behavior. + +## Primary Architectural Gaps + +### 1. Shared Normalizer Owns Language Dispatch + +`TreeSitterNormalizationAdapter.for` selects an adapter at lines 128-135, but +the selected adapter does not actually own the language boundary. The base +adapter above it still contains cross-language constants and grammar knowledge: + +- Function/class kinds at lines 49 and 70-73. +- Assignment operator tables at lines 50-56. +- Case/when/else grammar tables at lines 64-69. +- Wrapper and statement shape tables throughout lines 82-125. + +Then `TreeSitterNormalizer#normalize_node` at lines 1524-1658 performs one +large global dispatch across all languages. It checks assignment, infix, +dotted calls, unary operators, functions, classes, modules, loops, cases, +hashes, arrays, element references, rescue, ensure, calls, identifiers, nil, +strings, and symbols in one ordering-dependent chain. + +This makes the adapter layer incomplete. A new grammar still has to be wired +into shared lists and shared branch ordering. That is the exact direction the +architecture was supposed to avoid. + +Expected direction: + +- Each language adapter should classify native Tree-sitter nodes into a small + canonical set of semantic categories. +- Shared code should normalize canonical facts, not native grammar nodes. +- Adding a language should mostly mean adding or updating one adapter/profile, + plus tests for that language. + +### 2. Ruby AST Vocabulary Is Treated as Language-Neutral + +The comment at lines 1436-1441 says the target is "portable structural facts, +not Ruby semantics", but the output vocabulary is heavily Ruby-shaped: + +- `DEFN`, `DEFS`, `SCOPE`, `VCALL`, `FCALL`, `ITER`, `DASGN`, `DVAR`. +- `IASGN`, `GASGN`, `GVAR`, `NTH_REF`. +- `ATTRASGN`, `OP_ASGN1`, `OP_ASGN2`, `OP_ASGN_OR`, `OP_ASGN_AND`. +- `MATCH3`, `BLOCK_PASS`, `RESBODY`, `SCLASS`. + +Those are not neutral structural facts. They encode Ruby parser concepts and +Ruby name-resolution semantics. Forcing Python, Lua, TypeScript, Rust, C, Zig, +Swift, Kotlin, and Java into that vocabulary will either lose information or +invent false equivalences. + +Expected direction: + +- Decide whether this layer is a Ruby AST compatibility layer or a + language-neutral Decomplex IR. +- If Ruby compatibility is still required, keep it as a Ruby-specific output + adapter. +- Detectors should consume language-neutral concepts such as function, call, + assignment, branch, loop, literal, member access, block, return, and scope. + +### 3. `ruby?` Branches in Shared Code Prove the Normalizer Is Not Shared + +`TreeSitterNormalizer` delegates `ruby?` to the adapter at lines 2688-2690, +then uses it throughout shared normalization: + +- Root normalization enters Ruby scope tracking at lines 1512-1518. +- Ruby `yield` identifier handling appears at lines 1638-1643. +- Ruby `=~` handling appears at lines 1811-1815 and 2019-2023. +- Ruby `self[]` call rewriting appears at lines 1824-1826 and 3397-3399. +- Ruby hash key shorthand handling appears at lines 1915-1918. +- Ruby argument-list call normalization is gated at lines 2050-2073. +- Ruby argument-list element references are gated at lines 2168-2173. +- Ruby logical assignment lowering is gated at lines 2799-2808. +- Ruby local/vcall scope tracking lives at lines 2659-2686 and 2820-2910. +- Ruby parameter normalization is gated at lines 3024-3050. +- Ruby inline `def` handling lives at lines 3726-3793. +- Ruby tail return and implicit nil elision live at lines 3804-3858. +- Ruby inline parameter marker handling lives at lines 3860-3896. + +This is adapter logic living in the shared normalizer. It also means the base +normalizer cannot be reasoned about independently from Ruby. + +Expected direction: + +- Remove language predicates from shared normalization. +- Move Ruby scope, vcall/fcall, inline def, tail-return elision, implicit nil, + and Ruby-specific assignment lowering into a Ruby adapter or Ruby normalizer. +- Other languages should have their own scope/name-resolution rules or should + explicitly opt out of name-resolution at this layer. + +### 4. Broad `rescue StandardError` Masks Contract Failures + +The file repeatedly uses `rescue StandardError` to return `false`, `nil`, or +empty arrays. The base adapter alone has many examples in the first few hundred +lines, including: + +- `yield_statement?` at lines 149-153. +- `lambda_expression?` at lines 190-193. +- `literal_fragment_assignment_context?` at lines 230-239. +- `named_field` at lines 246-249. +- `safe_navigation_call?` at lines 252-255. +- `case_else_node` at lines 276-290. +- `leading_owner_statement?` at lines 319-327. +- `leading_if_statement?` at lines 336-346. + +Later helper methods do the same for sibling and parent access at lines +3177-3198, and for shape detection such as `infix_statement_parts` at +2545-2566. + +This hides missing optional values, wrong node shapes, facade bugs, and adapter +contract violations. A parser shape that should fail a test instead degrades +into "not this construct", which looks like partial language support rather +than a bug. + +Expected direction: + +- Provide safe node access helpers with explicit nil behavior. +- Rescue only known parser/facade exceptions at the parser boundary. +- Make adapter contracts explicit: a method should either return a documented + optional value or raise a meaningful unsupported-shape error in tests. + +### 5. Raw Token and Source-Text Mining Is Used for Semantic Decisions + +Many semantic decisions are made by checking token text or raw node source: + +- Safe navigation checks raw `&.` at lines 252-255. +- Leading function detection checks the first child kind against `"def"` at + lines 303-305 and 722-726. +- Ternary detection checks raw `?` and `:` tokens at lines 709-718. +- Dotted calls check raw `.` and `&.` at lines 3254-3262. +- Argument-list element reference checks raw `[` and `]` at lines 2160-2165. +- Hash pairs check raw `=>` at lines 1907-1912. +- Operator assignment parses raw token text at lines 2785-2790 and 3629-3647. +- Inline def handling checks source text for `"def "` at lines 3726-3737. +- Hidden match detection checks `node.text` for `"match "` at lines 3969-3973. + +This is not portable. Tree-sitter grammars expose punctuation and keywords +differently. Some grammars make punctuation anonymous, some name it, some hide +it behind fields, and some represent a construct as a dedicated node. Source +text also fails as soon as whitespace, comments, macro syntax, generated +facade text, or language-specific tokenization changes. + +Expected direction: + +- Adapters should use grammar fields and native node kinds to identify + language constructs. +- Shared normalization should receive facts such as `safe_navigation_call`, + `function_decl`, `ternary`, `member_access`, and `subscript`, not discover + them with punctuation scans. + +### 6. `safe_navigation_call?` Is in the Wrong Layer + +The base implementation at lines 252-255 checks for Ruby's `&.` token. The +TypeScript override at lines 1304-1308 adds `optional_chain`/`?.` checks and +recursive call-expression scanning. + +This should not be a shared base behavior. It is inherently grammar-specific: + +- Ruby uses `&.`. +- TypeScript and JavaScript use `?.`. +- C# and Swift have their own optional chaining syntax. +- Kotlin has `?.` but a different grammar. +- Python has no equivalent built-in operator. +- Rust, C, C++, Zig, Go, Java, and Lua do not have the same concept in the + same form. + +Expected direction: + +- Each adapter should expose optional-call/member-access semantics for its + grammar. +- Languages without this feature should explicitly return "unsupported" or + "not applicable", not inherit a Ruby token scan. + +### 7. Leading Statement Helpers Assume Keyword Tokens + +`leading_function_statement?` defaults to `def` at lines 303-305, and the +generic helper checks `node.children.first&.kind.to_s == keyword` at lines +722-726. Python overrides with another `"def"` check; Lua overrides with +`"function"`. + +That is still keyword-token mining. It cannot scale to languages where +function declarations are identified by node kind, declarator shape, receiver, +macro item, annotations/modifiers, or field names rather than a first keyword +token. + +Expected direction: + +- Adapter methods should answer "this wrapper contains a leading function + declaration" by using that grammar's function node and field structure. +- The shared normalizer should not know the keyword string. + +### 8. Assignment and Operator Tables Are Global, Incomplete, and Unsafe + +The base adapter defines assignment operators for Ruby, Python, Lua, and +TypeScript at lines 50-56. The fallback `assignment_operators` method returns +only `COMMON_ASSIGNMENT_OPERATORS` at lines 671-674. + +That silently misclassifies or ignores languages with different assignment +forms or operators: + +- Rust: `=`, `+=`, `-=`, `*=`, `/=`, `%=` plus bitwise/shift variants. +- C/C++/Java/C#/Go/Zig/Kotlin/Swift: overlapping but not identical augmented + assignment sets. +- Languages with declaration assignment, walrus-like operators, or pattern + assignment need grammar-specific handling. + +Expected direction: + +- Assignment/operator classification belongs in the language adapter/profile. +- Shared code should ask the adapter for an assignment semantic object, not + infer assignment by checking sibling punctuation. + +### 9. Scope and Local Resolution Are Ruby-Only but Central + +The normalizer tracks Ruby locals with `@local_stack`, `with_ruby_scope`, +`ruby_scope_locals`, `collect_ruby_scope_locals`, `ruby_assignment_node?`, and +related helpers at lines 2820-2910. It uses that to decide whether identifiers +become `LVAR`, `DVAR`, `VCALL`, or `FCALL`. + +That logic is Ruby-specific. Other languages have different scoping rules: + +- Python has local/global/nonlocal behavior and lexical scopes. +- JavaScript/TypeScript have `var`, `let`, `const`, function scope, block + scope, imports, and destructuring. +- Lua has globals by default and `local`. +- Rust, C, C++, Java, Kotlin, Swift, Zig, and Go have declaration forms and + block/module scopes unlike Ruby. + +Expected direction: + +- Either remove name-resolution from this normalization layer, or delegate it + to per-language scope adapters. +- The shared normalizer should not decide call-vs-local from Ruby local rules. + +### 10. Parameter Normalization Is Ruby-Gated + +`normalize_parameters` returns `nil` unless `ruby?` at lines 3024-3037. +`normalize_block_parameters` also returns `nil` unless `ruby?` at lines +3039-3050. + +That means non-Ruby function parameters, defaults, destructuring, and block or +lambda parameters are mostly unavailable through this AST contract. This is a +large parity gap because many Decomplex detectors need parameters to +distinguish state, local data flow, receiver conventions, and trivial wrappers. + +Expected direction: + +- Language adapters should emit canonical parameter facts. +- Parameter normalization should exist for every supported language with + explicit capability gaps. + +### 11. Control-Flow Semantics Are Flattened Into Ruby Names + +`RETURN_KINDS` at lines 1488-1497 maps `"continue_statement"` to `:NEXT` and +Ruby `next` also to `:NEXT`. `LOOP_KINDS` at lines 1454-1462 maps native loop +kinds into Ruby-ish symbols. Rescue/ensure normalization maps Python and +TypeScript exception constructs into `RESCUE`, `RESBODY`, and `ENSURE` shapes. + +This may be acceptable for a Ruby compatibility mode, but it is not a neutral +model. `continue`, Ruby `next`, `break`, `return`, `throw`, `raise`, `panic`, +and exception/finally constructs do not have identical semantics across +languages. + +Expected direction: + +- Use neutral control-flow facts: `return`, `break`, `continue`, + `exception_handler`, `finally`, `throw`, and language-specific termination + signals where needed. +- Only convert to Ruby names at the Ruby compatibility boundary. + +### 12. Literal Semantics Are Conflated Across Languages + +`NIL_KINDS` at line 1487 conflates `nil`, `none`, and `null`. Terminal +statement handling at lines 3557-3575 hard-codes Ruby spellings such as +`nil`, `true`, `false`, symbols, instance variables, globals, and `[]`. +Scalar argument handling repeats similar text matching at lines 3910-3927. + +That loses important distinctions: + +- Python `None`, JavaScript `null`, JavaScript `undefined`, Ruby `nil`, Swift + `nil`, Zig `null`, and Go `nil` are not always equivalent in analysis. +- Ruby symbols do not exist in most target languages. +- Ruby globals and numbered captures are not portable. + +Expected direction: + +- Adapters should classify literals into canonical literal facts with original + language and spelling preserved. +- Detectors should decide which literal classes are equivalent for a specific + metric. + +### 13. Member Access and Calls Are Guessed by Shared Heuristics + +`MEMBER_KINDS`, `CALL_KINDS`, `IDENTIFIER_KINDS`, and `CONST_KINDS` live in the +shared normalizer at lines 1474-1482. Member parsing is then guessed in +`member_parts` at lines 2912-2929 by trying several field names and falling +back to child order. + +That is unsafe across languages. Member access differs for: + +- Ruby calls without parentheses. +- JavaScript optional chaining and private fields. +- C/C++ pointer member access. +- Rust paths, method calls, and associated functions. +- Go selectors. +- Swift/Kotlin null-safe access. +- Python attributes and calls. + +Expected direction: + +- Each adapter should expose a canonical call/member/subscript shape. +- Shared code should not infer receiver and method name by trying a long list + of field names from unrelated grammars. + +### 14. Unsupported Languages Silently Use the Generic Adapter + +`TreeSitterNormalizationAdapter.for` falls back to `new(document)` at line +134. That means unsupported languages appear to work using generic heuristics. +The result is worse than a clean unsupported error because detectors can +publish partial, misleading findings. + +This is especially risky because `syntax.rb` already has `LANGUAGE_PROFILES` +for many languages at lines 2510-2598, while `ast.rb` only selects dedicated +normalization adapters for Ruby, Python, Lua, TypeScript, and JavaScript at +lines 128-135. + +Expected direction: + +- Require an explicit normalization adapter/profile for every language that + flows through `Ast.parse`. +- If a language is only partially supported, expose a capability matrix and + skip unsupported detector paths explicitly. + +### 15. There Are Two Adapter Systems That Can Drift + +`syntax.rb` already defines `TreeSitterLanguageAdapter` and language profiles +starting at lines 271 and 2510. Those profiles contain language lexicons, +function extraction, owner extraction, state reads/writes, call targets, +parameters, and branch facts. + +`ast.rb` defines a separate `TreeSitterNormalizationAdapter` starting at line +45 with its own function kinds, owner kinds, assignment operators, branch +heuristics, safe navigation logic, parameters, rescue/ensure handling, and +language subclasses. + +That is duplicated ownership. A language feature can be fixed in one adapter +layer and remain broken in the other. This is likely why language-specific +logic keeps reappearing in the wrong file. + +Expected direction: + +- Unify adapter ownership, or make one adapter explicitly depend on the other. +- There should be one place where language grammar knowledge is defined. +- `ast.rb` should not maintain its own parallel language universe. + +### 16. Source Span Utilities Are Mixed With Semantic Rewrites + +`wrap`, `source_before_child`, `source_from_nodes`, and +`source_from_normalized_nodes` at lines 3087-3171 construct spans and source +text while the same class performs semantic rewrites. + +This increases coupling. Transform code has to know how spans are rebuilt, +and span code has to handle both Tree-sitter nodes and already-normalized +nodes. + +Expected direction: + +- Move span/source helpers behind a small source mapping utility. +- Keep semantic normalization focused on semantic shape. + +### 17. Dispatch Ordering Is an Implicit Contract + +`normalize_node` at lines 1524-1658 and `normalize_body` at lines 2316-2359 +both contain long, order-sensitive dispatch chains. The same conceptual +constructs are checked in several places: leading functions, leading branches, +rescue/ensure bodies, calls with blocks, infix statements, unary operations, +element references, arrays, hashes, and terminal statements. + +Adding a new language or construct requires knowing exactly where it belongs +in two large branch chains. A new check can shadow an older one globally. + +Expected direction: + +- Classify once into a semantic category. +- Dispatch on that category with a small table or polymorphic handler. +- Keep body normalization and expression normalization separate where the + language actually distinguishes statements and expressions. + +## Cross-Language Incompatibilities + +These are representative examples of logic that cannot be correct across +languages while living in shared code. + +| Current behavior | Why it is not portable | Better owner | +|---|---|---| +| `safe_navigation_call?` checks `&.` in the base adapter. | Optional chaining is language-specific and absent in many languages. | Per-language adapter. | +| `leading_function_statement?` searches for `"def"` or `"function"` keyword tokens. | Function declarations are grammar-specific and often declarator-based. | Per-language adapter. | +| `ruby?` gates shared normalization. | Shared code changes behavior by language instead of using polymorphism. | Ruby normalizer or adapter. | +| `NIL_KINDS = %w[nil none null]`. | Nil/null/None/undefined have different semantics. | Literal classifier per language. | +| `RETURN_KINDS` maps `continue_statement` to `NEXT`. | Ruby `next` and non-Ruby `continue` are not the same abstraction. | Neutral control-flow IR. | +| `self_node?` maps `self` and `this` together. | `self`, `this`, receiver, class/static context, and module context differ. | Language scope/receiver adapter. | +| `member_parts` guesses receiver/member from many possible field names. | Member grammar differs widely and includes pointer, path, optional, private, and static forms. | Per-language call/member adapter. | +| `assignment_lhs?` checks sibling token text. | Assignment shape is not reliably represented by adjacent punctuation. | Per-language assignment classifier. | +| `normalize_parameters` is Ruby-only. | Non-Ruby functions lose parameter facts. | Per-language parameter adapter. | +| `normalize_pair` assumes Ruby hash semantics and symbol shorthand. | Object literals, dictionaries, tables, maps, and hashes differ. | Per-language literal/container adapter. | +| `vcall_identifier?` and `ruby_vcall_identifier?` decide local vs call. | Bare identifier semantics differ by language. | Per-language scope adapter or detector layer. | +| Rescue/ensure are normalized as Ruby `RESCUE`/`ENSURE`. | Exceptions/finally/defer/panic/error returns differ substantially. | Neutral exception/control-flow IR. | + +## Recommended Remediation Plan + +### P0: Stop the Architectural Bleeding + +- Do not add new language support by extending shared constants in `ast.rb`. +- Remove or isolate `ruby?` checks from `TreeSitterNormalizer`. +- Stop silent fallback to the generic normalization adapter for unsupported + languages. +- Replace broad `rescue StandardError` in hot-path shape checks with explicit + nil-safe accessors and documented adapter contracts. +- Move Ruby-only behavior out of the shared normalizer first: local/vcall + scope, inline def, implicit nil, tail return elision, Ruby argument-list + calls, Ruby hash shorthand, and Ruby `=~`. + +### P1: Define the Adapter Contract + +- Define the canonical facts a language adapter must provide: + function declaration, class/owner declaration, call, member access, + assignment, parameter, branch, loop, case arm, return/break/continue, + literal, string interpolation, exception handler, finally/ensure, and block. +- Make capability gaps explicit. A language should say "I do not support this + fact yet" rather than returning `false` from inherited generic heuristics. +- Pull punctuation and keyword-token checks into language adapters. +- Add adapter-level fixture tests per language that assert canonical facts, + not Ruby AST node names. + +### P2: Separate Ruby Compatibility From Decomplex Semantics + +- Introduce a language-neutral semantic IR for detector input. +- Keep Ruby AST-compatible node names only as a compatibility adapter for + legacy Ruby detector code. +- Migrate detectors toward semantic facts and away from Ruby parser node names. +- Preserve source spans as a separate utility so semantic normalization is not + responsible for source reconstruction. + +### P3: Unify `syntax.rb` and `ast.rb` Language Ownership + +- `syntax.rb` already has language profiles and structural fact extraction. +- `ast.rb` should either consume those profiles or be refactored so profile + ownership lives in one place. +- Avoid parallel adapter hierarchies with overlapping function, owner, branch, + assignment, call, and state semantics. + +## Desired End State + +The ideal architecture should look like this: + +1. `Syntax.parse` produces a Tree-sitter document with a known language + profile. +2. The language adapter owns grammar-specific queries and token quirks. +3. The adapter emits canonical semantic facts or canonical syntax nodes. +4. The shared normalizer only maps canonical facts into Decomplex's detector + model. +5. Ruby AST compatibility, where still required, is a Ruby-specific adapter, + not the shared representation. + +In that design, adding Rust, Zig, Go, C, C++, Java, Swift, Kotlin, or any other +language does not require stuffing more native grammar names into +`TreeSitterNormalizer#normalize_node`. It requires implementing that language's +adapter contract and proving it with language-specific fixtures. + +## Current Remediation Notes + +The Ruby production detector path has moved in this direction: + +- `FalseSimplicity` now consumes `Syntax::SemanticEffectSite` facts and owner / + function facts. Ruby-specific effect lexicons and grammar quirks live under + `lib/decomplex/syntax/ruby_effects.rb`. +- `OrderedProtocolMine` now consumes `Syntax::ProtocolMethodEffect` and + `Syntax::ProtocolMethodPath` facts. Ruby branch/case/lambda path semantics and + state-effect extraction live under `lib/decomplex/syntax/ruby_protocols.rb`. +- `SequenceMine` and `OversizedPredicate` now consume `Syntax` call and decision + facts directly instead of `Ast.parse_semantic`. +- `Syntax` no longer requires the `Ast` facade; the dependency now points from + compatibility parsing toward `Syntax`, not from Syntax back into Ast. +- Ruby structural/local/path helper behavior has been split out of `syntax.rb` + into `lib/decomplex/syntax/ruby.rb`; Ruby effect and protocol quirks live in + `ruby_effects.rb` and `ruby_protocols.rb`. +- A production detector grep no longer finds `Ast.parse`, `Ast.parse_semantic`, + or legacy Ruby AST node names outside the `ast.rb` compatibility facade. + +Remaining architectural debt: + +- `ast/legacy_normalizer.rb` still exists as a Ruby-shaped compatibility layer. +- Non-Ruby profile behavior in `syntax.rb` should continue moving into + language-specific profile files as those languages are made first-class. +- Rust still needs to mirror the Ruby architecture with minimal changes after + Ruby verification is complete. diff --git a/gems/decomplex/docs/agents/ruby-first-cross-language-ast-design.md b/gems/decomplex/docs/agents/ruby-first-cross-language-ast-design.md new file mode 100644 index 000000000..939fc003a --- /dev/null +++ b/gems/decomplex/docs/agents/ruby-first-cross-language-ast-design.md @@ -0,0 +1,621 @@ +# Ruby-First Cross-Language AST Architecture + +Status: Ruby implementation complete for the production detector architecture. +Rust mirror work is pending. The legacy AST normalizer remains quarantined as a +compatibility layer, not as detector infrastructure. + +Related analysis: `gems/decomplex/docs/agents/architectural-issues.md`. + +## Implementation Status + +Completed so far: + +- `ast.rb` has been reduced to a small facade. +- AST infrastructure has been split into `ast/node.rb`, `ast/cache.rb`, + `ast/source_map.rb`, and adapter files. +- `Ast.parse_semantic` and `SemanticNode` still exist as compatibility + infrastructure, but production detectors should consume `Syntax` facts + directly. +- `TreeSitterNormalizationAdapter.for` now fails loudly for unsupported AST + compatibility languages instead of silently falling back to a generic + adapter. +- Ruby-specific defaults for `yield`, `&.`, leading `def`, heredoc handling, + and Ruby variable text checks have been moved out of the base AST adapter + into `adapters/ruby.rb`. +- `RubySyntaxAdapter` owns Ruby method visibility markers and singleton + method receiver naming for structural facts. +- `PythonSyntaxAdapter` owns Python receiverless adjacent-call syntax. +- `Syntax::SemanticEffectSite` and Ruby effect adapters now expose + False-Simplicity-style semantic effects such as Ruby dynamic dispatch, + command literals, `yield`, singleton-class metaprogramming, globals, + receiver mutation, callbacks, and core-class reopen support. +- `Syntax::ProtocolMethodEffect` and `Syntax::ProtocolMethodPath` now expose + Ruby ordered-protocol method effects and path-separated internal call + sequences, including branch/case separation and lambda-body exclusion. +- `Syntax` no longer requires the `Ast` facade; the dependency direction is + compatibility-only (`Ast` may call into `Syntax`, not the reverse). +- Ruby structural/local/path helper behavior has been split out of `syntax.rb` + into `syntax/ruby.rb`; `syntax.rb` now keeps only the shared profile and + dispatcher layer plus a Ruby adapter stub. +- These detectors now avoid `Ast.parse` and `Ast.parse_semantic` in production + and consume `Syntax` facts: + - `SequenceMine` + - `OversizedPredicate` + - `StructuralTopology` + - `TemporalOrderingPressure` + - `StateBranchDensity` + - `StateMesh` write/read discovery + - `PredicateAlias` + - `SemanticAlias` + - `LocalFlow` + - `DerivedState` + - `FatUnion` + - `DecisionPressure` + - `PathCondition` + - `InconsistentRenameClone` + - `WeightedInlinedCognitiveComplexity` + - `RedundantNilGuard` + - `FalseSimplicity` + - `OrderedProtocolMine` +- A production detector search now leaves legacy Ruby AST node names only in + `ast.rb`, the explicit compatibility facade. + +Remaining follow-up work: + +- `ast/legacy_normalizer.rb` is still a large Ruby-shaped compatibility + normalizer. It is no longer production detector infrastructure, but it should + eventually shrink or become Ruby-only compatibility code. +- The base `TreeSitterLanguageAdapter` in `syntax.rb` still contains broad + cross-language heuristic tables; non-Ruby language work should continue to + move behavior into explicit language profiles. +- The semantic model still does not expose exception-flow details or a full + expression tree. Current Ruby detector coverage does not require those facts, + but future detectors must add adapter-owned facts rather than reviving the + legacy AST model. +- Rust has not yet been mirrored to the Ruby architecture; current Rust parity + is preserved for the migrated Ruby detector fixtures. + +## Goal + +Make Decomplex's Ruby AST/normalization implementation architecturally correct +first, then mirror that architecture in Rust with minimal behavioral drift. + +The correct end state is not "one Ruby AST shape that every language pretends +to be." The correct end state is: + +1. Language adapters own Tree-sitter grammar quirks. +2. A shared semantic model represents facts detectors can use across + languages. +3. Ruby parser compatibility exists only at a Ruby boundary. +4. Unsupported language features are explicit capability gaps, not silent + generic fallbacks. + +## Non-Goals + +- Do not add another layer of string matching to the shared normalizer. +- Do not preserve `ruby?` as a shared-code branch mechanism. +- Do not make Rust lead the architecture. Rust mirrors Ruby after Ruby is + correct. +- Do not claim cross-language support because tests produce Ruby AST node + names for non-Ruby code. +- Do not keep expanding `ast.rb` as a universal normalization file. + +## Current Problem + +`gems/decomplex/lib/decomplex/ast.rb` is 4,023 lines and currently combines: + +- AST facade helpers. +- Tree-sitter grammar adaptation. +- Ruby AST compatibility output. +- Ruby local/scope semantics. +- Shared cross-language normalization. +- Source span reconstruction. + +`gems/decomplex/rust/src/decomplex/ast.rs` is 8,642 lines and mirrors the same +architectural mistake. Rust currently has `syntax/tree_sitter_adapter.rs` with +a `LanguageProfile` trait, but AST normalization itself is still a single +large enum-driven file. + +The first implementation target is Ruby because Ruby owns the legacy behavior +and the existing detector contracts. Once Ruby has a clean boundary, Rust can +mirror the structure without copying the monolith. + +## Target Ruby File Layout + +The Ruby implementation should move toward this structure: + +```text +gems/decomplex/lib/decomplex/ast.rb +gems/decomplex/lib/decomplex/ast/node.rb +gems/decomplex/lib/decomplex/ast/span.rb +gems/decomplex/lib/decomplex/ast/source_map.rb +gems/decomplex/lib/decomplex/ast/semantic_node.rb +gems/decomplex/lib/decomplex/ast/semantic_normalizer.rb +gems/decomplex/lib/decomplex/ast/ruby_compat.rb +gems/decomplex/lib/decomplex/ast/adapters/base.rb +gems/decomplex/lib/decomplex/ast/adapters/ruby.rb +gems/decomplex/lib/decomplex/ast/adapters/python.rb +gems/decomplex/lib/decomplex/ast/adapters/lua.rb +gems/decomplex/lib/decomplex/ast/adapters/typescript.rb +``` + +`ast.rb` should become a facade and compatibility entry point. It should not +contain language-specific grammar tables or semantic rewrites. + +## Target Rust File Layout + +Rust should mirror Ruby after the Ruby boundary is correct: + +```text +gems/decomplex/rust/src/decomplex/ast/mod.rs +gems/decomplex/rust/src/decomplex/ast/node.rs +gems/decomplex/rust/src/decomplex/ast/span.rs +gems/decomplex/rust/src/decomplex/ast/source_map.rs +gems/decomplex/rust/src/decomplex/ast/semantic_node.rs +gems/decomplex/rust/src/decomplex/ast/semantic_normalizer.rs +gems/decomplex/rust/src/decomplex/ast/ruby_compat.rs +gems/decomplex/rust/src/decomplex/ast/adapters/mod.rs +gems/decomplex/rust/src/decomplex/ast/adapters/ruby.rs +gems/decomplex/rust/src/decomplex/ast/adapters/python.rs +gems/decomplex/rust/src/decomplex/ast/adapters/lua.rs +gems/decomplex/rust/src/decomplex/ast/adapters/typescript.rs +``` + +Rust should not receive a large redesign before Ruby is stabilized. The Rust +work is a mirror step, not an independent architecture experiment. + +## Line-of-Code Budgets + +These budgets are guardrails. They are not strict limits, but exceeding them +should trigger review. + +| Component | Target LoC | +|---|---:| +| `ast.rb` facade | 50-150 | +| `node.rb` | 50-120 | +| `span.rb` / `source_map.rb` | 100-250 total | +| `semantic_node.rb` | 100-250 | +| `semantic_normalizer.rb` | 400-800 | +| `ruby_compat.rb` | 400-900 | +| Base adapter contract | 150-300 | +| Ruby adapter | 400-700 | +| Python adapter | 250-400 | +| TypeScript/JavaScript adapter | 250-450 | +| Lua adapter | 150-300 | +| Each later language adapter | 200-500 | + +If a language adapter grows past roughly 700 lines, either the shared semantic +contract is too weak or detector logic has leaked into the adapter. If the +shared normalizer grows past roughly 800 lines, it is probably becoming the new +monolith. + +## Semantic Model + +The detector-facing model must not be Ruby AST names. It should represent +cross-language concepts directly. + +Minimum semantic node/fact types: + +- `Root` +- `Owner` +- `Function` +- `Parameter` +- `Block` +- `Call` +- `MemberAccess` +- `Subscript` +- `Assignment` +- `Identifier` +- `Literal` +- `Branch` +- `Loop` +- `Case` +- `CaseArm` +- `Return` +- `Break` +- `Continue` +- `BooleanOp` +- `Comparison` +- `UnaryOp` +- `BinaryOp` +- `Lambda` +- `ExceptionHandler` +- `Finally` +- `Unknown` + +Every semantic node should carry: + +- `type` +- `children` +- `span` +- `text` +- `language` +- optional metadata, such as `name`, `receiver`, `message`, `operator`, + `parameters`, `visibility`, `owner`, `control`, or `capability_gap`. + +The shared semantic model can preserve source text, but it should not depend +on source text to discover language constructs. + +## Adapter Contract + +Each language adapter should classify native Tree-sitter nodes into semantic +facts. The shared normalizer should ask the adapter for meaning instead of +matching grammar strings directly. + +Required adapter methods: + +```ruby +function_definition(node) +owner_definition(node) +parameters(node) +call(node) +member_access(node) +subscript(node) +assignment(node) +branch(node) +loop(node) +case_expression(node) +case_arm(node) +return_statement(node) +break_statement(node) +continue_statement(node) +literal(node) +identifier(node) +boolean_operation(node) +comparison(node) +unary_operation(node) +binary_operation(node) +lambda_expression(node) +exception_handler(node) +finally_clause(node) +block(node) +ignored_node?(node) +``` + +Each method returns either: + +- a semantic descriptor, +- `nil` when the node is not that construct, +- or a capability-gap object when the language construct is recognized but not + implemented yet. + +The base adapter should not contain Ruby token checks, shared operator tables, +or broad fallback grammar heuristics. It should mostly define the contract, +safe node access helpers, and common descriptor structs. + +## Ruby Adapter Responsibilities + +The Ruby adapter owns Ruby grammar and Ruby semantics: + +- `def`, singleton methods, inline `def`. +- `class`, `module`, singleton class. +- `yield`, `super`, `block_argument`. +- `&.` safe navigation. +- Ruby block and lambda syntax. +- Ruby `case`/`when`. +- Ruby `rescue`/`ensure`. +- Ruby local variable discovery and bare-call resolution. +- `VCALL`, `FCALL`, `DVAR`, `DASGN`, and `SCOPE` if needed for compatibility. +- Ruby symbols, globals, instance variables, class variables. +- Ruby hash key shorthand. +- Ruby `=~` behavior. +- Implicit nil and tail-return elision. +- Visibility calls such as `private`, `protected`, `public`, + `module_function`, and `private_class_method`. + +None of these should live in shared normalizer code. + +## Ruby Compatibility Boundary + +Existing Ruby detectors currently depend on Ruby AST-like node names such as: + +- `DEFN`, `DEFS`, `SCOPE` +- `CALL`, `QCALL`, `FCALL`, `VCALL`, `OPCALL` +- `LASGN`, `IASGN`, `DASGN` +- `LVAR`, `DVAR`, `IVAR`, `GVAR` +- `IF`, `UNLESS`, `CASE`, `WHEN` +- `RETURN`, `BREAK`, `NEXT` + +Those names can remain temporarily, but only behind a Ruby compatibility +adapter: + +```text +Tree-sitter Ruby nodes + -> Ruby adapter descriptors + -> semantic nodes + -> Ruby compatibility nodes for legacy detectors +``` + +New or migrated cross-language detectors should consume semantic nodes/facts: + +```text +Tree-sitter language nodes + -> language adapter descriptors + -> semantic nodes + -> detector facts +``` + +This separation is what makes the system truly cross-language. + +## Implementation Phases + +### Phase 1: Split Non-Language Infrastructure + +Create these files without changing behavior: + +- `ast/node.rb` +- `ast/span.rb` +- `ast/source_map.rb` +- `ast/adapters/base.rb` + +Move only mechanical infrastructure: + +- `Node` +- `node?` +- `slice` +- source span construction +- parent/child safe access helpers +- normalized cache helpers + +Acceptance criteria: + +- Ruby tests still pass. +- `ast.rb` becomes a facade for existing behavior. +- No semantic changes yet. + +### Phase 2: Extract Ruby Adapter + +Move Ruby-specific syntax and semantics out of `TreeSitterNormalizer` into +`ast/adapters/ruby.rb`. + +Initial Ruby adapter methods should cover: + +- functions and singleton functions +- owners +- calls and safe calls +- assignments +- identifiers and locals +- blocks/lambdas +- branch/case/loop +- rescue/ensure +- literals +- parameters + +Acceptance criteria: + +- No `ruby?` branch remains in shared normalizer. +- Ruby-specific token checks are in `RubyAdapter`. +- Ruby tests pass. +- Existing Ruby detector output is unchanged. + +### Phase 3: Introduce Semantic Nodes + +Add `ast/semantic_node.rb` and `ast/semantic_normalizer.rb`. + +The semantic normalizer should: + +- walk Tree-sitter nodes, +- ask the adapter for descriptors, +- emit semantic nodes, +- preserve spans and text, +- avoid language-specific grammar strings. + +Acceptance criteria: + +- Ruby semantic fixtures pass. +- Ruby compatibility output can be generated from semantic nodes. +- Shared semantic code contains no Ruby-specific behavior. + +### Phase 4: Move Legacy Ruby AST Output Behind Compatibility + +Create `ast/ruby_compat.rb`. + +This layer converts Ruby semantic nodes to the legacy Ruby AST-like nodes +needed by existing detectors. + +Acceptance criteria: + +- `Ast.parse(file)` still returns the legacy shape for Ruby until detectors + migrate. +- Internally, Ruby Tree-sitter nodes no longer flow through a shared + Ruby-shaped normalizer. +- All current Ruby detector tests pass. + +### Phase 5: Add Detector-Facing Semantic API + +Add a new API alongside `Ast.parse`: + +```ruby +Ast.parse_semantic(file, language: nil) +``` + +or equivalent through `Syntax.parse`. + +Acceptance criteria: + +- Cross-language detectors can use semantic facts without Ruby compatibility + nodes. +- At least one detector is ported to the semantic API as proof. +- Semantic facts include source spans and file/method context. + +### Phase 6: Extract Existing Non-Ruby Adapters + +Move the current Python, Lua, and TypeScript logic into adapter files. During +this phase, do not try to make every detector perfect for every language. +Focus on correct adapter ownership. + +Acceptance criteria: + +- Python/Lua/TypeScript grammar quirks are not in shared normalizer code. +- Unsupported features are explicit capability gaps. +- Existing non-Ruby smoke tests either pass or fail with intentional, + documented unsupported-feature assertions. + +### Phase 7: Rust Mirror + +After Ruby is correct, mirror the structure in Rust: + +- split `ast.rs`, +- replace `TreeSitterNormalizationAdapter` enum with an adapter trait, +- move language logic to `ast/adapters/*.rs`, +- keep Rust behavior matched to Ruby fixtures. + +Acceptance criteria: + +- Rust remains behaviorally equivalent for Ruby. +- Rust test files are separate from implementation files. +- Rust adapter files follow the same contract as Ruby. + +## Detector Migration Strategy + +Detectors fall into three categories. + +### Category A: Can Move to Semantic Facts Early + +These mostly need functions, branches, calls, assignments, and spans: + +- weighted inlined cognitive complexity +- structural topology +- local flow +- temporal ordering pressure +- state branch density +- sequence mining +- path condition +- oversized predicate + +### Category B: Needs Ruby Compatibility During Migration + +These depend on Ruby-specific node names or Ruby semantics: + +- predicate alias +- semantic alias +- redundant nil guard +- false simplicity +- ordered protocol mining +- derived state +- decision pressure +- state mesh +- fat union + +### Category C: Should Stay Ruby-Specific Unless Redesigned + +Any detector relying on Ruby-only language semantics should explicitly declare +Ruby-only support until it is redesigned. + +Examples: + +- Ruby visibility wrappers. +- Ruby metaprogramming shapes. +- Ruby `nil?` and safe-navigation-specific analyses. +- Ruby local-vs-call semantics. + +## Salvage Plan for `ast.rb` + +Expected salvage from the current 4,023 lines: + +| Portion | Approximate fate | +|---|---| +| `Node`, cache, `slice`, `node?` | Keep, move to small files | +| Source span helpers | Keep, move to `source_map.rb` | +| `flatten_and`, `def_push`, `body_stmts`, `canon_polarity` | Keep temporarily, then migrate to semantic helpers | +| Ruby scope/local/vcall logic | Keep only in Ruby adapter or Ruby compatibility | +| Ruby inline def/tail return/implicit nil | Keep only in Ruby compatibility | +| Python/Lua/TypeScript shape helpers | Move to adapter files, then rewrite where token mining is unsafe | +| Giant `normalize_node` dispatch | Delete/rewrite | +| Global grammar kind tables | Delete/move into adapters | +| `ruby?` predicate model | Delete | +| Generic fallback adapter | Delete | +| Broad `rescue StandardError` shape checks | Replace with explicit nil-safe helpers | + +Realistically: + +- 10-15% is directly reusable cross-language infrastructure. +- 25-35% is salvageable Ruby compatibility behavior. +- 15-20% is reusable as adapter seeds. +- 50% or more should be deleted or rewritten. + +## Testing Requirements + +### Ruby Must Stay Byte-for-Byte Compatible Where Legacy Requires It + +Before changing behavior, capture current Ruby detector output fixtures for: + +- report sections, +- state branch density, +- structural topology, +- weighted inlined cognitive complexity, +- redundant nil guard, +- false simplicity, +- local flow, +- temporal ordering pressure. + +Ruby compatibility output should remain unchanged until a detector is +explicitly migrated. + +### Semantic Fixtures + +Add language-independent semantic fixtures for: + +- function with parameters, +- method/member call, +- receiverless call, +- assignment, +- branch, +- loop, +- case/match/switch, +- boolean and comparison operations, +- return/break/continue, +- exception/finally, +- lambda/block, +- subscript, +- literal families. + +Each fixture should assert semantic facts, not Ruby AST node names. + +### Adapter Ownership Tests + +Add tests that fail if shared normalizer code learns language-specific tokens. +Examples: + +- no `ruby?` in shared normalizer, +- no `"def"`/`"function"` keyword checks in shared normalizer, +- no `&.`/`?.` checks in shared normalizer, +- no language assignment-operator tables in shared normalizer, +- no silent default adapter for supported languages. + +## Completion Criteria + +The Ruby implementation is complete only when all of these are true: + +- `ast.rb` is a small facade, not a monolith. +- Ruby-specific grammar and semantic behavior live in `adapters/ruby.rb` or + `ruby_compat.rb`. +- Shared normalizer code has no `ruby?` branches. +- Shared normalizer code does not inspect Ruby keyword/operator tokens. +- There is an explicit semantic model for detector-facing cross-language + support. +- `Ast.parse` is compatibility-only; production detectors do not call it. +- `Ast.parse_semantic` is compatibility-only; production detectors consume + `Syntax` facts directly. +- Ruby production detectors consume semantic facts instead of Ruby AST node + names. +- Unsupported language features are represented as explicit capability gaps. +- Ruby tests pass. +- Relevant cross-language semantic fixtures pass. +- Rust has not diverged; it is either unchanged pending mirror work or updated + minimally to match the Ruby architecture. + +Do not report the Ruby implementation as finished before these criteria are +satisfied. + +## Reporting Protocol + +During implementation, report status by phase: + +- completed files, +- behavior preserved, +- tests run, +- remaining architectural blockers. + +Only report "Ruby implementation complete" when the completion criteria above +are satisfied. Until then, report partial progress as partial progress. diff --git a/gems/decomplex/examples/c/co-update.c b/gems/decomplex/examples/c/co-update.c new file mode 100644 index 000000000..e498d0bfa --- /dev/null +++ b/gems/decomplex/examples/c/co-update.c @@ -0,0 +1,4 @@ +void stable_one(Node node) { node.storage = 1; node.provenance = 1; } +void stable_two(Node node) { node.storage = 1; node.provenance = 1; } +void stable_three(Node node) { node.storage = 1; node.provenance = 1; } +void misses_provenance(Node node) { node.storage = 1; } diff --git a/gems/decomplex/examples/c/decision-pressure.c b/gems/decomplex/examples/c/decision-pressure.c new file mode 100644 index 000000000..df529b6ce --- /dev/null +++ b/gems/decomplex/examples/c/decision-pressure.c @@ -0,0 +1 @@ +bool scan(Node node) { Value value = node.symbol; return value.isNull(); } diff --git a/gems/decomplex/examples/c/derived-state.c b/gems/decomplex/examples/c/derived-state.c new file mode 100644 index 000000000..8c51ff74a --- /dev/null +++ b/gems/decomplex/examples/c/derived-state.c @@ -0,0 +1 @@ +void check(int input) { int cached = input + 1; input = 2; print(cached); } diff --git a/gems/decomplex/examples/c/false-simplicity.c b/gems/decomplex/examples/c/false-simplicity.c new file mode 100644 index 000000000..e7a038a09 --- /dev/null +++ b/gems/decomplex/examples/c/false-simplicity.c @@ -0,0 +1 @@ +void hack() { print("hidden IO"); } diff --git a/gems/decomplex/examples/c/fat-union.c b/gems/decomplex/examples/c/fat-union.c new file mode 100644 index 000000000..0e18ef013 --- /dev/null +++ b/gems/decomplex/examples/c/fat-union.c @@ -0,0 +1,7 @@ +void handle(Node *self) { + switch (self) { + case AST_Call: self->line(); self->col(); self->ty(); self->span(); self->parent(); self->recv(); break; + case AST_Func: self->line(); self->col(); self->ty(); self->span(); self->parent(); self->name(); break; + case AST_Lit: self->line(); self->col(); self->ty(); self->span(); self->parent(); self->value(); break; + } +} diff --git a/gems/decomplex/examples/c/flay-similarity.c b/gems/decomplex/examples/c/flay-similarity.c new file mode 100644 index 000000000..74d8ffd5c --- /dev/null +++ b/gems/decomplex/examples/c/flay-similarity.c @@ -0,0 +1,2 @@ +int first_clone(Node node) { let total = 0; int value1 = node.part1; if (value1.ready() && value1.enabled()) { total += value1.amount; } int value2 = node.part2; if (value2.ready() && value2.enabled()) { total += value2.amount; } int value3 = node.part3; if (value3.ready() && value3.enabled()) { total += value3.amount; } int value4 = node.part4; if (value4.ready() && value4.enabled()) { total += value4.amount; } int value5 = node.part5; if (value5.ready() && value5.enabled()) { total += value5.amount; } int value6 = node.part6; if (value6.ready() && value6.enabled()) { total += value6.amount; } int value7 = node.part7; if (value7.ready() && value7.enabled()) { total += value7.amount; } int value8 = node.part8; if (value8.ready() && value8.enabled()) { total += value8.amount; } return total; } +int second_clone(Node entry) { let total = 0; int item1 = entry.part1; if (item1.ready() && item1.enabled()) { total += item1.amount; } int item2 = entry.part2; if (item2.ready() && item2.enabled()) { total += item2.amount; } int item3 = entry.part3; if (item3.ready() && item3.enabled()) { total += item3.amount; } int item4 = entry.part4; if (item4.ready() && item4.enabled()) { total += item4.amount; } int item5 = entry.part5; if (item5.ready() && item5.enabled()) { total += item5.amount; } int item6 = entry.part6; if (item6.ready() && item6.enabled()) { total += item6.amount; } int item7 = entry.part7; if (item7.ready() && item7.enabled()) { total += item7.amount; } int item8 = entry.part8; if (item8.ready() && item8.enabled()) { total += item8.amount; } return total; } diff --git a/gems/decomplex/examples/c/function-lcom.c b/gems/decomplex/examples/c/function-lcom.c new file mode 100644 index 000000000..0220f023b --- /dev/null +++ b/gems/decomplex/examples/c/function-lcom.c @@ -0,0 +1,13 @@ +Result mixed(int price, int tax, Logger logger) { + int subtotal = price + tax; + int total = subtotal * 2; + int rounded = total.round(); + + int timestamp = now(); + Buffer buffer = Buffer_init(); + int stamp = timestamp; + buffer.push(stamp); + logger.info(buffer); + + return Result_init(rounded, buffer); +} diff --git a/gems/decomplex/examples/c/implicit-control-flow.c b/gems/decomplex/examples/c/implicit-control-flow.c new file mode 100644 index 000000000..328266712 --- /dev/null +++ b/gems/decomplex/examples/c/implicit-control-flow.c @@ -0,0 +1,9 @@ +typedef struct FlowExample { int status; int valid; int done; } FlowExample; +void prepare(FlowExample *self) { self->status = 1; } +void validate(FlowExample *self) { self->valid = self->status == 1; } +void commit(FlowExample *self) { self->done = self->valid; } +void ok1(FlowExample *self) { prepare(self); validate(self); commit(self); } +void ok2(FlowExample *self) { prepare(self); validate(self); commit(self); } +void ok3(FlowExample *self) { prepare(self); validate(self); commit(self); } +void ok4(FlowExample *self) { prepare(self); validate(self); commit(self); } +void drift(FlowExample *self) { validate(self); prepare(self); commit(self); } diff --git a/gems/decomplex/examples/c/inconsistent-rename-clone.c b/gems/decomplex/examples/c/inconsistent-rename-clone.c new file mode 100644 index 000000000..581bfd232 --- /dev/null +++ b/gems/decomplex/examples/c/inconsistent-rename-clone.c @@ -0,0 +1,2 @@ +void original() { int src = fetch(1); check(src); store(src); finalize(src); } +void pasted() { int dst = fetch(2); check(dst); store(src); finalize(dst); } diff --git a/gems/decomplex/examples/c/local-flow.c b/gems/decomplex/examples/c/local-flow.c new file mode 100644 index 000000000..c03515f6e --- /dev/null +++ b/gems/decomplex/examples/c/local-flow.c @@ -0,0 +1,9 @@ +Result mixed(int price, int tax) { + int subtotal = price + tax; + int total = subtotal.round(); + + int timestamp = now(); + Buffer buffer = Buffer_init(); + buffer.push(timestamp); + return Result_init(total, buffer); +} diff --git a/gems/decomplex/examples/c/locality-drag.c b/gems/decomplex/examples/c/locality-drag.c new file mode 100644 index 000000000..d7a892e5a --- /dev/null +++ b/gems/decomplex/examples/c/locality-drag.c @@ -0,0 +1,27 @@ +void run(User user, Cart cart, Logger logger) { + int receipt_id = user.id; + + int total = cart.total; + if (total > 100) { + if (cart.discountable()) { + int discount = 10; + } + } + if (cart.taxable()) { + if (cart.region) { + int tax = total * 2; + } + } + if (logger.enabled()) { + if (logger.debug()) { + logger.info(total); + } + } + if (cart.valid()) { + if (cart.ready()) { + int status = 1; + } + } + + emit(receipt_id); +} diff --git a/gems/decomplex/examples/c/miner.c b/gems/decomplex/examples/c/miner.c new file mode 100644 index 000000000..a48eaf209 --- /dev/null +++ b/gems/decomplex/examples/c/miner.c @@ -0,0 +1,4 @@ +bool one(bool a, bool b, bool c) { return a && b && c; } +bool two(bool a, bool b, bool c) { return a && b && c; } +bool three(bool a, bool b, bool c) { return a && b && c; } +bool broken(bool a, bool b) { return a && b; } diff --git a/gems/decomplex/examples/c/operational-discontinuity.c b/gems/decomplex/examples/c/operational-discontinuity.c new file mode 100644 index 000000000..d011d0237 --- /dev/null +++ b/gems/decomplex/examples/c/operational-discontinuity.c @@ -0,0 +1,9 @@ +void phase_shift() { + int a = 1; + int b = 2; + + // Phase 2 + int x = 3; + int y = 4; + print(x); print(y); +} diff --git a/gems/decomplex/examples/c/oversized-predicate.c b/gems/decomplex/examples/c/oversized-predicate.c new file mode 100644 index 000000000..9c9197082 --- /dev/null +++ b/gems/decomplex/examples/c/oversized-predicate.c @@ -0,0 +1 @@ +void complex_check(bool a, bool b, bool c, bool d) { if (a && b && c && d) { print("too big"); } } diff --git a/gems/decomplex/examples/c/path-condition.c b/gems/decomplex/examples/c/path-condition.c new file mode 100644 index 000000000..9fc2f74f2 --- /dev/null +++ b/gems/decomplex/examples/c/path-condition.c @@ -0,0 +1,4 @@ +void one(X x, Y y, Z z) { if (x.p() && y.q() && z.r()) { go(x); } } +void two(X x, Y y, Z z) { if (x.p() && y.q() && z.r()) { go(x); } } +void three(X x, Y y, Z z) { if (x.p() && y.q() && z.r()) { go(x); } } +void bug(X x, Y y, Z z) { if (x.p() && y.q()) { go(x); } } diff --git a/gems/decomplex/examples/c/predicate-alias.c b/gems/decomplex/examples/c/predicate-alias.c new file mode 100644 index 000000000..b812862c9 --- /dev/null +++ b/gems/decomplex/examples/c/predicate-alias.c @@ -0,0 +1,3 @@ +bool first() { return true; } +bool second() { return true; } +bool other() { return false; } diff --git a/gems/decomplex/examples/c/redundant-nil-guard.c b/gems/decomplex/examples/c/redundant-nil-guard.c new file mode 100644 index 000000000..6cab7e8de --- /dev/null +++ b/gems/decomplex/examples/c/redundant-nil-guard.c @@ -0,0 +1 @@ +void check(Value *value) { if (value->isSome()) { value->isNull(); } } diff --git a/gems/decomplex/examples/c/semantic-alias.c b/gems/decomplex/examples/c/semantic-alias.c new file mode 100644 index 000000000..60c1db51a --- /dev/null +++ b/gems/decomplex/examples/c/semantic-alias.c @@ -0,0 +1,4 @@ +bool frame(Node node) { return node.provenance == FRAME; } +bool is_frame(Node node) { return provenance == FRAME; } +bool heap(Node node) { return node.provenance == HEAP; } +int somewhere(Node node) { if (node.provenance == FRAME) { return 1; } return 0; } diff --git a/gems/decomplex/examples/c/sequence-mine.c b/gems/decomplex/examples/c/sequence-mine.c new file mode 100644 index 000000000..e05be841f --- /dev/null +++ b/gems/decomplex/examples/c/sequence-mine.c @@ -0,0 +1,5 @@ +void one() { alloc_mark(x); body1(); cleanup(x); } +void two() { alloc_mark(y); body2(); cleanup(y); } +void three() { alloc_mark(z); body3(); cleanup(z); } +void four() { alloc_mark(w); body4(); cleanup(w); } +void leak() { alloc_mark(q); use_value(q); } diff --git a/gems/decomplex/examples/c/state-branch-density.c b/gems/decomplex/examples/c/state-branch-density.c new file mode 100644 index 000000000..87f820256 --- /dev/null +++ b/gems/decomplex/examples/c/state-branch-density.c @@ -0,0 +1,2 @@ +typedef struct StateBranchChecker { int checked; } StateBranchChecker; +void check(StateBranchChecker *self, User user) { if (user.admin) { self->checked = true; } if (self->checked && user.name == "admin") { print("hello"); } } diff --git a/gems/decomplex/examples/c/state-mesh.c b/gems/decomplex/examples/c/state-mesh.c new file mode 100644 index 000000000..506cae880 --- /dev/null +++ b/gems/decomplex/examples/c/state-mesh.c @@ -0,0 +1,5 @@ +typedef struct StateMeshExample { int a; int b; } StateMeshExample; +void initialize(StateMeshExample *self) { self->a = 1; self->b = 2; } +void writer(StateMeshExample *self) { self->a = 3; } +int reader(StateMeshExample *self) { return self->a + self->b; } +int a_alias(StateMeshExample *self) { return self->a; } diff --git a/gems/decomplex/examples/c/structural-topology.c b/gems/decomplex/examples/c/structural-topology.c new file mode 100644 index 000000000..af1c3ef29 --- /dev/null +++ b/gems/decomplex/examples/c/structural-topology.c @@ -0,0 +1,6 @@ +typedef struct Worker { int ready_flag; } Worker; +void run(Worker *self, Items items) { prepare(self); if (ready(self)) { validate(self); } for (int item = 0; item < items.count; item++) { helper(self, item); } } +void prepare(Worker *self) {} +bool ready(Worker *self) { return true; } +void validate(Worker *self) {} +void helper(Worker *self, Item item) { item.use(); } diff --git a/gems/decomplex/examples/c/temporal-ordering-pressure.c b/gems/decomplex/examples/c/temporal-ordering-pressure.c new file mode 100644 index 000000000..49e98ca09 --- /dev/null +++ b/gems/decomplex/examples/c/temporal-ordering-pressure.c @@ -0,0 +1,5 @@ +typedef struct TemporalOrderExample { int a; int b; } TemporalOrderExample; +void one(TemporalOrderExample *self) { self->a = 1; } +void two(TemporalOrderExample *self) { self->a = 2; self->b = 3; } +void three(TemporalOrderExample *self) { self->b = 4; } +int reader(TemporalOrderExample *self) { return self->a; } diff --git a/gems/decomplex/examples/c/weighted-inlined-complexity.c b/gems/decomplex/examples/c/weighted-inlined-complexity.c new file mode 100644 index 000000000..47be86e74 --- /dev/null +++ b/gems/decomplex/examples/c/weighted-inlined-complexity.c @@ -0,0 +1,5 @@ +void checkout(User user, Cart cart) { validate_user(user); apply_discount(cart); process_payment(user, cart); audit_cart(cart); } +bool validate_user(User user) { if (user.active() && !user.suspended()) { if (user.profile.complete()) { return true; } else { return false; } } else { return false; } } +int apply_discount(Cart cart) { if (cart.total > 100 && eligible()) { if (holiday()) { return 20; } else if (loyalty_month()) { return 15; } else { return 10; } } return 0; } +void process_payment(User user, Cart cart) { if (gateway.ready()) { if (cart.total > 0 && user.active()) { if (fraud_check(user)) { charge(user, cart); } else { decline(user); } } } } +void audit_cart(Cart cart) { for (int item = 0; item < cart.count; item++) { if (item_taxable(item)) { if (item_region(item) && item_amount(item) > 0) { record_tax(item); } } } } diff --git a/gems/decomplex/examples/cpp/co-update.cpp b/gems/decomplex/examples/cpp/co-update.cpp new file mode 100644 index 000000000..e498d0bfa --- /dev/null +++ b/gems/decomplex/examples/cpp/co-update.cpp @@ -0,0 +1,4 @@ +void stable_one(Node node) { node.storage = 1; node.provenance = 1; } +void stable_two(Node node) { node.storage = 1; node.provenance = 1; } +void stable_three(Node node) { node.storage = 1; node.provenance = 1; } +void misses_provenance(Node node) { node.storage = 1; } diff --git a/gems/decomplex/examples/cpp/decision-pressure.cpp b/gems/decomplex/examples/cpp/decision-pressure.cpp new file mode 100644 index 000000000..7b6370558 --- /dev/null +++ b/gems/decomplex/examples/cpp/decision-pressure.cpp @@ -0,0 +1 @@ +bool scan(Node node) { auto value = node.symbol; return value.isNull(); } diff --git a/gems/decomplex/examples/cpp/derived-state.cpp b/gems/decomplex/examples/cpp/derived-state.cpp new file mode 100644 index 000000000..02357b90a --- /dev/null +++ b/gems/decomplex/examples/cpp/derived-state.cpp @@ -0,0 +1 @@ +void check(int input) { auto cached = input + 1; input = 2; print(cached); } diff --git a/gems/decomplex/examples/cpp/false-simplicity.cpp b/gems/decomplex/examples/cpp/false-simplicity.cpp new file mode 100644 index 000000000..faa1ea232 --- /dev/null +++ b/gems/decomplex/examples/cpp/false-simplicity.cpp @@ -0,0 +1 @@ +class FalseSimplicityExample { void hack() { print("hidden IO"); } }; diff --git a/gems/decomplex/examples/cpp/fat-union.cpp b/gems/decomplex/examples/cpp/fat-union.cpp new file mode 100644 index 000000000..5cf051bde --- /dev/null +++ b/gems/decomplex/examples/cpp/fat-union.cpp @@ -0,0 +1 @@ +void handle(Node node) { switch (node) { case AST::Call: node.line(); node.col(); node.ty(); node.span(); node.parent(); node.recv(); break; case AST::Func: node.line(); node.col(); node.ty(); node.span(); node.parent(); node.name(); break; case AST::Lit: node.line(); node.col(); node.ty(); node.span(); node.parent(); node.value(); break; } } diff --git a/gems/decomplex/examples/cpp/flay-similarity.cpp b/gems/decomplex/examples/cpp/flay-similarity.cpp new file mode 100644 index 000000000..86f50f2fa --- /dev/null +++ b/gems/decomplex/examples/cpp/flay-similarity.cpp @@ -0,0 +1,2 @@ +int first_clone(Node node) { let total = 0; auto value1 = node.part1; if (value1.ready() && value1.enabled()) { total += value1.amount; } auto value2 = node.part2; if (value2.ready() && value2.enabled()) { total += value2.amount; } auto value3 = node.part3; if (value3.ready() && value3.enabled()) { total += value3.amount; } auto value4 = node.part4; if (value4.ready() && value4.enabled()) { total += value4.amount; } auto value5 = node.part5; if (value5.ready() && value5.enabled()) { total += value5.amount; } auto value6 = node.part6; if (value6.ready() && value6.enabled()) { total += value6.amount; } auto value7 = node.part7; if (value7.ready() && value7.enabled()) { total += value7.amount; } auto value8 = node.part8; if (value8.ready() && value8.enabled()) { total += value8.amount; } return total; } +int second_clone(Node entry) { let total = 0; auto item1 = entry.part1; if (item1.ready() && item1.enabled()) { total += item1.amount; } auto item2 = entry.part2; if (item2.ready() && item2.enabled()) { total += item2.amount; } auto item3 = entry.part3; if (item3.ready() && item3.enabled()) { total += item3.amount; } auto item4 = entry.part4; if (item4.ready() && item4.enabled()) { total += item4.amount; } auto item5 = entry.part5; if (item5.ready() && item5.enabled()) { total += item5.amount; } auto item6 = entry.part6; if (item6.ready() && item6.enabled()) { total += item6.amount; } auto item7 = entry.part7; if (item7.ready() && item7.enabled()) { total += item7.amount; } auto item8 = entry.part8; if (item8.ready() && item8.enabled()) { total += item8.amount; } return total; } diff --git a/gems/decomplex/examples/cpp/function-lcom.cpp b/gems/decomplex/examples/cpp/function-lcom.cpp new file mode 100644 index 000000000..413b3d068 --- /dev/null +++ b/gems/decomplex/examples/cpp/function-lcom.cpp @@ -0,0 +1,13 @@ +Result mixed(int price, int tax, Logger logger) { + auto subtotal = price + tax; + auto total = subtotal * 2; + auto rounded = total.round(); + + auto timestamp = now(); + auto buffer = Buffer.init(); + auto stamp = timestamp; + buffer.push(stamp); + logger.info(buffer); + + return Result.init(rounded, buffer); +} diff --git a/gems/decomplex/examples/cpp/implicit-control-flow.cpp b/gems/decomplex/examples/cpp/implicit-control-flow.cpp new file mode 100644 index 000000000..cfd71512a --- /dev/null +++ b/gems/decomplex/examples/cpp/implicit-control-flow.cpp @@ -0,0 +1 @@ +class FlowExample { public: int status; bool valid; bool done; void prepare() { this->status = 1; } void validate() { this->valid = this->status == 1; } void commit() { this->done = this->valid; } void ok1() { prepare(); validate(); commit(); } void ok2() { prepare(); validate(); commit(); } void ok3() { prepare(); validate(); commit(); } void ok4() { prepare(); validate(); commit(); } void drift() { validate(); prepare(); commit(); } }; diff --git a/gems/decomplex/examples/cpp/inconsistent-rename-clone.cpp b/gems/decomplex/examples/cpp/inconsistent-rename-clone.cpp new file mode 100644 index 000000000..c24e9b89c --- /dev/null +++ b/gems/decomplex/examples/cpp/inconsistent-rename-clone.cpp @@ -0,0 +1,2 @@ +void original() { auto src = fetch(1); check(src); store(src); finalize(src); } +void pasted() { auto dst = fetch(2); check(dst); store(src); finalize(dst); } diff --git a/gems/decomplex/examples/cpp/local-flow.cpp b/gems/decomplex/examples/cpp/local-flow.cpp new file mode 100644 index 000000000..107b3840b --- /dev/null +++ b/gems/decomplex/examples/cpp/local-flow.cpp @@ -0,0 +1,9 @@ +Result mixed(int price, int tax) { + auto subtotal = price + tax; + auto total = subtotal.round(); + + auto timestamp = now(); + auto buffer = Buffer.init(); + buffer.push(timestamp); + return Result.init(total, buffer); +} diff --git a/gems/decomplex/examples/cpp/locality-drag.cpp b/gems/decomplex/examples/cpp/locality-drag.cpp new file mode 100644 index 000000000..e2730e079 --- /dev/null +++ b/gems/decomplex/examples/cpp/locality-drag.cpp @@ -0,0 +1,27 @@ +void run(User user, Cart cart, Logger logger) { + auto receipt_id = user.id; + + auto total = cart.total; + if (total > 100) { + if (cart.discountable()) { + auto discount = 10; + } + } + if (cart.taxable()) { + if (cart.region) { + auto tax = total * 2; + } + } + if (logger.enabled()) { + if (logger.debug()) { + logger.info(total); + } + } + if (cart.valid()) { + if (cart.ready()) { + auto status = 1; + } + } + + emit(receipt_id); +} diff --git a/gems/decomplex/examples/cpp/miner.cpp b/gems/decomplex/examples/cpp/miner.cpp new file mode 100644 index 000000000..a48eaf209 --- /dev/null +++ b/gems/decomplex/examples/cpp/miner.cpp @@ -0,0 +1,4 @@ +bool one(bool a, bool b, bool c) { return a && b && c; } +bool two(bool a, bool b, bool c) { return a && b && c; } +bool three(bool a, bool b, bool c) { return a && b && c; } +bool broken(bool a, bool b) { return a && b; } diff --git a/gems/decomplex/examples/cpp/operational-discontinuity.cpp b/gems/decomplex/examples/cpp/operational-discontinuity.cpp new file mode 100644 index 000000000..d011d0237 --- /dev/null +++ b/gems/decomplex/examples/cpp/operational-discontinuity.cpp @@ -0,0 +1,9 @@ +void phase_shift() { + int a = 1; + int b = 2; + + // Phase 2 + int x = 3; + int y = 4; + print(x); print(y); +} diff --git a/gems/decomplex/examples/cpp/oversized-predicate.cpp b/gems/decomplex/examples/cpp/oversized-predicate.cpp new file mode 100644 index 000000000..9c9197082 --- /dev/null +++ b/gems/decomplex/examples/cpp/oversized-predicate.cpp @@ -0,0 +1 @@ +void complex_check(bool a, bool b, bool c, bool d) { if (a && b && c && d) { print("too big"); } } diff --git a/gems/decomplex/examples/cpp/path-condition.cpp b/gems/decomplex/examples/cpp/path-condition.cpp new file mode 100644 index 000000000..9fc2f74f2 --- /dev/null +++ b/gems/decomplex/examples/cpp/path-condition.cpp @@ -0,0 +1,4 @@ +void one(X x, Y y, Z z) { if (x.p() && y.q() && z.r()) { go(x); } } +void two(X x, Y y, Z z) { if (x.p() && y.q() && z.r()) { go(x); } } +void three(X x, Y y, Z z) { if (x.p() && y.q() && z.r()) { go(x); } } +void bug(X x, Y y, Z z) { if (x.p() && y.q()) { go(x); } } diff --git a/gems/decomplex/examples/cpp/predicate-alias.cpp b/gems/decomplex/examples/cpp/predicate-alias.cpp new file mode 100644 index 000000000..b812862c9 --- /dev/null +++ b/gems/decomplex/examples/cpp/predicate-alias.cpp @@ -0,0 +1,3 @@ +bool first() { return true; } +bool second() { return true; } +bool other() { return false; } diff --git a/gems/decomplex/examples/cpp/redundant-nil-guard.cpp b/gems/decomplex/examples/cpp/redundant-nil-guard.cpp new file mode 100644 index 000000000..6cab7e8de --- /dev/null +++ b/gems/decomplex/examples/cpp/redundant-nil-guard.cpp @@ -0,0 +1 @@ +void check(Value *value) { if (value->isSome()) { value->isNull(); } } diff --git a/gems/decomplex/examples/cpp/semantic-alias.cpp b/gems/decomplex/examples/cpp/semantic-alias.cpp new file mode 100644 index 000000000..60c1db51a --- /dev/null +++ b/gems/decomplex/examples/cpp/semantic-alias.cpp @@ -0,0 +1,4 @@ +bool frame(Node node) { return node.provenance == FRAME; } +bool is_frame(Node node) { return provenance == FRAME; } +bool heap(Node node) { return node.provenance == HEAP; } +int somewhere(Node node) { if (node.provenance == FRAME) { return 1; } return 0; } diff --git a/gems/decomplex/examples/cpp/sequence-mine.cpp b/gems/decomplex/examples/cpp/sequence-mine.cpp new file mode 100644 index 000000000..e05be841f --- /dev/null +++ b/gems/decomplex/examples/cpp/sequence-mine.cpp @@ -0,0 +1,5 @@ +void one() { alloc_mark(x); body1(); cleanup(x); } +void two() { alloc_mark(y); body2(); cleanup(y); } +void three() { alloc_mark(z); body3(); cleanup(z); } +void four() { alloc_mark(w); body4(); cleanup(w); } +void leak() { alloc_mark(q); use_value(q); } diff --git a/gems/decomplex/examples/cpp/state-branch-density.cpp b/gems/decomplex/examples/cpp/state-branch-density.cpp new file mode 100644 index 000000000..378dc859a --- /dev/null +++ b/gems/decomplex/examples/cpp/state-branch-density.cpp @@ -0,0 +1 @@ +class StateBranchChecker { public: bool checked; void check(User user) { if (user.admin) { this->checked = true; } if (this->checked && user.name == "admin") { print("hello"); } } }; diff --git a/gems/decomplex/examples/cpp/state-mesh.cpp b/gems/decomplex/examples/cpp/state-mesh.cpp new file mode 100644 index 000000000..423482f33 --- /dev/null +++ b/gems/decomplex/examples/cpp/state-mesh.cpp @@ -0,0 +1 @@ +class StateMeshExample { public: int a; int b; void initialize() { this->a = 1; this->b = 2; } void writer() { this->a = 3; } int reader() { return this->a + this->b; } int a_alias() { return this->a; } }; diff --git a/gems/decomplex/examples/cpp/structural-topology.cpp b/gems/decomplex/examples/cpp/structural-topology.cpp new file mode 100644 index 000000000..f39dde337 --- /dev/null +++ b/gems/decomplex/examples/cpp/structural-topology.cpp @@ -0,0 +1 @@ +class Worker { public: void run(Items items) { prepare(); if (ready()) { validate(); } for (auto item : items) { helper(item); } } private: void prepare() {} bool ready() { return true; } public: void validate() {} private: void helper(Item item) { item.use(); } }; diff --git a/gems/decomplex/examples/cpp/temporal-ordering-pressure.cpp b/gems/decomplex/examples/cpp/temporal-ordering-pressure.cpp new file mode 100644 index 000000000..4b565e3ed --- /dev/null +++ b/gems/decomplex/examples/cpp/temporal-ordering-pressure.cpp @@ -0,0 +1 @@ +class TemporalOrderExample { public: int a; int b; void one() { this->a = 1; } void two() { this->a = 2; this->b = 3; } void three() { this->b = 4; } int reader() { return this->a; } }; diff --git a/gems/decomplex/examples/cpp/weighted-inlined-complexity.cpp b/gems/decomplex/examples/cpp/weighted-inlined-complexity.cpp new file mode 100644 index 000000000..4fd497081 --- /dev/null +++ b/gems/decomplex/examples/cpp/weighted-inlined-complexity.cpp @@ -0,0 +1,5 @@ +void checkout(User user, Cart cart) { validate_user(user); apply_discount(cart); process_payment(user, cart); audit_cart(cart); } +bool validate_user(User user) { if (user.active() && !user.suspended()) { if (user.profile.complete()) { return true; } else { return false; } } else { return false; } } +int apply_discount(Cart cart) { if (cart.total > 100 && eligible()) { if (holiday()) { return 20; } else if (loyalty_month()) { return 15; } else { return 10; } } return 0; } +void process_payment(User user, Cart cart) { if (gateway.ready()) { if (cart.total > 0 && user.active()) { if (fraud_check(user)) { charge(user, cart); } else { decline(user); } } } } +void audit_cart(Cart cart) { for (auto item : cart.items) { if (item.taxable()) { if (item.region && item.amount > 0) { record_tax(item); } } } } diff --git a/gems/decomplex/examples/csharp/co-update.cs b/gems/decomplex/examples/csharp/co-update.cs new file mode 100644 index 000000000..093c07fbe --- /dev/null +++ b/gems/decomplex/examples/csharp/co-update.cs @@ -0,0 +1,6 @@ +class Example { + static void stable_one(Node node) { node.storage = 1; node.provenance = 1; } + static void stable_two(Node node) { node.storage = 1; node.provenance = 1; } + static void stable_three(Node node) { node.storage = 1; node.provenance = 1; } + static void misses_provenance(Node node) { node.storage = 1; } +} diff --git a/gems/decomplex/examples/csharp/decision-pressure.cs b/gems/decomplex/examples/csharp/decision-pressure.cs new file mode 100644 index 000000000..2be54cf91 --- /dev/null +++ b/gems/decomplex/examples/csharp/decision-pressure.cs @@ -0,0 +1 @@ +class Example { static bool scan(Node node) { var value = node.symbol; return value.isNull(); } } diff --git a/gems/decomplex/examples/csharp/derived-state.cs b/gems/decomplex/examples/csharp/derived-state.cs new file mode 100644 index 000000000..1170f1db7 --- /dev/null +++ b/gems/decomplex/examples/csharp/derived-state.cs @@ -0,0 +1 @@ +class Example { static void check(int input) { var cached = input + 1; input = 2; print(cached); } } diff --git a/gems/decomplex/examples/csharp/false-simplicity.cs b/gems/decomplex/examples/csharp/false-simplicity.cs new file mode 100644 index 000000000..78fb3897c --- /dev/null +++ b/gems/decomplex/examples/csharp/false-simplicity.cs @@ -0,0 +1 @@ +class FalseSimplicityExample { void hack() { print("hidden IO"); } } diff --git a/gems/decomplex/examples/csharp/fat-union.cs b/gems/decomplex/examples/csharp/fat-union.cs new file mode 100644 index 000000000..ad280adb7 --- /dev/null +++ b/gems/decomplex/examples/csharp/fat-union.cs @@ -0,0 +1 @@ +class Example { static void handle(Node node) { switch (node) { case AST.Call: node.line(); node.col(); node.ty(); node.span(); node.parent(); node.recv(); break; case AST.Func: node.line(); node.col(); node.ty(); node.span(); node.parent(); node.name(); break; case AST.Lit: node.line(); node.col(); node.ty(); node.span(); node.parent(); node.value(); break; } } } diff --git a/gems/decomplex/examples/csharp/flay-similarity.cs b/gems/decomplex/examples/csharp/flay-similarity.cs new file mode 100644 index 000000000..b6ca69339 --- /dev/null +++ b/gems/decomplex/examples/csharp/flay-similarity.cs @@ -0,0 +1,75 @@ +class Example { + static int first_clone(Node node) { + var total = 0; + var value1 = node.part1; + if (value1.ready() && value1.enabled()) { + total += value1.amount; + } + var value2 = node.part2; + if (value2.ready() && value2.enabled()) { + total += value2.amount; + } + var value3 = node.part3; + if (value3.ready() && value3.enabled()) { + total += value3.amount; + } + var value4 = node.part4; + if (value4.ready() && value4.enabled()) { + total += value4.amount; + } + var value5 = node.part5; + if (value5.ready() && value5.enabled()) { + total += value5.amount; + } + var value6 = node.part6; + if (value6.ready() && value6.enabled()) { + total += value6.amount; + } + var value7 = node.part7; + if (value7.ready() && value7.enabled()) { + total += value7.amount; + } + var value8 = node.part8; + if (value8.ready() && value8.enabled()) { + total += value8.amount; + } + return total; + } + + static int second_clone(Node entry) { + var total = 0; + var item1 = entry.part1; + if (item1.ready() && item1.enabled()) { + total += item1.amount; + } + var item2 = entry.part2; + if (item2.ready() && item2.enabled()) { + total += item2.amount; + } + var item3 = entry.part3; + if (item3.ready() && item3.enabled()) { + total += item3.amount; + } + var item4 = entry.part4; + if (item4.ready() && item4.enabled()) { + total += item4.amount; + } + var item5 = entry.part5; + if (item5.ready() && item5.enabled()) { + total += item5.amount; + } + var item6 = entry.part6; + if (item6.ready() && item6.enabled()) { + total += item6.amount; + } + var item7 = entry.part7; + if (item7.ready() && item7.enabled()) { + total += item7.amount; + } + var item8 = entry.part8; + if (item8.ready() && item8.enabled()) { + total += item8.amount; + } + return total; + } +} diff --git a/gems/decomplex/examples/csharp/function-lcom.cs b/gems/decomplex/examples/csharp/function-lcom.cs new file mode 100644 index 000000000..6528a671a --- /dev/null +++ b/gems/decomplex/examples/csharp/function-lcom.cs @@ -0,0 +1,13 @@ +class Example { static Result mixed(int price, int tax, Logger logger) { + var subtotal = price + tax; + var total = subtotal * 2; + var rounded = total.round(); + + var timestamp = now(); + var buffer = Buffer.init(); + var stamp = timestamp; + buffer.push(stamp); + logger.info(buffer); + + return Result.init(rounded, buffer); +} } diff --git a/gems/decomplex/examples/csharp/implicit-control-flow.cs b/gems/decomplex/examples/csharp/implicit-control-flow.cs new file mode 100644 index 000000000..f5e7319f7 --- /dev/null +++ b/gems/decomplex/examples/csharp/implicit-control-flow.cs @@ -0,0 +1 @@ +class FlowExample { int status; bool valid; bool done; void prepare() { this.status = 1; } void validate() { this.valid = this.status == 1; } void commit() { this.done = this.valid; } void ok1() { prepare(); validate(); commit(); } void ok2() { prepare(); validate(); commit(); } void ok3() { prepare(); validate(); commit(); } void ok4() { prepare(); validate(); commit(); } void drift() { validate(); prepare(); commit(); } } diff --git a/gems/decomplex/examples/csharp/inconsistent-rename-clone.cs b/gems/decomplex/examples/csharp/inconsistent-rename-clone.cs new file mode 100644 index 000000000..6ec491226 --- /dev/null +++ b/gems/decomplex/examples/csharp/inconsistent-rename-clone.cs @@ -0,0 +1 @@ +class Example { static void original() { var src = fetch(1); check(src); store(src); finalize(src); } static void pasted() { var dst = fetch(2); check(dst); store(src); finalize(dst); } } diff --git a/gems/decomplex/examples/csharp/local-flow.cs b/gems/decomplex/examples/csharp/local-flow.cs new file mode 100644 index 000000000..a5fa7f235 --- /dev/null +++ b/gems/decomplex/examples/csharp/local-flow.cs @@ -0,0 +1,9 @@ +class Example { static Result mixed(int price, int tax) { + var subtotal = price + tax; + var total = subtotal.round(); + + var timestamp = now(); + var buffer = Buffer.init(); + buffer.push(timestamp); + return Result.init(total, buffer); +} } diff --git a/gems/decomplex/examples/csharp/locality-drag.cs b/gems/decomplex/examples/csharp/locality-drag.cs new file mode 100644 index 000000000..8d31b0dcb --- /dev/null +++ b/gems/decomplex/examples/csharp/locality-drag.cs @@ -0,0 +1,29 @@ +class Example { + static void run(User user, Cart cart, Logger logger) { + var receipt_id = user.id; + + var total = cart.total; + if (total > 100) { + if (cart.discountable()) { + var discount = 10; + } + } + if (cart.taxable()) { + if (cart.region) { + var tax = total * 2; + } + } + if (logger.enabled()) { + if (logger.debug()) { + logger.info(total); + } + } + if (cart.valid()) { + if (cart.ready()) { + var status = 1; + } + } + + emit(receipt_id); + } +} diff --git a/gems/decomplex/examples/csharp/miner.cs b/gems/decomplex/examples/csharp/miner.cs new file mode 100644 index 000000000..4d471d44c --- /dev/null +++ b/gems/decomplex/examples/csharp/miner.cs @@ -0,0 +1 @@ +class Example { static bool one(bool a, bool b, bool c) { return a && b && c; } static bool two(bool a, bool b, bool c) { return a && b && c; } static bool three(bool a, bool b, bool c) { return a && b && c; } static bool broken(bool a, bool b) { return a && b; } } diff --git a/gems/decomplex/examples/csharp/operational-discontinuity.cs b/gems/decomplex/examples/csharp/operational-discontinuity.cs new file mode 100644 index 000000000..d2dfba674 --- /dev/null +++ b/gems/decomplex/examples/csharp/operational-discontinuity.cs @@ -0,0 +1,9 @@ +class Example { static void phase_shift() { + var a = 1; + var b = 2; + + // Phase 2 + var x = 3; + var y = 4; + print(x); print(y); +} } diff --git a/gems/decomplex/examples/csharp/oversized-predicate.cs b/gems/decomplex/examples/csharp/oversized-predicate.cs new file mode 100644 index 000000000..917339ced --- /dev/null +++ b/gems/decomplex/examples/csharp/oversized-predicate.cs @@ -0,0 +1 @@ +class Example { static void complex_check(bool a, bool b, bool c, bool d) { if (a && b && c && d) { print("too big"); } } } diff --git a/gems/decomplex/examples/csharp/path-condition.cs b/gems/decomplex/examples/csharp/path-condition.cs new file mode 100644 index 000000000..941f756f7 --- /dev/null +++ b/gems/decomplex/examples/csharp/path-condition.cs @@ -0,0 +1 @@ +class Example { static void one(X x,Y y,Z z) { if (x.p() && y.q() && z.r()) { go(x); } } static void two(X x,Y y,Z z) { if (x.p() && y.q() && z.r()) { go(x); } } static void three(X x,Y y,Z z) { if (x.p() && y.q() && z.r()) { go(x); } } static void bug(X x,Y y,Z z) { if (x.p() && y.q()) { go(x); } } } diff --git a/gems/decomplex/examples/csharp/predicate-alias.cs b/gems/decomplex/examples/csharp/predicate-alias.cs new file mode 100644 index 000000000..c853e98a4 --- /dev/null +++ b/gems/decomplex/examples/csharp/predicate-alias.cs @@ -0,0 +1 @@ +class Example { static bool first() { return true; } static bool second() { return true; } static bool other() { return false; } } diff --git a/gems/decomplex/examples/csharp/redundant-nil-guard.cs b/gems/decomplex/examples/csharp/redundant-nil-guard.cs new file mode 100644 index 000000000..32f12a734 --- /dev/null +++ b/gems/decomplex/examples/csharp/redundant-nil-guard.cs @@ -0,0 +1 @@ +class Example { static void check(Value value) { if (value.isSome()) { value.isNull(); } } } diff --git a/gems/decomplex/examples/csharp/semantic-alias.cs b/gems/decomplex/examples/csharp/semantic-alias.cs new file mode 100644 index 000000000..75c330865 --- /dev/null +++ b/gems/decomplex/examples/csharp/semantic-alias.cs @@ -0,0 +1 @@ +class Example { static bool frame(Node node) { return node.provenance == FRAME; } static bool is_frame(Node node) { return provenance == FRAME; } static bool heap(Node node) { return node.provenance == HEAP; } static int somewhere(Node node) { if (node.provenance == FRAME) { return 1; } return 0; } } diff --git a/gems/decomplex/examples/csharp/sequence-mine.cs b/gems/decomplex/examples/csharp/sequence-mine.cs new file mode 100644 index 000000000..629558214 --- /dev/null +++ b/gems/decomplex/examples/csharp/sequence-mine.cs @@ -0,0 +1 @@ +class Example { static void one() { alloc_mark(x); body1(); cleanup(x); } static void two() { alloc_mark(y); body2(); cleanup(y); } static void three() { alloc_mark(z); body3(); cleanup(z); } static void four() { alloc_mark(w); body4(); cleanup(w); } static void leak() { alloc_mark(q); use_value(q); } } diff --git a/gems/decomplex/examples/csharp/state-branch-density.cs b/gems/decomplex/examples/csharp/state-branch-density.cs new file mode 100644 index 000000000..d4549f587 --- /dev/null +++ b/gems/decomplex/examples/csharp/state-branch-density.cs @@ -0,0 +1 @@ +class StateBranchChecker { bool checked; void check(User user) { if (user.admin) { this.checked = true; } if (this.checked && user.name == "admin") { print("hello"); } } } diff --git a/gems/decomplex/examples/csharp/state-mesh.cs b/gems/decomplex/examples/csharp/state-mesh.cs new file mode 100644 index 000000000..ed01f5ad9 --- /dev/null +++ b/gems/decomplex/examples/csharp/state-mesh.cs @@ -0,0 +1 @@ +class StateMeshExample { int a; int b; void initialize() { this.a = 1; this.b = 2; } void writer() { this.a = 3; } int reader() { return this.a + this.b; } int a_alias() { return this.a; } } diff --git a/gems/decomplex/examples/csharp/structural-topology.cs b/gems/decomplex/examples/csharp/structural-topology.cs new file mode 100644 index 000000000..1af56a088 --- /dev/null +++ b/gems/decomplex/examples/csharp/structural-topology.cs @@ -0,0 +1 @@ +class Worker { public void run(Items items) { prepare(); if (ready()) { validate(); } foreach (var item in items) { helper(item); } } private void prepare() {} private bool ready() { return true; } public void validate() {} private void helper(Item item) { item.use(); } } diff --git a/gems/decomplex/examples/csharp/temporal-ordering-pressure.cs b/gems/decomplex/examples/csharp/temporal-ordering-pressure.cs new file mode 100644 index 000000000..83a07d855 --- /dev/null +++ b/gems/decomplex/examples/csharp/temporal-ordering-pressure.cs @@ -0,0 +1 @@ +class TemporalOrderExample { int a; int b; public void one() { this.a = 1; } public void two() { this.a = 2; this.b = 3; } public void three() { this.b = 4; } public int reader() { return this.a; } } diff --git a/gems/decomplex/examples/csharp/weighted-inlined-complexity.cs b/gems/decomplex/examples/csharp/weighted-inlined-complexity.cs new file mode 100644 index 000000000..e78470393 --- /dev/null +++ b/gems/decomplex/examples/csharp/weighted-inlined-complexity.cs @@ -0,0 +1 @@ +class Example { static void checkout(User user, Cart cart) { validate_user(user); apply_discount(cart); process_payment(user, cart); audit_cart(cart); } static bool validate_user(User user) { if (user.active() && !user.suspended()) { if (user.profile.complete()) { return true; } else { return false; } } else { return false; } } static int apply_discount(Cart cart) { if (cart.total > 100 && eligible()) { if (holiday()) { return 20; } else if (loyalty_month()) { return 15; } else { return 10; } } return 0; } static void process_payment(User user, Cart cart) { if (gateway.ready()) { if (cart.total > 0 && user.active()) { if (fraud_check(user)) { charge(user, cart); } else { decline(user); } } } } static void audit_cart(Cart cart) { foreach (var item in cart.items) { if (item.taxable()) { if (item.region && item.amount > 0) { record_tax(item); } } } } } diff --git a/gems/decomplex/examples/go/co-update.go b/gems/decomplex/examples/go/co-update.go new file mode 100644 index 000000000..7bc6047a7 --- /dev/null +++ b/gems/decomplex/examples/go/co-update.go @@ -0,0 +1,5 @@ +package main +func stable_one(node Node) { node.storage = 1; node.provenance = 1 } +func stable_two(node Node) { node.storage = 1; node.provenance = 1 } +func stable_three(node Node) { node.storage = 1; node.provenance = 1 } +func misses_provenance(node Node) { node.storage = 1 } diff --git a/gems/decomplex/examples/go/decision-pressure.go b/gems/decomplex/examples/go/decision-pressure.go new file mode 100644 index 000000000..84f811f68 --- /dev/null +++ b/gems/decomplex/examples/go/decision-pressure.go @@ -0,0 +1,2 @@ +package main +func scan(node Node) bool { value := node.symbol; return value.isNull() } diff --git a/gems/decomplex/examples/go/derived-state.go b/gems/decomplex/examples/go/derived-state.go new file mode 100644 index 000000000..1c6997b53 --- /dev/null +++ b/gems/decomplex/examples/go/derived-state.go @@ -0,0 +1,2 @@ +package main +func check(input int) { cached := input + 1; input = 2; print(cached) } diff --git a/gems/decomplex/examples/go/false-simplicity.go b/gems/decomplex/examples/go/false-simplicity.go new file mode 100644 index 000000000..3ce4c70a3 --- /dev/null +++ b/gems/decomplex/examples/go/false-simplicity.go @@ -0,0 +1,3 @@ +package main +type FalseSimplicityExample struct {} +func (self FalseSimplicityExample) hack() { print("hidden IO") } diff --git a/gems/decomplex/examples/go/fat-union.go b/gems/decomplex/examples/go/fat-union.go new file mode 100644 index 000000000..d88b1deee --- /dev/null +++ b/gems/decomplex/examples/go/fat-union.go @@ -0,0 +1,8 @@ +package main +func handle(node Node) { + switch node { + case AST.Call: node.line(); node.col(); node.ty(); node.span(); node.parent(); node.recv() + case AST.Func: node.line(); node.col(); node.ty(); node.span(); node.parent(); node.name() + case AST.Lit: node.line(); node.col(); node.ty(); node.span(); node.parent(); node.value() + } +} diff --git a/gems/decomplex/examples/go/flay-similarity.go b/gems/decomplex/examples/go/flay-similarity.go new file mode 100644 index 000000000..e6fa6e4db --- /dev/null +++ b/gems/decomplex/examples/go/flay-similarity.go @@ -0,0 +1,3 @@ +package main +func first_clone(node Node) int { total := 0; value1 := node.part1; if value1.ready() && value1.enabled() { total += value1.amount } value2 := node.part2; if value2.ready() && value2.enabled() { total += value2.amount } value3 := node.part3; if value3.ready() && value3.enabled() { total += value3.amount } value4 := node.part4; if value4.ready() && value4.enabled() { total += value4.amount } value5 := node.part5; if value5.ready() && value5.enabled() { total += value5.amount } value6 := node.part6; if value6.ready() && value6.enabled() { total += value6.amount } value7 := node.part7; if value7.ready() && value7.enabled() { total += value7.amount } value8 := node.part8; if value8.ready() && value8.enabled() { total += value8.amount } return total } +func second_clone(entry Node) int { total := 0; item1 := entry.part1; if item1.ready() && item1.enabled() { total += item1.amount } item2 := entry.part2; if item2.ready() && item2.enabled() { total += item2.amount } item3 := entry.part3; if item3.ready() && item3.enabled() { total += item3.amount } item4 := entry.part4; if item4.ready() && item4.enabled() { total += item4.amount } item5 := entry.part5; if item5.ready() && item5.enabled() { total += item5.amount } item6 := entry.part6; if item6.ready() && item6.enabled() { total += item6.amount } item7 := entry.part7; if item7.ready() && item7.enabled() { total += item7.amount } item8 := entry.part8; if item8.ready() && item8.enabled() { total += item8.amount } return total } diff --git a/gems/decomplex/examples/go/function-lcom.go b/gems/decomplex/examples/go/function-lcom.go new file mode 100644 index 000000000..9b8d9a98f --- /dev/null +++ b/gems/decomplex/examples/go/function-lcom.go @@ -0,0 +1,14 @@ +package main +func mixed(price int, tax int, logger Logger) Result { + subtotal := price + tax + total := subtotal * 2 + rounded := total.round() + + timestamp := now() + buffer := Buffer_init() + stamp := timestamp + buffer.push(stamp) + logger.info(buffer) + + return Result_init(rounded, buffer) +} diff --git a/gems/decomplex/examples/go/implicit-control-flow.go b/gems/decomplex/examples/go/implicit-control-flow.go new file mode 100644 index 000000000..3aae75762 --- /dev/null +++ b/gems/decomplex/examples/go/implicit-control-flow.go @@ -0,0 +1,10 @@ +package main +type FlowExample struct { status int; valid bool; done bool } +func (self *FlowExample) prepare() { self.status = 1 } +func (self *FlowExample) validate() { self.valid = self.status == 1 } +func (self *FlowExample) commit() { self.done = self.valid } +func (self *FlowExample) ok1() { self.prepare(); self.validate(); self.commit() } +func (self *FlowExample) ok2() { self.prepare(); self.validate(); self.commit() } +func (self *FlowExample) ok3() { self.prepare(); self.validate(); self.commit() } +func (self *FlowExample) ok4() { self.prepare(); self.validate(); self.commit() } +func (self *FlowExample) drift() { self.validate(); self.prepare(); self.commit() } diff --git a/gems/decomplex/examples/go/inconsistent-rename-clone.go b/gems/decomplex/examples/go/inconsistent-rename-clone.go new file mode 100644 index 000000000..5c55f1b9b --- /dev/null +++ b/gems/decomplex/examples/go/inconsistent-rename-clone.go @@ -0,0 +1,3 @@ +package main +func original() { src := fetch(1); check(src); store(src); finalize(src) } +func pasted() { dst := fetch(2); check(dst); store(src); finalize(dst) } diff --git a/gems/decomplex/examples/go/local-flow.go b/gems/decomplex/examples/go/local-flow.go new file mode 100644 index 000000000..63d08007e --- /dev/null +++ b/gems/decomplex/examples/go/local-flow.go @@ -0,0 +1,10 @@ +package main +func mixed(price int, tax int) Result { + subtotal := price + tax + total := subtotal.round() + + timestamp := now() + buffer := Buffer_init() + buffer.push(timestamp) + return Result_init(total, buffer) +} diff --git a/gems/decomplex/examples/go/locality-drag.go b/gems/decomplex/examples/go/locality-drag.go new file mode 100644 index 000000000..0ea9ebeef --- /dev/null +++ b/gems/decomplex/examples/go/locality-drag.go @@ -0,0 +1,31 @@ +package main +func run(user User, cart Cart, logger Logger) { + receipt_id := user.id + + total := cart.total + if total > 100 { + if cart.discountable() { + discount := 10 + _ = discount + } + } + if cart.taxable() { + if cart.region { + tax := total * 2 + _ = tax + } + } + if logger.enabled() { + if logger.debug() { + logger.info(total) + } + } + if cart.valid() { + if cart.ready() { + status := 1 + _ = status + } + } + + emit(receipt_id) +} diff --git a/gems/decomplex/examples/go/miner.go b/gems/decomplex/examples/go/miner.go new file mode 100644 index 000000000..0613414c5 --- /dev/null +++ b/gems/decomplex/examples/go/miner.go @@ -0,0 +1,5 @@ +package main +func one(a bool,b bool,c bool) bool { return a && b && c } +func two(a bool,b bool,c bool) bool { return a && b && c } +func three(a bool,b bool,c bool) bool { return a && b && c } +func broken(a bool,b bool) bool { return a && b } diff --git a/gems/decomplex/examples/go/operational-discontinuity.go b/gems/decomplex/examples/go/operational-discontinuity.go new file mode 100644 index 000000000..f577d642c --- /dev/null +++ b/gems/decomplex/examples/go/operational-discontinuity.go @@ -0,0 +1,11 @@ +package main +func phase_shift() { + a := 1 + b := 2 + _ = a; _ = b + + // Phase 2 + x := 3 + y := 4 + print(x); print(y) +} diff --git a/gems/decomplex/examples/go/oversized-predicate.go b/gems/decomplex/examples/go/oversized-predicate.go new file mode 100644 index 000000000..98feb9fb3 --- /dev/null +++ b/gems/decomplex/examples/go/oversized-predicate.go @@ -0,0 +1,2 @@ +package main +func complex_check(a bool,b bool,c bool,d bool) { if a && b && c && d { print("too big") } } diff --git a/gems/decomplex/examples/go/path-condition.go b/gems/decomplex/examples/go/path-condition.go new file mode 100644 index 000000000..60ac5b8a1 --- /dev/null +++ b/gems/decomplex/examples/go/path-condition.go @@ -0,0 +1,5 @@ +package main +func one(x X,y Y,z Z) { if x.p() && y.q() && z.r() { go(x) } } +func two(x X,y Y,z Z) { if x.p() && y.q() && z.r() { go(x) } } +func three(x X,y Y,z Z) { if x.p() && y.q() && z.r() { go(x) } } +func bug(x X,y Y,z Z) { if x.p() && y.q() { go(x) } } diff --git a/gems/decomplex/examples/go/predicate-alias.go b/gems/decomplex/examples/go/predicate-alias.go new file mode 100644 index 000000000..86b2ce5eb --- /dev/null +++ b/gems/decomplex/examples/go/predicate-alias.go @@ -0,0 +1,4 @@ +package main +func first() bool { return true } +func second() bool { return true } +func other() bool { return false } diff --git a/gems/decomplex/examples/go/redundant-nil-guard.go b/gems/decomplex/examples/go/redundant-nil-guard.go new file mode 100644 index 000000000..939e32535 --- /dev/null +++ b/gems/decomplex/examples/go/redundant-nil-guard.go @@ -0,0 +1,2 @@ +package main +func check(value Value) { if value.isSome() { value.isNull() } } diff --git a/gems/decomplex/examples/go/semantic-alias.go b/gems/decomplex/examples/go/semantic-alias.go new file mode 100644 index 000000000..dc9d43838 --- /dev/null +++ b/gems/decomplex/examples/go/semantic-alias.go @@ -0,0 +1,5 @@ +package main +func frame(node Node) bool { return node.provenance == FRAME } +func is_frame(node Node) bool { return provenance == FRAME } +func heap(node Node) bool { return node.provenance == HEAP } +func somewhere(node Node) int { if node.provenance == FRAME { return 1 }; return 0 } diff --git a/gems/decomplex/examples/go/sequence-mine.go b/gems/decomplex/examples/go/sequence-mine.go new file mode 100644 index 000000000..6f2c7d83a --- /dev/null +++ b/gems/decomplex/examples/go/sequence-mine.go @@ -0,0 +1,6 @@ +package main +func one() { alloc_mark(x); body1(); cleanup(x) } +func two() { alloc_mark(y); body2(); cleanup(y) } +func three() { alloc_mark(z); body3(); cleanup(z) } +func four() { alloc_mark(w); body4(); cleanup(w) } +func leak() { alloc_mark(q); use_value(q) } diff --git a/gems/decomplex/examples/go/state-branch-density.go b/gems/decomplex/examples/go/state-branch-density.go new file mode 100644 index 000000000..615befc24 --- /dev/null +++ b/gems/decomplex/examples/go/state-branch-density.go @@ -0,0 +1,3 @@ +package main +type StateBranchChecker struct { checked bool } +func (self *StateBranchChecker) check(user User) { if user.admin { self.checked = true } if self.checked && user.name == "admin" { print("hello") } } diff --git a/gems/decomplex/examples/go/state-mesh.go b/gems/decomplex/examples/go/state-mesh.go new file mode 100644 index 000000000..14ede1069 --- /dev/null +++ b/gems/decomplex/examples/go/state-mesh.go @@ -0,0 +1,6 @@ +package main +type StateMeshExample struct { a int; b int } +func (self *StateMeshExample) initialize() { self.a = 1; self.b = 2 } +func (self *StateMeshExample) writer() { self.a = 3 } +func (self *StateMeshExample) reader() int { return self.a + self.b } +func (self *StateMeshExample) a_alias() int { return self.a } diff --git a/gems/decomplex/examples/go/structural-topology.go b/gems/decomplex/examples/go/structural-topology.go new file mode 100644 index 000000000..c3b5805f8 --- /dev/null +++ b/gems/decomplex/examples/go/structural-topology.go @@ -0,0 +1,7 @@ +package main +type Worker struct {} +func (self *Worker) run(items Items) { self.prepare(); if self.ready() { self.validate() }; for _, item := range items { self.helper(item) } } +func (self *Worker) prepare() {} +func (self *Worker) ready() bool { return true } +func (self *Worker) validate() {} +func (self *Worker) helper(item Item) { item.use() } diff --git a/gems/decomplex/examples/go/temporal-ordering-pressure.go b/gems/decomplex/examples/go/temporal-ordering-pressure.go new file mode 100644 index 000000000..5b29ea4e4 --- /dev/null +++ b/gems/decomplex/examples/go/temporal-ordering-pressure.go @@ -0,0 +1,6 @@ +package main +type TemporalOrderExample struct { a int; b int } +func (self *TemporalOrderExample) One() { self.a = 1 } +func (self *TemporalOrderExample) Two() { self.a = 2; self.b = 3 } +func (self *TemporalOrderExample) Three() { self.b = 4 } +func (self *TemporalOrderExample) Reader() int { return self.a } diff --git a/gems/decomplex/examples/go/weighted-inlined-complexity.go b/gems/decomplex/examples/go/weighted-inlined-complexity.go new file mode 100644 index 000000000..75881fb16 --- /dev/null +++ b/gems/decomplex/examples/go/weighted-inlined-complexity.go @@ -0,0 +1,6 @@ +package main +func checkout(user User, cart Cart) { validate_user(user); apply_discount(cart); process_payment(user, cart); audit_cart(cart) } +func validate_user(user User) bool { if user.active() && !user.suspended() { if user.profile.complete() { return true } else { return false } } else { return false } } +func apply_discount(cart Cart) int { if cart.total > 100 && eligible() { if holiday() { return 20 } else if loyalty_month() { return 15 } else { return 10 } }; return 0 } +func process_payment(user User, cart Cart) { if gateway.ready() { if cart.total > 0 && user.active() { if fraud_check(user) { charge(user, cart) } else { decline(user) } } } } +func audit_cart(cart Cart) { for _, item := range cart.items { if item.taxable() { if item.region && item.amount > 0 { record_tax(item) } } } } diff --git a/gems/decomplex/examples/java/co-update.java b/gems/decomplex/examples/java/co-update.java new file mode 100644 index 000000000..093c07fbe --- /dev/null +++ b/gems/decomplex/examples/java/co-update.java @@ -0,0 +1,6 @@ +class Example { + static void stable_one(Node node) { node.storage = 1; node.provenance = 1; } + static void stable_two(Node node) { node.storage = 1; node.provenance = 1; } + static void stable_three(Node node) { node.storage = 1; node.provenance = 1; } + static void misses_provenance(Node node) { node.storage = 1; } +} diff --git a/gems/decomplex/examples/java/decision-pressure.java b/gems/decomplex/examples/java/decision-pressure.java new file mode 100644 index 000000000..f86ec54a1 --- /dev/null +++ b/gems/decomplex/examples/java/decision-pressure.java @@ -0,0 +1 @@ +class Example { static boolean scan(Node node) { Value value = node.symbol; return value.isNull(); } } diff --git a/gems/decomplex/examples/java/derived-state.java b/gems/decomplex/examples/java/derived-state.java new file mode 100644 index 000000000..a514b815a --- /dev/null +++ b/gems/decomplex/examples/java/derived-state.java @@ -0,0 +1 @@ +class Example { static void check(int input) { int cached = input + 1; input = 2; print(cached); } } diff --git a/gems/decomplex/examples/java/false-simplicity.java b/gems/decomplex/examples/java/false-simplicity.java new file mode 100644 index 000000000..78fb3897c --- /dev/null +++ b/gems/decomplex/examples/java/false-simplicity.java @@ -0,0 +1 @@ +class FalseSimplicityExample { void hack() { print("hidden IO"); } } diff --git a/gems/decomplex/examples/java/fat-union.java b/gems/decomplex/examples/java/fat-union.java new file mode 100644 index 000000000..ad280adb7 --- /dev/null +++ b/gems/decomplex/examples/java/fat-union.java @@ -0,0 +1 @@ +class Example { static void handle(Node node) { switch (node) { case AST.Call: node.line(); node.col(); node.ty(); node.span(); node.parent(); node.recv(); break; case AST.Func: node.line(); node.col(); node.ty(); node.span(); node.parent(); node.name(); break; case AST.Lit: node.line(); node.col(); node.ty(); node.span(); node.parent(); node.value(); break; } } } diff --git a/gems/decomplex/examples/java/flay-similarity.java b/gems/decomplex/examples/java/flay-similarity.java new file mode 100644 index 000000000..a1f4beee8 --- /dev/null +++ b/gems/decomplex/examples/java/flay-similarity.java @@ -0,0 +1,75 @@ +class Example { + static int first_clone(Node node) { + int total = 0; + var value1 = node.part1; + if (value1.ready() && value1.enabled()) { + total += value1.amount; + } + var value2 = node.part2; + if (value2.ready() && value2.enabled()) { + total += value2.amount; + } + var value3 = node.part3; + if (value3.ready() && value3.enabled()) { + total += value3.amount; + } + var value4 = node.part4; + if (value4.ready() && value4.enabled()) { + total += value4.amount; + } + var value5 = node.part5; + if (value5.ready() && value5.enabled()) { + total += value5.amount; + } + var value6 = node.part6; + if (value6.ready() && value6.enabled()) { + total += value6.amount; + } + var value7 = node.part7; + if (value7.ready() && value7.enabled()) { + total += value7.amount; + } + var value8 = node.part8; + if (value8.ready() && value8.enabled()) { + total += value8.amount; + } + return total; + } + + static int second_clone(Node entry) { + int total = 0; + var item1 = entry.part1; + if (item1.ready() && item1.enabled()) { + total += item1.amount; + } + var item2 = entry.part2; + if (item2.ready() && item2.enabled()) { + total += item2.amount; + } + var item3 = entry.part3; + if (item3.ready() && item3.enabled()) { + total += item3.amount; + } + var item4 = entry.part4; + if (item4.ready() && item4.enabled()) { + total += item4.amount; + } + var item5 = entry.part5; + if (item5.ready() && item5.enabled()) { + total += item5.amount; + } + var item6 = entry.part6; + if (item6.ready() && item6.enabled()) { + total += item6.amount; + } + var item7 = entry.part7; + if (item7.ready() && item7.enabled()) { + total += item7.amount; + } + var item8 = entry.part8; + if (item8.ready() && item8.enabled()) { + total += item8.amount; + } + return total; + } +} diff --git a/gems/decomplex/examples/java/function-lcom.java b/gems/decomplex/examples/java/function-lcom.java new file mode 100644 index 000000000..6528a671a --- /dev/null +++ b/gems/decomplex/examples/java/function-lcom.java @@ -0,0 +1,13 @@ +class Example { static Result mixed(int price, int tax, Logger logger) { + var subtotal = price + tax; + var total = subtotal * 2; + var rounded = total.round(); + + var timestamp = now(); + var buffer = Buffer.init(); + var stamp = timestamp; + buffer.push(stamp); + logger.info(buffer); + + return Result.init(rounded, buffer); +} } diff --git a/gems/decomplex/examples/java/implicit-control-flow.java b/gems/decomplex/examples/java/implicit-control-flow.java new file mode 100644 index 000000000..4bdd60844 --- /dev/null +++ b/gems/decomplex/examples/java/implicit-control-flow.java @@ -0,0 +1 @@ +class FlowExample { int status; boolean valid; boolean done; void prepare() { this.status = 1; } void validate() { this.valid = this.status == 1; } void commit() { this.done = this.valid; } void ok1() { prepare(); validate(); commit(); } void ok2() { prepare(); validate(); commit(); } void ok3() { prepare(); validate(); commit(); } void ok4() { prepare(); validate(); commit(); } void drift() { validate(); prepare(); commit(); } } diff --git a/gems/decomplex/examples/java/inconsistent-rename-clone.java b/gems/decomplex/examples/java/inconsistent-rename-clone.java new file mode 100644 index 000000000..6ec491226 --- /dev/null +++ b/gems/decomplex/examples/java/inconsistent-rename-clone.java @@ -0,0 +1 @@ +class Example { static void original() { var src = fetch(1); check(src); store(src); finalize(src); } static void pasted() { var dst = fetch(2); check(dst); store(src); finalize(dst); } } diff --git a/gems/decomplex/examples/java/local-flow.java b/gems/decomplex/examples/java/local-flow.java new file mode 100644 index 000000000..a5fa7f235 --- /dev/null +++ b/gems/decomplex/examples/java/local-flow.java @@ -0,0 +1,9 @@ +class Example { static Result mixed(int price, int tax) { + var subtotal = price + tax; + var total = subtotal.round(); + + var timestamp = now(); + var buffer = Buffer.init(); + buffer.push(timestamp); + return Result.init(total, buffer); +} } diff --git a/gems/decomplex/examples/java/locality-drag.java b/gems/decomplex/examples/java/locality-drag.java new file mode 100644 index 000000000..8d31b0dcb --- /dev/null +++ b/gems/decomplex/examples/java/locality-drag.java @@ -0,0 +1,29 @@ +class Example { + static void run(User user, Cart cart, Logger logger) { + var receipt_id = user.id; + + var total = cart.total; + if (total > 100) { + if (cart.discountable()) { + var discount = 10; + } + } + if (cart.taxable()) { + if (cart.region) { + var tax = total * 2; + } + } + if (logger.enabled()) { + if (logger.debug()) { + logger.info(total); + } + } + if (cart.valid()) { + if (cart.ready()) { + var status = 1; + } + } + + emit(receipt_id); + } +} diff --git a/gems/decomplex/examples/java/miner.java b/gems/decomplex/examples/java/miner.java new file mode 100644 index 000000000..749991f98 --- /dev/null +++ b/gems/decomplex/examples/java/miner.java @@ -0,0 +1 @@ +class Example { static boolean one(boolean a, boolean b, boolean c) { return a && b && c; } static boolean two(boolean a, boolean b, boolean c) { return a && b && c; } static boolean three(boolean a, boolean b, boolean c) { return a && b && c; } static boolean broken(boolean a, boolean b) { return a && b; } } diff --git a/gems/decomplex/examples/java/operational-discontinuity.java b/gems/decomplex/examples/java/operational-discontinuity.java new file mode 100644 index 000000000..d2dfba674 --- /dev/null +++ b/gems/decomplex/examples/java/operational-discontinuity.java @@ -0,0 +1,9 @@ +class Example { static void phase_shift() { + var a = 1; + var b = 2; + + // Phase 2 + var x = 3; + var y = 4; + print(x); print(y); +} } diff --git a/gems/decomplex/examples/java/oversized-predicate.java b/gems/decomplex/examples/java/oversized-predicate.java new file mode 100644 index 000000000..4a60c2466 --- /dev/null +++ b/gems/decomplex/examples/java/oversized-predicate.java @@ -0,0 +1 @@ +class Example { static void complex_check(boolean a, boolean b, boolean c, boolean d) { if (a && b && c && d) { print("too big"); } } } diff --git a/gems/decomplex/examples/java/path-condition.java b/gems/decomplex/examples/java/path-condition.java new file mode 100644 index 000000000..941f756f7 --- /dev/null +++ b/gems/decomplex/examples/java/path-condition.java @@ -0,0 +1 @@ +class Example { static void one(X x,Y y,Z z) { if (x.p() && y.q() && z.r()) { go(x); } } static void two(X x,Y y,Z z) { if (x.p() && y.q() && z.r()) { go(x); } } static void three(X x,Y y,Z z) { if (x.p() && y.q() && z.r()) { go(x); } } static void bug(X x,Y y,Z z) { if (x.p() && y.q()) { go(x); } } } diff --git a/gems/decomplex/examples/java/predicate-alias.java b/gems/decomplex/examples/java/predicate-alias.java new file mode 100644 index 000000000..891938723 --- /dev/null +++ b/gems/decomplex/examples/java/predicate-alias.java @@ -0,0 +1 @@ +class Example { static boolean first() { return true; } static boolean second() { return true; } static boolean other() { return false; } } diff --git a/gems/decomplex/examples/java/redundant-nil-guard.java b/gems/decomplex/examples/java/redundant-nil-guard.java new file mode 100644 index 000000000..32f12a734 --- /dev/null +++ b/gems/decomplex/examples/java/redundant-nil-guard.java @@ -0,0 +1 @@ +class Example { static void check(Value value) { if (value.isSome()) { value.isNull(); } } } diff --git a/gems/decomplex/examples/java/semantic-alias.java b/gems/decomplex/examples/java/semantic-alias.java new file mode 100644 index 000000000..29895156a --- /dev/null +++ b/gems/decomplex/examples/java/semantic-alias.java @@ -0,0 +1 @@ +class Example { static boolean frame(Node node) { return node.provenance == FRAME; } static boolean is_frame(Node node) { return provenance == FRAME; } static boolean heap(Node node) { return node.provenance == HEAP; } static int somewhere(Node node) { if (node.provenance == FRAME) { return 1; } return 0; } } diff --git a/gems/decomplex/examples/java/sequence-mine.java b/gems/decomplex/examples/java/sequence-mine.java new file mode 100644 index 000000000..629558214 --- /dev/null +++ b/gems/decomplex/examples/java/sequence-mine.java @@ -0,0 +1 @@ +class Example { static void one() { alloc_mark(x); body1(); cleanup(x); } static void two() { alloc_mark(y); body2(); cleanup(y); } static void three() { alloc_mark(z); body3(); cleanup(z); } static void four() { alloc_mark(w); body4(); cleanup(w); } static void leak() { alloc_mark(q); use_value(q); } } diff --git a/gems/decomplex/examples/java/state-branch-density.java b/gems/decomplex/examples/java/state-branch-density.java new file mode 100644 index 000000000..a022b8707 --- /dev/null +++ b/gems/decomplex/examples/java/state-branch-density.java @@ -0,0 +1 @@ +class StateBranchChecker { boolean checked; void check(User user) { if (user.admin) { this.checked = true; } if (this.checked && user.name == "admin") { print("hello"); } } } diff --git a/gems/decomplex/examples/java/state-mesh.java b/gems/decomplex/examples/java/state-mesh.java new file mode 100644 index 000000000..ed01f5ad9 --- /dev/null +++ b/gems/decomplex/examples/java/state-mesh.java @@ -0,0 +1 @@ +class StateMeshExample { int a; int b; void initialize() { this.a = 1; this.b = 2; } void writer() { this.a = 3; } int reader() { return this.a + this.b; } int a_alias() { return this.a; } } diff --git a/gems/decomplex/examples/java/structural-topology.java b/gems/decomplex/examples/java/structural-topology.java new file mode 100644 index 000000000..550e21e7f --- /dev/null +++ b/gems/decomplex/examples/java/structural-topology.java @@ -0,0 +1 @@ +class Worker { public void run(Items items) { prepare(); if (ready()) { validate(); } for (Item item : items) { helper(item); } } private void prepare() {} private boolean ready() { return true; } public void validate() {} private void helper(Item item) { item.use(); } } diff --git a/gems/decomplex/examples/java/temporal-ordering-pressure.java b/gems/decomplex/examples/java/temporal-ordering-pressure.java new file mode 100644 index 000000000..83a07d855 --- /dev/null +++ b/gems/decomplex/examples/java/temporal-ordering-pressure.java @@ -0,0 +1 @@ +class TemporalOrderExample { int a; int b; public void one() { this.a = 1; } public void two() { this.a = 2; this.b = 3; } public void three() { this.b = 4; } public int reader() { return this.a; } } diff --git a/gems/decomplex/examples/java/weighted-inlined-complexity.java b/gems/decomplex/examples/java/weighted-inlined-complexity.java new file mode 100644 index 000000000..9efea4524 --- /dev/null +++ b/gems/decomplex/examples/java/weighted-inlined-complexity.java @@ -0,0 +1 @@ +class Example { static void checkout(User user, Cart cart) { validate_user(user); apply_discount(cart); process_payment(user, cart); audit_cart(cart); } static boolean validate_user(User user) { if (user.active() && !user.suspended()) { if (user.profile.complete()) { return true; } else { return false; } } else { return false; } } static int apply_discount(Cart cart) { if (cart.total > 100 && eligible()) { if (holiday()) { return 20; } else if (loyalty_month()) { return 15; } else { return 10; } } return 0; } static void process_payment(User user, Cart cart) { if (gateway.ready()) { if (cart.total > 0 && user.active()) { if (fraud_check(user)) { charge(user, cart); } else { decline(user); } } } } static void audit_cart(Cart cart) { for (Item item : cart.items) { if (item.taxable()) { if (item.region && item.amount > 0) { record_tax(item); } } } } } diff --git a/gems/decomplex/examples/javascript/co-update.js b/gems/decomplex/examples/javascript/co-update.js new file mode 100644 index 000000000..0fbb808be --- /dev/null +++ b/gems/decomplex/examples/javascript/co-update.js @@ -0,0 +1,4 @@ +function stable_one(node) { node.storage = 1; node.provenance = 1; } +function stable_two(node) { node.storage = 1; node.provenance = 1; } +function stable_three(node) { node.storage = 1; node.provenance = 1; } +function misses_provenance(node) { node.storage = 1; } diff --git a/gems/decomplex/examples/javascript/decision-pressure.js b/gems/decomplex/examples/javascript/decision-pressure.js new file mode 100644 index 000000000..41b0455cc --- /dev/null +++ b/gems/decomplex/examples/javascript/decision-pressure.js @@ -0,0 +1 @@ +function scan(node) { const value = node.symbol; return value.isNull(); } diff --git a/gems/decomplex/examples/javascript/derived-state.js b/gems/decomplex/examples/javascript/derived-state.js new file mode 100644 index 000000000..a5ec31614 --- /dev/null +++ b/gems/decomplex/examples/javascript/derived-state.js @@ -0,0 +1 @@ +function check(input) { const cached = input + 1; input = 2; print(cached); } diff --git a/gems/decomplex/examples/javascript/false-simplicity.js b/gems/decomplex/examples/javascript/false-simplicity.js new file mode 100644 index 000000000..4bd9d8e03 --- /dev/null +++ b/gems/decomplex/examples/javascript/false-simplicity.js @@ -0,0 +1 @@ +class FalseSimplicityExample { hack() { print("hidden IO"); } } diff --git a/gems/decomplex/examples/javascript/fat-union.js b/gems/decomplex/examples/javascript/fat-union.js new file mode 100644 index 000000000..5b52db663 --- /dev/null +++ b/gems/decomplex/examples/javascript/fat-union.js @@ -0,0 +1 @@ +function handle(node) { switch (node) { case AST.Call: node.line(); node.col(); node.ty(); node.span(); node.parent(); node.recv(); break; case AST.Func: node.line(); node.col(); node.ty(); node.span(); node.parent(); node.name(); break; case AST.Lit: node.line(); node.col(); node.ty(); node.span(); node.parent(); node.value(); break; } } diff --git a/gems/decomplex/examples/javascript/flay-similarity.js b/gems/decomplex/examples/javascript/flay-similarity.js new file mode 100644 index 000000000..17ffc6b6a --- /dev/null +++ b/gems/decomplex/examples/javascript/flay-similarity.js @@ -0,0 +1,2 @@ +function first_clone(node) { let total = 0; const value1 = node.part1; if (value1.ready() && value1.enabled()) { total += value1.amount; } const value2 = node.part2; if (value2.ready() && value2.enabled()) { total += value2.amount; } const value3 = node.part3; if (value3.ready() && value3.enabled()) { total += value3.amount; } const value4 = node.part4; if (value4.ready() && value4.enabled()) { total += value4.amount; } const value5 = node.part5; if (value5.ready() && value5.enabled()) { total += value5.amount; } const value6 = node.part6; if (value6.ready() && value6.enabled()) { total += value6.amount; } const value7 = node.part7; if (value7.ready() && value7.enabled()) { total += value7.amount; } const value8 = node.part8; if (value8.ready() && value8.enabled()) { total += value8.amount; } return total; } +function second_clone(entry) { let total = 0; const item1 = entry.part1; if (item1.ready() && item1.enabled()) { total += item1.amount; } const item2 = entry.part2; if (item2.ready() && item2.enabled()) { total += item2.amount; } const item3 = entry.part3; if (item3.ready() && item3.enabled()) { total += item3.amount; } const item4 = entry.part4; if (item4.ready() && item4.enabled()) { total += item4.amount; } const item5 = entry.part5; if (item5.ready() && item5.enabled()) { total += item5.amount; } const item6 = entry.part6; if (item6.ready() && item6.enabled()) { total += item6.amount; } const item7 = entry.part7; if (item7.ready() && item7.enabled()) { total += item7.amount; } const item8 = entry.part8; if (item8.ready() && item8.enabled()) { total += item8.amount; } return total; } diff --git a/gems/decomplex/examples/javascript/function-lcom.js b/gems/decomplex/examples/javascript/function-lcom.js new file mode 100644 index 000000000..6e550446b --- /dev/null +++ b/gems/decomplex/examples/javascript/function-lcom.js @@ -0,0 +1,12 @@ +function mixed(price, tax, logger) { + const subtotal = price + tax; + const total = subtotal * 2; + const rounded = total.round(); + + const timestamp = now(); + const buffer = Buffer.init(); + buffer.push(timestamp); + logger.info(buffer); + + return Result.init(rounded, buffer); +} diff --git a/gems/decomplex/examples/javascript/implicit-control-flow.js b/gems/decomplex/examples/javascript/implicit-control-flow.js new file mode 100644 index 000000000..7878c7cf7 --- /dev/null +++ b/gems/decomplex/examples/javascript/implicit-control-flow.js @@ -0,0 +1 @@ +class FlowExample { prepare() { this.status = 1; } validate() { this.valid = this.status == 1; } commit() { this.done = this.valid; } ok1() { this.prepare(); this.validate(); this.commit(); } ok2() { this.prepare(); this.validate(); this.commit(); } ok3() { this.prepare(); this.validate(); this.commit(); } ok4() { this.prepare(); this.validate(); this.commit(); } drift() { this.validate(); this.prepare(); this.commit(); } } diff --git a/gems/decomplex/examples/javascript/inconsistent-rename-clone.js b/gems/decomplex/examples/javascript/inconsistent-rename-clone.js new file mode 100644 index 000000000..67430611b --- /dev/null +++ b/gems/decomplex/examples/javascript/inconsistent-rename-clone.js @@ -0,0 +1,2 @@ +function original() { const src = fetch(1); check(src); store(src); finalize(src); } +function pasted() { const dst = fetch(2); check(dst); store(src); finalize(dst); } diff --git a/gems/decomplex/examples/javascript/local-flow.js b/gems/decomplex/examples/javascript/local-flow.js new file mode 100644 index 000000000..28856eb43 --- /dev/null +++ b/gems/decomplex/examples/javascript/local-flow.js @@ -0,0 +1,9 @@ +function mixed(price, tax) { + const subtotal = price + tax; + const total = subtotal.round(); + + const timestamp = now(); + const buffer = Buffer.init(); + buffer.push(timestamp); + return Result.init(total, buffer); +} diff --git a/gems/decomplex/examples/javascript/locality-drag.js b/gems/decomplex/examples/javascript/locality-drag.js new file mode 100644 index 000000000..21de7e8ae --- /dev/null +++ b/gems/decomplex/examples/javascript/locality-drag.js @@ -0,0 +1,27 @@ +function run(user, cart, logger) { + const receipt_id = user.id; + + const total = cart.total; + if (total > 100) { + if (cart.discountable()) { + const discount = 10; + } + } + if (cart.taxable()) { + if (cart.region) { + const tax = total * 2; + } + } + if (logger.enabled()) { + if (logger.debug()) { + logger.info(total); + } + } + if (cart.valid()) { + if (cart.ready()) { + const status = 1; + } + } + + emit(receipt_id); +} diff --git a/gems/decomplex/examples/javascript/miner.js b/gems/decomplex/examples/javascript/miner.js new file mode 100644 index 000000000..c7a3f8430 --- /dev/null +++ b/gems/decomplex/examples/javascript/miner.js @@ -0,0 +1,4 @@ +function one(a,b,c) { return a && b && c; } +function two(a,b,c) { return a && b && c; } +function three(a,b,c) { return a && b && c; } +function broken(a,b) { return a && b; } diff --git a/gems/decomplex/examples/javascript/operational-discontinuity.js b/gems/decomplex/examples/javascript/operational-discontinuity.js new file mode 100644 index 000000000..629c5e9a2 --- /dev/null +++ b/gems/decomplex/examples/javascript/operational-discontinuity.js @@ -0,0 +1,9 @@ +function phase_shift() { + const a = 1; + const b = 2; + + // Phase 2 + const x = 3; + const y = 4; + print(x); print(y); +} diff --git a/gems/decomplex/examples/javascript/oversized-predicate.js b/gems/decomplex/examples/javascript/oversized-predicate.js new file mode 100644 index 000000000..bb7a5f359 --- /dev/null +++ b/gems/decomplex/examples/javascript/oversized-predicate.js @@ -0,0 +1 @@ +function complex_check(a,b,c,d) { if (a && b && c && d) { print("too big"); } } diff --git a/gems/decomplex/examples/javascript/path-condition.js b/gems/decomplex/examples/javascript/path-condition.js new file mode 100644 index 000000000..b717ad99a --- /dev/null +++ b/gems/decomplex/examples/javascript/path-condition.js @@ -0,0 +1,4 @@ +function one(x,y,z) { if (x.p() && y.q() && z.r()) { go(x); } } +function two(x,y,z) { if (x.p() && y.q() && z.r()) { go(x); } } +function three(x,y,z) { if (x.p() && y.q() && z.r()) { go(x); } } +function bug(x,y,z) { if (x.p() && y.q()) { go(x); } } diff --git a/gems/decomplex/examples/javascript/predicate-alias.js b/gems/decomplex/examples/javascript/predicate-alias.js new file mode 100644 index 000000000..62b2c9b57 --- /dev/null +++ b/gems/decomplex/examples/javascript/predicate-alias.js @@ -0,0 +1,3 @@ +function first() { return true; } +function second() { return true; } +function other() { return false; } diff --git a/gems/decomplex/examples/javascript/redundant-nil-guard.js b/gems/decomplex/examples/javascript/redundant-nil-guard.js new file mode 100644 index 000000000..95d1802fa --- /dev/null +++ b/gems/decomplex/examples/javascript/redundant-nil-guard.js @@ -0,0 +1 @@ +function check(value) { if (value.isSome()) { value.isNull(); } } diff --git a/gems/decomplex/examples/javascript/semantic-alias.js b/gems/decomplex/examples/javascript/semantic-alias.js new file mode 100644 index 000000000..8f3d0de18 --- /dev/null +++ b/gems/decomplex/examples/javascript/semantic-alias.js @@ -0,0 +1,4 @@ +function frame(node) { return node.provenance == FRAME; } +function is_frame(node) { return provenance == FRAME; } +function heap(node) { return node.provenance == HEAP; } +function somewhere(node) { if (node.provenance == FRAME) { return 1; } return 0; } diff --git a/gems/decomplex/examples/javascript/sequence-mine.js b/gems/decomplex/examples/javascript/sequence-mine.js new file mode 100644 index 000000000..72ee4959a --- /dev/null +++ b/gems/decomplex/examples/javascript/sequence-mine.js @@ -0,0 +1,5 @@ +function one() { alloc_mark(x); body1(); cleanup(x); } +function two() { alloc_mark(y); body2(); cleanup(y); } +function three() { alloc_mark(z); body3(); cleanup(z); } +function four() { alloc_mark(w); body4(); cleanup(w); } +function leak() { alloc_mark(q); use_value(q); } diff --git a/gems/decomplex/examples/javascript/state-branch-density.js b/gems/decomplex/examples/javascript/state-branch-density.js new file mode 100644 index 000000000..ebb16349d --- /dev/null +++ b/gems/decomplex/examples/javascript/state-branch-density.js @@ -0,0 +1 @@ +class StateBranchChecker { check(user) { if (user.admin) { this.checked = true; } if (this.checked && user.name == "admin") { print("hello"); } } } diff --git a/gems/decomplex/examples/javascript/state-mesh.js b/gems/decomplex/examples/javascript/state-mesh.js new file mode 100644 index 000000000..3e6271f94 --- /dev/null +++ b/gems/decomplex/examples/javascript/state-mesh.js @@ -0,0 +1 @@ +class StateMeshExample { initialize() { this.a = 1; this.b = 2; } writer() { this.a = 3; } reader() { return this.a + this.b; } a_alias() { return this.a; } } diff --git a/gems/decomplex/examples/javascript/structural-topology.js b/gems/decomplex/examples/javascript/structural-topology.js new file mode 100644 index 000000000..b7559a669 --- /dev/null +++ b/gems/decomplex/examples/javascript/structural-topology.js @@ -0,0 +1 @@ +class Worker { run(items) { this.prepare(); if (this.ready()) { this.validate(); } for (const item of items) { this.helper(item); } } prepare() {} ready() { return true; } validate() {} helper(item) { return item; } } diff --git a/gems/decomplex/examples/javascript/temporal-ordering-pressure.js b/gems/decomplex/examples/javascript/temporal-ordering-pressure.js new file mode 100644 index 000000000..0901e3c2e --- /dev/null +++ b/gems/decomplex/examples/javascript/temporal-ordering-pressure.js @@ -0,0 +1 @@ +class TemporalOrderExample { one() { this.a = 1; } two() { this.a = 2; this.b = 3; } three() { this.b = 4; } reader() { return this.a; } } diff --git a/gems/decomplex/examples/javascript/weighted-inlined-complexity.js b/gems/decomplex/examples/javascript/weighted-inlined-complexity.js new file mode 100644 index 000000000..e7ae5abc4 --- /dev/null +++ b/gems/decomplex/examples/javascript/weighted-inlined-complexity.js @@ -0,0 +1,5 @@ +function checkout(user, cart) { validate_user(user); apply_discount(cart); process_payment(user, cart); audit_cart(cart); } +function validate_user(user) { if (user.active() && !user.suspended()) { if (user.profile.complete()) { return true; } else { return false; } } else { return false; } } +function apply_discount(cart) { if (cart.total > 100 && eligible()) { if (holiday()) { return 20; } else if (loyalty_month()) { return 15; } else { return 10; } } return 0; } +function process_payment(user, cart) { if (gateway.ready()) { if (cart.total > 0 && user.active()) { if (fraud_check(user)) { charge(user, cart); } else { decline(user); } } } } +function audit_cart(cart) { for (const item of cart.items) { if (item.taxable()) { if (item.region && item.amount > 0) { record_tax(item); } } } } diff --git a/gems/decomplex/examples/kotlin/co-update.kt b/gems/decomplex/examples/kotlin/co-update.kt new file mode 100644 index 000000000..7880a0e3c --- /dev/null +++ b/gems/decomplex/examples/kotlin/co-update.kt @@ -0,0 +1,4 @@ +fun stable_one(node: Node) { node.storage = 1; node.provenance = 1 } +fun stable_two(node: Node) { node.storage = 1; node.provenance = 1 } +fun stable_three(node: Node) { node.storage = 1; node.provenance = 1 } +fun misses_provenance(node: Node) { node.storage = 1 } diff --git a/gems/decomplex/examples/kotlin/decision-pressure.kt b/gems/decomplex/examples/kotlin/decision-pressure.kt new file mode 100644 index 000000000..320c10e64 --- /dev/null +++ b/gems/decomplex/examples/kotlin/decision-pressure.kt @@ -0,0 +1 @@ +fun scan(node: Node): Boolean { val value = node.symbol; return value.isNull() } diff --git a/gems/decomplex/examples/kotlin/derived-state.kt b/gems/decomplex/examples/kotlin/derived-state.kt new file mode 100644 index 000000000..ee30f74e4 --- /dev/null +++ b/gems/decomplex/examples/kotlin/derived-state.kt @@ -0,0 +1 @@ +fun check(input_value: Int) { var input = input_value; val cached = input + 1; input = 2; print(cached) } diff --git a/gems/decomplex/examples/kotlin/false-simplicity.kt b/gems/decomplex/examples/kotlin/false-simplicity.kt new file mode 100644 index 000000000..f5be1a591 --- /dev/null +++ b/gems/decomplex/examples/kotlin/false-simplicity.kt @@ -0,0 +1 @@ +class FalseSimplicityExample { fun hack() { print("hidden IO") } } diff --git a/gems/decomplex/examples/kotlin/fat-union.kt b/gems/decomplex/examples/kotlin/fat-union.kt new file mode 100644 index 000000000..159f08ac5 --- /dev/null +++ b/gems/decomplex/examples/kotlin/fat-union.kt @@ -0,0 +1,7 @@ +fun handle(node: Node) { + when (node) { + AST.Call -> { node.line(); node.col(); node.ty(); node.span(); node.parent(); node.recv() } + AST.Func -> { node.line(); node.col(); node.ty(); node.span(); node.parent(); node.name() } + AST.Lit -> { node.line(); node.col(); node.ty(); node.span(); node.parent(); node.value() } + } +} diff --git a/gems/decomplex/examples/kotlin/flay-similarity.kt b/gems/decomplex/examples/kotlin/flay-similarity.kt new file mode 100644 index 000000000..63ff60cda --- /dev/null +++ b/gems/decomplex/examples/kotlin/flay-similarity.kt @@ -0,0 +1,73 @@ +fun first_clone(node: Node): Int { + var total = 0 + val value1 = node.part1 + if (value1.ready() && value1.enabled()) { + total += value1.amount + } + val value2 = node.part2 + if (value2.ready() && value2.enabled()) { + total += value2.amount + } + val value3 = node.part3 + if (value3.ready() && value3.enabled()) { + total += value3.amount + } + val value4 = node.part4 + if (value4.ready() && value4.enabled()) { + total += value4.amount + } + val value5 = node.part5 + if (value5.ready() && value5.enabled()) { + total += value5.amount + } + val value6 = node.part6 + if (value6.ready() && value6.enabled()) { + total += value6.amount + } + val value7 = node.part7 + if (value7.ready() && value7.enabled()) { + total += value7.amount + } + val value8 = node.part8 + if (value8.ready() && value8.enabled()) { + total += value8.amount + } + return total +} + +fun second_clone(entry: Node): Int { + var total = 0 + val item1 = entry.part1 + if (item1.ready() && item1.enabled()) { + total += item1.amount + } + val item2 = entry.part2 + if (item2.ready() && item2.enabled()) { + total += item2.amount + } + val item3 = entry.part3 + if (item3.ready() && item3.enabled()) { + total += item3.amount + } + val item4 = entry.part4 + if (item4.ready() && item4.enabled()) { + total += item4.amount + } + val item5 = entry.part5 + if (item5.ready() && item5.enabled()) { + total += item5.amount + } + val item6 = entry.part6 + if (item6.ready() && item6.enabled()) { + total += item6.amount + } + val item7 = entry.part7 + if (item7.ready() && item7.enabled()) { + total += item7.amount + } + val item8 = entry.part8 + if (item8.ready() && item8.enabled()) { + total += item8.amount + } + return total +} diff --git a/gems/decomplex/examples/kotlin/function-lcom.kt b/gems/decomplex/examples/kotlin/function-lcom.kt new file mode 100644 index 000000000..ece21d20b --- /dev/null +++ b/gems/decomplex/examples/kotlin/function-lcom.kt @@ -0,0 +1,13 @@ +fun mixed(price: Int, tax: Int, logger: Logger): Result { + val subtotal = price + tax + val total = subtotal * 2 + val rounded = total.round() + + val timestamp = now() + val buffer = Buffer.init() + val stamp = timestamp + buffer.push(stamp) + logger.info(buffer) + + return Result.init(rounded, buffer) +} diff --git a/gems/decomplex/examples/kotlin/implicit-control-flow.kt b/gems/decomplex/examples/kotlin/implicit-control-flow.kt new file mode 100644 index 000000000..8ba4b59ac --- /dev/null +++ b/gems/decomplex/examples/kotlin/implicit-control-flow.kt @@ -0,0 +1,13 @@ +class FlowExample { + var status = 0 + var valid = false + var done = false + fun prepare() { this.status = 1 } + fun validate() { this.valid = this.status == 1 } + fun commit() { this.done = this.valid } + fun ok1() { this.prepare(); this.validate(); this.commit() } + fun ok2() { this.prepare(); this.validate(); this.commit() } + fun ok3() { this.prepare(); this.validate(); this.commit() } + fun ok4() { this.prepare(); this.validate(); this.commit() } + fun drift() { this.validate(); this.prepare(); this.commit() } +} diff --git a/gems/decomplex/examples/kotlin/inconsistent-rename-clone.kt b/gems/decomplex/examples/kotlin/inconsistent-rename-clone.kt new file mode 100644 index 000000000..df533ba97 --- /dev/null +++ b/gems/decomplex/examples/kotlin/inconsistent-rename-clone.kt @@ -0,0 +1,2 @@ +fun original() { val src = fetch(1); check(src); store(src); finalize(src) } +fun pasted() { val dst = fetch(2); check(dst); store(src); finalize(dst) } diff --git a/gems/decomplex/examples/kotlin/local-flow.kt b/gems/decomplex/examples/kotlin/local-flow.kt new file mode 100644 index 000000000..3b1c13723 --- /dev/null +++ b/gems/decomplex/examples/kotlin/local-flow.kt @@ -0,0 +1,9 @@ +fun mixed(price: Int, tax: Int): Result { + val subtotal = price + tax + val total = subtotal.round() + + val timestamp = now() + val buffer = Buffer.init() + buffer.push(timestamp) + return Result.init(total, buffer) +} diff --git a/gems/decomplex/examples/kotlin/locality-drag.kt b/gems/decomplex/examples/kotlin/locality-drag.kt new file mode 100644 index 000000000..7bc0d1e1a --- /dev/null +++ b/gems/decomplex/examples/kotlin/locality-drag.kt @@ -0,0 +1,27 @@ +fun run(user: User, cart: Cart, logger: Logger) { + val receipt_id = user.id + + val total = cart.total + if (total > 100) { + if (cart.discountable()) { + val discount = 10 + } + } + if (cart.taxable()) { + if (cart.region) { + val tax = total * 2 + } + } + if (logger.enabled()) { + if (logger.debug()) { + logger.info(total) + } + } + if (cart.valid()) { + if (cart.ready()) { + val status = 1 + } + } + + emit(receipt_id) +} diff --git a/gems/decomplex/examples/kotlin/miner.kt b/gems/decomplex/examples/kotlin/miner.kt new file mode 100644 index 000000000..12ba782f7 --- /dev/null +++ b/gems/decomplex/examples/kotlin/miner.kt @@ -0,0 +1,4 @@ +fun one(a: Boolean,b: Boolean,c: Boolean): Boolean { return a && b && c } +fun two(a: Boolean,b: Boolean,c: Boolean): Boolean { return a && b && c } +fun three(a: Boolean,b: Boolean,c: Boolean): Boolean { return a && b && c } +fun broken(a: Boolean,b: Boolean): Boolean { return a && b } diff --git a/gems/decomplex/examples/kotlin/operational-discontinuity.kt b/gems/decomplex/examples/kotlin/operational-discontinuity.kt new file mode 100644 index 000000000..7ddfb33e1 --- /dev/null +++ b/gems/decomplex/examples/kotlin/operational-discontinuity.kt @@ -0,0 +1,9 @@ +fun phase_shift() { + val a = 1 + val b = 2 + + // Phase 2 + val x = 3 + val y = 4 + print(x); print(y) +} diff --git a/gems/decomplex/examples/kotlin/oversized-predicate.kt b/gems/decomplex/examples/kotlin/oversized-predicate.kt new file mode 100644 index 000000000..948ea9889 --- /dev/null +++ b/gems/decomplex/examples/kotlin/oversized-predicate.kt @@ -0,0 +1 @@ +fun complex_check(a: Boolean,b: Boolean,c: Boolean,d: Boolean) { if (a && b && c && d) { print("too big") } } diff --git a/gems/decomplex/examples/kotlin/path-condition.kt b/gems/decomplex/examples/kotlin/path-condition.kt new file mode 100644 index 000000000..4037d0a9d --- /dev/null +++ b/gems/decomplex/examples/kotlin/path-condition.kt @@ -0,0 +1,4 @@ +fun one(x: X,y: Y,z: Z) { if (x.p() && y.q() && z.r()) { go(x) } } +fun two(x: X,y: Y,z: Z) { if (x.p() && y.q() && z.r()) { go(x) } } +fun three(x: X,y: Y,z: Z) { if (x.p() && y.q() && z.r()) { go(x) } } +fun bug(x: X,y: Y,z: Z) { if (x.p() && y.q()) { go(x) } } diff --git a/gems/decomplex/examples/kotlin/predicate-alias.kt b/gems/decomplex/examples/kotlin/predicate-alias.kt new file mode 100644 index 000000000..e6b3081cd --- /dev/null +++ b/gems/decomplex/examples/kotlin/predicate-alias.kt @@ -0,0 +1,3 @@ +fun first(): Boolean { return true } +fun second(): Boolean { return true } +fun other(): Boolean { return false } diff --git a/gems/decomplex/examples/kotlin/redundant-nil-guard.kt b/gems/decomplex/examples/kotlin/redundant-nil-guard.kt new file mode 100644 index 000000000..d9659325f --- /dev/null +++ b/gems/decomplex/examples/kotlin/redundant-nil-guard.kt @@ -0,0 +1 @@ +fun check(value: Value) { if (value.isSome()) { value.isNull() } } diff --git a/gems/decomplex/examples/kotlin/semantic-alias.kt b/gems/decomplex/examples/kotlin/semantic-alias.kt new file mode 100644 index 000000000..576612c58 --- /dev/null +++ b/gems/decomplex/examples/kotlin/semantic-alias.kt @@ -0,0 +1,4 @@ +fun frame(node: Node): Boolean { return node.provenance == FRAME } +fun is_frame(node: Node): Boolean { return provenance == FRAME } +fun heap(node: Node): Boolean { return node.provenance == HEAP } +fun somewhere(node: Node): Int { if (node.provenance == FRAME) { return 1 }; return 0 } diff --git a/gems/decomplex/examples/kotlin/sequence-mine.kt b/gems/decomplex/examples/kotlin/sequence-mine.kt new file mode 100644 index 000000000..fe2c778cd --- /dev/null +++ b/gems/decomplex/examples/kotlin/sequence-mine.kt @@ -0,0 +1,5 @@ +fun one() { alloc_mark(x); body1(); cleanup(x) } +fun two() { alloc_mark(y); body2(); cleanup(y) } +fun three() { alloc_mark(z); body3(); cleanup(z) } +fun four() { alloc_mark(w); body4(); cleanup(w) } +fun leak() { alloc_mark(q); use_value(q) } diff --git a/gems/decomplex/examples/kotlin/state-branch-density.kt b/gems/decomplex/examples/kotlin/state-branch-density.kt new file mode 100644 index 000000000..ef04608c6 --- /dev/null +++ b/gems/decomplex/examples/kotlin/state-branch-density.kt @@ -0,0 +1 @@ +class StateBranchChecker { var checked = false; fun check(user: User) { if (user.admin) { this.checked = true } if (this.checked && user.name == "admin") { print("hello") } } } diff --git a/gems/decomplex/examples/kotlin/state-mesh.kt b/gems/decomplex/examples/kotlin/state-mesh.kt new file mode 100644 index 000000000..8f1ca6f93 --- /dev/null +++ b/gems/decomplex/examples/kotlin/state-mesh.kt @@ -0,0 +1 @@ +class StateMeshExample { var a = 0; var b = 0; fun initialize() { this.a = 1; this.b = 2 } fun writer() { this.a = 3 } fun reader(): Int { return this.a + this.b } fun a_alias(): Int { return this.a } } diff --git a/gems/decomplex/examples/kotlin/structural-topology.kt b/gems/decomplex/examples/kotlin/structural-topology.kt new file mode 100644 index 000000000..3284753d7 --- /dev/null +++ b/gems/decomplex/examples/kotlin/structural-topology.kt @@ -0,0 +1 @@ +class Worker { fun run(items: Items) { this.prepare(); if (this.ready()) { this.validate() }; for (item in items) { this.helper(item) } } private fun prepare() {} private fun ready(): Boolean { return true } fun validate() {} private fun helper(item: Item) { item.use() } } diff --git a/gems/decomplex/examples/kotlin/temporal-ordering-pressure.kt b/gems/decomplex/examples/kotlin/temporal-ordering-pressure.kt new file mode 100644 index 000000000..6b94a5132 --- /dev/null +++ b/gems/decomplex/examples/kotlin/temporal-ordering-pressure.kt @@ -0,0 +1,21 @@ +class TemporalOrderExample { + var a = 0 + var b = 0 + + fun one() { + this.a = 1 + } + + fun two() { + this.a = 2 + this.b = 3 + } + + fun three() { + this.b = 4 + } + + fun reader(): Int { + return this.a + } +} diff --git a/gems/decomplex/examples/kotlin/weighted-inlined-complexity.kt b/gems/decomplex/examples/kotlin/weighted-inlined-complexity.kt new file mode 100644 index 000000000..00c5543e7 --- /dev/null +++ b/gems/decomplex/examples/kotlin/weighted-inlined-complexity.kt @@ -0,0 +1,5 @@ +fun checkout(user: User, cart: Cart) { validate_user(user); apply_discount(cart); process_payment(user, cart); audit_cart(cart) } +fun validate_user(user: User): Boolean { if (user.active() && !user.suspended()) { if (user.profile.complete()) { return true } else { return false } } else { return false } } +fun apply_discount(cart: Cart): Int { if (cart.total > 100 && eligible()) { if (holiday()) { return 20 } else if (loyalty_month()) { return 15 } else { return 10 } }; return 0 } +fun process_payment(user: User, cart: Cart) { if (gateway.ready()) { if (cart.total > 0 && user.active()) { if (fraud_check(user)) { charge(user, cart) } else { decline(user) } } } } +fun audit_cart(cart: Cart) { for (item in cart.items) { if (item.taxable()) { if (item.region && item.amount > 0) { record_tax(item) } } } } diff --git a/gems/decomplex/examples/lua/co-update.lua b/gems/decomplex/examples/lua/co-update.lua new file mode 100644 index 000000000..1497b28e7 --- /dev/null +++ b/gems/decomplex/examples/lua/co-update.lua @@ -0,0 +1,15 @@ +function stable_one(node) + node.storage = 1 + node.provenance = 1 +end +function stable_two(node) + node.storage = 1 + node.provenance = 1 +end +function stable_three(node) + node.storage = 1 + node.provenance = 1 +end +function misses_provenance(node) + node.storage = 1 +end diff --git a/gems/decomplex/examples/lua/decision-pressure.lua b/gems/decomplex/examples/lua/decision-pressure.lua new file mode 100644 index 000000000..206ffbcea --- /dev/null +++ b/gems/decomplex/examples/lua/decision-pressure.lua @@ -0,0 +1,4 @@ +function scan(node) + local value = node.symbol + return value.isNull() +end diff --git a/gems/decomplex/examples/lua/derived-state.lua b/gems/decomplex/examples/lua/derived-state.lua new file mode 100644 index 000000000..aaee214a8 --- /dev/null +++ b/gems/decomplex/examples/lua/derived-state.lua @@ -0,0 +1 @@ +function check(input) local cached = input + 1; input = 2; print(cached) end diff --git a/gems/decomplex/examples/lua/false-simplicity.lua b/gems/decomplex/examples/lua/false-simplicity.lua new file mode 100644 index 000000000..e222554dc --- /dev/null +++ b/gems/decomplex/examples/lua/false-simplicity.lua @@ -0,0 +1,2 @@ +FalseSimplicityExample = {} +function FalseSimplicityExample:hack() print("hidden IO") end diff --git a/gems/decomplex/examples/lua/fat-union.lua b/gems/decomplex/examples/lua/fat-union.lua new file mode 100644 index 000000000..7a87c60de --- /dev/null +++ b/gems/decomplex/examples/lua/fat-union.lua @@ -0,0 +1,5 @@ +function handle(node) + if node == AST.Call then node.line(); node.col(); node.ty(); node.span(); node.parent(); node.recv() end + if node == AST.Func then node.line(); node.col(); node.ty(); node.span(); node.parent(); node.name() end + if node == AST.Lit then node.line(); node.col(); node.ty(); node.span(); node.parent(); node.value() end +end diff --git a/gems/decomplex/examples/lua/flay-similarity.lua b/gems/decomplex/examples/lua/flay-similarity.lua new file mode 100644 index 000000000..d64d5c2c4 --- /dev/null +++ b/gems/decomplex/examples/lua/flay-similarity.lua @@ -0,0 +1,2 @@ +function first_clone(node) local total = 0; local value1 = node.part1; if value1.ready() and value1.enabled() then total = total + value1.amount end local value2 = node.part2; if value2.ready() and value2.enabled() then total = total + value2.amount end local value3 = node.part3; if value3.ready() and value3.enabled() then total = total + value3.amount end local value4 = node.part4; if value4.ready() and value4.enabled() then total = total + value4.amount end local value5 = node.part5; if value5.ready() and value5.enabled() then total = total + value5.amount end local value6 = node.part6; if value6.ready() and value6.enabled() then total = total + value6.amount end local value7 = node.part7; if value7.ready() and value7.enabled() then total = total + value7.amount end local value8 = node.part8; if value8.ready() and value8.enabled() then total = total + value8.amount end return total end +function second_clone(entry) local total = 0; local item1 = entry.part1; if item1.ready() and item1.enabled() then total = total + item1.amount end local item2 = entry.part2; if item2.ready() and item2.enabled() then total = total + item2.amount end local item3 = entry.part3; if item3.ready() and item3.enabled() then total = total + item3.amount end local item4 = entry.part4; if item4.ready() and item4.enabled() then total = total + item4.amount end local item5 = entry.part5; if item5.ready() and item5.enabled() then total = total + item5.amount end local item6 = entry.part6; if item6.ready() and item6.enabled() then total = total + item6.amount end local item7 = entry.part7; if item7.ready() and item7.enabled() then total = total + item7.amount end local item8 = entry.part8; if item8.ready() and item8.enabled() then total = total + item8.amount end return total end diff --git a/gems/decomplex/examples/lua/function-lcom.lua b/gems/decomplex/examples/lua/function-lcom.lua new file mode 100644 index 000000000..bcad2d5bd --- /dev/null +++ b/gems/decomplex/examples/lua/function-lcom.lua @@ -0,0 +1,13 @@ +function mixed(price, tax, logger) + local subtotal = price + tax + local total = subtotal * 2 + local rounded = total.round() + + local timestamp = now() + local buffer = Buffer.init() + local stamp = timestamp + buffer.push(stamp) + logger.info(buffer) + + return Result.init(rounded, buffer) +end diff --git a/gems/decomplex/examples/lua/implicit-control-flow.lua b/gems/decomplex/examples/lua/implicit-control-flow.lua new file mode 100644 index 000000000..603b2d6d4 --- /dev/null +++ b/gems/decomplex/examples/lua/implicit-control-flow.lua @@ -0,0 +1,9 @@ +FlowExample = {} +function FlowExample:prepare() self.status = 1 end +function FlowExample:validate() self.valid = self.status == 1 end +function FlowExample:commit() self.done = self.valid end +function FlowExample:ok1() self.prepare(); self.validate(); self.commit() end +function FlowExample:ok2() self.prepare(); self.validate(); self.commit() end +function FlowExample:ok3() self.prepare(); self.validate(); self.commit() end +function FlowExample:ok4() self.prepare(); self.validate(); self.commit() end +function FlowExample:drift() self.validate(); self.prepare(); self.commit() end diff --git a/gems/decomplex/examples/lua/inconsistent-rename-clone.lua b/gems/decomplex/examples/lua/inconsistent-rename-clone.lua new file mode 100644 index 000000000..f7c681c86 --- /dev/null +++ b/gems/decomplex/examples/lua/inconsistent-rename-clone.lua @@ -0,0 +1,2 @@ +function original() local src = fetch(1); check(src); store(src); finalize(src) end +function pasted() local dst = fetch(2); check(dst); store(src); finalize(dst) end diff --git a/gems/decomplex/examples/lua/local-flow.lua b/gems/decomplex/examples/lua/local-flow.lua new file mode 100644 index 000000000..3627024e2 --- /dev/null +++ b/gems/decomplex/examples/lua/local-flow.lua @@ -0,0 +1,9 @@ +function mixed(price, tax) + local subtotal = price + tax + local total = subtotal.round() + + local timestamp = now() + local buffer = Buffer.init() + buffer.push(timestamp) + return Result.init(total, buffer) +end diff --git a/gems/decomplex/examples/lua/locality-drag.lua b/gems/decomplex/examples/lua/locality-drag.lua new file mode 100644 index 000000000..1cca002ab --- /dev/null +++ b/gems/decomplex/examples/lua/locality-drag.lua @@ -0,0 +1,27 @@ +function run(user, cart, logger) + local receipt_id = user.id + + local total = cart.total + if total > 100 then + if cart.discountable() then + local discount = 10 + end + end + if cart.taxable() then + if cart.region then + local tax = total * 2 + end + end + if logger.enabled() then + if logger.debug() then + logger.info(total) + end + end + if cart.valid() then + if cart.ready() then + local status = 1 + end + end + + emit(receipt_id) +end diff --git a/gems/decomplex/examples/lua/miner.lua b/gems/decomplex/examples/lua/miner.lua new file mode 100644 index 000000000..e188986dc --- /dev/null +++ b/gems/decomplex/examples/lua/miner.lua @@ -0,0 +1,4 @@ +function one(a,b,c) return a and b and c end +function two(a,b,c) return a and b and c end +function three(a,b,c) return a and b and c end +function broken(a,b) return a and b end diff --git a/gems/decomplex/examples/lua/operational-discontinuity.lua b/gems/decomplex/examples/lua/operational-discontinuity.lua new file mode 100644 index 000000000..af2e143f7 --- /dev/null +++ b/gems/decomplex/examples/lua/operational-discontinuity.lua @@ -0,0 +1,9 @@ +function phase_shift() + local a = 1 + local b = 2 + + -- Phase 2 + local x = 3 + local y = 4 + print(x); print(y) +end diff --git a/gems/decomplex/examples/lua/oversized-predicate.lua b/gems/decomplex/examples/lua/oversized-predicate.lua new file mode 100644 index 000000000..28e204f1b --- /dev/null +++ b/gems/decomplex/examples/lua/oversized-predicate.lua @@ -0,0 +1 @@ +function complex_check(a,b,c,d) if a and b and c and d then print("too big") end end diff --git a/gems/decomplex/examples/lua/path-condition.lua b/gems/decomplex/examples/lua/path-condition.lua new file mode 100644 index 000000000..fb94f2cf9 --- /dev/null +++ b/gems/decomplex/examples/lua/path-condition.lua @@ -0,0 +1,4 @@ +function one(x,y,z) if x.p() and y.q() and z.r() then go(x) end end +function two(x,y,z) if x.p() and y.q() and z.r() then go(x) end end +function three(x,y,z) if x.p() and y.q() and z.r() then go(x) end end +function bug(x,y,z) if x.p() and y.q() then go(x) end end diff --git a/gems/decomplex/examples/lua/predicate-alias.lua b/gems/decomplex/examples/lua/predicate-alias.lua new file mode 100644 index 000000000..3acd4b64d --- /dev/null +++ b/gems/decomplex/examples/lua/predicate-alias.lua @@ -0,0 +1,3 @@ +function first() return true end +function second() return true end +function other() return false end diff --git a/gems/decomplex/examples/lua/redundant-nil-guard.lua b/gems/decomplex/examples/lua/redundant-nil-guard.lua new file mode 100644 index 000000000..9aeb9b3d1 --- /dev/null +++ b/gems/decomplex/examples/lua/redundant-nil-guard.lua @@ -0,0 +1 @@ +function check(value) if value.isSome() then value.isNull() end end diff --git a/gems/decomplex/examples/lua/semantic-alias.lua b/gems/decomplex/examples/lua/semantic-alias.lua new file mode 100644 index 000000000..7206ed542 --- /dev/null +++ b/gems/decomplex/examples/lua/semantic-alias.lua @@ -0,0 +1,4 @@ +function frame(node) return node.provenance == FRAME end +function is_frame(node) return provenance == FRAME end +function heap(node) return node.provenance == HEAP end +function somewhere(node) if node.provenance == FRAME then return 1 end return 0 end diff --git a/gems/decomplex/examples/lua/sequence-mine.lua b/gems/decomplex/examples/lua/sequence-mine.lua new file mode 100644 index 000000000..c17077f23 --- /dev/null +++ b/gems/decomplex/examples/lua/sequence-mine.lua @@ -0,0 +1,5 @@ +function one() alloc_mark(x); body1(); cleanup(x) end +function two() alloc_mark(y); body2(); cleanup(y) end +function three() alloc_mark(z); body3(); cleanup(z) end +function four() alloc_mark(w); body4(); cleanup(w) end +function leak() alloc_mark(q); use_value(q) end diff --git a/gems/decomplex/examples/lua/state-branch-density.lua b/gems/decomplex/examples/lua/state-branch-density.lua new file mode 100644 index 000000000..5fa066c60 --- /dev/null +++ b/gems/decomplex/examples/lua/state-branch-density.lua @@ -0,0 +1,2 @@ +StateBranchChecker = {} +function StateBranchChecker:check(user) if user.admin then self.checked = true end if self.checked and user.name == "admin" then print("hello") end end diff --git a/gems/decomplex/examples/lua/state-mesh.lua b/gems/decomplex/examples/lua/state-mesh.lua new file mode 100644 index 000000000..6725a32df --- /dev/null +++ b/gems/decomplex/examples/lua/state-mesh.lua @@ -0,0 +1,5 @@ +StateMeshExample = {} +function StateMeshExample:initialize() self.a = 1; self.b = 2 end +function StateMeshExample:writer() self.a = 3 end +function StateMeshExample:reader() return self.a + self.b end +function StateMeshExample:a_alias() return self.a end diff --git a/gems/decomplex/examples/lua/structural-topology.lua b/gems/decomplex/examples/lua/structural-topology.lua new file mode 100644 index 000000000..ee4cd50f3 --- /dev/null +++ b/gems/decomplex/examples/lua/structural-topology.lua @@ -0,0 +1,6 @@ +Worker = {} +function Worker:run(items) self.prepare(); if self.ready() then self.validate() end for item in items do self.helper(item) end end +function Worker:prepare() end +function Worker:ready() return true end +function Worker:validate() end +function Worker:helper(item) return item end diff --git a/gems/decomplex/examples/lua/temporal-ordering-pressure.lua b/gems/decomplex/examples/lua/temporal-ordering-pressure.lua new file mode 100644 index 000000000..5a7eb8295 --- /dev/null +++ b/gems/decomplex/examples/lua/temporal-ordering-pressure.lua @@ -0,0 +1,5 @@ +TemporalOrderExample = {} +function TemporalOrderExample:one() self.a = 1 end +function TemporalOrderExample:two() self.a = 2; self.b = 3 end +function TemporalOrderExample:three() self.b = 4 end +function TemporalOrderExample:reader() return self.a end diff --git a/gems/decomplex/examples/lua/weighted-inlined-complexity.lua b/gems/decomplex/examples/lua/weighted-inlined-complexity.lua new file mode 100644 index 000000000..e2e454bee --- /dev/null +++ b/gems/decomplex/examples/lua/weighted-inlined-complexity.lua @@ -0,0 +1,5 @@ +function checkout(user, cart) validate_user(user); apply_discount(cart); process_payment(user, cart); audit_cart(cart) end +function validate_user(user) if user.active() and not user.suspended() then if user.profile.complete() then return true else return false end else return false end end +function apply_discount(cart) if cart.total > 100 and eligible() then if holiday() then return 20 elseif loyalty_month() then return 15 else return 10 end end return 0 end +function process_payment(user, cart) if gateway.ready() then if cart.total > 0 and user.active() then if fraud_check(user) then charge(user, cart) else decline(user) end end end end +function audit_cart(cart) for item in cart.items do if item.taxable() then if item.region and item.amount > 0 then record_tax(item) end end end end diff --git a/gems/decomplex/examples/oracles/co-update.json b/gems/decomplex/examples/oracles/co-update.json new file mode 100644 index 000000000..1ebd374bd --- /dev/null +++ b/gems/decomplex/examples/oracles/co-update.json @@ -0,0 +1,28 @@ +{ + "detector": "co-update", + "engine": "ruby", + "options": { + }, + "expected": { + "co_written_pairs": [ + { + "pair": [ + "provenance", + "storage" + ], + "support": 3 + } + ], + "neglected_updates": [ + { + "pair": [ + "provenance", + "storage" + ], + "support": 3, + "has": "storage", + "missing": "provenance" + } + ] + } +} diff --git a/gems/decomplex/examples/oracles/decision-pressure.json b/gems/decomplex/examples/oracles/decision-pressure.json new file mode 100644 index 000000000..ce2920db3 --- /dev/null +++ b/gems/decomplex/examples/oracles/decision-pressure.json @@ -0,0 +1,11 @@ +{ + "detector": "decision-pressure", + "engine": "ruby", + "options": { + }, + "expected": [ + { + "present": true + } + ] +} diff --git a/gems/decomplex/examples/oracles/derived-state.json b/gems/decomplex/examples/oracles/derived-state.json new file mode 100644 index 000000000..3ca4bc743 --- /dev/null +++ b/gems/decomplex/examples/oracles/derived-state.json @@ -0,0 +1,12 @@ +{ + "detector": "derived-state", + "engine": "ruby", + "options": { + }, + "expected": [ + { + "derived": "cached", + "source": "input" + } + ] +} diff --git a/gems/decomplex/examples/oracles/false-simplicity.json b/gems/decomplex/examples/oracles/false-simplicity.json new file mode 100644 index 000000000..eede2341d --- /dev/null +++ b/gems/decomplex/examples/oracles/false-simplicity.json @@ -0,0 +1,11 @@ +{ + "detector": "false-simplicity", + "engine": "ruby", + "options": { + }, + "expected": [ + { + "kind": "hidden_io" + } + ] +} diff --git a/gems/decomplex/examples/oracles/fat-union.json b/gems/decomplex/examples/oracles/fat-union.json new file mode 100644 index 000000000..c82ab01cf --- /dev/null +++ b/gems/decomplex/examples/oracles/fat-union.json @@ -0,0 +1,11 @@ +{ + "detector": "fat-union", + "engine": "ruby", + "options": { + }, + "expected": [ + { + "present": true + } + ] +} diff --git a/gems/decomplex/examples/oracles/flay-similarity.json b/gems/decomplex/examples/oracles/flay-similarity.json new file mode 100644 index 000000000..d3540e736 --- /dev/null +++ b/gems/decomplex/examples/oracles/flay-similarity.json @@ -0,0 +1,13 @@ +{ + "detector": "flay-similarity", + "engine": "ruby", + "options": { + }, + "expected": [ + { + "clone_type": "type2", + "node": "defn", + "site_count": 2 + } + ] +} diff --git a/gems/decomplex/examples/oracles/function-lcom.json b/gems/decomplex/examples/oracles/function-lcom.json new file mode 100644 index 000000000..7c46a6af4 --- /dev/null +++ b/gems/decomplex/examples/oracles/function-lcom.json @@ -0,0 +1,11 @@ +{ + "detector": "function-lcom", + "engine": "ruby", + "options": { + }, + "expected": [ + { + "present": true + } + ] +} diff --git a/gems/decomplex/examples/oracles/implicit-control-flow.json b/gems/decomplex/examples/oracles/implicit-control-flow.json new file mode 100644 index 000000000..0e101cfd6 --- /dev/null +++ b/gems/decomplex/examples/oracles/implicit-control-flow.json @@ -0,0 +1,18 @@ +{ + "detector": "implicit-control-flow", + "engine": "ruby", + "options": { + }, + "expected": { + "ordered_protocols": [ + { + "present": true + } + ], + "order_drift": [ + { + "present": true + } + ] + } +} diff --git a/gems/decomplex/examples/oracles/inconsistent-rename-clone.json b/gems/decomplex/examples/oracles/inconsistent-rename-clone.json new file mode 100644 index 000000000..ee66f85d2 --- /dev/null +++ b/gems/decomplex/examples/oracles/inconsistent-rename-clone.json @@ -0,0 +1,12 @@ +{ + "detector": "inconsistent-rename-clone", + "engine": "ruby", + "options": { + }, + "expected": [ + { + "ref_name": "src", + "divergent_count": 2 + } + ] +} diff --git a/gems/decomplex/examples/oracles/local-flow.json b/gems/decomplex/examples/oracles/local-flow.json new file mode 100644 index 000000000..fec5d2c17 --- /dev/null +++ b/gems/decomplex/examples/oracles/local-flow.json @@ -0,0 +1,12 @@ +{ + "detector": "local-flow", + "engine": "ruby", + "options": { + }, + "expected": [ + { + "statement_count": 6, + "boundary_count": 1 + } + ] +} diff --git a/gems/decomplex/examples/oracles/locality-drag.json b/gems/decomplex/examples/oracles/locality-drag.json new file mode 100644 index 000000000..51f7d4683 --- /dev/null +++ b/gems/decomplex/examples/oracles/locality-drag.json @@ -0,0 +1,11 @@ +{ + "detector": "locality-drag", + "engine": "ruby", + "options": { + }, + "expected": [ + { + "variable": "receipt_id" + } + ] +} diff --git a/gems/decomplex/examples/oracles/miner.json b/gems/decomplex/examples/oracles/miner.json new file mode 100644 index 000000000..adce8efd1 --- /dev/null +++ b/gems/decomplex/examples/oracles/miner.json @@ -0,0 +1,13 @@ +{ + "detector": "miner", + "engine": "ruby", + "options": { + }, + "expected": { + "missing_abstractions": [ + { + "present": true + } + ] + } +} diff --git a/gems/decomplex/examples/oracles/operational-discontinuity.json b/gems/decomplex/examples/oracles/operational-discontinuity.json new file mode 100644 index 000000000..3db9915de --- /dev/null +++ b/gems/decomplex/examples/oracles/operational-discontinuity.json @@ -0,0 +1,12 @@ +{ + "detector": "operational-discontinuity", + "engine": "ruby", + "options": { + }, + "expected": [ + { + "resets": 1, + "confidence": "high" + } + ] +} diff --git a/gems/decomplex/examples/oracles/oversized-predicate.json b/gems/decomplex/examples/oracles/oversized-predicate.json new file mode 100644 index 000000000..6fe219147 --- /dev/null +++ b/gems/decomplex/examples/oracles/oversized-predicate.json @@ -0,0 +1,12 @@ +{ + "detector": "oversized-predicate", + "engine": "ruby", + "options": { + }, + "expected": [ + { + "count": 4, + "atom_count": 4 + } + ] +} diff --git a/gems/decomplex/examples/oracles/path-condition.json b/gems/decomplex/examples/oracles/path-condition.json new file mode 100644 index 000000000..7e16ab337 --- /dev/null +++ b/gems/decomplex/examples/oracles/path-condition.json @@ -0,0 +1,11 @@ +{ + "detector": "path-condition", + "engine": "ruby", + "options": { + }, + "expected": [ + { + "present": true + } + ] +} diff --git a/gems/decomplex/examples/oracles/predicate-alias.json b/gems/decomplex/examples/oracles/predicate-alias.json new file mode 100644 index 000000000..2cd893067 --- /dev/null +++ b/gems/decomplex/examples/oracles/predicate-alias.json @@ -0,0 +1,13 @@ +{ + "detector": "predicate-alias", + "engine": "ruby", + "options": { + }, + "expected": { + "alias_clusters": [ + { + "name_count": 2 + } + ] + } +} diff --git a/gems/decomplex/examples/oracles/redundant-nil-guard.json b/gems/decomplex/examples/oracles/redundant-nil-guard.json new file mode 100644 index 000000000..28dc8d6e7 --- /dev/null +++ b/gems/decomplex/examples/oracles/redundant-nil-guard.json @@ -0,0 +1,11 @@ +{ + "detector": "redundant-nil-guard", + "engine": "ruby", + "options": { + }, + "expected": [ + { + "local": "value" + } + ] +} diff --git a/gems/decomplex/examples/oracles/semantic-alias.json b/gems/decomplex/examples/oracles/semantic-alias.json new file mode 100644 index 000000000..25ad9ce3f --- /dev/null +++ b/gems/decomplex/examples/oracles/semantic-alias.json @@ -0,0 +1,13 @@ +{ + "detector": "semantic-alias", + "engine": "ruby", + "options": { + }, + "expected": { + "alias_clusters": [ + { + "name_count": 2 + } + ] + } +} diff --git a/gems/decomplex/examples/oracles/sequence-mine.json b/gems/decomplex/examples/oracles/sequence-mine.json new file mode 100644 index 000000000..58921b6de --- /dev/null +++ b/gems/decomplex/examples/oracles/sequence-mine.json @@ -0,0 +1,17 @@ +{ + "detector": "sequence-mine", + "engine": "ruby", + "options": { + }, + "expected": [ + { + "pair": [ + "alloc_mark", + "cleanup" + ], + "support": 4, + "has": "alloc_mark", + "missing": "cleanup" + } + ] +} diff --git a/gems/decomplex/examples/oracles/state-branch-density.json b/gems/decomplex/examples/oracles/state-branch-density.json new file mode 100644 index 000000000..e46cc034a --- /dev/null +++ b/gems/decomplex/examples/oracles/state-branch-density.json @@ -0,0 +1,11 @@ +{ + "detector": "state-branch-density", + "engine": "ruby", + "options": { + }, + "expected": [ + { + "present": true + } + ] +} diff --git a/gems/decomplex/examples/oracles/state-mesh.json b/gems/decomplex/examples/oracles/state-mesh.json new file mode 100644 index 000000000..e5d212598 --- /dev/null +++ b/gems/decomplex/examples/oracles/state-mesh.json @@ -0,0 +1,11 @@ +{ + "detector": "state-mesh", + "engine": "ruby", + "options": { + }, + "expected": { + "state_mesh": { + "present": true + } + } +} diff --git a/gems/decomplex/examples/oracles/structural-topology.json b/gems/decomplex/examples/oracles/structural-topology.json new file mode 100644 index 000000000..97f827883 --- /dev/null +++ b/gems/decomplex/examples/oracles/structural-topology.json @@ -0,0 +1,9 @@ +{ + "detector": "structural-topology", + "engine": "ruby", + "options": { + }, + "expected": { + "present": true + } +} diff --git a/gems/decomplex/examples/oracles/temporal-ordering-pressure.json b/gems/decomplex/examples/oracles/temporal-ordering-pressure.json new file mode 100644 index 000000000..ac90e3e0b --- /dev/null +++ b/gems/decomplex/examples/oracles/temporal-ordering-pressure.json @@ -0,0 +1,11 @@ +{ + "detector": "temporal-ordering-pressure", + "engine": "ruby", + "options": { + }, + "expected": [ + { + "present": true + } + ] +} diff --git a/gems/decomplex/examples/oracles/weighted-inlined-complexity.json b/gems/decomplex/examples/oracles/weighted-inlined-complexity.json new file mode 100644 index 000000000..fdf8fe941 --- /dev/null +++ b/gems/decomplex/examples/oracles/weighted-inlined-complexity.json @@ -0,0 +1,13 @@ +{ + "detector": "weighted-inlined-complexity", + "engine": "ruby", + "options": { + }, + "expected": [ + { + "method": "checkout", + "depth": 1, + "callee_count": 4 + } + ] +} diff --git a/gems/decomplex/examples/python/co-update.py b/gems/decomplex/examples/python/co-update.py new file mode 100644 index 000000000..450322d79 --- /dev/null +++ b/gems/decomplex/examples/python/co-update.py @@ -0,0 +1,14 @@ +def stable_one(node): + node.storage = 1 + node.provenance = 1 + +def stable_two(node): + node.storage = 1 + node.provenance = 1 + +def stable_three(node): + node.storage = 1 + node.provenance = 1 + +def misses_provenance(node): + node.storage = 1 diff --git a/gems/decomplex/examples/python/decision-pressure.py b/gems/decomplex/examples/python/decision-pressure.py new file mode 100644 index 000000000..637b935de --- /dev/null +++ b/gems/decomplex/examples/python/decision-pressure.py @@ -0,0 +1,3 @@ +def scan(node): + value = node.symbol + return value.isNull() diff --git a/gems/decomplex/examples/python/derived-state.py b/gems/decomplex/examples/python/derived-state.py new file mode 100644 index 000000000..99799bd0e --- /dev/null +++ b/gems/decomplex/examples/python/derived-state.py @@ -0,0 +1,4 @@ +def check(input): + cached = input + 1 + input = 2 + print(cached) diff --git a/gems/decomplex/examples/python/false-simplicity.py b/gems/decomplex/examples/python/false-simplicity.py new file mode 100644 index 000000000..a8601cbb2 --- /dev/null +++ b/gems/decomplex/examples/python/false-simplicity.py @@ -0,0 +1,3 @@ +class FalseSimplicityExample: + def hack(self): + print("hidden IO") diff --git a/gems/decomplex/examples/python/fat-union.py b/gems/decomplex/examples/python/fat-union.py new file mode 100644 index 000000000..990a35126 --- /dev/null +++ b/gems/decomplex/examples/python/fat-union.py @@ -0,0 +1,8 @@ +def handle(node): + match node: + case AST.Call: + node.line(); node.col(); node.ty(); node.span(); node.parent(); node.recv() + case AST.Func: + node.line(); node.col(); node.ty(); node.span(); node.parent(); node.name() + case AST.Lit: + node.line(); node.col(); node.ty(); node.span(); node.parent(); node.value() diff --git a/gems/decomplex/examples/python/flay-similarity.py b/gems/decomplex/examples/python/flay-similarity.py new file mode 100644 index 000000000..81a886050 --- /dev/null +++ b/gems/decomplex/examples/python/flay-similarity.py @@ -0,0 +1,55 @@ +def first_clone(node): + total = 0 + value1 = node.part1 + if value1.ready() and value1.enabled(): + total += value1.amount + value2 = node.part2 + if value2.ready() and value2.enabled(): + total += value2.amount + value3 = node.part3 + if value3.ready() and value3.enabled(): + total += value3.amount + value4 = node.part4 + if value4.ready() and value4.enabled(): + total += value4.amount + value5 = node.part5 + if value5.ready() and value5.enabled(): + total += value5.amount + value6 = node.part6 + if value6.ready() and value6.enabled(): + total += value6.amount + value7 = node.part7 + if value7.ready() and value7.enabled(): + total += value7.amount + value8 = node.part8 + if value8.ready() and value8.enabled(): + total += value8.amount + return total + +def second_clone(entry): + total = 0 + item1 = entry.part1 + if item1.ready() and item1.enabled(): + total += item1.amount + item2 = entry.part2 + if item2.ready() and item2.enabled(): + total += item2.amount + item3 = entry.part3 + if item3.ready() and item3.enabled(): + total += item3.amount + item4 = entry.part4 + if item4.ready() and item4.enabled(): + total += item4.amount + item5 = entry.part5 + if item5.ready() and item5.enabled(): + total += item5.amount + item6 = entry.part6 + if item6.ready() and item6.enabled(): + total += item6.amount + item7 = entry.part7 + if item7.ready() and item7.enabled(): + total += item7.amount + item8 = entry.part8 + if item8.ready() and item8.enabled(): + total += item8.amount + return total diff --git a/gems/decomplex/examples/python/function-lcom.py b/gems/decomplex/examples/python/function-lcom.py new file mode 100644 index 000000000..cf1fe2c61 --- /dev/null +++ b/gems/decomplex/examples/python/function-lcom.py @@ -0,0 +1,11 @@ +def mixed(price, tax, logger): + subtotal = price + tax + total = subtotal * 2 + rounded = total.round() + + timestamp = now() + buffer = Buffer() + buffer.push(timestamp) + logger.info(buffer) + + return Result(rounded, buffer) diff --git a/gems/decomplex/examples/python/implicit-control-flow.py b/gems/decomplex/examples/python/implicit-control-flow.py new file mode 100644 index 000000000..2fef8d307 --- /dev/null +++ b/gems/decomplex/examples/python/implicit-control-flow.py @@ -0,0 +1,9 @@ +class FlowExample: + def prepare(self): self.status = 1 + def validate(self): self.valid = self.status == 1 + def commit(self): self.done = self.valid + def ok1(self): self.prepare(); self.validate(); self.commit() + def ok2(self): self.prepare(); self.validate(); self.commit() + def ok3(self): self.prepare(); self.validate(); self.commit() + def ok4(self): self.prepare(); self.validate(); self.commit() + def drift(self): self.validate(); self.prepare(); self.commit() diff --git a/gems/decomplex/examples/python/inconsistent-rename-clone.py b/gems/decomplex/examples/python/inconsistent-rename-clone.py new file mode 100644 index 000000000..1b9a28c9f --- /dev/null +++ b/gems/decomplex/examples/python/inconsistent-rename-clone.py @@ -0,0 +1,11 @@ +def original(): + src = fetch(1) + check(src) + store(src) + finalize(src) + +def pasted(): + dst = fetch(2) + check(dst) + store(src) + finalize(dst) diff --git a/gems/decomplex/examples/python/local-flow.py b/gems/decomplex/examples/python/local-flow.py new file mode 100644 index 000000000..8862fc626 --- /dev/null +++ b/gems/decomplex/examples/python/local-flow.py @@ -0,0 +1,8 @@ +def mixed(price, tax): + subtotal = price + tax + total = subtotal.round() + + timestamp = now() + buffer = Buffer() + buffer.push(timestamp) + return Result(total, buffer) diff --git a/gems/decomplex/examples/python/locality-drag.py b/gems/decomplex/examples/python/locality-drag.py new file mode 100644 index 000000000..9ae444fc9 --- /dev/null +++ b/gems/decomplex/examples/python/locality-drag.py @@ -0,0 +1,14 @@ +def run(user, cart, logger): + receipt_id = user.id + + total = cart.total + if total > 100: + if cart.discountable(): discount = 10 + if cart.taxable(): + if cart.region: tax = total * 2 + if logger.enabled(): + if logger.debug(): logger.info(total) + if cart.valid(): + if cart.ready(): status = 1 + + emit(receipt_id) diff --git a/gems/decomplex/examples/python/miner.py b/gems/decomplex/examples/python/miner.py new file mode 100644 index 000000000..298e46ddd --- /dev/null +++ b/gems/decomplex/examples/python/miner.py @@ -0,0 +1,4 @@ +def one(a,b,c): return a and b and c +def two(a,b,c): return a and b and c +def three(a,b,c): return a and b and c +def broken(a,b): return a and b diff --git a/gems/decomplex/examples/python/operational-discontinuity.py b/gems/decomplex/examples/python/operational-discontinuity.py new file mode 100644 index 000000000..2ae5319b7 --- /dev/null +++ b/gems/decomplex/examples/python/operational-discontinuity.py @@ -0,0 +1,8 @@ +def phase_shift(): + a = 1 + b = 2 + + # Phase 2 + x = 3 + y = 4 + print(x, y) diff --git a/gems/decomplex/examples/python/oversized-predicate.py b/gems/decomplex/examples/python/oversized-predicate.py new file mode 100644 index 000000000..886c6d71c --- /dev/null +++ b/gems/decomplex/examples/python/oversized-predicate.py @@ -0,0 +1,3 @@ +def complex_check(a,b,c,d): + if a and b and c and d: + print("too big") diff --git a/gems/decomplex/examples/python/path-condition.py b/gems/decomplex/examples/python/path-condition.py new file mode 100644 index 000000000..9749a124c --- /dev/null +++ b/gems/decomplex/examples/python/path-condition.py @@ -0,0 +1,8 @@ +def one(x,y,z): + if x.p() and y.q() and z.r(): go(x) +def two(x,y,z): + if x.p() and y.q() and z.r(): go(x) +def three(x,y,z): + if x.p() and y.q() and z.r(): go(x) +def bug(x,y,z): + if x.p() and y.q(): go(x) diff --git a/gems/decomplex/examples/python/predicate-alias.py b/gems/decomplex/examples/python/predicate-alias.py new file mode 100644 index 000000000..42320b895 --- /dev/null +++ b/gems/decomplex/examples/python/predicate-alias.py @@ -0,0 +1,3 @@ +def first(): return True +def second(): return True +def other(): return False diff --git a/gems/decomplex/examples/python/redundant-nil-guard.py b/gems/decomplex/examples/python/redundant-nil-guard.py new file mode 100644 index 000000000..8fe538750 --- /dev/null +++ b/gems/decomplex/examples/python/redundant-nil-guard.py @@ -0,0 +1,3 @@ +def check(value): + if value.isSome(): + value.isNull() diff --git a/gems/decomplex/examples/python/semantic-alias.py b/gems/decomplex/examples/python/semantic-alias.py new file mode 100644 index 000000000..3c093f443 --- /dev/null +++ b/gems/decomplex/examples/python/semantic-alias.py @@ -0,0 +1,7 @@ +def frame(node): return node.provenance == FRAME +def is_frame(node): return provenance == FRAME +def heap(node): return node.provenance == HEAP +def somewhere(node): + if node.provenance == FRAME: + return 1 + return 0 diff --git a/gems/decomplex/examples/python/sequence-mine.py b/gems/decomplex/examples/python/sequence-mine.py new file mode 100644 index 000000000..9e3bdd379 --- /dev/null +++ b/gems/decomplex/examples/python/sequence-mine.py @@ -0,0 +1,5 @@ +def one(): alloc_mark(x); body1(); cleanup(x) +def two(): alloc_mark(y); body2(); cleanup(y) +def three(): alloc_mark(z); body3(); cleanup(z) +def four(): alloc_mark(w); body4(); cleanup(w) +def leak(): alloc_mark(q); use_value(q) diff --git a/gems/decomplex/examples/python/state-branch-density.py b/gems/decomplex/examples/python/state-branch-density.py new file mode 100644 index 000000000..90ec12429 --- /dev/null +++ b/gems/decomplex/examples/python/state-branch-density.py @@ -0,0 +1,6 @@ +class StateBranchChecker: + def check(self, user): + if user.admin: + self.checked = True + if self.checked and user.name == "admin": + print("hello") diff --git a/gems/decomplex/examples/python/state-mesh.py b/gems/decomplex/examples/python/state-mesh.py new file mode 100644 index 000000000..85396e9fd --- /dev/null +++ b/gems/decomplex/examples/python/state-mesh.py @@ -0,0 +1,10 @@ +class StateMeshExample: + def initialize(self): + self.a = 1 + self.b = 2 + def writer(self): + self.a = 3 + def reader(self): + return self.a + self.b + def a_alias(self): + return self.a diff --git a/gems/decomplex/examples/python/structural-topology.py b/gems/decomplex/examples/python/structural-topology.py new file mode 100644 index 000000000..6652682c0 --- /dev/null +++ b/gems/decomplex/examples/python/structural-topology.py @@ -0,0 +1,11 @@ +class Worker: + def run(self, items): + self.prepare() + if self.ready(): + self.validate() + for item in items: + self.helper(item) + def prepare(self): pass + def ready(self): return True + def validate(self): pass + def helper(self, item): return item diff --git a/gems/decomplex/examples/python/temporal-ordering-pressure.py b/gems/decomplex/examples/python/temporal-ordering-pressure.py new file mode 100644 index 000000000..b26e60e63 --- /dev/null +++ b/gems/decomplex/examples/python/temporal-ordering-pressure.py @@ -0,0 +1,5 @@ +class TemporalOrderExample: + def one(self): self.a = 1 + def two(self): self.a = 2; self.b = 3 + def three(self): self.b = 4 + def reader(self): return self.a diff --git a/gems/decomplex/examples/python/weighted-inlined-complexity.py b/gems/decomplex/examples/python/weighted-inlined-complexity.py new file mode 100644 index 000000000..25aa61456 --- /dev/null +++ b/gems/decomplex/examples/python/weighted-inlined-complexity.py @@ -0,0 +1,30 @@ +def checkout(user, cart): + validate_user(user) + apply_discount(cart) + process_payment(user, cart) + audit_cart(cart) + +def validate_user(user): + if user.active() and not user.suspended(): + if user.profile.complete(): return True + else: return False + else: return False + +def apply_discount(cart): + if cart.total > 100 and eligible(): + if holiday(): return 20 + elif loyalty_month(): return 15 + else: return 10 + return 0 + +def process_payment(user, cart): + if gateway.ready(): + if cart.total > 0 and user.active(): + if fraud_check(user): charge(user, cart) + else: decline(user) + +def audit_cart(cart): + for item in cart.items: + if item.taxable(): + if item.region and item.amount > 0: + record_tax(item) diff --git a/gems/decomplex/examples/ruby/co-update.rb b/gems/decomplex/examples/ruby/co-update.rb new file mode 100644 index 000000000..a7b2a38bb --- /dev/null +++ b/gems/decomplex/examples/ruby/co-update.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +def stable_one(node) + node.storage = :heap + node.provenance = :heap +end + +def stable_two(node) + node.storage = :heap + node.provenance = :heap +end + +def stable_three(node) + node.storage = :heap + node.provenance = :heap +end + +def misses_provenance(node) + node.storage = :heap +end diff --git a/gems/decomplex/examples/ruby/decision-pressure.rb b/gems/decomplex/examples/ruby/decision-pressure.rb new file mode 100644 index 000000000..193ffe609 --- /dev/null +++ b/gems/decomplex/examples/ruby/decision-pressure.rb @@ -0,0 +1,6 @@ +# frozen_string_literal: true + +def scan(node) + value = node.symbol + value.nil? +end diff --git a/gems/decomplex/examples/ruby/derived-state.rb b/gems/decomplex/examples/ruby/derived-state.rb new file mode 100644 index 000000000..f1a9e1cda --- /dev/null +++ b/gems/decomplex/examples/ruby/derived-state.rb @@ -0,0 +1,7 @@ +# frozen_string_literal: true + +def check(input) + cached = input + 1 + input = 2 + puts cached +end diff --git a/gems/decomplex/examples/ruby/false-simplicity.rb b/gems/decomplex/examples/ruby/false-simplicity.rb new file mode 100644 index 000000000..2b709e756 --- /dev/null +++ b/gems/decomplex/examples/ruby/false-simplicity.rb @@ -0,0 +1,7 @@ +# frozen_string_literal: true + +class FalseSimplicityExample + def hack + print "hidden IO" + end +end diff --git a/gems/decomplex/examples/ruby/fat-union.rb b/gems/decomplex/examples/ruby/fat-union.rb new file mode 100644 index 000000000..7fd78ebd6 --- /dev/null +++ b/gems/decomplex/examples/ruby/fat-union.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +def handle(node) + case node + when AST::Call + node.line + node.col + node.ty + node.span + node.parent + node.recv + when AST::Func + node.line + node.col + node.ty + node.span + node.parent + node.name + when AST::Lit + node.line + node.col + node.ty + node.span + node.parent + node.value + end +end diff --git a/gems/decomplex/examples/ruby/flay-similarity.rb b/gems/decomplex/examples/ruby/flay-similarity.rb new file mode 100644 index 000000000..e3515ad97 --- /dev/null +++ b/gems/decomplex/examples/ruby/flay-similarity.rb @@ -0,0 +1,75 @@ +# frozen_string_literal: true + +def first_clone(node) + total = 0 + value1 = node.part1 + if value1.ready? && value1.enabled? + total += value1.amount + end + value2 = node.part2 + if value2.ready? && value2.enabled? + total += value2.amount + end + value3 = node.part3 + if value3.ready? && value3.enabled? + total += value3.amount + end + value4 = node.part4 + if value4.ready? && value4.enabled? + total += value4.amount + end + value5 = node.part5 + if value5.ready? && value5.enabled? + total += value5.amount + end + value6 = node.part6 + if value6.ready? && value6.enabled? + total += value6.amount + end + value7 = node.part7 + if value7.ready? && value7.enabled? + total += value7.amount + end + value8 = node.part8 + if value8.ready? && value8.enabled? + total += value8.amount + end + total +end + +def second_clone(entry) + total = 0 + item1 = entry.part1 + if item1.ready? && item1.enabled? + total += item1.amount + end + item2 = entry.part2 + if item2.ready? && item2.enabled? + total += item2.amount + end + item3 = entry.part3 + if item3.ready? && item3.enabled? + total += item3.amount + end + item4 = entry.part4 + if item4.ready? && item4.enabled? + total += item4.amount + end + item5 = entry.part5 + if item5.ready? && item5.enabled? + total += item5.amount + end + item6 = entry.part6 + if item6.ready? && item6.enabled? + total += item6.amount + end + item7 = entry.part7 + if item7.ready? && item7.enabled? + total += item7.amount + end + item8 = entry.part8 + if item8.ready? && item8.enabled? + total += item8.amount + end + total +end diff --git a/gems/decomplex/examples/ruby/function-lcom.rb b/gems/decomplex/examples/ruby/function-lcom.rb new file mode 100644 index 000000000..722342983 --- /dev/null +++ b/gems/decomplex/examples/ruby/function-lcom.rb @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +class Billing + def mixed(price, tax, logger) + subtotal = price + tax + total = subtotal * 2 + rounded = total.round + + timestamp = Time.now + buffer = [] + buffer << timestamp + logger.info(buffer) + + [rounded, buffer] + end +end diff --git a/gems/decomplex/examples/ruby/implicit-control-flow.rb b/gems/decomplex/examples/ruby/implicit-control-flow.rb new file mode 100644 index 000000000..edb727b4f --- /dev/null +++ b/gems/decomplex/examples/ruby/implicit-control-flow.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +class FlowExample + def prepare; self.status = :ready; end + def validate; @valid = status == :ready; end + def commit; self.done = @valid; end + + def ok1; prepare; validate; commit; end + def ok2; prepare; validate; commit; end + def ok3; prepare; validate; commit; end + def ok4; prepare; validate; commit; end + def drift; validate; prepare; commit; end +end diff --git a/gems/decomplex/examples/ruby/inconsistent-rename-clone.rb b/gems/decomplex/examples/ruby/inconsistent-rename-clone.rb new file mode 100644 index 000000000..9409249b7 --- /dev/null +++ b/gems/decomplex/examples/ruby/inconsistent-rename-clone.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +def original + src = fetch(1) + check(src) + store(src) + finalize(src) +end + +def pasted + dst = fetch(2) + check(dst) + store(src) + finalize(dst) +end diff --git a/gems/decomplex/examples/ruby/local-flow.rb b/gems/decomplex/examples/ruby/local-flow.rb new file mode 100644 index 000000000..f4c3de6d5 --- /dev/null +++ b/gems/decomplex/examples/ruby/local-flow.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +class Billing + def mixed(price, tax) + subtotal = price + tax + total = subtotal.round + + timestamp = Time.now + buffer = [] + buffer << timestamp + [total, buffer] + end +end diff --git a/gems/decomplex/examples/ruby/locality-drag.rb b/gems/decomplex/examples/ruby/locality-drag.rb new file mode 100644 index 000000000..1b87ae6ba --- /dev/null +++ b/gems/decomplex/examples/ruby/locality-drag.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true + +class Importer + def run(user, cart, logger) + receipt_id = user.id + + total = cart.total + if total > 100 + if cart.discountable? + discount = 10 + end + end + if cart.taxable? + if cart.region + tax = total * 0.2 + end + end + if logger.enabled? + if logger.debug? + logger.info(total) + end + end + if cart.valid? + if cart.ready? + status = :ready + end + end + + emit(receipt_id) + end +end diff --git a/gems/decomplex/examples/ruby/miner.rb b/gems/decomplex/examples/ruby/miner.rb new file mode 100644 index 000000000..4213623bc --- /dev/null +++ b/gems/decomplex/examples/ruby/miner.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +def one(a, b, c) + a && b && c +end + +def two(a, b, c) + a && b && c +end + +def three(a, b, c) + a && b && c +end + +def broken(a, b) + a && b +end diff --git a/gems/decomplex/examples/ruby/operational-discontinuity.rb b/gems/decomplex/examples/ruby/operational-discontinuity.rb new file mode 100644 index 000000000..3979ac0ba --- /dev/null +++ b/gems/decomplex/examples/ruby/operational-discontinuity.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +def phase_shift + a = 1 + b = 2 + + # Phase 2 + x = 3 + y = 4 + puts x, y +end diff --git a/gems/decomplex/examples/ruby/oversized-predicate.rb b/gems/decomplex/examples/ruby/oversized-predicate.rb new file mode 100644 index 000000000..b1d8e005c --- /dev/null +++ b/gems/decomplex/examples/ruby/oversized-predicate.rb @@ -0,0 +1,7 @@ +# frozen_string_literal: true + +def complex_check + if a && b && c && d + puts "too big" + end +end diff --git a/gems/decomplex/examples/ruby/path-condition.rb b/gems/decomplex/examples/ruby/path-condition.rb new file mode 100644 index 000000000..3dde8c231 --- /dev/null +++ b/gems/decomplex/examples/ruby/path-condition.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +def one(x, y, z) + go(x) if x.p? && y.q? && z.r? +end + +def two(x, y, z) + go(x) if x.p? && y.q? && z.r? +end + +def three(x, y, z) + go(x) if x.p? && y.q? && z.r? +end + +def bug(x, y, z) + go(x) if x.p? && y.q? +end diff --git a/gems/decomplex/examples/ruby/predicate-alias.rb b/gems/decomplex/examples/ruby/predicate-alias.rb new file mode 100644 index 000000000..e187f130f --- /dev/null +++ b/gems/decomplex/examples/ruby/predicate-alias.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +def first + true +end + +def second + true +end + +def other + false +end diff --git a/gems/decomplex/examples/ruby/redundant-nil-guard.rb b/gems/decomplex/examples/ruby/redundant-nil-guard.rb new file mode 100644 index 000000000..5a66bac58 --- /dev/null +++ b/gems/decomplex/examples/ruby/redundant-nil-guard.rb @@ -0,0 +1,7 @@ +# frozen_string_literal: true + +def check(value) + if value + value.nil? + end +end diff --git a/gems/decomplex/examples/ruby/semantic-alias.rb b/gems/decomplex/examples/ruby/semantic-alias.rb new file mode 100644 index 000000000..cdbe2734c --- /dev/null +++ b/gems/decomplex/examples/ruby/semantic-alias.rb @@ -0,0 +1,9 @@ +# frozen_string_literal: true + +def frame?; @provenance == :frame; end +def is_frame?; provenance == :frame; end +def heap?; @provenance == :heap; end + +def somewhere(node) + return 1 if node.provenance == :frame +end diff --git a/gems/decomplex/examples/ruby/sequence-mine.rb b/gems/decomplex/examples/ruby/sequence-mine.rb new file mode 100644 index 000000000..eb43e797a --- /dev/null +++ b/gems/decomplex/examples/ruby/sequence-mine.rb @@ -0,0 +1,7 @@ +# frozen_string_literal: true + +def one; alloc_mark(x); body1; cleanup(x); end +def two; alloc_mark(y); body2; cleanup(y); end +def three; alloc_mark(z); body3; cleanup(z); end +def four; alloc_mark(w); body4; cleanup(w); end +def leak; alloc_mark(q); use(q); end diff --git a/gems/decomplex/examples/ruby/state-branch-density.rb b/gems/decomplex/examples/ruby/state-branch-density.rb new file mode 100644 index 000000000..af05b1135 --- /dev/null +++ b/gems/decomplex/examples/ruby/state-branch-density.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +class StateBranchUser < T::Struct + const :name, String + const :admin, T::Boolean +end + +class StateBranchChecker + sig { params(user: StateBranchUser).void } + def check(user) + if user.admin + @checked = true + end + + if @checked && user.name == "admin" + puts "hello" + end + end +end diff --git a/gems/decomplex/examples/ruby/state-mesh.rb b/gems/decomplex/examples/ruby/state-mesh.rb new file mode 100644 index 000000000..9efd86ef2 --- /dev/null +++ b/gems/decomplex/examples/ruby/state-mesh.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +class StateMeshExample + def initialize + @a = 1 + @b = 2 + end + + def writer + @a = 3 + end + + def reader + @a + @b + end + + def a_alias + @a + end +end diff --git a/gems/decomplex/examples/ruby/structural-topology.rb b/gems/decomplex/examples/ruby/structural-topology.rb new file mode 100644 index 000000000..5100898cb --- /dev/null +++ b/gems/decomplex/examples/ruby/structural-topology.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +class Worker + def run(items) + prepare + if ready + validate + end + items.each do |item| + helper(item) + end + end + + private + + def prepare; end + def ready; true; end + def validate; end + def helper(item); item; end + + public :validate +end diff --git a/gems/decomplex/examples/ruby/temporal-ordering-pressure.rb b/gems/decomplex/examples/ruby/temporal-ordering-pressure.rb new file mode 100644 index 000000000..afa9d0d1c --- /dev/null +++ b/gems/decomplex/examples/ruby/temporal-ordering-pressure.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +class TemporalOrderExample + def one + @a = 1 + end + + def two + @a = 2 + @b = 3 + end + + def three + @b = 4 + end + + def reader + @a + end +end diff --git a/gems/decomplex/examples/ruby/weighted-inlined-complexity.rb b/gems/decomplex/examples/ruby/weighted-inlined-complexity.rb new file mode 100644 index 000000000..c9f82e75a --- /dev/null +++ b/gems/decomplex/examples/ruby/weighted-inlined-complexity.rb @@ -0,0 +1,59 @@ +# frozen_string_literal: true + +class WeightedInlineExample + def checkout(user, cart) + validate_user(user) + apply_discount(cart) + process_payment(user, cart) + audit_cart(cart) + end + + private + + def validate_user(user) + return false unless user + if user.active? && !user.suspended? + if user.profile.complete? + true + else + false + end + else + false + end + end + + def apply_discount(cart) + if cart.total > 100 && eligible? + if holiday? + 20 + elsif loyalty_month? + 15 + else + 10 + end + end + end + + def process_payment(user, cart) + if gateway.ready? + if cart.total > 0 && user.active? + if fraud_check(user) + charge(user, cart) + else + decline(user) + end + end + end + end + + def audit_cart(cart) + cart.items.each do |item| + if item.taxable? + if item.region && item.amount > 0 + record_tax(item) + end + end + end + end +end diff --git a/gems/decomplex/examples/rust/co-update.rs b/gems/decomplex/examples/rust/co-update.rs new file mode 100644 index 000000000..6afaea6b6 --- /dev/null +++ b/gems/decomplex/examples/rust/co-update.rs @@ -0,0 +1,23 @@ +struct Node { + storage: i32, + provenance: i32, +} + +fn stable_one(mut node: Node) { + node.storage = 1; + node.provenance = 1; +} + +fn stable_two(mut node: Node) { + node.storage = 1; + node.provenance = 1; +} + +fn stable_three(mut node: Node) { + node.storage = 1; + node.provenance = 1; +} + +fn misses_provenance(mut node: Node) { + node.storage = 1; +} diff --git a/gems/decomplex/examples/rust/decision-pressure.rs b/gems/decomplex/examples/rust/decision-pressure.rs new file mode 100644 index 000000000..3a8401f04 --- /dev/null +++ b/gems/decomplex/examples/rust/decision-pressure.rs @@ -0,0 +1,3 @@ +fn scan(node: Node) -> bool { + node.symbol.is_none() +} diff --git a/gems/decomplex/examples/rust/derived-state.rs b/gems/decomplex/examples/rust/derived-state.rs new file mode 100644 index 000000000..aebef251b --- /dev/null +++ b/gems/decomplex/examples/rust/derived-state.rs @@ -0,0 +1,5 @@ +fn check(mut input: i32) { + let cached = input + 1; + input = 2; + print(cached); +} diff --git a/gems/decomplex/examples/rust/false-simplicity.rs b/gems/decomplex/examples/rust/false-simplicity.rs new file mode 100644 index 000000000..fb7937db7 --- /dev/null +++ b/gems/decomplex/examples/rust/false-simplicity.rs @@ -0,0 +1,7 @@ +struct FalseSimplicityExample; + +impl FalseSimplicityExample { + fn hack(&self) { + print("hidden IO"); + } +} diff --git a/gems/decomplex/examples/rust/fat-union.rs b/gems/decomplex/examples/rust/fat-union.rs new file mode 100644 index 000000000..aca570ca6 --- /dev/null +++ b/gems/decomplex/examples/rust/fat-union.rs @@ -0,0 +1,28 @@ +fn handle(node: Ast) { + match node { + AST::Call => { + node.line(); + node.col(); + node.ty(); + node.span(); + node.parent(); + node.recv(); + } + AST::Func => { + node.line(); + node.col(); + node.ty(); + node.span(); + node.parent(); + node.name(); + } + AST::Lit => { + node.line(); + node.col(); + node.ty(); + node.span(); + node.parent(); + node.value(); + } + } +} diff --git a/gems/decomplex/examples/rust/flay-similarity.rs b/gems/decomplex/examples/rust/flay-similarity.rs new file mode 100644 index 000000000..ab464e914 --- /dev/null +++ b/gems/decomplex/examples/rust/flay-similarity.rs @@ -0,0 +1,41 @@ +fn first_clone(node: Node) -> i32 { + let mut total = 0; + let value1 = node.part1; + if value1.ready() && value1.enabled() { total += value1.amount; } + let value2 = node.part2; + if value2.ready() && value2.enabled() { total += value2.amount; } + let value3 = node.part3; + if value3.ready() && value3.enabled() { total += value3.amount; } + let value4 = node.part4; + if value4.ready() && value4.enabled() { total += value4.amount; } + let value5 = node.part5; + if value5.ready() && value5.enabled() { total += value5.amount; } + let value6 = node.part6; + if value6.ready() && value6.enabled() { total += value6.amount; } + let value7 = node.part7; + if value7.ready() && value7.enabled() { total += value7.amount; } + let value8 = node.part8; + if value8.ready() && value8.enabled() { total += value8.amount; } + total +} + +fn second_clone(entry: Node) -> i32 { + let mut total = 0; + let item1 = entry.part1; + if item1.ready() && item1.enabled() { total += item1.amount; } + let item2 = entry.part2; + if item2.ready() && item2.enabled() { total += item2.amount; } + let item3 = entry.part3; + if item3.ready() && item3.enabled() { total += item3.amount; } + let item4 = entry.part4; + if item4.ready() && item4.enabled() { total += item4.amount; } + let item5 = entry.part5; + if item5.ready() && item5.enabled() { total += item5.amount; } + let item6 = entry.part6; + if item6.ready() && item6.enabled() { total += item6.amount; } + let item7 = entry.part7; + if item7.ready() && item7.enabled() { total += item7.amount; } + let item8 = entry.part8; + if item8.ready() && item8.enabled() { total += item8.amount; } + total +} diff --git a/gems/decomplex/examples/rust/function-lcom.rs b/gems/decomplex/examples/rust/function-lcom.rs new file mode 100644 index 000000000..ed4a5cb70 --- /dev/null +++ b/gems/decomplex/examples/rust/function-lcom.rs @@ -0,0 +1,12 @@ +fn mixed(price: i32, tax: i32, logger: Logger) -> (i32, Buffer) { + let subtotal = price + tax; + let total = subtotal * 2; + let rounded = total.round(); + + let timestamp = now(); + let mut buffer = Buffer::new(); + buffer.push(timestamp); + logger.info(buffer); + + (rounded, buffer) +} diff --git a/gems/decomplex/examples/rust/implicit-control-flow.rs b/gems/decomplex/examples/rust/implicit-control-flow.rs new file mode 100644 index 000000000..f8a00efe4 --- /dev/null +++ b/gems/decomplex/examples/rust/implicit-control-flow.rs @@ -0,0 +1,17 @@ +struct FlowExample { + status: i32, + valid: bool, + done: bool, +} + +impl FlowExample { + fn prepare(&mut self) { self.status = 1; } + fn validate(&mut self) { self.valid = self.status == 1; } + fn commit(&mut self) { self.done = self.valid; } + + fn ok1(&mut self) { self.prepare(); self.validate(); self.commit(); } + fn ok2(&mut self) { self.prepare(); self.validate(); self.commit(); } + fn ok3(&mut self) { self.prepare(); self.validate(); self.commit(); } + fn ok4(&mut self) { self.prepare(); self.validate(); self.commit(); } + fn drift(&mut self) { self.validate(); self.prepare(); self.commit(); } +} diff --git a/gems/decomplex/examples/rust/inconsistent-rename-clone.rs b/gems/decomplex/examples/rust/inconsistent-rename-clone.rs new file mode 100644 index 000000000..8e8a3f819 --- /dev/null +++ b/gems/decomplex/examples/rust/inconsistent-rename-clone.rs @@ -0,0 +1,13 @@ +fn original() { + let src = fetch(1); + check(src); + store(src); + finalize(src); +} + +fn pasted() { + let dst = fetch(2); + check(dst); + store(src); + finalize(dst); +} diff --git a/gems/decomplex/examples/rust/local-flow.rs b/gems/decomplex/examples/rust/local-flow.rs new file mode 100644 index 000000000..d027c1e92 --- /dev/null +++ b/gems/decomplex/examples/rust/local-flow.rs @@ -0,0 +1,9 @@ +fn mixed(price: i32, tax: i32) -> (i32, Buffer) { + let subtotal = price + tax; + let total = subtotal.round(); + + let timestamp = now(); + let mut buffer = Buffer::new(); + buffer.push(timestamp); + (total, buffer) +} diff --git a/gems/decomplex/examples/rust/locality-drag.rs b/gems/decomplex/examples/rust/locality-drag.rs new file mode 100644 index 000000000..1d71f299f --- /dev/null +++ b/gems/decomplex/examples/rust/locality-drag.rs @@ -0,0 +1,27 @@ +fn run(user: User, cart: Cart, logger: Logger) { + let receipt_id = user.id; + + let total = cart.total; + if total > 100 { + if cart.discountable() { + let discount = 10; + } + } + if cart.taxable() { + if cart.region { + let tax = total * 2; + } + } + if logger.enabled() { + if logger.debug() { + logger.info(total); + } + } + if cart.valid() { + if cart.ready() { + let status = 1; + } + } + + emit(receipt_id); +} diff --git a/gems/decomplex/examples/rust/miner.rs b/gems/decomplex/examples/rust/miner.rs new file mode 100644 index 000000000..4f5b7fcab --- /dev/null +++ b/gems/decomplex/examples/rust/miner.rs @@ -0,0 +1,15 @@ +fn one(a: bool, b: bool, c: bool) -> bool { + a && b && c +} + +fn two(a: bool, b: bool, c: bool) -> bool { + a && b && c +} + +fn three(a: bool, b: bool, c: bool) -> bool { + a && b && c +} + +fn broken(a: bool, b: bool) -> bool { + a && b +} diff --git a/gems/decomplex/examples/rust/operational-discontinuity.rs b/gems/decomplex/examples/rust/operational-discontinuity.rs new file mode 100644 index 000000000..401589871 --- /dev/null +++ b/gems/decomplex/examples/rust/operational-discontinuity.rs @@ -0,0 +1,10 @@ +fn phase_shift() { + let a = 1; + let b = 2; + + // Phase 2 + let x = 3; + let y = 4; + print(x); + print(y); +} diff --git a/gems/decomplex/examples/rust/oversized-predicate.rs b/gems/decomplex/examples/rust/oversized-predicate.rs new file mode 100644 index 000000000..40129fdde --- /dev/null +++ b/gems/decomplex/examples/rust/oversized-predicate.rs @@ -0,0 +1,5 @@ +fn complex_check(a: bool, b: bool, c: bool, d: bool) { + if a && b && c && d { + print("too big"); + } +} diff --git a/gems/decomplex/examples/rust/path-condition.rs b/gems/decomplex/examples/rust/path-condition.rs new file mode 100644 index 000000000..343d2af0b --- /dev/null +++ b/gems/decomplex/examples/rust/path-condition.rs @@ -0,0 +1,15 @@ +fn one(x: X, y: Y, z: Z) { + if x.p() && y.q() && z.r() { go(x); } +} + +fn two(x: X, y: Y, z: Z) { + if x.p() && y.q() && z.r() { go(x); } +} + +fn three(x: X, y: Y, z: Z) { + if x.p() && y.q() && z.r() { go(x); } +} + +fn bug(x: X, y: Y, z: Z) { + if x.p() && y.q() { go(x); } +} diff --git a/gems/decomplex/examples/rust/predicate-alias.rs b/gems/decomplex/examples/rust/predicate-alias.rs new file mode 100644 index 000000000..5d70e6588 --- /dev/null +++ b/gems/decomplex/examples/rust/predicate-alias.rs @@ -0,0 +1,11 @@ +fn first() -> bool { + true +} + +fn second() -> bool { + true +} + +fn other() -> bool { + false +} diff --git a/gems/decomplex/examples/rust/redundant-nil-guard.rs b/gems/decomplex/examples/rust/redundant-nil-guard.rs new file mode 100644 index 000000000..33e2f1184 --- /dev/null +++ b/gems/decomplex/examples/rust/redundant-nil-guard.rs @@ -0,0 +1,5 @@ +fn check(value: Option) { + if value.is_some() { + value.is_none(); + } +} diff --git a/gems/decomplex/examples/rust/semantic-alias.rs b/gems/decomplex/examples/rust/semantic-alias.rs new file mode 100644 index 000000000..2b707286d --- /dev/null +++ b/gems/decomplex/examples/rust/semantic-alias.rs @@ -0,0 +1,8 @@ +fn frame(node: Node) -> bool { node.provenance == FRAME } +fn is_frame(node: Node) -> bool { provenance == FRAME } +fn heap(node: Node) -> bool { node.provenance == HEAP } + +fn somewhere(node: Node) -> i32 { + if node.provenance == FRAME { return 1; } + 0 +} diff --git a/gems/decomplex/examples/rust/sequence-mine.rs b/gems/decomplex/examples/rust/sequence-mine.rs new file mode 100644 index 000000000..412047b5d --- /dev/null +++ b/gems/decomplex/examples/rust/sequence-mine.rs @@ -0,0 +1,5 @@ +fn one() { alloc_mark(x); body1(); cleanup(x); } +fn two() { alloc_mark(y); body2(); cleanup(y); } +fn three() { alloc_mark(z); body3(); cleanup(z); } +fn four() { alloc_mark(w); body4(); cleanup(w); } +fn leak() { alloc_mark(q); use_value(q); } diff --git a/gems/decomplex/examples/rust/state-branch-density.rs b/gems/decomplex/examples/rust/state-branch-density.rs new file mode 100644 index 000000000..d08344808 --- /dev/null +++ b/gems/decomplex/examples/rust/state-branch-density.rs @@ -0,0 +1,15 @@ +struct StateBranchChecker { + checked: bool, +} + +impl StateBranchChecker { + fn check(&mut self, user: User) { + if user.admin { + self.checked = true; + } + + if self.checked && user.name == "admin" { + print("hello"); + } + } +} diff --git a/gems/decomplex/examples/rust/state-mesh.rs b/gems/decomplex/examples/rust/state-mesh.rs new file mode 100644 index 000000000..a7d28e1d1 --- /dev/null +++ b/gems/decomplex/examples/rust/state-mesh.rs @@ -0,0 +1,23 @@ +struct StateMeshExample { + a: i32, + b: i32, +} + +impl StateMeshExample { + fn initialize(&mut self) { + self.a = 1; + self.b = 2; + } + + fn writer(&mut self) { + self.a = 3; + } + + fn reader(&self) -> i32 { + self.a + self.b + } + + fn a_alias(&self) -> i32 { + self.a + } +} diff --git a/gems/decomplex/examples/rust/structural-topology.rs b/gems/decomplex/examples/rust/structural-topology.rs new file mode 100644 index 000000000..bd29439db --- /dev/null +++ b/gems/decomplex/examples/rust/structural-topology.rs @@ -0,0 +1,18 @@ +struct Worker; + +impl Worker { + pub fn run(&self, items: Items) { + self.prepare(); + if true { + self.validate(); + } + for item in items { + self.helper(item); + } + } + + fn prepare(&self) {} + fn ready(&self) -> bool { true } + pub fn validate(&self) {} + fn helper(&self, item: Item) { item; } +} diff --git a/gems/decomplex/examples/rust/temporal-ordering-pressure.rs b/gems/decomplex/examples/rust/temporal-ordering-pressure.rs new file mode 100644 index 000000000..cb845cfe9 --- /dev/null +++ b/gems/decomplex/examples/rust/temporal-ordering-pressure.rs @@ -0,0 +1,23 @@ +pub struct TemporalOrderExample { + a: i32, + b: i32, +} + +impl TemporalOrderExample { + pub fn one(&mut self) { + self.a = 1; + } + + pub fn two(&mut self) { + self.a = 2; + self.b = 3; + } + + pub fn three(&mut self) { + self.b = 4; + } + + pub fn reader(&self) -> i32 { + self.a + } +} diff --git a/gems/decomplex/examples/rust/weighted-inlined-complexity.rs b/gems/decomplex/examples/rust/weighted-inlined-complexity.rs new file mode 100644 index 000000000..8cc8c9e8e --- /dev/null +++ b/gems/decomplex/examples/rust/weighted-inlined-complexity.rs @@ -0,0 +1,44 @@ +struct WeightedInlineExample; + +impl WeightedInlineExample { + fn checkout(&self, user: User, cart: Cart) { + self.validate_user(user); + self.apply_discount(cart); + self.process_payment(user, cart); + self.audit_cart(cart); + } + + fn validate_user(&self, user: User) -> bool { + if user.active() && !user.suspended() { + if user.profile.complete() { true } else { false } + } else { + false + } + } + + fn apply_discount(&self, cart: Cart) -> i32 { + if cart.total > 100 && eligible() { + if holiday() { 20 } else if loyalty_month() { 15 } else { 10 } + } else { + 0 + } + } + + fn process_payment(&self, user: User, cart: Cart) { + if gateway.ready() { + if cart.total > 0 && user.active() { + if fraud_check(user) { charge(user, cart); } else { decline(user); } + } + } + } + + fn audit_cart(&self, cart: Cart) { + for item in cart.items { + if item.taxable() { + if item.region && item.amount > 0 { + record_tax(item); + } + } + } + } +} diff --git a/gems/decomplex/examples/swift/co-update.swift b/gems/decomplex/examples/swift/co-update.swift new file mode 100644 index 000000000..389d85727 --- /dev/null +++ b/gems/decomplex/examples/swift/co-update.swift @@ -0,0 +1,4 @@ +func stable_one(node: Node) { node.storage = 1; node.provenance = 1 } +func stable_two(node: Node) { node.storage = 1; node.provenance = 1 } +func stable_three(node: Node) { node.storage = 1; node.provenance = 1 } +func misses_provenance(node: Node) { node.storage = 1 } diff --git a/gems/decomplex/examples/swift/decision-pressure.swift b/gems/decomplex/examples/swift/decision-pressure.swift new file mode 100644 index 000000000..2edc2cfda --- /dev/null +++ b/gems/decomplex/examples/swift/decision-pressure.swift @@ -0,0 +1 @@ +func scan(node: Node) -> Bool { let value = node.symbol; return value.isNull() } diff --git a/gems/decomplex/examples/swift/derived-state.swift b/gems/decomplex/examples/swift/derived-state.swift new file mode 100644 index 000000000..53abac1f0 --- /dev/null +++ b/gems/decomplex/examples/swift/derived-state.swift @@ -0,0 +1 @@ +func check(inputValue: Int) { var input = inputValue; let cached = input + 1; input = 2; print(cached) } diff --git a/gems/decomplex/examples/swift/false-simplicity.swift b/gems/decomplex/examples/swift/false-simplicity.swift new file mode 100644 index 000000000..7afa201cf --- /dev/null +++ b/gems/decomplex/examples/swift/false-simplicity.swift @@ -0,0 +1 @@ +class FalseSimplicityExample { func hack() { print("hidden IO") } } diff --git a/gems/decomplex/examples/swift/fat-union.swift b/gems/decomplex/examples/swift/fat-union.swift new file mode 100644 index 000000000..c55414822 --- /dev/null +++ b/gems/decomplex/examples/swift/fat-union.swift @@ -0,0 +1,7 @@ +func handle(node: Node) { + switch node { + case AST.Call: node.line(); node.col(); node.ty(); node.span(); node.parent(); node.recv() + case AST.Func: node.line(); node.col(); node.ty(); node.span(); node.parent(); node.name() + case AST.Lit: node.line(); node.col(); node.ty(); node.span(); node.parent(); node.value() + } +} diff --git a/gems/decomplex/examples/swift/flay-similarity.swift b/gems/decomplex/examples/swift/flay-similarity.swift new file mode 100644 index 000000000..e731d0d82 --- /dev/null +++ b/gems/decomplex/examples/swift/flay-similarity.swift @@ -0,0 +1,73 @@ +func first_clone(node: Node) -> Int { + var total = 0 + let value1 = node.part1 + if value1.ready() && value1.enabled() { + total += value1.amount + } + let value2 = node.part2 + if value2.ready() && value2.enabled() { + total += value2.amount + } + let value3 = node.part3 + if value3.ready() && value3.enabled() { + total += value3.amount + } + let value4 = node.part4 + if value4.ready() && value4.enabled() { + total += value4.amount + } + let value5 = node.part5 + if value5.ready() && value5.enabled() { + total += value5.amount + } + let value6 = node.part6 + if value6.ready() && value6.enabled() { + total += value6.amount + } + let value7 = node.part7 + if value7.ready() && value7.enabled() { + total += value7.amount + } + let value8 = node.part8 + if value8.ready() && value8.enabled() { + total += value8.amount + } + return total +} + +func second_clone(entry: Node) -> Int { + var total = 0 + let item1 = entry.part1 + if item1.ready() && item1.enabled() { + total += item1.amount + } + let item2 = entry.part2 + if item2.ready() && item2.enabled() { + total += item2.amount + } + let item3 = entry.part3 + if item3.ready() && item3.enabled() { + total += item3.amount + } + let item4 = entry.part4 + if item4.ready() && item4.enabled() { + total += item4.amount + } + let item5 = entry.part5 + if item5.ready() && item5.enabled() { + total += item5.amount + } + let item6 = entry.part6 + if item6.ready() && item6.enabled() { + total += item6.amount + } + let item7 = entry.part7 + if item7.ready() && item7.enabled() { + total += item7.amount + } + let item8 = entry.part8 + if item8.ready() && item8.enabled() { + total += item8.amount + } + return total +} diff --git a/gems/decomplex/examples/swift/function-lcom.swift b/gems/decomplex/examples/swift/function-lcom.swift new file mode 100644 index 000000000..12197c331 --- /dev/null +++ b/gems/decomplex/examples/swift/function-lcom.swift @@ -0,0 +1,13 @@ +func mixed(price: Int, tax: Int, logger: Logger) -> Result { + let subtotal = price + tax + let total = subtotal * 2 + let rounded = total.round() + + let timestamp = now() + let buffer = Buffer.init() + let stamp = timestamp + buffer.push(stamp) + logger.info(buffer) + + return Result.init(rounded, buffer) +} diff --git a/gems/decomplex/examples/swift/implicit-control-flow.swift b/gems/decomplex/examples/swift/implicit-control-flow.swift new file mode 100644 index 000000000..fcfb6ba52 --- /dev/null +++ b/gems/decomplex/examples/swift/implicit-control-flow.swift @@ -0,0 +1,13 @@ +class FlowExample { + var status = 0 + var valid = false + var done = false + func prepare() { self.status = 1 } + func validate() { self.valid = self.status == 1 } + func commit() { self.done = self.valid } + func ok1() { self.prepare(); self.validate(); self.commit() } + func ok2() { self.prepare(); self.validate(); self.commit() } + func ok3() { self.prepare(); self.validate(); self.commit() } + func ok4() { self.prepare(); self.validate(); self.commit() } + func drift() { self.validate(); self.prepare(); self.commit() } +} diff --git a/gems/decomplex/examples/swift/inconsistent-rename-clone.swift b/gems/decomplex/examples/swift/inconsistent-rename-clone.swift new file mode 100644 index 000000000..696feba3a --- /dev/null +++ b/gems/decomplex/examples/swift/inconsistent-rename-clone.swift @@ -0,0 +1,2 @@ +func original() { let src = fetch(1); check(src); store(src); finalize(src) } +func pasted() { let dst = fetch(2); check(dst); store(src); finalize(dst) } diff --git a/gems/decomplex/examples/swift/local-flow.swift b/gems/decomplex/examples/swift/local-flow.swift new file mode 100644 index 000000000..375727f9d --- /dev/null +++ b/gems/decomplex/examples/swift/local-flow.swift @@ -0,0 +1,9 @@ +func mixed(price: Int, tax: Int) -> Result { + let subtotal = price + tax + let total = subtotal.round() + + let timestamp = now() + let buffer = Buffer.init() + buffer.push(timestamp) + return Result.init(total, buffer) +} diff --git a/gems/decomplex/examples/swift/locality-drag.swift b/gems/decomplex/examples/swift/locality-drag.swift new file mode 100644 index 000000000..d73827fe1 --- /dev/null +++ b/gems/decomplex/examples/swift/locality-drag.swift @@ -0,0 +1,27 @@ +func run(user: User, cart: Cart, logger: Logger) { + let receipt_id = user.id + + let total = cart.total + if total > 100 { + if cart.discountable() { + let discount = 10 + } + } + if cart.taxable() { + if cart.region { + let tax = total * 2 + } + } + if logger.enabled() { + if logger.debug() { + logger.info(total) + } + } + if cart.valid() { + if cart.ready() { + let status = 1 + } + } + + emit(receipt_id) +} diff --git a/gems/decomplex/examples/swift/miner.swift b/gems/decomplex/examples/swift/miner.swift new file mode 100644 index 000000000..2c4f863ad --- /dev/null +++ b/gems/decomplex/examples/swift/miner.swift @@ -0,0 +1,4 @@ +func one(a: Bool,b: Bool,c: Bool) -> Bool { return a && b && c } +func two(a: Bool,b: Bool,c: Bool) -> Bool { return a && b && c } +func three(a: Bool,b: Bool,c: Bool) -> Bool { return a && b && c } +func broken(a: Bool,b: Bool) -> Bool { return a && b } diff --git a/gems/decomplex/examples/swift/operational-discontinuity.swift b/gems/decomplex/examples/swift/operational-discontinuity.swift new file mode 100644 index 000000000..837d4b336 --- /dev/null +++ b/gems/decomplex/examples/swift/operational-discontinuity.swift @@ -0,0 +1,9 @@ +func phase_shift() { + let a = 1 + let b = 2 + + // Phase 2 + let x = 3 + let y = 4 + print(x); print(y) +} diff --git a/gems/decomplex/examples/swift/oversized-predicate.swift b/gems/decomplex/examples/swift/oversized-predicate.swift new file mode 100644 index 000000000..e0128fcc0 --- /dev/null +++ b/gems/decomplex/examples/swift/oversized-predicate.swift @@ -0,0 +1 @@ +func complex_check(a: Bool,b: Bool,c: Bool,d: Bool) { if a && b && c && d { print("too big") } } diff --git a/gems/decomplex/examples/swift/path-condition.swift b/gems/decomplex/examples/swift/path-condition.swift new file mode 100644 index 000000000..4f0e26ef0 --- /dev/null +++ b/gems/decomplex/examples/swift/path-condition.swift @@ -0,0 +1,4 @@ +func one(x: X,y: Y,z: Z) { if x.p() && y.q() && z.r() { go(x) } } +func two(x: X,y: Y,z: Z) { if x.p() && y.q() && z.r() { go(x) } } +func three(x: X,y: Y,z: Z) { if x.p() && y.q() && z.r() { go(x) } } +func bug(x: X,y: Y,z: Z) { if x.p() && y.q() { go(x) } } diff --git a/gems/decomplex/examples/swift/predicate-alias.swift b/gems/decomplex/examples/swift/predicate-alias.swift new file mode 100644 index 000000000..d00c8d1dd --- /dev/null +++ b/gems/decomplex/examples/swift/predicate-alias.swift @@ -0,0 +1,3 @@ +func first() -> Bool { return true } +func second() -> Bool { return true } +func other() -> Bool { return false } diff --git a/gems/decomplex/examples/swift/redundant-nil-guard.swift b/gems/decomplex/examples/swift/redundant-nil-guard.swift new file mode 100644 index 000000000..b288620f3 --- /dev/null +++ b/gems/decomplex/examples/swift/redundant-nil-guard.swift @@ -0,0 +1 @@ +func check(value: Value) { if value.isSome() { value.isNull() } } diff --git a/gems/decomplex/examples/swift/semantic-alias.swift b/gems/decomplex/examples/swift/semantic-alias.swift new file mode 100644 index 000000000..178244e76 --- /dev/null +++ b/gems/decomplex/examples/swift/semantic-alias.swift @@ -0,0 +1,4 @@ +func frame(node: Node) -> Bool { return node.provenance == FRAME } +func is_frame(node: Node) -> Bool { return provenance == FRAME } +func heap(node: Node) -> Bool { return node.provenance == HEAP } +func somewhere(node: Node) -> Int { if node.provenance == FRAME { return 1 }; return 0 } diff --git a/gems/decomplex/examples/swift/sequence-mine.swift b/gems/decomplex/examples/swift/sequence-mine.swift new file mode 100644 index 000000000..bd72413da --- /dev/null +++ b/gems/decomplex/examples/swift/sequence-mine.swift @@ -0,0 +1,5 @@ +func one() { alloc_mark(x); body1(); cleanup(x) } +func two() { alloc_mark(y); body2(); cleanup(y) } +func three() { alloc_mark(z); body3(); cleanup(z) } +func four() { alloc_mark(w); body4(); cleanup(w) } +func leak() { alloc_mark(q); use_value(q) } diff --git a/gems/decomplex/examples/swift/state-branch-density.swift b/gems/decomplex/examples/swift/state-branch-density.swift new file mode 100644 index 000000000..bbcf5c6c0 --- /dev/null +++ b/gems/decomplex/examples/swift/state-branch-density.swift @@ -0,0 +1 @@ +class StateBranchChecker { var checked = false; func check(user: User) { if user.admin { self.checked = true } if self.checked && user.name == "admin" { print("hello") } } } diff --git a/gems/decomplex/examples/swift/state-mesh.swift b/gems/decomplex/examples/swift/state-mesh.swift new file mode 100644 index 000000000..0faaf547a --- /dev/null +++ b/gems/decomplex/examples/swift/state-mesh.swift @@ -0,0 +1 @@ +class StateMeshExample { var a = 0; var b = 0; func initialize() { self.a = 1; self.b = 2 } func writer() { self.a = 3 } func reader() -> Int { return self.a + self.b } func a_alias() -> Int { return self.a } } diff --git a/gems/decomplex/examples/swift/structural-topology.swift b/gems/decomplex/examples/swift/structural-topology.swift new file mode 100644 index 000000000..1becdb230 --- /dev/null +++ b/gems/decomplex/examples/swift/structural-topology.swift @@ -0,0 +1 @@ +class Worker { func run(items: Items) { self.prepare(); if self.ready() { self.validate() }; for item in items { self.helper(item: item) } } private func prepare() {} private func ready() -> Bool { return true } func validate() {} private func helper(item: Item) { item.use() } } diff --git a/gems/decomplex/examples/swift/temporal-ordering-pressure.swift b/gems/decomplex/examples/swift/temporal-ordering-pressure.swift new file mode 100644 index 000000000..775acd72d --- /dev/null +++ b/gems/decomplex/examples/swift/temporal-ordering-pressure.swift @@ -0,0 +1 @@ +class TemporalOrderExample { var a = 0; var b = 0; func one() { self.a = 1 } func two() { self.a = 2; self.b = 3 } func three() { self.b = 4 } func reader() -> Int { return self.a } } diff --git a/gems/decomplex/examples/swift/weighted-inlined-complexity.swift b/gems/decomplex/examples/swift/weighted-inlined-complexity.swift new file mode 100644 index 000000000..98e6579b2 --- /dev/null +++ b/gems/decomplex/examples/swift/weighted-inlined-complexity.swift @@ -0,0 +1,5 @@ +func checkout(user: User, cart: Cart) { validate_user(user: user); apply_discount(cart: cart); process_payment(user: user, cart: cart); audit_cart(cart: cart) } +func validate_user(user: User) -> Bool { if user.active() && !user.suspended() { if user.profile.complete() { return true } else { return false } } else { return false } } +func apply_discount(cart: Cart) -> Int { if cart.total > 100 && eligible() { if holiday() { return 20 } else if loyalty_month() { return 15 } else { return 10 } }; return 0 } +func process_payment(user: User, cart: Cart) { if gateway.ready() { if cart.total > 0 && user.active() { if fraud_check(user) { charge(user, cart) } else { decline(user) } } } } +func audit_cart(cart: Cart) { for item in cart.items { if item.taxable() { if item.region && item.amount > 0 { record_tax(item) } } } } diff --git a/gems/decomplex/examples/typescript/co-update.ts b/gems/decomplex/examples/typescript/co-update.ts new file mode 100644 index 000000000..7ced4120f --- /dev/null +++ b/gems/decomplex/examples/typescript/co-update.ts @@ -0,0 +1,4 @@ +function stable_one(node: Node) { node.storage = 1; node.provenance = 1; } +function stable_two(node: Node) { node.storage = 1; node.provenance = 1; } +function stable_three(node: Node) { node.storage = 1; node.provenance = 1; } +function misses_provenance(node: Node) { node.storage = 1; } diff --git a/gems/decomplex/examples/typescript/decision-pressure.ts b/gems/decomplex/examples/typescript/decision-pressure.ts new file mode 100644 index 000000000..3356de7d5 --- /dev/null +++ b/gems/decomplex/examples/typescript/decision-pressure.ts @@ -0,0 +1 @@ +function scan(node: Node): boolean { const value = node.symbol; return value.isNull(); } diff --git a/gems/decomplex/examples/typescript/derived-state.ts b/gems/decomplex/examples/typescript/derived-state.ts new file mode 100644 index 000000000..de02aa6d3 --- /dev/null +++ b/gems/decomplex/examples/typescript/derived-state.ts @@ -0,0 +1 @@ +function check(input: number) { const cached = input + 1; input = 2; print(cached); } diff --git a/gems/decomplex/examples/typescript/false-simplicity.ts b/gems/decomplex/examples/typescript/false-simplicity.ts new file mode 100644 index 000000000..4bd9d8e03 --- /dev/null +++ b/gems/decomplex/examples/typescript/false-simplicity.ts @@ -0,0 +1 @@ +class FalseSimplicityExample { hack() { print("hidden IO"); } } diff --git a/gems/decomplex/examples/typescript/fat-union.ts b/gems/decomplex/examples/typescript/fat-union.ts new file mode 100644 index 000000000..68ee3bff9 --- /dev/null +++ b/gems/decomplex/examples/typescript/fat-union.ts @@ -0,0 +1 @@ +function handle(node: Node) { switch (node) { case AST.Call: node.line(); node.col(); node.ty(); node.span(); node.parent(); node.recv(); break; case AST.Func: node.line(); node.col(); node.ty(); node.span(); node.parent(); node.name(); break; case AST.Lit: node.line(); node.col(); node.ty(); node.span(); node.parent(); node.value(); break; } } diff --git a/gems/decomplex/examples/typescript/flay-similarity.ts b/gems/decomplex/examples/typescript/flay-similarity.ts new file mode 100644 index 000000000..b838287c8 --- /dev/null +++ b/gems/decomplex/examples/typescript/flay-similarity.ts @@ -0,0 +1,2 @@ +function first_clone(node: Node): number { let total = 0; const value1 = node.part1; if (value1.ready() && value1.enabled()) { total += value1.amount; } const value2 = node.part2; if (value2.ready() && value2.enabled()) { total += value2.amount; } const value3 = node.part3; if (value3.ready() && value3.enabled()) { total += value3.amount; } const value4 = node.part4; if (value4.ready() && value4.enabled()) { total += value4.amount; } const value5 = node.part5; if (value5.ready() && value5.enabled()) { total += value5.amount; } const value6 = node.part6; if (value6.ready() && value6.enabled()) { total += value6.amount; } const value7 = node.part7; if (value7.ready() && value7.enabled()) { total += value7.amount; } const value8 = node.part8; if (value8.ready() && value8.enabled()) { total += value8.amount; } return total; } +function second_clone(entry: Node): number { let total = 0; const item1 = entry.part1; if (item1.ready() && item1.enabled()) { total += item1.amount; } const item2 = entry.part2; if (item2.ready() && item2.enabled()) { total += item2.amount; } const item3 = entry.part3; if (item3.ready() && item3.enabled()) { total += item3.amount; } const item4 = entry.part4; if (item4.ready() && item4.enabled()) { total += item4.amount; } const item5 = entry.part5; if (item5.ready() && item5.enabled()) { total += item5.amount; } const item6 = entry.part6; if (item6.ready() && item6.enabled()) { total += item6.amount; } const item7 = entry.part7; if (item7.ready() && item7.enabled()) { total += item7.amount; } const item8 = entry.part8; if (item8.ready() && item8.enabled()) { total += item8.amount; } return total; } diff --git a/gems/decomplex/examples/typescript/function-lcom.ts b/gems/decomplex/examples/typescript/function-lcom.ts new file mode 100644 index 000000000..42c2f141e --- /dev/null +++ b/gems/decomplex/examples/typescript/function-lcom.ts @@ -0,0 +1,12 @@ +function mixed(price: number, tax: number, logger: Logger) { + const subtotal = price + tax; + const total = subtotal * 2; + const rounded = total.round(); + + const timestamp = now(); + const buffer = Buffer.init(); + buffer.push(timestamp); + logger.info(buffer); + + return Result.init(rounded, buffer); +} diff --git a/gems/decomplex/examples/typescript/implicit-control-flow.ts b/gems/decomplex/examples/typescript/implicit-control-flow.ts new file mode 100644 index 000000000..7878c7cf7 --- /dev/null +++ b/gems/decomplex/examples/typescript/implicit-control-flow.ts @@ -0,0 +1 @@ +class FlowExample { prepare() { this.status = 1; } validate() { this.valid = this.status == 1; } commit() { this.done = this.valid; } ok1() { this.prepare(); this.validate(); this.commit(); } ok2() { this.prepare(); this.validate(); this.commit(); } ok3() { this.prepare(); this.validate(); this.commit(); } ok4() { this.prepare(); this.validate(); this.commit(); } drift() { this.validate(); this.prepare(); this.commit(); } } diff --git a/gems/decomplex/examples/typescript/inconsistent-rename-clone.ts b/gems/decomplex/examples/typescript/inconsistent-rename-clone.ts new file mode 100644 index 000000000..67430611b --- /dev/null +++ b/gems/decomplex/examples/typescript/inconsistent-rename-clone.ts @@ -0,0 +1,2 @@ +function original() { const src = fetch(1); check(src); store(src); finalize(src); } +function pasted() { const dst = fetch(2); check(dst); store(src); finalize(dst); } diff --git a/gems/decomplex/examples/typescript/local-flow.ts b/gems/decomplex/examples/typescript/local-flow.ts new file mode 100644 index 000000000..65c38cad3 --- /dev/null +++ b/gems/decomplex/examples/typescript/local-flow.ts @@ -0,0 +1,9 @@ +function mixed(price: number, tax: number) { + const subtotal = price + tax; + const total = subtotal.round(); + + const timestamp = now(); + const buffer = Buffer.init(); + buffer.push(timestamp); + return Result.init(total, buffer); +} diff --git a/gems/decomplex/examples/typescript/locality-drag.ts b/gems/decomplex/examples/typescript/locality-drag.ts new file mode 100644 index 000000000..581fc1db1 --- /dev/null +++ b/gems/decomplex/examples/typescript/locality-drag.ts @@ -0,0 +1,27 @@ +function run(user: User, cart: Cart, logger: Logger) { + const receipt_id = user.id; + + const total = cart.total; + if (total > 100) { + if (cart.discountable()) { + const discount = 10; + } + } + if (cart.taxable()) { + if (cart.region) { + const tax = total * 2; + } + } + if (logger.enabled()) { + if (logger.debug()) { + logger.info(total); + } + } + if (cart.valid()) { + if (cart.ready()) { + const status = 1; + } + } + + emit(receipt_id); +} diff --git a/gems/decomplex/examples/typescript/miner.ts b/gems/decomplex/examples/typescript/miner.ts new file mode 100644 index 000000000..384e6c1ba --- /dev/null +++ b/gems/decomplex/examples/typescript/miner.ts @@ -0,0 +1,4 @@ +function one(a: boolean,b: boolean,c: boolean): boolean { return a && b && c; } +function two(a: boolean,b: boolean,c: boolean): boolean { return a && b && c; } +function three(a: boolean,b: boolean,c: boolean): boolean { return a && b && c; } +function broken(a: boolean,b: boolean): boolean { return a && b; } diff --git a/gems/decomplex/examples/typescript/operational-discontinuity.ts b/gems/decomplex/examples/typescript/operational-discontinuity.ts new file mode 100644 index 000000000..629c5e9a2 --- /dev/null +++ b/gems/decomplex/examples/typescript/operational-discontinuity.ts @@ -0,0 +1,9 @@ +function phase_shift() { + const a = 1; + const b = 2; + + // Phase 2 + const x = 3; + const y = 4; + print(x); print(y); +} diff --git a/gems/decomplex/examples/typescript/oversized-predicate.ts b/gems/decomplex/examples/typescript/oversized-predicate.ts new file mode 100644 index 000000000..0563b6ef1 --- /dev/null +++ b/gems/decomplex/examples/typescript/oversized-predicate.ts @@ -0,0 +1 @@ +function complex_check(a: boolean,b: boolean,c: boolean,d: boolean) { if (a && b && c && d) { print("too big"); } } diff --git a/gems/decomplex/examples/typescript/path-condition.ts b/gems/decomplex/examples/typescript/path-condition.ts new file mode 100644 index 000000000..72a739b13 --- /dev/null +++ b/gems/decomplex/examples/typescript/path-condition.ts @@ -0,0 +1,4 @@ +function one(x: X,y: Y,z: Z) { if (x.p() && y.q() && z.r()) { go(x); } } +function two(x: X,y: Y,z: Z) { if (x.p() && y.q() && z.r()) { go(x); } } +function three(x: X,y: Y,z: Z) { if (x.p() && y.q() && z.r()) { go(x); } } +function bug(x: X,y: Y,z: Z) { if (x.p() && y.q()) { go(x); } } diff --git a/gems/decomplex/examples/typescript/predicate-alias.ts b/gems/decomplex/examples/typescript/predicate-alias.ts new file mode 100644 index 000000000..6d32d956a --- /dev/null +++ b/gems/decomplex/examples/typescript/predicate-alias.ts @@ -0,0 +1,3 @@ +function first(): boolean { return true; } +function second(): boolean { return true; } +function other(): boolean { return false; } diff --git a/gems/decomplex/examples/typescript/redundant-nil-guard.ts b/gems/decomplex/examples/typescript/redundant-nil-guard.ts new file mode 100644 index 000000000..ab1fcae19 --- /dev/null +++ b/gems/decomplex/examples/typescript/redundant-nil-guard.ts @@ -0,0 +1 @@ +function check(value: Value) { if (value.isSome()) { value.isNull(); } } diff --git a/gems/decomplex/examples/typescript/semantic-alias.ts b/gems/decomplex/examples/typescript/semantic-alias.ts new file mode 100644 index 000000000..9161d9f3c --- /dev/null +++ b/gems/decomplex/examples/typescript/semantic-alias.ts @@ -0,0 +1,4 @@ +function frame(node: Node): boolean { return node.provenance == FRAME; } +function is_frame(node: Node): boolean { return provenance == FRAME; } +function heap(node: Node): boolean { return node.provenance == HEAP; } +function somewhere(node: Node): number { if (node.provenance == FRAME) { return 1; } return 0; } diff --git a/gems/decomplex/examples/typescript/sequence-mine.ts b/gems/decomplex/examples/typescript/sequence-mine.ts new file mode 100644 index 000000000..72ee4959a --- /dev/null +++ b/gems/decomplex/examples/typescript/sequence-mine.ts @@ -0,0 +1,5 @@ +function one() { alloc_mark(x); body1(); cleanup(x); } +function two() { alloc_mark(y); body2(); cleanup(y); } +function three() { alloc_mark(z); body3(); cleanup(z); } +function four() { alloc_mark(w); body4(); cleanup(w); } +function leak() { alloc_mark(q); use_value(q); } diff --git a/gems/decomplex/examples/typescript/state-branch-density.ts b/gems/decomplex/examples/typescript/state-branch-density.ts new file mode 100644 index 000000000..ebb16349d --- /dev/null +++ b/gems/decomplex/examples/typescript/state-branch-density.ts @@ -0,0 +1 @@ +class StateBranchChecker { check(user) { if (user.admin) { this.checked = true; } if (this.checked && user.name == "admin") { print("hello"); } } } diff --git a/gems/decomplex/examples/typescript/state-mesh.ts b/gems/decomplex/examples/typescript/state-mesh.ts new file mode 100644 index 000000000..3e6271f94 --- /dev/null +++ b/gems/decomplex/examples/typescript/state-mesh.ts @@ -0,0 +1 @@ +class StateMeshExample { initialize() { this.a = 1; this.b = 2; } writer() { this.a = 3; } reader() { return this.a + this.b; } a_alias() { return this.a; } } diff --git a/gems/decomplex/examples/typescript/structural-topology.ts b/gems/decomplex/examples/typescript/structural-topology.ts new file mode 100644 index 000000000..b7559a669 --- /dev/null +++ b/gems/decomplex/examples/typescript/structural-topology.ts @@ -0,0 +1 @@ +class Worker { run(items) { this.prepare(); if (this.ready()) { this.validate(); } for (const item of items) { this.helper(item); } } prepare() {} ready() { return true; } validate() {} helper(item) { return item; } } diff --git a/gems/decomplex/examples/typescript/temporal-ordering-pressure.ts b/gems/decomplex/examples/typescript/temporal-ordering-pressure.ts new file mode 100644 index 000000000..0901e3c2e --- /dev/null +++ b/gems/decomplex/examples/typescript/temporal-ordering-pressure.ts @@ -0,0 +1 @@ +class TemporalOrderExample { one() { this.a = 1; } two() { this.a = 2; this.b = 3; } three() { this.b = 4; } reader() { return this.a; } } diff --git a/gems/decomplex/examples/typescript/weighted-inlined-complexity.ts b/gems/decomplex/examples/typescript/weighted-inlined-complexity.ts new file mode 100644 index 000000000..8dde50019 --- /dev/null +++ b/gems/decomplex/examples/typescript/weighted-inlined-complexity.ts @@ -0,0 +1,5 @@ +function checkout(user: User, cart: Cart) { validate_user(user); apply_discount(cart); process_payment(user, cart); audit_cart(cart); } +function validate_user(user: User) { if (user.active() && !user.suspended()) { if (user.profile.complete()) { return true; } else { return false; } } else { return false; } } +function apply_discount(cart: Cart) { if (cart.total > 100 && eligible()) { if (holiday()) { return 20; } else if (loyalty_month()) { return 15; } else { return 10; } } return 0; } +function process_payment(user: User, cart: Cart) { if (gateway.ready()) { if (cart.total > 0 && user.active()) { if (fraud_check(user)) { charge(user, cart); } else { decline(user); } } } } +function audit_cart(cart: Cart) { for (const item of cart.items) { if (item.taxable()) { if (item.region && item.amount > 0) { record_tax(item); } } } } diff --git a/gems/decomplex/examples/zig/co-update.zig b/gems/decomplex/examples/zig/co-update.zig new file mode 100644 index 000000000..2170aaa99 --- /dev/null +++ b/gems/decomplex/examples/zig/co-update.zig @@ -0,0 +1,23 @@ +const Node = struct { + storage: i32, + provenance: i32, +}; + +pub fn stable_one(node: *Node) void { + node.storage = 1; + node.provenance = 1; +} + +pub fn stable_two(node: *Node) void { + node.storage = 1; + node.provenance = 1; +} + +pub fn stable_three(node: *Node) void { + node.storage = 1; + node.provenance = 1; +} + +pub fn misses_provenance(node: *Node) void { + node.storage = 1; +} diff --git a/gems/decomplex/examples/zig/decision-pressure.zig b/gems/decomplex/examples/zig/decision-pressure.zig new file mode 100644 index 000000000..c7bfb6725 --- /dev/null +++ b/gems/decomplex/examples/zig/decision-pressure.zig @@ -0,0 +1,3 @@ +pub fn scan(node: Node) bool { + return node.symbol.isNull(); +} diff --git a/gems/decomplex/examples/zig/derived-state.zig b/gems/decomplex/examples/zig/derived-state.zig new file mode 100644 index 000000000..47904c4ad --- /dev/null +++ b/gems/decomplex/examples/zig/derived-state.zig @@ -0,0 +1,6 @@ +pub fn check(input_value: i32) void { + var input = input_value; + const cached = input + 1; + input = 2; + print(cached); +} diff --git a/gems/decomplex/examples/zig/false-simplicity.zig b/gems/decomplex/examples/zig/false-simplicity.zig new file mode 100644 index 000000000..dc9f857b0 --- /dev/null +++ b/gems/decomplex/examples/zig/false-simplicity.zig @@ -0,0 +1,6 @@ +const FalseSimplicityExample = struct { + pub fn hack(self: *FalseSimplicityExample) void { + _ = self; + print("hidden IO"); + } +}; diff --git a/gems/decomplex/examples/zig/fat-union.zig b/gems/decomplex/examples/zig/fat-union.zig new file mode 100644 index 000000000..7ac30e8a2 --- /dev/null +++ b/gems/decomplex/examples/zig/fat-union.zig @@ -0,0 +1,28 @@ +pub fn handle(node: Ast) void { + switch (node) { + AST.Call => { + node.line(); + node.col(); + node.ty(); + node.span(); + node.parent(); + node.recv(); + }, + AST.Func => { + node.line(); + node.col(); + node.ty(); + node.span(); + node.parent(); + node.name(); + }, + AST.Lit => { + node.line(); + node.col(); + node.ty(); + node.span(); + node.parent(); + node.value(); + }, + } +} diff --git a/gems/decomplex/examples/zig/flay-similarity.zig b/gems/decomplex/examples/zig/flay-similarity.zig new file mode 100644 index 000000000..72a75ff2e --- /dev/null +++ b/gems/decomplex/examples/zig/flay-similarity.zig @@ -0,0 +1,41 @@ +pub fn first_clone(node: Node) i32 { + var total = 0; + const value1 = node.part1; + if (value1.ready() and value1.enabled()) { total += value1.amount; } + const value2 = node.part2; + if (value2.ready() and value2.enabled()) { total += value2.amount; } + const value3 = node.part3; + if (value3.ready() and value3.enabled()) { total += value3.amount; } + const value4 = node.part4; + if (value4.ready() and value4.enabled()) { total += value4.amount; } + const value5 = node.part5; + if (value5.ready() and value5.enabled()) { total += value5.amount; } + const value6 = node.part6; + if (value6.ready() and value6.enabled()) { total += value6.amount; } + const value7 = node.part7; + if (value7.ready() and value7.enabled()) { total += value7.amount; } + const value8 = node.part8; + if (value8.ready() and value8.enabled()) { total += value8.amount; } + return total; +} + +pub fn second_clone(entry: Node) i32 { + var total = 0; + const item1 = entry.part1; + if (item1.ready() and item1.enabled()) { total += item1.amount; } + const item2 = entry.part2; + if (item2.ready() and item2.enabled()) { total += item2.amount; } + const item3 = entry.part3; + if (item3.ready() and item3.enabled()) { total += item3.amount; } + const item4 = entry.part4; + if (item4.ready() and item4.enabled()) { total += item4.amount; } + const item5 = entry.part5; + if (item5.ready() and item5.enabled()) { total += item5.amount; } + const item6 = entry.part6; + if (item6.ready() and item6.enabled()) { total += item6.amount; } + const item7 = entry.part7; + if (item7.ready() and item7.enabled()) { total += item7.amount; } + const item8 = entry.part8; + if (item8.ready() and item8.enabled()) { total += item8.amount; } + return total; +} diff --git a/gems/decomplex/examples/zig/function-lcom.zig b/gems/decomplex/examples/zig/function-lcom.zig new file mode 100644 index 000000000..48bd1c086 --- /dev/null +++ b/gems/decomplex/examples/zig/function-lcom.zig @@ -0,0 +1,12 @@ +pub fn mixed(price: i32, tax: i32, logger: Logger) Result { + const subtotal = price + tax; + const total = subtotal * 2; + const rounded = total.round(); + + const timestamp = now(); + var buffer = Buffer.init(); + buffer.push(timestamp); + logger.info(buffer); + + return Result.init(rounded, buffer); +} diff --git a/gems/decomplex/examples/zig/implicit-control-flow.zig b/gems/decomplex/examples/zig/implicit-control-flow.zig new file mode 100644 index 000000000..3fc7dab36 --- /dev/null +++ b/gems/decomplex/examples/zig/implicit-control-flow.zig @@ -0,0 +1,15 @@ +const FlowExample = struct { + status: i32, + valid: bool, + done: bool, + + pub fn prepare(self: *FlowExample) void { self.status = 1; } + pub fn validate(self: *FlowExample) void { self.valid = self.status == 1; } + pub fn commit(self: *FlowExample) void { self.done = self.valid; } + + pub fn ok1(self: *FlowExample) void { self.prepare(); self.validate(); self.commit(); } + pub fn ok2(self: *FlowExample) void { self.prepare(); self.validate(); self.commit(); } + pub fn ok3(self: *FlowExample) void { self.prepare(); self.validate(); self.commit(); } + pub fn ok4(self: *FlowExample) void { self.prepare(); self.validate(); self.commit(); } + pub fn drift(self: *FlowExample) void { self.validate(); self.prepare(); self.commit(); } +}; diff --git a/gems/decomplex/examples/zig/inconsistent-rename-clone.zig b/gems/decomplex/examples/zig/inconsistent-rename-clone.zig new file mode 100644 index 000000000..e482bdd60 --- /dev/null +++ b/gems/decomplex/examples/zig/inconsistent-rename-clone.zig @@ -0,0 +1,13 @@ +pub fn original() void { + const src = fetch(1); + check(src); + store(src); + finalize(src); +} + +pub fn pasted() void { + const dst = fetch(2); + check(dst); + store(src); + finalize(dst); +} diff --git a/gems/decomplex/examples/zig/local-flow.zig b/gems/decomplex/examples/zig/local-flow.zig new file mode 100644 index 000000000..a0eb2b536 --- /dev/null +++ b/gems/decomplex/examples/zig/local-flow.zig @@ -0,0 +1,9 @@ +pub fn mixed(price: i32, tax: i32) Result { + const subtotal = price + tax; + const total = subtotal.round(); + + const timestamp = now(); + var buffer = Buffer.init(); + buffer.push(timestamp); + return Result.init(total, buffer); +} diff --git a/gems/decomplex/examples/zig/locality-drag.zig b/gems/decomplex/examples/zig/locality-drag.zig new file mode 100644 index 000000000..67276fa64 --- /dev/null +++ b/gems/decomplex/examples/zig/locality-drag.zig @@ -0,0 +1,30 @@ +pub fn run(user: User, cart: Cart, logger: Logger) void { + const receipt_id = user.id; + + const total = cart.total; + if (total > 100) { + if (cart.discountable()) { + const discount = 10; + _ = discount; + } + } + if (cart.taxable()) { + if (cart.region) { + const tax = total * 2; + _ = tax; + } + } + if (logger.enabled()) { + if (logger.debug()) { + logger.info(total); + } + } + if (cart.valid()) { + if (cart.ready()) { + const status = 1; + _ = status; + } + } + + emit(receipt_id); +} diff --git a/gems/decomplex/examples/zig/miner.zig b/gems/decomplex/examples/zig/miner.zig new file mode 100644 index 000000000..e89739c2f --- /dev/null +++ b/gems/decomplex/examples/zig/miner.zig @@ -0,0 +1,15 @@ +pub fn one(a: bool, b: bool, c: bool) bool { + return a and b and c; +} + +pub fn two(a: bool, b: bool, c: bool) bool { + return a and b and c; +} + +pub fn three(a: bool, b: bool, c: bool) bool { + return a and b and c; +} + +pub fn broken(a: bool, b: bool) bool { + return a and b; +} diff --git a/gems/decomplex/examples/zig/operational-discontinuity.zig b/gems/decomplex/examples/zig/operational-discontinuity.zig new file mode 100644 index 000000000..c63ac551d --- /dev/null +++ b/gems/decomplex/examples/zig/operational-discontinuity.zig @@ -0,0 +1,12 @@ +pub fn phase_shift() void { + const a = 1; + const b = 2; + _ = a; + _ = b; + + // Phase 2 + const x = 3; + const y = 4; + print(x); + print(y); +} diff --git a/gems/decomplex/examples/zig/oversized-predicate.zig b/gems/decomplex/examples/zig/oversized-predicate.zig new file mode 100644 index 000000000..d9afdb4f2 --- /dev/null +++ b/gems/decomplex/examples/zig/oversized-predicate.zig @@ -0,0 +1,5 @@ +pub fn complex_check(a: bool, b: bool, c: bool, d: bool) void { + if (a and b and c and d) { + print("too big"); + } +} diff --git a/gems/decomplex/examples/zig/path-condition.zig b/gems/decomplex/examples/zig/path-condition.zig new file mode 100644 index 000000000..df71c9ca2 --- /dev/null +++ b/gems/decomplex/examples/zig/path-condition.zig @@ -0,0 +1,15 @@ +pub fn one(x: X, y: Y, z: Z) void { + if (x.p() and y.q() and z.r()) { go(x); } +} + +pub fn two(x: X, y: Y, z: Z) void { + if (x.p() and y.q() and z.r()) { go(x); } +} + +pub fn three(x: X, y: Y, z: Z) void { + if (x.p() and y.q() and z.r()) { go(x); } +} + +pub fn bug(x: X, y: Y, z: Z) void { + if (x.p() and y.q()) { go(x); } +} diff --git a/gems/decomplex/examples/zig/predicate-alias.zig b/gems/decomplex/examples/zig/predicate-alias.zig new file mode 100644 index 000000000..db6b4bd96 --- /dev/null +++ b/gems/decomplex/examples/zig/predicate-alias.zig @@ -0,0 +1,11 @@ +pub fn first() bool { + return true; +} + +pub fn second() bool { + return true; +} + +pub fn other() bool { + return false; +} diff --git a/gems/decomplex/examples/zig/redundant-nil-guard.zig b/gems/decomplex/examples/zig/redundant-nil-guard.zig new file mode 100644 index 000000000..7b247b668 --- /dev/null +++ b/gems/decomplex/examples/zig/redundant-nil-guard.zig @@ -0,0 +1,5 @@ +pub fn check(value: OptionalItem) void { + if (value.isSome()) { + value.isNull(); + } +} diff --git a/gems/decomplex/examples/zig/semantic-alias.zig b/gems/decomplex/examples/zig/semantic-alias.zig new file mode 100644 index 000000000..f0256494c --- /dev/null +++ b/gems/decomplex/examples/zig/semantic-alias.zig @@ -0,0 +1,8 @@ +pub fn frame(node: Node) bool { return node.provenance == FRAME; } +pub fn is_frame(node: Node) bool { return provenance == FRAME; } +pub fn heap(node: Node) bool { return node.provenance == HEAP; } + +pub fn somewhere(node: Node) i32 { + if (node.provenance == FRAME) { return 1; } + return 0; +} diff --git a/gems/decomplex/examples/zig/sequence-mine.zig b/gems/decomplex/examples/zig/sequence-mine.zig new file mode 100644 index 000000000..3b0b73680 --- /dev/null +++ b/gems/decomplex/examples/zig/sequence-mine.zig @@ -0,0 +1,5 @@ +pub fn one() void { alloc_mark(x); body1(); cleanup(x); } +pub fn two() void { alloc_mark(y); body2(); cleanup(y); } +pub fn three() void { alloc_mark(z); body3(); cleanup(z); } +pub fn four() void { alloc_mark(w); body4(); cleanup(w); } +pub fn leak() void { alloc_mark(q); use_value(q); } diff --git a/gems/decomplex/examples/zig/state-branch-density.zig b/gems/decomplex/examples/zig/state-branch-density.zig new file mode 100644 index 000000000..282cae925 --- /dev/null +++ b/gems/decomplex/examples/zig/state-branch-density.zig @@ -0,0 +1,13 @@ +const StateBranchChecker = struct { + checked: bool, + + pub fn check(self: *StateBranchChecker, user: User) void { + if (user.admin) { + self.checked = true; + } + + if (self.checked and user.name == "admin") { + print("hello"); + } + } +}; diff --git a/gems/decomplex/examples/zig/state-mesh.zig b/gems/decomplex/examples/zig/state-mesh.zig new file mode 100644 index 000000000..b8118c136 --- /dev/null +++ b/gems/decomplex/examples/zig/state-mesh.zig @@ -0,0 +1,21 @@ +const StateMeshExample = struct { + a: i32, + b: i32, + + pub fn initialize(self: *StateMeshExample) void { + self.a = 1; + self.b = 2; + } + + pub fn writer(self: *StateMeshExample) void { + self.a = 3; + } + + pub fn reader(self: *StateMeshExample) i32 { + return self.a + self.b; + } + + pub fn a_alias(self: *StateMeshExample) i32 { + return self.a; + } +}; diff --git a/gems/decomplex/examples/zig/structural-topology.zig b/gems/decomplex/examples/zig/structural-topology.zig new file mode 100644 index 000000000..743477099 --- /dev/null +++ b/gems/decomplex/examples/zig/structural-topology.zig @@ -0,0 +1,16 @@ +const Worker = struct { + pub fn run(self: *Worker, items: Items) void { + self.prepare(); + if (true) { + self.validate(); + } + for (items) |item| { + self.helper(item); + } + } + + fn prepare(self: *Worker) void { _ = self; } + fn ready(self: *Worker) bool { _ = self; return true; } + pub fn validate(self: *Worker) void { _ = self; } + fn helper(self: *Worker, item: Item) void { _ = self; _ = item; } +}; diff --git a/gems/decomplex/examples/zig/temporal-ordering-pressure.zig b/gems/decomplex/examples/zig/temporal-ordering-pressure.zig new file mode 100644 index 000000000..858b7ffd2 --- /dev/null +++ b/gems/decomplex/examples/zig/temporal-ordering-pressure.zig @@ -0,0 +1,21 @@ +const TemporalOrderExample = struct { + a: i32, + b: i32, + + pub fn one(self: *TemporalOrderExample) void { + self.a = 1; + } + + pub fn two(self: *TemporalOrderExample) void { + self.a = 2; + self.b = 3; + } + + pub fn three(self: *TemporalOrderExample) void { + self.b = 4; + } + + pub fn reader(self: *TemporalOrderExample) i32 { + return self.a; + } +}; diff --git a/gems/decomplex/examples/zig/weighted-inlined-complexity.zig b/gems/decomplex/examples/zig/weighted-inlined-complexity.zig new file mode 100644 index 000000000..56087431e --- /dev/null +++ b/gems/decomplex/examples/zig/weighted-inlined-complexity.zig @@ -0,0 +1,45 @@ +const WeightedInlineExample = struct { + pub fn checkout(self: *WeightedInlineExample, user: User, cart: Cart) void { + self.validate_user(user); + self.apply_discount(cart); + self.process_payment(user, cart); + self.audit_cart(cart); + } + + fn validate_user(self: *WeightedInlineExample, user: User) bool { + _ = self; + if (user.active() and !user.suspended()) { + if (user.profile.complete()) { return true; } else { return false; } + } else { + return false; + } + } + + fn apply_discount(self: *WeightedInlineExample, cart: Cart) i32 { + _ = self; + if (cart.total > 100 and eligible()) { + if (holiday()) { return 20; } else if (loyalty_month()) { return 15; } else { return 10; } + } + return 0; + } + + fn process_payment(self: *WeightedInlineExample, user: User, cart: Cart) void { + _ = self; + if (gateway.ready()) { + if (cart.total > 0 and user.active()) { + if (fraud_check(user)) { charge(user, cart); } else { decline(user); } + } + } + } + + fn audit_cart(self: *WeightedInlineExample, cart: Cart) void { + _ = self; + for (cart.items) |item| { + if (item.taxable()) { + if (item.region and item.amount > 0) { + record_tax(item); + } + } + } + } +}; diff --git a/gems/decomplex/lib/decomplex/ast.rb b/gems/decomplex/lib/decomplex/ast.rb index 44f72eee1..24715b753 100644 --- a/gems/decomplex/lib/decomplex/ast.rb +++ b/gems/decomplex/lib/decomplex/ast.rb @@ -1,6 +1,11 @@ # frozen_string_literal: true require "set" +require_relative "ast/node" +require_relative "ast/cache" +require_relative "ast/source_map" +require_relative "ast/semantic_node" +require_relative "ast/semantic_normalizer" module Decomplex # Shared AST primitives for the v1 detectors. Kept separate from the @@ -8,12 +13,6 @@ module Decomplex # so adding it cannot destabilise them (design principle 3); they # will be migrated onto this once it has proven itself. module Ast - Node = Struct.new( - :type, :children, :first_lineno, :first_column, :last_lineno, :last_column, - :text, - keyword_init: true - ) - module_function def parse(file) @@ -25,3961 +24,16 @@ def parse(file) end end - def normalized_cache - @normalized_cache ||= {} - end - - def node?(n) - n.is_a?(Node) - end - - # Exact source text of a node, trivial formatting normalised. - def slice(node, _lines) - return "" unless node?(node) - - node.text.to_s.strip.gsub(/\s+/, " ") - end - - # Language-specific syntax-shape decisions live here, before nodes - # are converted into Decomplex's shared AST vocabulary. - class TreeSitterNormalizationAdapter - BINARY_WRAPPER_KINDS = %w[ - binary binary_expression binary_operator boolean_operator comparison_operator - ].freeze - CLASS_KINDS = %w[class class_definition class_declaration class_specifier].freeze - COMMON_ASSIGNMENT_OPERATORS = %w[= += -= *= /= %=].freeze - RUBY_ASSIGNMENT_OPERATORS = (COMMON_ASSIGNMENT_OPERATORS + %w[**= &&= ||= &= |= ^= <<= >>=]).freeze - PYTHON_ASSIGNMENT_OPERATORS = (COMMON_ASSIGNMENT_OPERATORS + %w[//= **= @= &= |= ^= <<= >>= :=]).freeze - LUA_ASSIGNMENT_OPERATORS = %w[=].freeze - TYPESCRIPT_ASSIGNMENT_OPERATORS = ( - COMMON_ASSIGNMENT_OPERATORS + %w[**= <<= >>= >>>= &= |= ^= &&= ||= ??=] - ).freeze - OPERATOR_CALL_OPERATORS = %w[+ - * / % ** | & ^ << >> =~ !~].freeze - BOOLEAN_EXPRESSION_KINDS = %w[binary binary_expression boolean_operator].freeze - COMPARISON_EXPRESSION_KINDS = %w[binary binary_expression comparison_operator].freeze - DOTTED_EXPRESSION_WRAPPER_KINDS = %w[body_statement block_body statement argument_list].freeze - PYTHON_DOTTED_EXPRESSION_WRAPPER_KINDS = (DOTTED_EXPRESSION_WRAPPER_KINDS + %w[expression_statement]).freeze - LITERAL_CONTAINER_KINDS = %w[string delimited_symbol regex regex_literal].freeze - LITERAL_FRAGMENT_KINDS = %w[string_content escape_sequence interpolation string_fragment].freeze - CASE_ARGUMENT_WHEN_KINDS = %w[ - when switch_case case_clause expression_case case_statement switch_section - switch_block_statement_group switch_entry when_entry match_arm - ].freeze - CASE_ELSE_KINDS = %w[else switch_default].freeze - CASE_DEFAULT_PATTERN_KINDS = %w[case_pattern match_pattern pattern].freeze - ADAPTER_FUNCTION_KINDS = %w[ - method function_definition function_declaration method_definition - method_declaration function_item singleton_method - ].freeze - STATEMENT_BLOCK_PARENT_KINDS = %w[ - method_declaration constructor_declaration function_declaration function_body - if_statement while_statement for_statement enhanced_for_statement try_statement - catch_clause finally_clause do_statement lambda_expression - ].freeze - IDENTIFIER_KINDS = %w[ - identifier simple_identifier property_identifier field_identifier shorthand_property_identifier - ].freeze - LEADING_FUNCTION_WRAPPER_KINDS = %w[body_statement statement].freeze - PYTHON_LEADING_FUNCTION_WRAPPER_KINDS = %w[block].freeze - LUA_LEADING_FUNCTION_WRAPPER_KINDS = %w[block].freeze - OWNER_STATEMENT_NESTED_KIND = %w[class class_definition class_declaration module].freeze - LEADING_OWNER_WRAPPER_KINDS = %w[body_statement statement].freeze - PYTHON_LEADING_OWNER_WRAPPER_KINDS = %w[block].freeze - IF_NODE_KINDS = %w[if if_statement if_modifier unless unless_modifier if_expression conditional].freeze - LEADING_IF_WRAPPER_KINDS = %w[body_statement block block_body statement].freeze - PYTHON_LEADING_IF_WRAPPER_KINDS = %w[block].freeze - LUA_LEADING_IF_WRAPPER_KINDS = %w[block].freeze - LEADING_CASE_WRAPPER_KINDS = %w[body_statement block block_body statement].freeze - LEADING_LOOP_WRAPPER_KINDS = %w[body_statement block block_body statement].freeze - RESCUE_BODY_WRAPPER_KINDS = %w[body_statement block_body statement].freeze - ENSURE_BODY_WRAPPER_KINDS = %w[body_statement block_body statement].freeze - ARRAY_LITERAL_WRAPPER_KINDS = %w[ - body_statement block block_body statement argument_list expression_statement - ].freeze - ARRAY_LITERAL_NODE_KINDS = %w[array list].freeze - ELEMENT_REFERENCE_WRAPPER_KINDS = %w[ - body_statement block block_body statement expression_statement expression_list - ].freeze - ELEMENT_REFERENCE_NODE_KINDS = %w[ - element_reference subscript subscript_expression bracket_index_expression - ].freeze - HASH_LITERAL_WRAPPER_KINDS = %w[ - body_statement block block_body statement argument_list expression_statement parenthesized_expression - ].freeze - HASH_LITERAL_NODE_KINDS = %w[hash dictionary object table_constructor].freeze - EMPTY_BODY_WRAPPER_KINDS = %w[body_statement block block_body statement].freeze - HEREDOC_BODY_WRAPPER_KINDS = %w[body_statement block_body statement then].freeze - INTERPOLATED_STATEMENT_WRAPPER_KINDS = %w[body_statement block_body statement argument_list].freeze - CONCATENATED_STRING_WRAPPER_KINDS = %w[body_statement block_body statement argument_list].freeze - PYTHON_CONCATENATED_STRING_WRAPPER_KINDS = (CONCATENATED_STRING_WRAPPER_KINDS + %w[block expression_statement]).freeze - CONCATENATED_STRING_NODE_KINDS = %w[chained_string concatenated_string].freeze - UNWRAP_KINDS = %w[ - parenthesized_expression parenthesized_statements expression_statement statement - case_pattern match_pattern pattern - ].freeze - PYTHON_BODY_FIELD_KINDS = %w[ - elif_clause else_clause for_statement function_definition if_statement - try_statement while_statement with_statement - ].freeze - QUESTION_COLON_TERNARY_KINDS = %w[body_statement block_body statement argument_list conditional].freeze - TYPESCRIPT_TERNARY_KINDS = (QUESTION_COLON_TERNARY_KINDS + %w[ternary_expression]).freeze - - class << self - def for(document) - case document&.language&.to_sym - when :ruby then RubyTreeSitterNormalizationAdapter.new(document) - when :python then PythonTreeSitterNormalizationAdapter.new(document) - when :lua then LuaTreeSitterNormalizationAdapter.new(document) - when :typescript, :javascript then TypeScriptTreeSitterNormalizationAdapter.new(document) - else new(document) - end - end - end - - attr_reader :document - - def initialize(document) - @document = document - end - - def ruby? - false - end - - def yield_statement?(node) - %w[body_statement block block_body statement].include?(node.kind) && - node.children.first&.text == "yield" - rescue StandardError - false - end - - def super_statement?(_node) - false - end - - def explicit_alternative(node) - node.named_children.find { |child| %w[else else_clause else_statement].include?(child.kind) } - rescue StandardError - nil - end - - def unary_not_expression?(node) - %w[unary unary_expression].include?(node.kind) && node.text.to_s.lstrip.start_with?("!") - end - - def unary_minus_expression?(node) - %w[unary unary_expression].include?(node.kind) && node.text.to_s.lstrip.start_with?("-") - end - - def binary_operator(node) - direct_binary_operator(node).to_s - end - - def class_node?(node) - CLASS_KINDS.include?(node.kind) - end - - def unwrap_node?(node) - UNWRAP_KINDS.include?(node.kind) && node.named_children.size == 1 - end - - def interpolated_string?(node) - node.kind == "string" && node.named_children.any? { |child| child.kind == "interpolation" } - end - - def lambda_expression?(node) - !lambda_target(node).nil? - rescue StandardError - false - end - - def lambda_target(node) - return node if node.kind == "lambda" - - nil - rescue StandardError - nil - end - - def interpolation_node?(node) - node.kind == "interpolation" - rescue StandardError - false - end - - def instance_variable?(node) - node.kind == "instance_variable" - rescue StandardError - false - end - - def global_variable?(node) - node.kind == "global_variable" - rescue StandardError - false - end - - def member_assignment_target?(_node) - false - end - - def identifier_text_node?(_node) - false - end - - def literal_fragment_assignment_context?(node) - parent = node.parent - return false unless parent.respond_to?(:kind) - return true if literal_container_kind?(parent) - - literal_fragment_kind?(node) && - parent.parent.respond_to?(:kind) && - literal_container_kind?(parent.parent) - rescue StandardError - false - end - - def assignment_operator?(text) - assignment_operators.include?(text.to_s) - end - - def named_field(node, name) - node.child_by_field_name(name) - rescue StandardError - nil - end - - def safe_navigation_call?(node) - node.children.any? { |child| !child.named? && child.text == "&." } - rescue StandardError - false - end - - def ternary_statement?(node) - !ternary_parts(node).nil? - end - - def ternary_parts(node) - question_colon_ternary_parts(node, QUESTION_COLON_TERNARY_KINDS) - end - - def case_argument_list?(_node) - false - end - - def case_arm?(node) - case_arm_kind?(node) && !case_else_arm?(node) - rescue StandardError - false - end - - def case_else_node(node) - stack = node.named_children.dup - until stack.empty? - child = stack.shift - next unless child.respond_to?(:kind) - - return child if case_else_node?(child) - next if case_arm_kind?(child) - - stack.concat(child.named_children) unless adapter_function_kind?(child) - end - - nil - rescue StandardError - nil - end - - def case_else_arm?(_node) - false - end - - def case_else_node?(node) - CASE_ELSE_KINDS.include?(node&.kind) || case_else_arm?(node) - rescue StandardError - false - end - - def leading_function_statement?(node) - leading_function_statement_with_keyword?(node, "def", LEADING_FUNCTION_WRAPPER_KINDS) - end - - def leading_function_name(node) - node.named_children.find { |child| identifier_kind?(child) }&.text - rescue StandardError - nil - end - - def leading_function_body(node) - node.named_children.reverse.find { |child| child.kind == "body_statement" } - rescue StandardError - nil - end - - def leading_owner_statement?(node) - target = leading_owner_target(node) - return false unless target - - %w[class module].include?(target.children.first&.kind.to_s) && - target.named_children.size >= 2 && - !OWNER_STATEMENT_NESTED_KIND.include?(target.named_children.first.kind) - rescue StandardError - false - end - - def leading_owner_target(node) - node if LEADING_OWNER_WRAPPER_KINDS.include?(node.kind) - rescue StandardError - nil - end - - def leading_if_statement?(node) - target = leading_if_target(node) - return false unless target - - !!( - %w[if unless].include?(target.children.first&.kind.to_s) && - target.named_children.size >= 2 && - !IF_NODE_KINDS.include?(target.named_children.first.kind) - ) - rescue StandardError - false - end - - def leading_if_target(node) - node if LEADING_IF_WRAPPER_KINDS.include?(node.kind) - rescue StandardError - nil - end - - def leading_case_statement?(node) - target = leading_case_target(node) - return false unless target - - %w[case match switch].include?(target.children.first&.kind.to_s) && case_arm_descendant?(target) - rescue StandardError - false - end - - def leading_case_target(node) - node if LEADING_CASE_WRAPPER_KINDS.include?(node.kind) - rescue StandardError - nil - end - - def leading_loop_statement?(node) - target = leading_loop_target(node) - return false unless target - - !target.children.first&.named? && - %w[while until].include?(target.children.first&.kind.to_s) && - target.named_children.size >= 2 - rescue StandardError - false - end - - def leading_loop_target(node) - node if LEADING_LOOP_WRAPPER_KINDS.include?(node.kind) - rescue StandardError - nil - end - - def rescue_body_statement?(node) - rescue_clauses(node).any? - rescue StandardError - false - end - - def rescue_body_target(node) - node if RESCUE_BODY_WRAPPER_KINDS.include?(node.kind) - rescue StandardError - nil - end - - def rescue_body_nodes(node) - target = rescue_body_target(node) || node - named = target.named_children - rescue_index = named.index { |child| rescue_clause?(child) } - return [] unless rescue_index - - named[0...rescue_index] - rescue StandardError - [] - end - - def rescue_clauses(node) - target = rescue_body_target(node) - return [] unless target - - target.named_children.select { |child| rescue_clause?(child) } - rescue StandardError - [] - end - - def rescue_clause_exceptions(node) - exceptions = node.named_children.find { |child| child.kind == "exceptions" } - return [] unless exceptions - return [exceptions] if exceptions.text.to_s.match?(/\A[A-Z]\w*(?:::\w+)*\z/) - return [exceptions] if exceptions.named_children.empty? && !exceptions.text.to_s.strip.empty? - - exceptions.named_children - rescue StandardError - [] - end - - def rescue_clause_exceptions_source(node) - node.named_children.find { |child| child.kind == "exceptions" } - rescue StandardError - nil - end - - def rescue_clause_exception_variable_name(node) - var = node.named_children.find { |child| child.kind == "exception_variable" } - var&.named_children&.find { |child| identifier_kind?(child) } - rescue StandardError - nil - end - - def rescue_clause_exception_variable_source(node) - node.named_children.find { |child| child.kind == "exception_variable" } - rescue StandardError - nil - end - - def rescue_clause_handler(node) - node.named_children.reverse.find do |child| - !%w[exceptions exception_variable comment].include?(child.kind) - end - rescue StandardError - nil - end - - def ensure_body_statement?(node) - !ensure_clause(node).nil? - rescue StandardError - false - end - - def ensure_body_target(node) - node if ENSURE_BODY_WRAPPER_KINDS.include?(node.kind) - rescue StandardError - nil - end - - def ensure_body_nodes(node) - target = ensure_body_target(node) || node - named = target.named_children - ensure_index = named.index { |child| ensure_clause?(child) } - return [] unless ensure_index - - named[0...ensure_index] - rescue StandardError - [] - end - - def ensure_clause(node) - target = ensure_body_target(node) - return nil unless target - - target.named_children.find { |child| ensure_clause?(child) } - rescue StandardError - nil - end - - def ensure_clause_body(_node) - nil - end - - def array_literal_statement?(node) - !array_literal_target(node).nil? - rescue StandardError - false - end - - def array_literal_target(node) - return node if ARRAY_LITERAL_NODE_KINDS.include?(node.kind) - return nil unless ARRAY_LITERAL_WRAPPER_KINDS.include?(node.kind) - return node if bracketed?(node, "[", "]") - - child = exact_single_named_child(node, kinds: ARRAY_LITERAL_NODE_KINDS) - return child if child - - named = node.named_children - return nil unless named.size == 1 && ARRAY_LITERAL_NODE_KINDS.include?(named.first.kind) - - child = named.first - stripped = node.text.to_s.strip - child if stripped == child.text.to_s || stripped == "#{child.text};" - rescue StandardError - nil - end - - def array_literal_values(node) - target = array_literal_target(node) || node - target.named_children - rescue StandardError - [] - end - - def element_reference_statement?(node) - !element_reference_target(node).nil? - rescue StandardError - false - end - - def element_reference_target(node) - return node if ELEMENT_REFERENCE_NODE_KINDS.include?(node.kind) - return nil unless ELEMENT_REFERENCE_WRAPPER_KINDS.include?(node.kind) - - named = node.named_children - if named.size == 1 && ELEMENT_REFERENCE_NODE_KINDS.include?(named.first.kind) - stripped = node.text.to_s.strip - child = named.first - return child if stripped == child.text.to_s || stripped == "#{child.text};" - end - - node if element_reference_shape?(node) - rescue StandardError - nil - end - - def element_reference_receiver(node) - target = element_reference_target(node) || node - target.named_children.first - rescue StandardError - nil - end - - def element_reference_arguments(node) - target = element_reference_target(node) || node - target.named_children.drop(1) - rescue StandardError - [] - end - - def hash_literal_statement?(node) - !hash_literal_target(node).nil? - rescue StandardError - false - end - - def hash_literal_target(node) - return node if HASH_LITERAL_NODE_KINDS.include?(node.kind) - return nil unless HASH_LITERAL_WRAPPER_KINDS.include?(node.kind) - return nil if statement_block_wrapper?(node) - return node if bracketed?(node, "{", "}") - - named = node.named_children - return nil unless named.size == 1 - - child = named.first - return hash_literal_target(child) if node.kind == "parenthesized_expression" - - stripped = node.text.to_s.strip - if stripped == child.text.to_s || stripped == "#{child.text};" - return child if HASH_LITERAL_NODE_KINDS.include?(child.kind) - return hash_literal_target(child) if HASH_LITERAL_WRAPPER_KINDS.include?(child.kind) - end - - nil - rescue StandardError - nil - end - - def hash_literal_values(node) - target = hash_literal_target(node) || node - target.named_children - rescue StandardError - [] - end - - def empty_body_statement?(node) - EMPTY_BODY_WRAPPER_KINDS.include?(node.kind) && - node.named_children.empty? && - node.text.to_s.strip.empty? - rescue StandardError - false - end - - def heredoc_body_statement?(node) - ruby? && - HEREDOC_BODY_WRAPPER_KINDS.include?(node.kind) && - node.named_children.any? { |child| child.kind == "heredoc_body" } - rescue StandardError - false - end - - def heredoc_call_for_body?(_node) - false - end - - def interpolated_statement?(node) - INTERPOLATED_STATEMENT_WRAPPER_KINDS.include?(node.kind) && - node.named_children.any? { |child| child.kind == "interpolation" } - rescue StandardError - false - end - - def concatenated_string_statement?(node) - !concatenated_string_target(node).nil? - rescue StandardError - false - end - - def concatenated_string_target(node) - return node if concatenated_string_node?(node) - return nil unless concatenated_string_wrapper_kinds.include?(node.kind) - - named = node.named_children - return node if named.size > 1 && named.all? { |child| child.kind == "string" } - return named.first if named.size == 1 && concatenated_string_node?(named.first) - - nil - rescue StandardError - nil - end - - def zero_child_identifier_call?(_node) - false - end - - def operator_call_expression?(node) - operator_call_expression_kinds.include?(node.kind) && - OPERATOR_CALL_OPERATORS.include?(binary_operator(node)) - rescue StandardError - false - end - - def boolean_expression_kind?(node) - boolean_expression_kinds.include?(node.kind) - rescue StandardError - false - end - - def comparison_expression_kind?(node) - comparison_expression_kinds.include?(node.kind) - rescue StandardError - false - end - - def dotted_expression_wrapper?(node) - dotted_expression_wrapper_kinds.include?(node.kind) - rescue StandardError - false - end - - private - - def assignment_operators - COMMON_ASSIGNMENT_OPERATORS - end - - def operator_call_expression_kinds - %w[binary binary_expression] - end - - def boolean_expression_kinds - BOOLEAN_EXPRESSION_KINDS - end - - def comparison_expression_kinds - COMPARISON_EXPRESSION_KINDS - end - - def dotted_expression_wrapper_kinds - DOTTED_EXPRESSION_WRAPPER_KINDS - end - - def concatenated_string_wrapper_kinds - CONCATENATED_STRING_WRAPPER_KINDS - end - - def concatenated_string_node?(node) - CONCATENATED_STRING_NODE_KINDS.include?(node&.kind) && - node.named_children.size > 1 && - node.named_children.all? { |child| child.kind == "string" } - end - - def direct_binary_operator(node) - node.children.find { |child| !child.named? && !%w[( )].include?(child.text.to_s) }&.text - rescue StandardError - nil - end - - def question_colon_ternary_parts(node, kinds) - return nil unless kinds.include?(node.kind) - return nil unless node.children.any? { |child| !child.named? && child.text == "?" } - return nil unless node.children.any? { |child| !child.named? && child.text == ":" } - - children = node.named_children - return nil unless children.size >= 3 - - children.first(3) - rescue StandardError - nil - end - - def leading_function_statement_with_keyword?(node, keyword, wrapper_kinds) - wrapper_kinds.include?(node.kind) && - node.children.first&.kind.to_s == keyword && - node.named_children.any? { |child| identifier_kind?(child) } - rescue StandardError - false - end - - def identifier_kind?(node) - IDENTIFIER_KINDS.include?(node&.kind) - end - - def exact_single_named_child(node, kinds:) - children = node.named_children - return nil unless children.size == 1 - - child = children.first - return nil unless kinds.include?(child.kind) - return nil unless node.text.to_s == child.text.to_s - - child - rescue StandardError - nil - end - - def case_arm_kind?(node) - CASE_ARGUMENT_WHEN_KINDS.include?(node&.kind) - end - - def default_case_pattern?(node) - pattern = node.named_children.find { |child| CASE_DEFAULT_PATTERN_KINDS.include?(child.kind) } - pattern&.text.to_s.strip == "_" - rescue StandardError - false - end - - def adapter_function_kind?(node) - ADAPTER_FUNCTION_KINDS.include?(node&.kind) - end - - def statement_block_wrapper?(node) - node.kind == "block" && STATEMENT_BLOCK_PARENT_KINDS.include?(node.parent&.kind) - rescue StandardError - false - end - - def case_arm_descendant?(node) - stack = node.named_children.dup - until stack.empty? - child = stack.shift - next unless child.respond_to?(:kind) - return true if CASE_ARGUMENT_WHEN_KINDS.include?(child.kind) - - stack.concat(child.named_children) - end - - false - rescue StandardError - false - end - - def ruby_instance_variable_text?(text) - text.to_s.match?(/\A@[A-Za-z_]\w*[!?=]?\z/) - end - - def ruby_global_variable_text?(text) - text.to_s.match?(/\A\$[A-Za-z_]\w*[!?=]?\z/) - end - - def literal_container_kind?(node) - LITERAL_CONTAINER_KINDS.include?(node&.kind) - end - - def literal_fragment_kind?(node) - LITERAL_FRAGMENT_KINDS.include?(node&.kind) - end - - def rescue_clause?(node) - node&.kind == "rescue" - end - - def ensure_clause?(node) - node&.kind == "ensure" - end - - def bracketed?(node, opening, closing) - node.children.first&.text == opening && node.children.last&.text == closing - rescue StandardError - false - end - - def element_reference_shape?(node) - node.children.first&.text != "[" && - node.children.any? { |child| !child.named? && child.text == "[" } && - node.children.any? { |child| !child.named? && child.text == "]" } && - node.named_children.size >= 2 && - node.named_children.none? { |child| %w[block do_block].include?(child.kind) } - rescue StandardError - false - end - - def descendant(node, kinds:) - stack = node&.named_children.to_a - until stack.empty? - child = stack.shift - next unless child.respond_to?(:kind) - return child if kinds.include?(child.kind) - - stack.concat(child.named_children) - end - - nil + def parse_semantic(file, language: nil) + require_relative "syntax" + document = Syntax.parse(file, language: language, parser: "tree_sitter") + key = [:semantic_tree_sitter, document.object_id] + normalized_cache.fetch(key) do + normalized_cache[key] = [SemanticNormalizer.new(document).normalize, document.lines] end end - class RubyTreeSitterNormalizationAdapter < TreeSitterNormalizationAdapter - def ruby? - true - end - - def super_statement?(node) - %w[body_statement block block_body statement].include?(node.kind) && - (node.text.to_s.strip == "super" || - (node.named_children.first&.kind == "super" && - node.named_children.drop(1).all? { |child| child.kind == "argument_list" })) - rescue StandardError - false - end - - def explicit_alternative(node) - node.named_children.find { |child| %w[elsif else].include?(child.kind) } - rescue StandardError - nil - end - - def instance_variable?(node) - node.kind == "instance_variable" || ruby_instance_variable_text?(node.text) - rescue StandardError - false - end - - def global_variable?(node) - node.kind == "global_variable" || ruby_global_variable_text?(node.text) - rescue StandardError - false - end - - def case_argument_list?(node) - node.kind == "argument_list" && - node.children.any? { |child| !child.named? && child.kind == "case" } && - node.named_children.any? { |child| CASE_ARGUMENT_WHEN_KINDS.include?(child.kind) } - rescue StandardError - false - end - - def zero_child_identifier_call?(node) - node.kind == "call" && node.named_children.empty? && - node.text.to_s.match?(/\A[A-Za-z_]\w*[!?=]?\z/) - rescue StandardError - false - end - - def heredoc_call_for_body?(node) - return true if node.kind == "heredoc_beginning" - return true if %w[call argument_list].include?(node.kind) && - node.text.to_s.match?(/(?:\A|[\s(,])<<[-~]?[A-Za-z_]\w*/) - - node.named_children.any? do |child| - next false if child.named_children.any? { |grandchild| grandchild.kind == "heredoc_body" } - - heredoc_call_for_body?(child) - end - rescue StandardError - false - end - - private - - def assignment_operators - RUBY_ASSIGNMENT_OPERATORS - end - end - - class PythonTreeSitterNormalizationAdapter < TreeSitterNormalizationAdapter - def yield_statement?(node) - (%w[body_statement block block_body expression_statement statement].include?(node.kind) && - node.children.first&.text == "yield") - rescue StandardError - false - end - - def explicit_alternative(node) - node.named_children.find { |child| %w[elif_clause else else_clause].include?(child.kind) } - rescue StandardError - nil - end - - def case_else_arm?(node) - node.kind == "case_clause" && default_case_pattern?(node) - rescue StandardError - false - end - - def named_field(node, name) - super || python_body_field(node, name) - end - - def leading_function_statement?(node) - leading_function_statement_with_keyword?(node, "def", PYTHON_LEADING_FUNCTION_WRAPPER_KINDS) - end - - def leading_function_body(node) - node.named_children.reverse.find { |child| child.kind == "block" } - rescue StandardError - nil - end - - def leading_owner_target(node) - return node if PYTHON_LEADING_OWNER_WRAPPER_KINDS.include?(node.kind) - - super - rescue StandardError - nil - end - - def leading_if_target(node) - if PYTHON_LEADING_IF_WRAPPER_KINDS.include?(node.kind) - child = exact_single_named_child(node, kinds: %w[if_statement]) - return child if child - end - - super - end - - def rescue_body_target(node) - return node if node.kind == "try_statement" - return node if flattened_try_block?(node, clauses: %w[except_clause]) - - if node.kind == "block" - child = exact_single_named_child(node, kinds: %w[try_statement]) - return child if child - end - - super - rescue StandardError - nil - end - - def rescue_body_nodes(node) - target = rescue_body_target(node) || node - return super unless target.kind == "try_statement" || flattened_try_block?(target, clauses: %w[except_clause]) - - target.named_children.take_while { |child| !%w[except_clause finally_clause].include?(child.kind) } - rescue StandardError - [] - end - - def rescue_clauses(node) - target = rescue_body_target(node) - return [] unless target - - target.named_children.select { |child| child.kind == "except_clause" } - rescue StandardError - [] - end - - def rescue_clause_exceptions(node) - pattern = node.named_children.find { |child| !%w[block comment].include?(child.kind) } - return [] unless pattern - return [pattern] unless pattern.kind == "as_pattern" - - exception = pattern.named_children.find { |child| child.kind != "as_pattern_target" } - exception ? [exception] : [] - rescue StandardError - [] - end - - def rescue_clause_exceptions_source(node) - rescue_clause_exceptions(node).first - rescue StandardError - nil - end - - def rescue_clause_exception_variable_name(node) - pattern = node.named_children.find { |child| child.kind == "as_pattern" } - descendant(pattern, kinds: %w[as_pattern_target]) - rescue StandardError - nil - end - - def rescue_clause_exception_variable_source(node) - rescue_clause_exception_variable_name(node) - rescue StandardError - nil - end - - def rescue_clause_handler(node) - node.named_children.reverse.find { |child| child.kind == "block" } - rescue StandardError - nil - end - - def ensure_body_target(node) - return node if node.kind == "try_statement" - return node if flattened_try_block?(node, clauses: %w[finally_clause]) - - if node.kind == "block" - child = exact_single_named_child(node, kinds: %w[try_statement]) - return child if child - end - - super - rescue StandardError - nil - end - - def ensure_body_nodes(node) - target = ensure_body_target(node) || node - return super unless target.kind == "try_statement" || flattened_try_block?(target, clauses: %w[finally_clause]) - - target.named_children.take_while { |child| child.kind != "finally_clause" } - rescue StandardError - [] - end - - def ensure_clause(node) - target = ensure_body_target(node) - return nil unless target - - target.named_children.find { |child| child.kind == "finally_clause" } - rescue StandardError - nil - end - - def ensure_clause_body(node) - node.named_children.reverse.find { |child| child.kind == "block" } - rescue StandardError - nil - end - - def ternary_parts(node) - return nil unless node.kind == "conditional_expression" - - children = node.named_children - return nil unless children.size >= 3 - - [children[1], children[0], children[2]] - rescue StandardError - nil - end - - def unary_minus_expression?(node) - (%w[unary unary_expression unary_operator].include?(node.kind) && node.text.to_s.lstrip.start_with?("-")) - end - - def empty_body_statement?(node) - super || - (node.kind == "block" && node.named_children.empty? && node.text.to_s.strip == "pass") || - node.kind == "pass_statement" - rescue StandardError - false - end - - private - - def flattened_try_block?(node, clauses:) - node.kind == "block" && - node.children.first&.text == "try" && - node.named_children.any? { |child| clauses.include?(child.kind) } - rescue StandardError - false - end - - def python_body_field(node, name) - return nil unless %w[body consequence].include?(name.to_s) - return nil unless PYTHON_BODY_FIELD_KINDS.include?(node.kind) - - node.named_children.find { |child| child.kind == "block" } - rescue StandardError - nil - end - - def assignment_operators - PYTHON_ASSIGNMENT_OPERATORS - end - - def operator_call_expression_kinds - super + %w[binary_operator] - end - - def concatenated_string_wrapper_kinds - PYTHON_CONCATENATED_STRING_WRAPPER_KINDS - end - - def dotted_expression_wrapper_kinds - PYTHON_DOTTED_EXPRESSION_WRAPPER_KINDS - end - end - - class LuaTreeSitterNormalizationAdapter < TreeSitterNormalizationAdapter - def explicit_alternative(node) - node.named_children.find { |child| %w[elseif_statement else else_statement].include?(child.kind) } - rescue StandardError - nil - end - - def unary_minus_expression?(node) - super || - (node.kind == "expression_list" && node.children.first&.text == "-" && node.named_children.size == 1) - rescue StandardError - false - end - - def binary_operator(node) - direct = direct_binary_operator(node) - return direct.to_s if direct - - child = exact_single_named_child(node, kinds: BINARY_WRAPPER_KINDS) - child ? binary_operator(child) : "" - end - - def unwrap_node?(node) - super || - (node.kind == "expression_list" && - node.named_children.size == 1 && - node.children.first&.text == "(" && - node.children.last&.text == ")") - rescue StandardError - false - end - - def leading_function_statement?(node) - leading_function_statement_with_keyword?(node, "function", LUA_LEADING_FUNCTION_WRAPPER_KINDS) - end - - def leading_function_body(node) - node.named_children.reverse.find { |child| child.kind == "block" } - rescue StandardError - nil - end - - def leading_if_target(node) - if LUA_LEADING_IF_WRAPPER_KINDS.include?(node.kind) - child = exact_single_named_child(node, kinds: %w[if_statement]) - return child if child - end - - super - end - - def array_literal_target(node) - if node.kind == "block" - named = node.named_children - if named.size == 2 && named.first.kind == "identifier" && named.first.text.to_s.empty? - target = lua_positional_table_arguments(named[1]) - return target if target - end - end - - target = lua_positional_table_arguments(node) - return target if target - - super - rescue StandardError - nil - end - - def hash_literal_target(node) - target = lua_keyed_table_arguments(node) - return target if target - - super - rescue StandardError - nil - end - - def hash_literal_values(node) - target = hash_literal_target(node) || node - return target.named_children if target.kind == "arguments" - - super - rescue StandardError - [] - end - - def identifier_text_node?(node) - %w[variable_list expression_list].include?(node.kind) && - node.text.to_s.match?(/\A[A-Za-z_]\w*\z/) - rescue StandardError - false - end - - def member_assignment_target?(node) - return false unless node.kind == "variable_list" - - node.named_children.size == 2 && - node.children.any? { |child| !child.named? && child.text == "." } - rescue StandardError - false - end - - def literal_fragment_assignment_context?(node) - return true if super - - literal_fragment_kind?(node) && node.parent&.kind == "expression_list" - rescue StandardError - false - end - - def lambda_target(node) - return node if node.kind == "function_definition" - - if node.kind == "expression_list" - return node if node.children.first&.kind == "function" && - node.named_children.any? { |child| child.kind == "block" } - - named = node.named_children - return named.first if named.size == 1 && named.first.kind == "function_definition" - end - - super - rescue StandardError - nil - end - - private - - def lua_positional_table_arguments(node) - return nil unless node&.kind == "arguments" - return nil unless bracketed?(node, "{", "}") - - fields = node.named_children - return nil if fields.empty? - return nil unless fields.all? { |field| field.kind == "field" && field.named_children.size <= 1 } - - node - end - - def lua_keyed_table_arguments(node) - if node&.kind == "block" - named = node.named_children - if named.size == 2 && named.first.kind == "identifier" && named.first.text.to_s.empty? - return lua_keyed_table_arguments(named[1]) - end - end - - return nil unless node&.kind == "arguments" - return nil unless bracketed?(node, "{", "}") - - fields = node.named_children - return node if fields.empty? - return nil if fields.all? { |field| field.kind == "field" && field.named_children.size <= 1 } - - node - end - - private - - def assignment_operators - LUA_ASSIGNMENT_OPERATORS - end - - def operator_call_expression_kinds - super + %w[expression_list] - end - - def boolean_expression_kinds - super + %w[expression_list] - end - - def comparison_expression_kinds - super + %w[expression_list] - end - end - - class TypeScriptTreeSitterNormalizationAdapter < TreeSitterNormalizationAdapter - def explicit_alternative(node) - node.named_children.find { |child| %w[else else_clause].include?(child.kind) } - rescue StandardError - nil - end - - def safe_navigation_call?(node) - super || - node.children.any? { |child| child.kind == "optional_chain" && child.text.to_s == "?." } || - (node.kind == "call_expression" && node.named_children.any? { |child| safe_navigation_call?(child) }) - rescue StandardError - false - end - - def ternary_parts(node) - question_colon_ternary_parts(node, TYPESCRIPT_TERNARY_KINDS) - end - - def interpolated_string?(node) - super || - (node.kind == "template_string" && - node.named_children.any? { |child| child.kind == "template_substitution" }) - end - - def lambda_target(node) - return node if %w[arrow_function function_expression].include?(node.kind) - - super - rescue StandardError - nil - end - - def interpolation_node?(node) - super || node.kind == "template_substitution" - rescue StandardError - false - end - - def rescue_body_target(node) - return node if node.kind == "try_statement" - - if node.kind == "statement_block" - child = exact_single_named_child(node, kinds: %w[try_statement]) - return child if child - end - - super - rescue StandardError - nil - end - - def rescue_body_nodes(node) - target = rescue_body_target(node) || node - return super unless target.kind == "try_statement" - - target.named_children.take_while { |child| !%w[catch_clause finally_clause].include?(child.kind) } - rescue StandardError - [] - end - - def rescue_clauses(node) - target = rescue_body_target(node) - return [] unless target - - target.named_children.select { |child| child.kind == "catch_clause" } - rescue StandardError - [] - end - - def rescue_clause_exception_variable_name(node) - node.named_children.find { |child| IDENTIFIER_KINDS.include?(child.kind) } - rescue StandardError - nil - end - - def rescue_clause_exception_variable_source(node) - rescue_clause_exception_variable_name(node) - rescue StandardError - nil - end - - def rescue_clause_handler(node) - node.named_children.reverse.find { |child| child.kind == "statement_block" } - rescue StandardError - nil - end - - def ensure_body_target(node) - return node if node.kind == "try_statement" - - if node.kind == "statement_block" - child = exact_single_named_child(node, kinds: %w[try_statement]) - return child if child - end - - super - rescue StandardError - nil - end - - def ensure_body_nodes(node) - target = ensure_body_target(node) || node - return super unless target.kind == "try_statement" - - target.named_children.take_while { |child| child.kind != "finally_clause" } - rescue StandardError - [] - end - - def ensure_clause(node) - target = ensure_body_target(node) - return nil unless target - - target.named_children.find { |child| child.kind == "finally_clause" } - rescue StandardError - nil - end - - def ensure_clause_body(node) - node.named_children.reverse.find { |child| child.kind == "statement_block" } - rescue StandardError - nil - end - - def empty_body_statement?(node) - super || - (node.kind == "statement_block" && node.named_children.empty? && node.text.to_s.strip == "{}") - rescue StandardError - false - end - - private - - def assignment_operators - TYPESCRIPT_ASSIGNMENT_OPERATORS - end - end - - # Tree-sitter exposes each grammar's native node names. Decomplex's - # detectors share a small language-neutral AST vocabulary, so this - # normalizer converts common syntax categories into that vocabulary: - # DEFN, CLASS, IF, CASE/WHEN, AND/OR, CALL, LASGN, ATTRASGN, IVAR, - # LVAR, and friends. The goal is portable structural facts, not - # Ruby semantics. - class TreeSitterNormalizer - FUNCTION_KINDS = %w[ - method function_definition function_declaration method_definition - method_declaration function_item singleton_method - ].freeze - CLASS_KINDS = %w[class class_definition class_declaration class_specifier].freeze - MODULE_KINDS = %w[module].freeze - BLOCK_KINDS = %w[ - block body_statement statement_block statement_list class_body - switch_body match_block then block_body control_structure_body function_body - ].freeze - IF_KINDS = %w[if if_statement if_modifier unless unless_modifier if_expression conditional].freeze - LOOP_KINDS = { - "while" => :WHILE, - "while_statement" => :WHILE, - "while_modifier" => :WHILE, - "until_modifier" => :UNTIL, - "for" => :FOR, - "for_statement" => :FOR, - "for_in_clause" => :FOR - }.freeze - CASE_KINDS = %w[ - case switch_statement expression_switch_statement switch_expression match_statement match_expression - when_expression - ].freeze - WHEN_KINDS = %w[ - when switch_case case_clause expression_case case_statement switch_section - switch_block_statement_group switch_entry when_entry match_arm - ].freeze - ASSIGNMENT_KINDS = %w[ - assignment assignment_expression assignment_statement augmented_assignment - ].freeze - MEMBER_KINDS = %w[ - call attribute member_expression member_access_expression field field_access selector_expression field_expression - navigation_expression directly_assignable_expression expression_list - ].freeze - CALL_KINDS = %w[call call_expression method_call method_call_expression].freeze - IDENTIFIER_KINDS = %w[ - identifier simple_identifier property_identifier field_identifier shorthand_property_identifier - ].freeze - CONST_KINDS = %w[constant scope_resolution type_identifier scoped_type_identifier].freeze - STRING_KINDS = %w[ - string string_content string_literal interpreted_string_literal raw_string_literal - ].freeze - SYMBOL_KINDS = %w[symbol simple_symbol].freeze - NIL_KINDS = %w[nil none null].freeze - RETURN_KINDS = { - "return" => :RETURN, - "return_statement" => :RETURN, - "return_expression" => :RETURN, - "break" => :BREAK, - "break_statement" => :BREAK, - "break_expression" => :BREAK, - "next" => :NEXT, - "continue_statement" => :NEXT - }.freeze - COMPARISON_OPERATORS = %w[== != === !== < <= > >=].freeze - OPERATOR_CALL_OPERATORS = TreeSitterNormalizationAdapter::OPERATOR_CALL_OPERATORS - INFIX_STATEMENT_OPERATORS = (OPERATOR_CALL_OPERATORS + COMPARISON_OPERATORS).freeze - INLINE_DEF_WRAPPER_MIDS = %w[ - public protected private private_class_method module_function - ].freeze - - def initialize(document) - @document = document - @normalization_adapter = TreeSitterNormalizationAdapter.for(document) - @local_stack = [] - @normalizing = Set.new - end - - def normalize - children = - if ruby? - with_ruby_scope(@document.root, reset: true) { normalize_children(@document.root) } - else - normalize_children(@document.root) - end - wrap(:ROOT, children: children, source: @document.root) - end - - private - - def normalize_node(node) - return nil unless ts_node?(node) - key = node_key(node) - return nil if @normalizing.include?(key) - - @normalizing << key - begin - return nil if node.kind == "comment" - return normalize_assignment_lhs(node) if assignment_lhs?(node) - return normalize_infix_statement(node) if infix_statement?(node) - return normalize_dotted_expression(node) if dotted_expression?(node) - return normalize_unary_not_statement(node) if unary_not_statement?(node) - return normalize_wrapped_return_statement(node) if wrapped_return_statement?(node) - - if leading_function_statement?(node) - normalize_leading_function_statement(node) - elsif leading_if_statement?(node) - normalize_leading_if_statement(node) - elsif ensure_body_statement?(node) - normalize_ensure_body_statement(node) - elsif rescue_body_statement?(node) - normalize_rescue_body_statement(node) - elsif modifier_statement?(node) - normalize_modifier_statement(node) - elsif ternary_statement?(node) - normalize_ternary_statement(node) - elsif statement_call_with_block?(node) - normalize_statement_call_with_block(node) - elsif command_call_statement?(node) - normalize_command_call_statement(node) - elsif lambda_expression?(node) - normalize_lambda(node) - elsif FUNCTION_KINDS.include?(node.kind) - normalize_function(node) - elsif class_node?(node) - normalize_class(node) - elsif module_node?(node) - normalize_module(node) - elsif node.kind == "impl_item" - normalize_impl(node) - elsif node.kind == "elsif" - normalize_elsif(node) - elsif IF_KINDS.include?(node.kind) - normalize_if(node) - elsif LOOP_KINDS.key?(node.kind) - normalize_loop(node) - elsif CASE_KINDS.include?(node.kind) || hidden_match?(node) - normalize_case(node) - elsif hash_literal_statement?(node) - normalize_hash_literal_statement(node) - elsif array_literal_statement?(node) - normalize_array_literal_statement(node) - elsif element_reference_statement?(node) - normalize_element_reference_statement(node) - elsif node.kind == "element_reference" - normalize_element_reference(node) - elsif node.kind == "rescue_modifier" - normalize_rescue_modifier(node) - elsif node.kind == "ensure" - normalize_ensure_clause(node) - elsif node.kind == "begin" - normalize_begin(node) - elsif node.kind == "operator_assignment" - normalize_operator_assignment(node) - elsif ASSIGNMENT_KINDS.include?(node.kind) - normalize_assignment(node) - elsif node.kind == "subshell" - normalize_subshell(node) - elsif node.kind == "block_argument" - normalize_block_argument(node) - elsif node.kind == "pair" - normalize_pair(node) - elsif node.kind == "singleton_class" - normalize_singleton_class(node) - elsif node.kind == "yield" - normalize_yield(node) - elsif yield_statement?(node) - normalize_yield_statement(node) - elsif yield_argument_list?(node) - normalize_yield_argument_list(node) - elsif node.kind == "heredoc_beginning" - normalize_heredoc_beginning(node) - elsif node.kind == "chained_string" - normalize_chained_string(node) - elsif interpolation_node?(node) - normalize_interpolation(node) - elsif unary_minus_expression?(node) - normalize_unary_minus(node) - elsif unary_not_expression?(node) - normalize_unary_not(node) - elsif boolean_expression?(node) - normalize_boolean(node) - elsif operator_call_expression?(node) - normalize_operator_call(node) - elsif comparison_expression?(node) - normalize_comparison(node) - elsif CALL_KINDS.include?(node.kind) - normalize_call(node) - elsif member_read_node?(node) - normalize_member_read(node) - elsif BLOCK_KINDS.include?(node.kind) - wrap(:BLOCK, children: normalize_children(node), source: node) - elsif unwrap_node?(node) - normalize_node(node.named_children.first) - elsif RETURN_KINDS.key?(node.kind) - normalize_return(node) - elsif self_node?(node) - wrap(:SELF, children: [], source: node) - elsif instance_variable?(node) - wrap(:IVAR, children: [node.text.to_s], source: node) - elsif global_variable?(node) - normalize_global_variable(node) - elsif const_node?(node) - normalize_const(node) - elsif ruby? && IDENTIFIER_KINDS.include?(node.kind) && node.text.to_s == "yield" - wrap(:YIELD, children: [nil], source: node) - elsif ruby_vcall_identifier?(node) - return wrap(:YIELD, children: [nil], source: node) if node.text.to_s == "yield" - - wrap(:VCALL, children: [node.text.to_s.to_sym], source: node) - elsif vcall_identifier?(node) - wrap(:VCALL, children: [node.text.to_s.to_sym], source: node) - elsif local_identifier?(node) - wrap(:LVAR, children: [node.text.to_s], source: node) - elsif NIL_KINDS.include?(node.kind) - wrap(:NIL, children: [], source: node) - elsif interpolated_string?(node) - normalize_interpolated_string(node) - elsif STRING_KINDS.include?(node.kind) - wrap(:STR, children: [node.text.to_s], source: node) - elsif SYMBOL_KINDS.include?(node.kind) - wrap(:LIT, children: [node.text.to_s.sub(/\A:/, "").to_sym], source: node) - else - wrap(kind_type(node.kind), children: normalize_children(node), source: node) - end - ensure - @normalizing.delete(key) - end - end - - def normalize_function(node) - return normalize_singleton_function(node) if node.kind == "singleton_method" - - name = function_name(node) - args = normalize_parameters(named_field(node, "parameters")) - body = with_ruby_scope(node, reset: true) do - elide_implicit_nil_body( - prepend_inline_parameter_begin( - node, - elide_tail_returns(normalize_body(named_field(node, "body") || block_child(node))) - ) - ) - end - wrap(:DEFN, children: [name, scope(body, args: args, source: node)], source: node) - end - - def normalize_singleton_function(node) - receiver = singleton_receiver(node) - name = singleton_name(node) - args = normalize_parameters(named_field(node, "parameters")) - body = with_ruby_scope(node, reset: true) do - elide_implicit_nil_body( - prepend_inline_parameter_begin( - node, - elide_tail_returns(normalize_body(named_field(node, "body") || block_child(node))) - ) - ) - end - wrap(:DEFS, children: [normalize_node(receiver), name, scope(body, args: args, source: node)], source: node) - end - - def normalize_class(node) - name = const_for(named_field(node, "name") || first_named(node)) - body = normalize_body(named_field(node, "body") || block_child(node)) - wrap(:CLASS, children: [name, nil, scope(body, source: node)], source: node) - end - - def normalize_module(node) - name = const_for(named_field(node, "name") || first_named(node)) - body = normalize_body(named_field(node, "body") || block_child(node)) - wrap(:MODULE, children: [name, scope(body, source: node)], source: node) - end - - def normalize_impl(node) - type_node = named_field(node, "type") || - node.named_children.find do |child| - %w[type_identifier scoped_type_identifier identifier].include?(child.kind) - end - name = const_for(type_node || node) - body = normalize_body(named_field(node, "body") || block_child(node) || node) - wrap(:CLASS, children: [name, nil, scope(body, source: node)], source: node) - end - - def normalize_if(node) - if %w[if_modifier unless_modifier].include?(node.kind) - action, cond_raw = node.named_children - type = node.kind.start_with?("unless") ? :UNLESS : :IF - return wrap(type, children: [normalize_node(cond_raw), normalize_modifier_action(action), nil], source: node) - end - - cond_raw = named_field(node, "condition") || named_field(node, "predicate") || first_named(node) - cond = normalize_node(cond_raw) - positive_raw = named_field(node, "consequence") || named_field(node, "body") || - node.named_children.find { |child| child.kind == "then" } || - branch_child(node, cond_raw, 0) - negative_raw = named_field(node, "alternative") || - explicit_alternative(node) || - (branch_child(node, cond_raw, 1) unless ruby?) - positive = normalize_body(positive_raw) - negative = normalize_else_or_branch(negative_raw) - type = node.kind.start_with?("unless") ? :UNLESS : :IF - wrap(type, children: [cond, positive, negative], source: node) - end - - def normalize_elsif(node) - cond = node.named_children.find { |child| !%w[comment then elsif else].include?(child.kind) } - positive = node.named_children.find { |child| child.kind == "then" } - negative = node.named_children.find { |child| %w[elsif else].include?(child.kind) } - wrap(:IF, children: [normalize_node(cond), normalize_body(positive), normalize_else_or_branch(negative)], - source: node) - end - - def normalize_loop(node) - if %w[while_modifier until_modifier].include?(node.kind) - action, cond = node.named_children - return wrap(LOOP_KINDS.fetch(node.kind), children: [normalize_node(cond), normalize_modifier_action(action), true], - source: node) - end - - cond = normalize_node(named_field(node, "condition") || first_named(node)) - body = normalize_body(named_field(node, "body") || named_field(node, "consequence") || block_child(node)) - wrap(LOOP_KINDS.fetch(node.kind), children: [cond, body], source: node) - end - - def normalize_case(node) - value_raw = case_value(node) - value = normalize_node(value_raw) - whens = case_arms(node).map { |arm| normalize_when(arm) }.compact - fallback = case_else_body(node) - chain = link_when_chain(whens, fallback) - return wrap(:CASE2, children: [chain], source: node) unless value_raw - - wrap(:CASE, children: [value, chain], source: node) - end - - def normalize_when(node) - patterns = normalize_patterns(node) - body = normalize_body(when_body(node)) - wrap(:WHEN, children: [list(patterns, source: node), body, nil], source: node) - end - - def normalize_assignment(node) - left = assignment_left(node) - right = normalize_node(assignment_right(node)) - return normalize_multiple_assignment(left, right, node) if left&.kind == "left_assignment_list" - return assignment_target(left, right, source: node) if assignment_target(left, right, source: node) - - wrap(:LASGN, children: [target_name(left), right], source: node) - end - - def normalize_multiple_assignment(left, right, node) - targets = left.named_children.map do |child| - type = global_variable?(child) ? :GASGN : :LASGN - wrap(type, children: [target_name(child), nil], source: child) - end - wrap(:MASGN, children: [right, list(targets, source: left)], source: node) - end - - def normalize_boolean(node) - type = boolean_operator(node) == "or" ? :OR : :AND - operands = node.named_children.map { |child| normalize_node(child) }.compact - operands = operands.flat_map { |child| Ast.node?(child) && child.type == type ? child.children : [child] } - wrap(type, children: operands, source: node) - end - - def normalize_comparison(node) - operands = node.named_children - left = normalize_node(operands[0]) - right = normalize_node(operands[1]) - wrap(:OPCALL, children: [left, comparison_operator(node).to_sym, list([right], source: operands[1] || node)], - source: node) - end - - def normalize_operator_call(node) - operands = node.named_children - left = normalize_node(operands[0]) - right = normalize_node(operands[1]) - if ruby? && binary_operator(node) == "=~" && regex_literal?(operands[1]) - return wrap(:MATCH3, children: [right, left], source: node) - elsif ruby? && binary_operator(node) == "=~" - return wrap(:CALL, children: [left, :=~, list([right], source: operands[1] || node)], source: node) - end - - wrap(:OPCALL, children: [left, binary_operator(node).to_sym, list([right], source: operands[1] || node)], - source: node) - end - - def normalize_element_reference(node) - recv = node.named_children.first - args = node.named_children.drop(1).map { |child| normalize_node(child) }.compact - if ruby? && self_node?(recv) - return wrap(:FCALL, children: [:[], list(args, source: node)], source: node) - end - - wrap(:CALL, children: [normalize_node(recv), :[], list(args, source: node)], source: node) - end - - def normalize_rescue_modifier(node) - body = normalize_node(node.named_children.first) - handler = normalize_node(node.named_children[1]) - resbody = wrap(:RESBODY, children: [nil, handler, nil], source: node) - wrap(:RESCUE, children: [body, resbody, nil], source: node) - end - - def normalize_ensure_clause(node) - normalize_body_nodes(node.named_children, source: node) - end - - def normalize_begin(node) - rescue_nodes = node.named_children.select { |child| child.kind == "rescue" } - ensure_node = node.named_children.find { |child| child.kind == "ensure" } - if rescue_nodes.empty? - return wrap(:BEGIN, children: normalize_children(node), source: node) unless ensure_node - - body_nodes = node.named_children.take_while { |child| child.kind != "ensure" } - body = normalize_body_nodes(body_nodes, source: body_nodes.first || node) - ensure_body = normalize_body(ensure_node) - source = source_from_nodes(body_nodes.first || node, ensure_node.named_children.last || ensure_node) - return wrap(:ENSURE, children: [body, ensure_body], source: source) - end - - body_nodes = node.named_children.take_while { |child| child.kind != "rescue" } - body = normalize_body_nodes(body_nodes, source: body_nodes.first || node) - resbodies = rescue_nodes.map { |child| normalize_rescue_clause(child) } - source = source_from_nodes(body_nodes.first || node, rescue_source_end(rescue_nodes.last) || rescue_nodes.last || node) - rescued = wrap(:RESCUE, children: [body, link_rescue_chain(resbodies), nil], source: source) - return rescued unless ensure_node - - ensure_body = normalize_body(ensure_node) - ensure_source = source_from_nodes(body_nodes.first || node, ensure_node.named_children.last || ensure_node) - wrap(:ENSURE, children: [rescued, ensure_body], source: ensure_source) - end - - def normalize_operator_assignment(node) - left = assignment_left(node) - right_raw = assignment_right(node) - right = normalize_node(right_raw) - operator = operator_assignment_operator(node) - - if left&.kind == "element_reference" - recv = left.named_children.first - args = left.named_children.drop(1).map { |child| normalize_node(child) }.compact - return wrap(:OP_ASGN1, children: [normalize_node(recv), operator, list(args, source: left), right], - source: node) - end - - if member_read_node?(left) - recv, mid = member_parts(left) - return wrap(:OP_ASGN2, children: [normalize_node(recv), false, mid.to_sym, operator, right], source: node) - end - - logical = normalize_logical_operator_assignment(left, operator, right, source: node) - return logical if logical - if instance_variable?(left) || global_variable?(left) - return assignment_target(left, augmented_assignment_value(left, operator, right_raw, node), source: node) - end - - assignment_target(left, right, source: node) || - wrap(:LASGN, children: [target_name(left), augmented_assignment_value(left, operator, right_raw, node)], - source: node) - end - - def normalize_subshell(node) - children = node.named_children.filter_map do |child| - case child.kind - when "interpolation" then normalize_interpolation(child) - when "string_content" then wrap(:STR, children: [child.text.to_s], source: child) - end - end - type = children.any? { |child| child.is_a?(Node) && child.type == :EVSTR } ? :DXSTR : :XSTR - wrap(type, children: children, source: node) - end - - def normalize_pair(node) - key = node.named_children.first - value = node.named_children[1] - if node.children.any? { |child| !child.named? && child.text == "=>" } - return wrap(:HASH, children: [normalize_node(key), normalize_node(value)].compact, source: node) - end - - key_lit = wrap(:LIT, children: [key.text.to_s.to_sym], source: key || node) - if ruby? && key&.kind == "hash_key_symbol" && value.nil? - name = key.text.to_s - return wrap(:HASH, children: [key_lit, local_or_call_for_name(name, key)], source: node) - end - - wrap(:HASH, children: [key_lit, normalize_node(value)].compact, source: node) - end - - def normalize_block_argument(node) - value = normalize_node(node.named_children.first) - wrap(:BLOCK_PASS, children: [nil, value], source: node) - end - - def normalize_singleton_class(node) - recv = normalize_node(node.named_children.first) - body = normalize_body(node.named_children[1]) - wrap(:SCLASS, children: [recv, scope(body, source: node)], source: node) - end - - def normalize_lambda(node) - target = lambda_target(node) || node - body_node = named_field(target, "body") || block_child(target) || target.named_children.last - body = with_ruby_scope(target) do - dynamic_scope(normalize_body(body_node)) - end - wrap(:LAMBDA, children: [scope(body, source: target)], source: target) - end - - def normalize_yield(node) - args_node = node.named_children.find { |child| child.kind == "argument_list" } - args = args_node ? yield_argument_nodes(args_node) : yield_inline_arguments(node) - wrap(:YIELD, children: [list(args, source: args_node || node)], source: node) - end - - def yield_statement?(node) - normalization_adapter.yield_statement?(node) - end - - def normalize_yield_statement(node) - args_node = node.named_children.find { |child| child.kind == "argument_list" } - args = args_node ? yield_argument_nodes(args_node) : yield_inline_arguments(node) - wrap(:YIELD, children: [list(args, source: args_node || node)], source: node) - end - - def yield_argument_list?(node) - node.kind == "argument_list" && parent_node(node)&.children&.first&.text == "yield" - rescue StandardError - false - end - - def normalize_yield_argument_list(node) - args = yield_argument_nodes(node) - source = parent_node(node) || node - wrap(:YIELD, children: [list(args, source: node)], source: source) - end - - def yield_inline_arguments(node) - node.named_children.reject { |child| child.kind == "yield" }.map { |child| normalize_node(child) }.compact - end - - def yield_argument_nodes(node) - return [scalar_argument_list_value(node)].compact if node.named_children.empty? - - node.named_children.map { |child| normalize_node(child) }.compact - end - - def super_statement?(node) - normalization_adapter.super_statement?(node) - end - - def normalize_super_statement(node) - args_node = node.named_children.find { |child| child.kind == "argument_list" } - args = - if args_node && args_node.named_children.empty? - [scalar_argument_list_value(args_node)].compact - elsif args_node - args_node.named_children.map { |child| normalize_node(child) }.compact - else - [] - end - wrap(:SUPER, children: [list(args, source: args_node || node)], source: node) - end - - def normalize_unary_not(node) - operand = node.named_children.first - wrap(:OPCALL, children: [normalize_node(operand), :!, nil], source: node) - end - - def normalize_unary_not_statement(node) - operand = node.named_children.first - wrap(:OPCALL, children: [normalize_node(operand), :!, nil], source: node) - end - - def normalize_unary_minus(node) - operand = node.named_children.first - if ts_node?(operand) && operand.kind == "integer" - return wrap(:INTEGER, children: [-operand.text.to_i], source: operand) - end - - wrap(:OPCALL, children: [normalize_node(operand), :-@, nil], source: node) - end - - def normalize_infix_statement(node) - left, operator, right = infix_statement_parts(node) - if ruby? && operator == "=~" && regex_literal?(right) - return wrap(:MATCH3, children: [normalize_node(right), normalize_node(left)], source: node) - elsif ruby? && operator == "=~" - return wrap(:CALL, children: [normalize_node(left), :=~, list([normalize_node(right)].compact, source: right)], - source: node) - end - - wrap(:OPCALL, children: [normalize_node(left), operator.to_sym, list([normalize_node(right)].compact, source: right)], - source: node) - end - - def normalize_dotted_expression(node) - block = call_block(node) - call = normalize_dotted_call_expression(node, source: block ? source_before_child(node, block) : node) - return call unless block - - args = normalize_block_parameters(block) - body = with_ruby_scope(block) do - dynamic_scope(normalize_body(named_field(block, "body") || block_child(block) || block)) - end - wrap(:ITER, children: [call, scope(body, args: args, source: node)], source: node) - end - - def normalize_dotted_call_expression(node, source: node) - target = dotted_call_target(node) || node - recv, mid = dotted_call_parts(target) - args = call_arguments(target, nil) - type = safe_navigation_call?(target) ? :QCALL : :CALL - wrap(type, children: [normalize_node(recv), mid.to_sym, list(args, source: source)], source: source) - end - - def normalize_argument_list_call_with_block(node) - return nil unless ruby? && ts_node?(node) && node.kind == "argument_list" - - block = call_block(node) - return nil unless block - - call = normalize_argument_list_call(node) - return nil unless call - - args = normalize_block_parameters(block) - body = with_ruby_scope(block) do - dynamic_scope(normalize_body(named_field(block, "body") || block_child(block) || block)) - end - wrap(:ITER, children: [call, scope(body, args: args, source: node)], source: node) - end - - def normalize_argument_list_call(node) - return nil unless ruby? && ts_node?(node) && node.kind == "argument_list" - - function = node.named_children.first - args_node = node.named_children.find { |child| child.kind == "argument_list" } - args = args_node ? args_node.named_children.map { |child| normalize_node(child) }.compact : [] - wrap(:FCALL, children: [function.text.to_sym, list(args, source: args_node || node)], source: node) - end - - def normalize_call(node) - return normalize_zero_child_call(node) if zero_child_identifier_call?(node) - return normalize_call_with_block(node) if call_block(node) - return normalize_visibility_inline_def(node) if visibility_inline_def_call?(node) - - if named_field(node, "receiver") && named_field(node, "method") - recv, mid = member_parts(node) - args = call_arguments(node, nil) - type = safe_navigation_call?(node) ? :QCALL : :CALL - return wrap(type, children: [normalize_node(recv), mid.to_sym, list(args, source: node)], source: node) - end - - function = named_field(node, "function") || named_field(node, "call") || node.named_children.first - args = call_arguments(node, function) - return wrap(:YIELD, children: [list(args, source: node)], source: node) if ruby? && function&.text == "yield" - - if member_read_node?(function) - recv, mid = member_parts(function) - return wrap(:CALL, children: [normalize_node(recv), mid.to_sym, list(args, source: node)], source: node) - end - - if function && IDENTIFIER_KINDS.include?(function.kind) - type = args.empty? ? :VCALL : :FCALL - return wrap(type, children: [function.text.to_sym, list(args, source: node)], source: node) - end - - if ruby? && function && const_node?(function) - return wrap(:FCALL, children: [function.text.to_sym, list(args, source: node)], source: node) - end - - wrap(:CALL, children: [normalize_node(function), :call, list(args, source: node)], source: node) - end - - def normalize_return(node) - normalize_return_node(node, elide_symbol: false) - end - - def wrapped_return_statement?(node) - return false unless ts_node?(node) - return false unless %w[body_statement block_body statement block].include?(node.kind) - return false if node.text.to_s.include?("\n") - - keyword = node.children.first - keyword && !keyword.named? && RETURN_KINDS.key?(keyword.kind) - end - - def normalize_wrapped_return_statement(node) - keyword = node.children.first - children = node.named_children.map { |child| normalize_return_value(child) }.compact - wrap(RETURN_KINDS.fetch(keyword.kind), children: children, source: node) - end - - def normalize_return_node(node, elide_symbol:) - children = node.named_children.map { |child| normalize_return_value(child) }.compact - return children.first if elide_symbol && ruby? && children.size == 1 && symbol_literal_node?(children.first) - - wrap(RETURN_KINDS.fetch(node.kind), children: children, source: node) - end - - def normalize_return_value(node) - return normalize_node(node) unless ts_node?(node) && node.kind == "argument_list" - return scalar_argument_list_value(node) if node.named_children.empty? - return normalize_argument_list_element_reference(node) if argument_list_element_reference?(node) - return normalize_boolean(node) if boolean_expression?(node) - return normalize_ternary_statement(node) if ternary_statement?(node) - return normalize_case(node) if case_argument_list?(node) - return normalize_argument_list_call_with_block(node) if argument_list_call_with_block?(node) - return normalize_dotted_expression(node) if dotted_expression?(node) - return normalize_argument_list_unary_not(node) if argument_list_unary_not?(node) - return normalize_infix_statement(node) if infix_statement?(node) - - function = node.named_children.first - nested_args = node.named_children[1] - if function && IDENTIFIER_KINDS.include?(function.kind) && nested_args&.kind == "argument_list" - args = nested_args.named_children.map { |child| normalize_node(child) }.compact - return wrap(:FCALL, children: [function.text.to_sym, list(args, source: nested_args)], source: node) - end - - values = node.named_children.map { |child| normalize_node(child) }.compact - return values.first if values.size == 1 - - list(values, source: node) - end - - def argument_list_element_reference?(node) - node.kind == "argument_list" && - node.children.first&.text != "[" && - node.children.any? { |child| !child.named? && child.text == "[" } && - node.children.any? { |child| !child.named? && child.text == "]" } && - node.named_children.size >= 2 && - node.named_children.none? { |child| %w[block do_block].include?(child.kind) } - end - - def normalize_argument_list_element_reference(node) - return nil unless ruby? && ts_node?(node) && argument_list_element_reference?(node) - - recv = node.named_children.first - args = node.named_children.drop(1).map { |child| normalize_node(child) }.compact - wrap(:CALL, children: [normalize_node(recv), :[], list(args, source: node)], source: node) - end - - def normalize_call_with_block(node) - block = call_block(node) - call = normalize_call_without_block(node, block) - args = normalize_block_parameters(block) - body = with_ruby_scope(block) do - dynamic_scope(normalize_body(named_field(block, "body") || block_child(block) || block)) - end - wrap(:ITER, children: [call, scope(body, args: args, source: node)], source: node) - end - - def normalize_call_without_block(node, block) - call_source = block ? source_before_child(node, block) : node - if dotted_call?(node) - recv, mid = dotted_call_parts(node) - args = call_arguments(node, nil) - arg_list = args.empty? ? nil : list(args, source: call_source) - type = safe_navigation_call?(node) ? :QCALL : :CALL - return wrap(type, children: [normalize_node(recv), mid.to_sym, arg_list], source: call_source) - end - - function = named_field(node, "function") || named_field(node, "call") || - node.named_children.find { |child| !same_ts_node?(child, block) } - args = call_arguments(node, function) - - if function && IDENTIFIER_KINDS.include?(function.kind) - return wrap(:FCALL, children: [function.text.to_sym, list(args, source: call_source)], source: call_source) - end - - if ruby? && function && const_node?(function) - return wrap(:FCALL, children: [function.text.to_sym, list(args, source: call_source)], source: call_source) - end - - if member_read_node?(function) - recv, mid = member_parts(function) - type = safe_navigation_call?(function) ? :QCALL : :CALL - return wrap(type, children: [normalize_node(recv), mid.to_sym, list(args, source: call_source)], source: call_source) - end - - wrap(:CALL, children: [normalize_node(function), :call, list(args, source: call_source)], source: call_source) - end - - def normalize_visibility_inline_def(node) - message = node.named_children.first&.text.to_s - args = node.named_children.find { |child| child.kind == "argument_list" } - method = inline_def_from_argument_list(args) - wrap(:FCALL, children: [message.to_sym, list([method].compact, source: args || node)], source: node) - end - - def normalize_modifier_statement(node) - keyword = modifier_keyword(node) - action, cond = modifier_parts(node) - type = - case keyword - when "unless" then :UNLESS - when "while" then :WHILE - when "until" then :UNTIL - else :IF - end - normalized_action = normalize_modifier_action(action) - children = %i[WHILE UNTIL].include?(type) ? [normalize_node(cond), normalized_action, true] : - [normalize_node(cond), normalized_action, nil] - wrap(type, children: children, source: node) - end - - def normalize_modifier_action(node) - modifier_return_action?(node) ? normalize_return_node(node, elide_symbol: false) : normalize_node(node) - end - - def modifier_return_action?(node) - ts_node?(node) && RETURN_KINDS.key?(node.kind) - end - - def normalize_command_call_statement(node) - function = node.named_children.first - if visibility_inline_def_statement?(node, function) - method = inline_def_from_statement(node) - return wrap(:FCALL, children: [function.text.to_sym, list([method].compact, source: node)], source: node) - end - - args_node = node.named_children.find { |child| %w[argument_list arguments].include?(child.kind) } - args = args_node ? command_arguments(args_node) : [] - block = call_block(node) - call_source = block ? source_before_child(node, block) : node - if ruby? && function&.text == "yield" - return wrap(:YIELD, children: [list(args, source: args_node || call_source)], source: call_source) - end - - call = wrap(args.empty? ? :VCALL : :FCALL, - children: [function.text.to_sym, list(args, source: args_node || call_source)], - source: call_source) - return call unless block - - block_args = normalize_block_parameters(block) - body = with_ruby_scope(block) do - dynamic_scope(normalize_body(named_field(block, "body") || block_child(block) || block)) - end - wrap(:ITER, children: [call, scope(body, args: block_args, source: node)], source: node) - end - - def dynamic_scope(node) - return node unless node.is_a?(Node) - return node if %i[DEFN DEFS CLASS MODULE SCLASS LAMBDA].include?(node.type) - - node.type = :DASGN if node.type == :LASGN - node.type = :DVAR if node.type == :LVAR - node.children = node.children.map { |child| dynamic_scope(child) } - node - end - - def normalize_zero_child_call(node) - wrap(:VCALL, children: [node.text.to_s.to_sym], source: node) - end - - def normalize_member_read(node) - recv, mid = member_parts(node) - return wrap(kind_type(node.kind), children: normalize_children(node), source: node) unless recv && mid - - wrap(:CALL, children: [normalize_node(recv), mid.to_sym, nil], source: node) - end - - def normalize_const(node) - if %w[scope_resolution scoped_type_identifier].include?(node.kind) - parts = node.named_children - base = normalize_const(parts[0]) if parts[0] - name = (named_field(node, "name") || parts[-1])&.text.to_s - return wrap(:COLON2, children: [base, name.to_sym], source: node) - end - - wrap(:CONST, children: [node.text.to_s.to_sym], source: node) - end - - def normalize_children(node) - node.named_children.filter_map do |child| - next if child.kind == "heredoc_body" - next if assignment_rhs?(child) - - normalize_node(child) - end - end - - def normalize_body(node) - return nil unless ts_node?(node) - return normalize_leading_function_statement(node) if leading_function_statement?(node) - return normalize_leading_owner_statement(node) if leading_owner_statement?(node) - return normalize_leading_case_statement(node) if leading_case_statement?(node) - return normalize_ensure_body_statement(node) if ensure_body_statement?(node) - return normalize_rescue_body_statement(node) if rescue_body_statement?(node) - return normalize_heredoc_body_statement(node) if heredoc_body_statement?(node) - return normalize_leading_loop_statement(node) if leading_loop_statement?(node) - return normalize_leading_if_statement(node) if leading_if_statement?(node) - return normalize_elsif(node) if node.kind == "elsif" - return normalize_wrapped_return_statement(node) if wrapped_return_statement?(node) - return normalize_yield_statement(node) if yield_statement?(node) - return normalize_super_statement(node) if super_statement?(node) - return normalize_unary_not_statement(node) if unary_not_statement?(node) - return normalize_operator_assignment_statement(node) if operator_assignment_statement?(node) - return normalize_element_reference_statement(node) if element_reference_statement?(node) - return normalize_hash_literal_statement(node) if hash_literal_statement?(node) - return normalize_array_literal_statement(node) if array_literal_statement?(node) - return normalize_concatenated_string_statement(node) if concatenated_string_statement?(node) - return normalize_interpolated_statement(node) if interpolated_statement?(node) - return nil if empty_body_statement?(node) - return normalize_terminal_statement(node) if terminal_statement?(node) - return normalize_modifier_statement(node) if modifier_statement?(node) - return normalize_ternary_statement(node) if ternary_statement?(node) - return normalize_statement_call_with_block(node) if statement_call_with_block?(node) - return normalize_command_call_statement(node) if command_call_statement?(node) - return normalize_infix_statement(node) if infix_statement?(node) - return normalize_boolean(node) if boolean_expression?(node) - return normalize_dotted_expression(node) if dotted_expression?(node) - - if BLOCK_KINDS.include?(node.kind) - children = normalize_children(node) - if children.empty? && bare_identifier_text?(node.text) - return wrap(:VCALL, children: [node.text.to_s.strip.to_sym], source: node) - end - return nil if children.empty? - return children.first if children.size == 1 - - return wrap(:BLOCK, children: children, source: node) - end - - normalize_node(node) - end - - def normalize_body_nodes(nodes, source:) - children = nodes.map { |child| normalize_body(child) }.compact - return nil if children.empty? - return children.first if children.size == 1 - - wrap(:BLOCK, children: children, source: source) - end - - def normalize_patterns(node) - patterns = node.named_children.select do |child| - %w[pattern case_pattern match_pattern switch_pattern when_condition].include?(child.kind) - end - patterns = [named_field(node, "value")].compact if patterns.empty? - patterns = [node.named_children.find { |child| !BLOCK_KINDS.include?(child.kind) && !statement_node?(child) }].compact if patterns.empty? - - patterns.flat_map do |pattern| - pattern_text = pattern.text.to_s - pattern_children = pattern.named_children - if pattern_text.include?("::") - [wrap(:CONST, children: [pattern_text.to_sym], source: pattern)] - elsif %w[pattern case_pattern match_pattern switch_pattern when_condition expression_list].include?(pattern.kind) && - pattern_children.empty? && pattern_text.match?(/\A-?\d+\z/) - [wrap(:INTEGER, children: [], source: pattern)] - elsif ruby? && %w[pattern case_pattern match_pattern switch_pattern when_condition expression_list].include?(pattern.kind) && - pattern_children.empty? && pattern_text.match?(/\A[A-Z]\w*\z/) - [wrap(:CONST, children: [pattern_text.to_sym], source: pattern)] - elsif ruby? && %w[pattern case_pattern match_pattern switch_pattern when_condition expression_list].include?(pattern.kind) && - pattern_children.empty? && pattern_text.match?(/\A[A-Za-z_]\w*[!?=]?\z/) - [local_or_call_for_name(pattern_text, pattern)] - elsif %w[pattern case_pattern match_pattern switch_pattern when_condition expression_list].include?(pattern.kind) - pattern_children.map { |child| normalize_node(child) }.compact - else - [normalize_node(pattern)].compact - end - end - end - - def assignment_target(left, right, source: nil) - return nil unless ts_node?(left) - source ||= left - - if instance_variable?(left) - return wrap(:IASGN, children: [left.text.to_s, right], source: source) - end - - if global_variable?(left) - return wrap(:GASGN, children: [left.text.to_s, right], source: source) - end - - if left.kind == "element_reference" - recv = left.named_children.first - args = left.named_children.drop(1).map { |child| normalize_node(child) }.compact - return wrap(:ATTRASGN, children: [normalize_node(recv), :[]=, list(args + [right], source: left)], - source: source) - end - - if member_read_node?(left) || normalization_adapter.member_assignment_target?(left) - recv, mid = member_parts(left) - writer = left.text.to_s.include?("&.") ? mid.to_sym : "#{mid}=".to_sym - return wrap(:ATTRASGN, children: [normalize_node(recv), writer, list([right], source: left)], - source: source) - end - - return assignment_target(left.named_children.first, right, source: source) if left.kind == "expression_list" - - nil - end - - def normalize_assignment_lhs(node) - right = normalize_node(next_named_sibling(node)) - source = parent_node(node) || node - assignment_target(node, right, source: source) || - wrap(:LASGN, children: [target_name(node), right], source: source) - end - - def target_name(left) - return left.text.to_s.sub(/\A\*/, "") if ts_node?(left) && IDENTIFIER_KINDS.include?(left.kind) - return left.text.to_s.sub(/\A\*/, "") if ts_node?(left) && %w[splat splat_parameter rest_assignment].include?(left.kind) - return left.text.to_s if ts_node?(left) - - Ast.slice(normalize_node(left), @document.lines) - end - - def case_value(node) - named_field(node, "value") || named_field(node, "subject") || - named_field(node, "condition") || - node.named_children.find do |child| - !WHEN_KINDS.include?(child.kind) && !BLOCK_KINDS.include?(child.kind) && child.kind != "else" - end - end - - def case_arms(node) - arms = [] - stack = node.named_children.dup - until stack.empty? - child = stack.shift - next unless ts_node?(child) - - if normalization_adapter.case_arm?(child) - arms << child - elsif normalization_adapter.case_else_node?(child) - next - else - stack.concat(child.named_children) unless FUNCTION_KINDS.include?(child.kind) - end - end - arms - end - - def when_body(node) - named_field(node, "body") || named_field(node, "consequence") || - named_field(node, "value") || - node.named_children.reverse.find { |child| BLOCK_KINDS.include?(child.kind) || statement_node?(child) } - end - - def link_when_chain(whens, fallback = nil) - whens.reverse.inject(fallback) do |next_when, current| - current.children[2] = next_when - current - end - end - - def case_else_body(node) - else_node = normalization_adapter.case_else_node(node) - return nil unless else_node - - if normalization_adapter.case_else_arm?(else_node) || else_node.kind == "switch_default" - body = when_body(else_node) - return normalize_body(body) if body - end - - normalize_else_or_branch(else_node) - end - - def normalize_else_or_branch(node) - return nil unless ts_node?(node) - return normalize_body(node) unless node.kind == "else" - - normalize_body_nodes(node.named_children, source: node) - end - - def link_rescue_chain(resbodies) - resbodies.reverse.inject(nil) do |next_rescue, current| - current.children[2] = next_rescue - current - end - end - - def boolean_expression?(node) - (normalization_adapter.boolean_expression_kind?(node) || boolean_statement?(node)) && - %w[and or].include?(boolean_operator(node)) - end - - def boolean_statement?(node) - return false unless %w[body_statement block_body statement argument_list].include?(node.kind) - return false unless %w[&& || and or].include?(binary_operator(node)) - return false if node.named_children.size < 2 - - node.children.all? do |child| - child.named? || %w[&& || and or ( )].include?(child.text.to_s) - end - end - - def operator_call_expression?(node) - normalization_adapter.operator_call_expression?(node) - end - - def infix_statement?(node) - left, operator, right = infix_statement_parts(node) - left && right && INFIX_STATEMENT_OPERATORS.include?(operator) - end - - def dotted_expression?(node) - normalization_adapter.dotted_expression_wrapper?(node) && dotted_call?(node) - end - - def argument_list_call_with_block?(node) - return false unless node.kind == "argument_list" - return false if dotted_call?(node) - return false unless call_block(node) - - IDENTIFIER_KINDS.include?(node.named_children.first&.kind) - end - - def infix_statement_parts(node) - return [nil, nil, nil] unless %w[body_statement block_body statement argument_list].include?(node.kind) - - named_index = 0 - left = nil - right = nil - operator = nil - node.children.each do |child| - if child.named? - left ||= child - right = child if operator - named_index += 1 - elsif INFIX_STATEMENT_OPERATORS.include?(child.text.to_s) - operator = child.text.to_s - end - end - return [nil, nil, nil] unless named_index == 2 && operator - - [left, operator, right] - rescue StandardError - [nil, nil, nil] - end - - def argument_list_unary_not?(node) - node.kind == "argument_list" && - node.children.first&.text == "!" && - node.named_children.size == 1 - rescue StandardError - false - end - - def unary_not_statement?(node) - %w[body_statement block_body statement argument_list].include?(node.kind) && - node.children.first&.text == "!" && - node.named_children.size == 1 - rescue StandardError - false - end - - def normalize_argument_list_unary_not(node) - return nil unless ruby? && ts_node?(node) && argument_list_unary_not?(node) - - operand = node.named_children.first - wrap(:OPCALL, children: [normalize_node(operand), :!, nil], source: node) - end - - def comparison_expression?(node) - return false if literal_fragment_expression_list?(node) - - normalization_adapter.comparison_expression_kind?(node) && - COMPARISON_OPERATORS.include?(comparison_operator(node)) - end - - def regex_literal?(node) - ts_node?(node) && %w[regex regex_literal].include?(node.kind) - end - - def unary_not_expression?(node) - normalization_adapter.unary_not_expression?(node) - end - - def unary_minus_expression?(node) - normalization_adapter.unary_minus_expression?(node) - end - - def boolean_operator(node) - direct = binary_operator(node) - return "and" if %w[&& and].include?(direct) - return "or" if %w[|| or].include?(direct) - return nil if ts_node?(node) - - text = spaced_text(node) - return "and" if text.include?("&&") || text.match?(/\band\b/) - return "or" if text.include?("||") || text.match?(/\bor\b/) - - nil - end - - def comparison_operator(node) - direct = binary_operator(node) - return direct if COMPARISON_OPERATORS.include?(direct) - - spaced_text(node)[/(===|!==|==|!=|<=|>=|<|>)/, 1] - end - - def binary_operator(node) - normalization_adapter.binary_operator(node) - end - - def spaced_text(node) - " #{node.text} " - end - - def class_node?(node) - normalization_adapter.class_node?(node) - end - - def module_node?(node) - MODULE_KINDS.include?(node.kind) && named_field(node, "name") - end - - def unwrap_node?(node) - normalization_adapter.unwrap_node?(node) - end - - def statement_node?(node) - node.kind.end_with?("_statement") || node.kind.end_with?("_expression") || - %w[return break next].include?(node.kind) - end - - def local_identifier?(node) - IDENTIFIER_KINDS.include?(node.kind) - end - - def ruby_vcall_identifier?(node) - return false unless ruby? - return false unless IDENTIFIER_KINDS.include?(node.kind) - return false if assignment_lhs?(node) - return false if ruby_definition_identifier?(node) - - !ruby_local_name?(node.text.to_s) - end - - def ruby_definition_identifier?(node) - parent = parent_node(node) - return false unless ts_node?(parent) - - if %w[method singleton_method].include?(parent.kind) - name = named_field(parent, "name") || - parent.named_children.find { |child| IDENTIFIER_KINDS.include?(child.kind) } - return same_ts_node?(name, node) - end - - %w[ - method_parameters block_parameters lambda_parameters - optional_parameter keyword_parameter block_parameter - ].include?(parent.kind) - end - - def ruby_local_name?(name) - @local_stack.reverse.any? { |scope| scope.include?(name) } - end - - def ruby? - normalization_adapter.ruby? - end - - def normalization_adapter - @normalization_adapter ||= TreeSitterNormalizationAdapter.for(@document) - end - - def interpolated_string?(node) - normalization_adapter.interpolated_string?(node) - end - - def lambda_expression?(node) - normalization_adapter.lambda_expression?(node) - end - - def lambda_target(node) - normalization_adapter.lambda_target(node) - end - - def interpolation_node?(node) - normalization_adapter.interpolation_node?(node) - end - - def normalize_interpolated_string(node) - wrap(:DSTR, children: normalize_children(node), source: node) - end - - def vcall_identifier?(node) - return false unless local_identifier?(node) - return false if ruby? && ruby_local_name?(node.text.to_s) - - parent = parent_node(node) - return false unless ts_node?(parent) - return false if %w[method method_parameters parameter_list argument_list arguments].include?(parent.kind) - return false if member_read_node?(parent) - return false if dotted_expression?(parent) - return false if assignment_lhs?(node) || assignment_rhs?(node) - - return true if %w[body_statement block_body then].include?(parent.kind) && parent_named_child?(parent, node) - return true if %w[if_modifier unless_modifier].include?(parent.kind) && same_ts_node?(parent.named_children.first, node) - - false - end - - def const_node?(node) - CONST_KINDS.include?(node.kind) - end - - def self_node?(node) - %w[self this].include?(node.kind) || node.text == "self" || node.text == "this" - end - - def instance_variable?(node) - normalization_adapter.instance_variable?(node) - end - - def global_variable?(node) - normalization_adapter.global_variable?(node) - end - - def member_read_node?(node) - ts_node?(node) && MEMBER_KINDS.include?(node.kind) && member_parts(node).all? - end - - def assignment_lhs?(node) - return false if prev_sibling(node)&.text == ":" - return false if literal_fragment_assignment_context?(node) - - sibling = next_sibling(node) - sibling && assignment_operator?(sibling.text) - end - - def assignment_rhs?(node) - return false if literal_fragment_assignment_context?(node) - - sibling = prev_sibling(node) - sibling && assignment_operator?(sibling.text) - end - - def literal_fragment_assignment_context?(node) - normalization_adapter.literal_fragment_assignment_context?(node) - end - - def literal_fragment_expression_list?(node) - return false unless ts_node?(node) && node.kind == "expression_list" - - named = node.named_children - named.size == 1 && literal_fragment_assignment_context?(named.first) - rescue StandardError - false - end - - def assignment_operator?(text) - normalization_adapter.assignment_operator?(text) - end - - def operator_assignment_operator(node) - raw = node.children.find { |child| !child.named? && child.text.to_s.end_with?("=") }&.text.to_s - op = raw.sub(/=\z/, "") - op = "||" if raw == "||=" - op = "&&" if raw == "&&=" - op.to_sym - end - - def augmented_assignment_value(left, operator, right_raw, source) - receiver = assignment_receiver(left) - right = normalize_node(right_raw) - wrap(:CALL, children: [receiver, operator, list([right].compact, source: right_raw || left)], source: source) - end - - def normalize_logical_operator_assignment(left, operator, right, source:) - return nil unless ruby? && [:"||", :"&&"].include?(operator) - return nil unless ts_node?(left) && IDENTIFIER_KINDS.include?(left.kind) - - name = target_name(left) - type = operator == :"||" ? :OP_ASGN_OR : :OP_ASGN_AND - receiver = wrap(:LVAR, children: [name], source: left) - assignment = wrap(:LASGN, children: [name, right], source: source) - wrap(type, children: [receiver, operator, assignment], source: source) - end - - def assignment_receiver(left) - return nil unless ts_node?(left) - return wrap(:LVAR, children: [left.text.to_s], source: left) if IDENTIFIER_KINDS.include?(left.kind) - return wrap(:IVAR, children: [left.text.to_s], source: left) if instance_variable?(left) - return normalize_global_variable(left) if global_variable?(left) - return normalize_const(left) if const_node?(left) - - normalize_node(left) - end - - def with_ruby_scope(node, reset: false) - return yield unless ruby? - - previous = @local_stack - @local_stack = [] if reset - @local_stack = @local_stack + [ruby_scope_locals(node)] - yield - ensure - @local_stack = previous if ruby? - end - - def ruby_scope_locals(node) - locals = Set.new - collect_ruby_scope_locals(node, locals, root: true) - locals - end - - def collect_ruby_scope_locals(node, locals, root: false) - return unless ts_node?(node) - return if !root && ruby_scope_boundary?(node) - - collect_ruby_parameter_locals(node, locals) - collect_ruby_assignment_locals(node, locals) - - node.named_children.each do |child| - next if ruby_scope_child_boundary?(child) - - collect_ruby_scope_locals(child, locals) - end - end - - def collect_ruby_parameter_locals(node, locals) - return unless %w[method_parameters block_parameters lambda_parameters].include?(node.kind) - - node.named_children.each do |child| - collect_identifier_names(child, locals) - end - end - - def collect_ruby_assignment_locals(node, locals) - if node.kind == "exception_variable" - collect_identifier_names(node, locals) - return - end - - return unless ruby_assignment_node?(node) - - left = assignment_left(node) - collect_assignment_target_names(left, locals) - end - - def ruby_assignment_node?(node) - return false unless ts_node?(node) - return true if %w[assignment operator_assignment].include?(node.kind) - return true if node.kind == "pattern" && node.children.any? { |child| !child.named? && child.text == "=" } - - %w[body_statement block_body statement].include?(node.kind) && - node.children.any? { |child| !child.named? && assignment_operator?(child.text) } - end - - def collect_assignment_target_names(node, locals) - return unless ts_node?(node) - - if IDENTIFIER_KINDS.include?(node.kind) - locals.add(node.text.to_s.sub(/\A\*/, "")) - return - end - - return unless %w[left_assignment_list expression_list splat splat_parameter rest_assignment].include?(node.kind) - - node.named_children.each { |child| collect_assignment_target_names(child, locals) } - end - - def collect_identifier_names(node, locals) - return unless ts_node?(node) - - locals.add(node.text.to_s.sub(/\A\*/, "")) if IDENTIFIER_KINDS.include?(node.kind) - locals.add(node.text.to_s) if normalization_adapter.identifier_text_node?(node) - node.children.select(&:named?).each { |child| collect_identifier_names(child, locals) } - end - - def ruby_scope_boundary?(node) - return false if %w[block do_block].include?(node.kind) && parent_node(node)&.kind == "lambda" - - FUNCTION_KINDS.include?(node.kind) || class_node?(node) || module_node?(node) || - %w[singleton_class lambda block do_block].include?(node.kind) - end - - def ruby_scope_child_boundary?(node) - ruby_scope_boundary?(node) - end - - def member_parts(node) - return [nil, nil] if node.kind == "expression_list" && - !(named_field(node, "operand") && named_field(node, "field")) - - return dotted_call_parts(node) if dotted_call?(node) - - recv = named_field(node, "receiver") || named_field(node, "object") || - named_field(node, "operand") || named_field(node, "value") || - named_field(node, "expression") || - node.named_children.find { |child| child.kind != "navigation_suffix" } - mid = named_field(node, "method") || named_field(node, "field") || - named_field(node, "property") || named_field(node, "suffix") || - node.named_children.find { |child| child.kind == "navigation_suffix" } || - node.named_children.reject { |child| %w[block do_block argument_list arguments].include?(child.kind) }.last - return [nil, nil] unless recv && mid && recv != mid - - [recv, member_name(mid).sub(/=\z/, "")] - end - - def member_name(node) - return "" unless ts_node?(node) - - if node.kind == "navigation_suffix" - suffix = named_field(node, "suffix") || - node.named_children.find { |child| IDENTIFIER_KINDS.include?(child.kind) } || - node.named_children.last - return suffix&.text.to_s.sub(/\A[.?]+/, "") - end - - node.text.to_s.sub(/\A[.?]+/, "") - end - - def call_arguments(node, function) - args = named_field(node, "arguments") || named_field(node, "argument") || - node.named_children.find { |child| %w[argument_list arguments].include?(child.kind) } - return [] unless args - - children = args.named_children.reject { |child| function && child == function } - return [normalize_dotted_expression(args)] if dotted_expression?(args) - if children.empty? - scalar = scalar_argument_list_value(args) - return [scalar] if scalar - - return literal_arguments_from_text(args) - end - return [normalize_infix_statement(args)] if infix_statement?(args) - - children.map { |child| normalize_node(child) }.compact - end - - def assignment_left(node) - named_field(node, "left") || node.named_children.first - end - - def assignment_right(node) - named_field(node, "right") || node.named_children[1] - end - - def function_name(node) - return singleton_name(node) if node.kind == "singleton_method" - - name = named_field(node, "name") || - node.named_children.find do |child| - IDENTIFIER_KINDS.include?(child.kind) || child.kind == "constant" - end - name&.text.to_s.to_sym - end - - def singleton_receiver(node) - receiver = named_field(node, "receiver") - return receiver if receiver - - name = named_field(node, "name") || - node.named_children.reverse.find { |child| IDENTIFIER_KINDS.include?(child.kind) } - parameters = named_field(node, "parameters") - body = named_field(node, "body") || block_child(node) - node.named_children.find do |child| - !same_ts_node?(child, name) && - !same_ts_node?(child, parameters) && - !same_ts_node?(child, body) - end - end - - def singleton_name(node) - name = named_field(node, "name")&.text || - node.named_children.reverse.find { |child| IDENTIFIER_KINDS.include?(child.kind) }&.text.to_s - name.to_s.to_sym - end - - def first_named(node) - node.named_children.first - end - - def block_child(node) - node.named_children.find { |child| BLOCK_KINDS.include?(child.kind) || %w[block do_block].include?(child.kind) } - end - - def branch_child(node, cond, index) - node.named_children.reject { |child| child == cond || %w[comment else elsif].include?(child.kind) }[index] - end - - def explicit_alternative(node) - normalization_adapter.explicit_alternative(node) - end - - def const_for(node) - return wrap(:CONST, children: ["(anonymous)".to_sym], source: @document.root) unless ts_node?(node) - return normalize_const(node) if const_node?(node) - - wrap(:CONST, children: [node.text.to_s.to_sym], source: node) - end - - def normalize_parameters(node) - return nil unless ruby? && ts_node?(node) - - defaults = node.named_children.filter_map do |param| - name = named_field(param, "name") - value = named_field(param, "value") - next unless name && value - - wrap(:LASGN, children: [name.text.to_sym, normalize_node(value)], source: param) - end - return nil if defaults.empty? - - wrap(:ARGS, children: defaults, source: node) - end - - def normalize_block_parameters(block) - return nil unless ruby? && ts_node?(block) - - params = block.named_children.find { |child| child.kind == "block_parameters" } - return nil unless params - - destructured = params.named_children.select { |child| child.kind == "destructured_parameter" } - pre_init = destructured.map { |param| normalize_destructured_block_parameter(param) }.compact - return nil if pre_init.empty? - - wrap(:ARGS, children: pre_init, source: params) - end - - def normalize_destructured_block_parameter(param) - targets = [] - param.named_children.each { |child| collect_destructured_parameter_targets(child, targets) } - return nil if targets.empty? - - wrap(:MASGN, - children: [ - wrap(:DVAR, children: [nil], source: param), - list(targets, source: param), - nil, - ], - source: param) - end - - def collect_destructured_parameter_targets(node, targets) - return unless ts_node?(node) - - if IDENTIFIER_KINDS.include?(node.kind) - targets << wrap(:DASGN, children: [node.text.to_s, nil], source: node) - return - end - - node.named_children.each { |child| collect_destructured_parameter_targets(child, targets) } - end - - def scope(body, args: nil, source: nil) - wrap(:SCOPE, children: [nil, args, body], source: body || args || source || @document.root) - end - - def list(children, source:) - return nil if children.nil? || children.empty? - - wrap(:LIST, children: children, source: source) - end - - def wrap(type, children:, source:) - if source.respond_to?(:start_point) - first_lineno = source.start_point.row + 1 - first_column = source.start_point.column - last_lineno = source.end_point.row + 1 - last_column = source.end_point.column - text = source.text.to_s - else - first_lineno = source.first_lineno - first_column = source.first_column - last_lineno = source.last_lineno - last_column = source.last_column - text = source.text.to_s - end - - Node.new( - type: type, - children: children, - first_lineno: first_lineno, - first_column: first_column, - last_lineno: last_lineno, - last_column: last_column, - text: text - ) - end - - def source_before_child(node, child) - text = @document.source.byteslice(node.start_byte...child.start_byte).to_s.rstrip - return node if text.empty? - - lines = text.lines - last_lineno = node.start_point.row + lines.size - last_column = - if lines.size <= 1 - node.start_point.column + text.length - else - lines.last.to_s.chomp.length - end - Node.new( - type: :SOURCE, - children: [], - first_lineno: node.start_point.row + 1, - first_column: node.start_point.column, - last_lineno: last_lineno, - last_column: last_column, - text: text - ) - end - - def source_from_nodes(first_node, last_node) - return first_node unless ts_node?(first_node) && ts_node?(last_node) - - text = @document.source.byteslice(first_node.start_byte...last_node.end_byte).to_s - Node.new( - type: :SOURCE, - children: [], - first_lineno: first_node.start_point.row + 1, - first_column: first_node.start_point.column, - last_lineno: last_node.end_point.row + 1, - last_column: last_node.end_point.column, - text: text - ) - end - - def source_from_normalized_nodes(first_node, last_node) - return first_node unless first_node.is_a?(Node) && last_node.is_a?(Node) - - text = - if first_node.first_lineno == last_node.last_lineno - @document.lines[first_node.first_lineno - 1].to_s.byteslice(first_node.first_column...last_node.last_column) - else - ([@document.lines[first_node.first_lineno - 1].to_s.byteslice(first_node.first_column..)] + - @document.lines[first_node.first_lineno...(last_node.last_lineno - 1)] + - [@document.lines[last_node.last_lineno - 1].to_s.byteslice(0...last_node.last_column)]).join - end - Node.new( - type: :SOURCE, - children: [], - first_lineno: first_node.first_lineno, - first_column: first_node.first_column, - last_lineno: last_node.last_lineno, - last_column: last_node.last_column, - text: text.to_s - ) - end - - def named_field(node, name) - normalization_adapter.named_field(node, name) - end - - def parent_node(node) - node.parent - rescue StandardError - nil - end - - def next_sibling(node) - node.next_sibling - rescue StandardError - nil - end - - def prev_sibling(node) - node.prev_sibling - rescue StandardError - nil - end - - def next_named_sibling(node) - node.next_named_sibling - rescue StandardError - nil - end - - def modifier_statement?(node) - %w[body_statement block_body statement].include?(node.kind) && - modifier_keyword(node) && - node.named_children.size >= 2 - end - - def ternary_statement?(node) - normalization_adapter.ternary_statement?(node) - end - - def normalize_ternary_statement(node) - cond, positive, negative = normalization_adapter.ternary_parts(node) - wrap(:IF, children: [normalize_node(cond), normalize_node(positive), normalize_node(negative)], source: node) - end - - def case_argument_list?(node) - normalization_adapter.case_argument_list?(node) - end - - def leading_function_statement?(node) - normalization_adapter.leading_function_statement?(node) - end - - def normalize_leading_function_statement(node) - name = normalization_adapter.leading_function_name(node).to_s.to_sym - body = normalization_adapter.leading_function_body(node) - normalized_body = with_ruby_scope(node, reset: true) do - elide_tail_returns(normalize_body(body)) - end - wrap(:DEFN, children: [name, scope(normalized_body, source: node)], source: node) - end - - def command_call_statement?(node) - return false unless %w[body_statement block block_body statement].include?(node.kind) - return false if dotted_call?(node) - return false unless node.named_children.first&.kind == "identifier" - - node.named_children.any? { |child| %w[argument_list arguments].include?(child.kind) } || - call_block(node) - end - - def zero_child_identifier_call?(node) - normalization_adapter.zero_child_identifier_call?(node) - end - - def dotted_call?(node) - return false unless ts_node?(node) - target = dotted_call_target(node) - return true if target && dotted_call_node?(target) - - dotted_call_node?(node) - end - - def dotted_call_node?(node) - return false unless ts_node?(node) - return false unless node.children.any? { |child| child.text == "." || child.text == "&." } - - callable = dotted_callable_children(node) - return false if callable.any? { |child| %w[string_content interpolation].include?(child.kind) } - - callable.size >= 2 - end - - def dotted_call_target(node) - return nil unless ts_node?(node) - - named = node.named_children - return nil unless named.size == 1 - - child = named.first - dotted_call_node?(child) ? child : nil - rescue StandardError - nil - end - - def dotted_callable_children(node) - node.named_children.reject { |child| %w[block do_block argument_list arguments].include?(child.kind) } - end - - def safe_navigation_call?(node) - ts_node?(node) && normalization_adapter.safe_navigation_call?(node) - end - - def dotted_call_parts(node) - target = dotted_call_target(node) || node - callable = dotted_callable_children(target) - [callable.first, callable[1].text.to_s.sub(/=\z/, "")] - end - - def leading_if_statement?(node) - normalization_adapter.leading_if_statement?(node) - end - - def leading_case_statement?(node) - normalization_adapter.leading_case_statement?(node) - end - - def normalize_leading_case_statement(node) - target = normalization_adapter.leading_case_target(node) || node - value = normalize_node(case_value(target)) - whens = case_arms(target).map { |arm| normalize_when(arm) }.compact - wrap(:CASE, children: [value, link_when_chain(whens, case_else_body(target))], source: target) - end - - def leading_loop_statement?(node) - normalization_adapter.leading_loop_statement?(node) - end - - def rescue_body_statement?(node) - normalization_adapter.rescue_body_statement?(node) - end - - def normalize_rescue_body_statement(node) - target = normalization_adapter.rescue_body_target(node) || node - body_nodes = normalization_adapter.rescue_body_nodes(target) - body = normalize_body_nodes(body_nodes, source: target) - rescue_nodes = normalization_adapter.rescue_clauses(target) - resbodies = rescue_nodes.map { |child| normalize_rescue_clause(child) } - source = source_from_nodes(body_nodes.first || target, rescue_source_end(rescue_nodes.last) || rescue_nodes.last || target) - wrap(:RESCUE, children: [body, link_rescue_chain(resbodies), nil], source: source) - end - - def normalize_rescue_clause(node) - exceptions = normalization_adapter.rescue_clause_exceptions(node) - exception_nodes = exceptions.map do |child| - if child.kind == "exceptions" && child.text.to_s.match?(/\A[A-Z]\w*(?:::\w+)*\z/) - normalize_const(child) - else - normalize_node(child) - end - end.compact - exception_source = normalization_adapter.rescue_clause_exceptions_source(node) - exception_variable = rescue_exception_variable(node) - handler = normalization_adapter.rescue_clause_handler(node) - body = prepend_rescue_exception_assignment(normalize_body(handler), exception_variable) - wrap(:RESBODY, children: [list(exception_nodes, source: exception_source || node), body, nil], - source: node) - end - - def rescue_source_end(node) - return nil unless ts_node?(node) - - handler = normalization_adapter.rescue_clause_handler(node) - return handler.named_children.last || handler if ts_node?(handler) - - node.named_children.reverse.find { |child| !%w[comment].include?(child.kind) } || node - end - - def rescue_exception_variable(node) - name = normalization_adapter.rescue_clause_exception_variable_name(node) - return nil unless name - - source = normalization_adapter.rescue_clause_exception_variable_source(node) || name - wrap(:LASGN, children: [name.text.to_s, wrap(:ERRINFO, children: [], source: source)], source: source) - end - - def prepend_rescue_exception_assignment(body, assignment) - return body unless assignment - return assignment unless body.is_a?(Node) - - if body.type == :BLOCK - body.children = [assignment] + body.children.compact - body - else - wrap(:BLOCK, children: [assignment, body], source: source_from_normalized_nodes(assignment, body)) - end - end - - def ensure_body_statement?(node) - normalization_adapter.ensure_body_statement?(node) - end - - def normalize_ensure_body_statement(node) - target = normalization_adapter.ensure_body_target(node) || node - body = if rescue_body_statement?(target) - normalize_rescue_body_statement(target) - else - normalize_body_nodes(normalization_adapter.ensure_body_nodes(target), source: target) - end - ensure_node = normalization_adapter.ensure_clause(target) - ensure_body = normalize_body(normalization_adapter.ensure_clause_body(ensure_node) || ensure_node) - wrap(:ENSURE, children: [body, ensure_body], source: body || node) - end - - def array_literal_statement?(node) - normalization_adapter.array_literal_statement?(node) - end - - def element_reference_statement?(node) - normalization_adapter.element_reference_statement?(node) - end - - def normalize_element_reference_statement(node) - target = normalization_adapter.element_reference_target(node) || node - recv = normalization_adapter.element_reference_receiver(target) - args = normalization_adapter.element_reference_arguments(target).map { |child| normalize_node(child) }.compact - if ruby? && self_node?(recv) - return wrap(:FCALL, children: [:[], list(args, source: target)], source: target) - end - - wrap(:CALL, children: [normalize_node(recv), :[], list(args, source: target)], source: target) - end - - def hash_literal_statement?(node) - normalization_adapter.hash_literal_statement?(node) - end - - def normalize_hash_literal_statement(node) - target = normalization_adapter.hash_literal_target(node) || node - children = normalization_adapter.hash_literal_values(target).map do |child| - normalize_hash_literal_value(child) - end.compact - wrap(:HASH, children: children, source: target) - end - - def normalize_hash_literal_value(node) - if node.kind == "field" - named = node.named_children - if named.size >= 2 - key = named.first - value = named[1] - key_lit = wrap(:LIT, children: [key.text.to_s.to_sym], source: key || node) - return wrap(:HASH, children: [key_lit, normalize_node(value)].compact, source: node) - end - end - - normalize_node(node) - end - - def normalize_array_literal_statement(node) - target = normalization_adapter.array_literal_target(node) || node - values = normalization_adapter.array_literal_values(target).map do |child| - normalize_array_literal_value(child) - end.compact - return wrap(:ZLIST, children: [], source: target) if values.empty? - - list(values, source: target) - end - - def normalize_array_literal_value(node) - if node.kind == "field" - named = node.named_children - return normalize_node(named.first) if named.size == 1 - return normalize_terminal_statement(node) if named.empty? - end - - normalize_node(node) - end - - def empty_body_statement?(node) - normalization_adapter.empty_body_statement?(node) - end - - def heredoc_body_statement?(node) - normalization_adapter.heredoc_body_statement?(node) - end - - def normalize_heredoc_body_statement(node) - heredoc_bodies = node.named_children.select { |child| child.kind == "heredoc_body" } - children = node.named_children.filter_map do |child| - next if child.kind == "heredoc_body" - - if heredoc_call_for_body?(child) - with_current_heredoc_body(heredoc_bodies.shift) { normalize_node(child) } - else - normalize_body(child) - end - end - return nil if children.empty? - return children.first if children.size == 1 - - wrap(:BLOCK, children: children, source: node) - end - - def heredoc_call_for_body?(node) - return false unless ts_node?(node) - - normalization_adapter.heredoc_call_for_body?(node) - end - - def with_current_heredoc_body(body) - previous = @current_heredoc_body - @current_heredoc_body = body - yield - ensure - @current_heredoc_body = previous - end - - def normalize_heredoc_beginning(node) - body = @current_heredoc_body || - parent_node(parent_node(node))&.named_children&.find { |child| child.kind == "heredoc_body" } - children = body ? normalize_heredoc_children(body) : [] - wrap(:DSTR, children: children, source: node) - end - - def normalize_heredoc_children(node) - node.named_children.filter_map do |child| - case child.kind - when "interpolation" - normalize_interpolation(child) - when "heredoc_content" - text = child.text.to_s - text.empty? ? nil : wrap(:STR, children: [text], source: child) - else - nil - end - end - end - - def normalize_interpolation(node) - exprs = node.named_children.map { |child| normalize_node(child) }.compact - body = exprs.size == 1 ? exprs.first : list(exprs, source: node) - wrap(:EVSTR, children: [body].compact, source: node) - end - - def interpolated_statement?(node) - normalization_adapter.interpolated_statement?(node) - end - - def normalize_interpolated_statement(node) - wrap(:DSTR, children: normalize_children(node), source: node) - end - - def concatenated_string_statement?(node) - normalization_adapter.concatenated_string_statement?(node) - end - - def normalize_concatenated_string_statement(node) - normalized = node.named_children.map { |child| [child, normalize_node(child)] } - parts = normalized.flat_map do |_child, child_node| - child_node.is_a?(Node) && child_node.type == :DSTR ? child_node.children : [child_node] - end.compact - wrap(:DSTR, children: parts, source: dynamic_string_source(normalized) || node.named_children.first) - end - - def normalize_chained_string(node) - normalized = node.named_children.map { |child| [child, normalize_node(child)] } - parts = normalized.flat_map do |_child, child_node| - child_node.is_a?(Node) && child_node.type == :DSTR ? child_node.children : [child_node] - end.compact - wrap(:DSTR, children: parts, source: dynamic_string_source(normalized) || node.named_children.first || node) - end - - def dynamic_string_source(normalized_children) - normalized_children.find do |_child, child_node| - child_node.is_a?(Node) && child_node.type == :DSTR && - child_node.children.any? { |part| part.is_a?(Node) && part.type == :EVSTR } - end&.first - end - - def terminal_statement?(node) - %w[body_statement block_body statement argument_list].include?(node.kind) && - node.named_children.empty? && - !node.text.to_s.strip.empty? - end - - def normalize_terminal_statement(node) - text = node.text.to_s.strip - return wrap(:YIELD, children: [nil], source: node) if ruby? && text == "yield" - return wrap(:IVAR, children: [text], source: node) if text.match?(/\A@[A-Za-z_]\w*[!?=]?\z/) - return normalize_global_variable(node) if text.match?(/\A\$/) - return wrap(:NIL, children: [], source: node) if text == "nil" - return wrap(:TRUE, children: [], source: node) if text == "true" - return wrap(:FALSE, children: [], source: node) if text == "false" - return wrap(:LIT, children: [text.delete_prefix(":").to_sym], source: node) if text.match?(/\A:[A-Za-z_]\w*[!?=]?\z/) - return wrap(:INTEGER, children: [text.to_i], source: node) if text.match?(/\A-?\d+\z/) - return wrap(:ZLIST, children: [], source: node) if text == "[]" - - if bare_identifier_text?(text) - return wrap(:VCALL, children: [text.to_sym], source: node) if ruby? && !ruby_local_name?(text) - - return wrap(:LVAR, children: [text], source: node) - end - - wrap(kind_type(node.kind), children: [], source: node) - end - - def normalize_global_variable(node) - text = node.text.to_s - return wrap(:NTH_REF, children: [text.delete_prefix("$").to_i], source: node) if text.match?(/\A\$[1-9]\d*\z/) - - wrap(:GVAR, children: [text], source: node) - end - - def normalize_leading_loop_statement(node) - target = normalization_adapter.leading_loop_target(node) || node - return normalize_loop(target) unless same_ts_node?(target, node) - - keyword = target.children.first.kind - cond = normalize_node(target.named_children.first) - body = normalize_body(target.named_children[1]) - wrap(keyword == "until" ? :UNTIL : :WHILE, children: [cond, body], source: target) - end - - def operator_assignment_statement?(node) - %w[body_statement block_body statement].include?(node.kind) && - operator_assignment_statement_parts(node)[1] - rescue StandardError - false - end - - def normalize_operator_assignment_statement(node) - left, operator, right_raw = operator_assignment_statement_parts(node) - right = normalize_node(right_raw) - - if left&.kind == "element_reference" - recv = left.named_children.first - args = left.named_children.drop(1).map { |child| normalize_node(child) }.compact - return wrap(:OP_ASGN1, children: [normalize_node(recv), operator, list(args, source: left), right], - source: node) - end - - if member_read_node?(left) - recv, mid = member_parts(left) - return wrap(:OP_ASGN2, children: [normalize_node(recv), false, mid.to_sym, operator, right], source: node) - end - - logical = normalize_logical_operator_assignment(left, operator, right, source: node) - return logical if logical - if instance_variable?(left) || global_variable?(left) - return assignment_target(left, augmented_assignment_value(left, operator, right_raw, node), source: node) - end - - assignment_target(left, right, source: node) || - wrap(:LASGN, children: [target_name(left), augmented_assignment_value(left, operator, right_raw, node)], - source: node) - end - - def operator_assignment_statement_parts(node) - left = nil - operator = nil - right = nil - node.children.each do |child| - if child.named? - left ||= child - right = child if operator - elsif child.text.to_s.match?(/\A(?:[+\-*\/%&|^]|\|\||&&)=\z/) - raw = child.text.to_s - operator = raw.sub(/=\z/, "") - operator = "||" if raw == "||=" - operator = "&&" if raw == "&&=" - end - end - return [nil, nil, nil] unless left && operator && right - - [left, operator.to_sym, right] - end - - def leading_owner_statement?(node) - normalization_adapter.leading_owner_statement?(node) - end - - def normalize_leading_owner_statement(node) - target = normalization_adapter.leading_owner_target(node) || node - keyword = target.children.first.kind - name = const_for(target.named_children.first) - body_node = named_field(target, "body") || - target.named_children.reverse.find { |child| BLOCK_KINDS.include?(child.kind) } - body = normalize_body(body_node) - if keyword == "module" - wrap(:MODULE, children: [name, scope(body, source: target)], source: target) - else - wrap(:CLASS, children: [name, nil, scope(body, source: target)], source: target) - end - end - - def normalize_leading_if_statement(node) - target = normalization_adapter.leading_if_target(node) || node - return normalize_if(target) unless same_ts_node?(target, node) - - keyword = target.children.first.kind - cond = target.named_children.find { |child| !%w[comment then elsif else].include?(child.kind) } - consequence = target.named_children.find { |child| child.kind == "then" } || - branch_child(target, cond, 0) - alternative = explicit_alternative(target) - type = keyword == "unless" ? :UNLESS : :IF - wrap(type, children: [normalize_node(cond), normalize_body(consequence), normalize_else_or_branch(alternative)], - source: target) - end - - def modifier_keyword(node) - seen_named = false - node.children.each do |child| - seen_named ||= child.named? - return child.kind if seen_named && !child.named? && %w[if unless while until].include?(child.kind) - end - nil - rescue StandardError - nil - end - - def modifier_parts(node) - [node.named_children.first, node.named_children.last] - end - - def call_block(node) - node.named_children.find { |child| %w[block do_block].include?(child.kind) } - end - - def statement_call_with_block?(node) - %w[body_statement block_body statement].include?(node.kind) && - call_block(node) && - statement_block_call(node) - end - - def statement_block_call(node) - return node if dotted_call?(node) - return node if member_read_node?(node) - - block = call_block(node) - node.named_children.find do |child| - !same_ts_node?(child, block) && (CALL_KINDS.include?(child.kind) || member_read_node?(child)) - end - end - - def normalize_statement_call_with_block(node) - block = call_block(node) - call = normalize_call_without_block(statement_block_call(node), block) - args = normalize_block_parameters(block) - body = with_ruby_scope(block) do - dynamic_scope(normalize_body(named_field(block, "body") || block_child(block) || block)) - end - wrap(:ITER, children: [call, scope(body, args: args, source: node)], source: node) - end - - def visibility_inline_def_call?(node) - return false unless node.kind == "call" - - message = node.named_children.first&.text.to_s - return false unless INLINE_DEF_WRAPPER_MIDS.include?(message) - - args = node.named_children.find { |child| child.kind == "argument_list" } - args&.text.to_s.lstrip.start_with?("def ") - end - - def visibility_inline_def_statement?(node, function) - INLINE_DEF_WRAPPER_MIDS.include?(function&.text.to_s) && node.text.to_s.include?("def ") - end - - def inline_def_from_argument_list(args) - return nil unless ruby? && ts_node?(args) - - inline_def_from_source(args) - end - - def inline_def_from_statement(node) - source = node.named_children.find { |child| child.kind == "argument_list" } || node - inline_def_from_source(source) - end - - def inline_def_from_source(source) - return nil unless ruby? && ts_node?(source) - - body = inline_def_body(source) - receiver = inline_def_receiver(source) - normalized_body = with_ruby_scope(source, reset: true) do - elide_tail_returns(normalize_body(body)) - end - if receiver - name = inline_def_name_after_receiver(source, receiver) - return nil if name.to_s.empty? - - return wrap(:DEFS, children: [normalize_node(receiver), name.to_sym, scope(normalized_body, source: source)], - source: source) - end - - name = source.named_children.find { |child| IDENTIFIER_KINDS.include?(child.kind) }&.text.to_s - return nil if name.to_s.empty? - - wrap(:DEFN, children: [name.to_sym, scope(normalized_body, source: source)], source: source) - end - - def inline_def_receiver(source) - return nil unless source.text.to_s.match?(/\bdef\s+[^.\s]+\./) - - source.named_children.find { |child| self_node?(child) || const_node?(child) } - end - - def inline_def_name_after_receiver(source, receiver) - index = source.named_children.index { |child| same_ts_node?(child, receiver) } - source.named_children[(index.to_i + 1)..]&.find { |child| IDENTIFIER_KINDS.include?(child.kind) }&.text.to_s - end - - def inline_def_body(node) - stack = node.named_children.reverse - until stack.empty? - child = stack.shift - return child if child.kind == "body_statement" - - stack.concat(child.named_children.reverse) - end - nil - end - - def literal_arguments_from_text(args) - text = args.text.to_s - return [normalize_heredoc_beginning(args)] if text.match?(/\A\s*<<[-~]?[A-Za-z_]\w*/) - - text.scan(/:([A-Za-z_]\w*[!?=]?)/).map do |name| - wrap(:LIT, children: [name.first.to_sym], source: args) - end - end - - def elide_tail_returns(node) - return node unless ruby? - return node unless node.is_a?(Node) - return node if %i[DEFN DEFS CLASS MODULE SCLASS LAMBDA ITER].include?(node.type) - return node.children.first if node.type == :RETURN - - case node.type - when :BLOCK - children = node.children.dup - children[-1] = elide_tail_returns(children[-1]) if children.any? - node.children = children - when :SCOPE - children = node.children.dup - children[2] = elide_tail_returns(children[2]) - node.children = children - when :IF, :UNLESS - children = node.children.dup - children[1] = elide_tail_returns(children[1]) - children[2] = elide_tail_returns(children[2]) if children.size > 2 - node.children = children - when :CASE - children = node.children.dup - children[1] = elide_tail_returns(children[1]) - node.children = children - when :CASE2 - children = node.children.dup - children[0] = elide_tail_returns(children[0]) - node.children = children - when :WHEN - children = node.children.dup - children[1] = elide_tail_returns(children[1]) - children[2] = elide_tail_returns(children[2]) if children.size > 2 - node.children = children - when :RESCUE - children = node.children.dup - children[0] = elide_tail_returns(children[0]) - children[1] = elide_tail_returns(children[1]) - node.children = children - when :RESBODY - children = node.children.dup - children[1] = elide_tail_returns(children[1]) - children[2] = elide_tail_returns(children[2]) if children.size > 2 - node.children = children - end - - node - end - - def elide_implicit_nil_body(node) - return node unless ruby? - node = drop_trailing_nil_statement(node) - return nil if node.is_a?(Node) && node.type == :NIL - - node - end - - def prepend_inline_parameter_begin(function_node, body) - marker = inline_parameter_begin_marker(function_node) - return body unless marker - - children = body.is_a?(Node) && body.type == :BLOCK ? body.children.compact : [body].compact - return nil if children.empty? - - if body.is_a?(Node) && body.type == :BLOCK - body.children = [marker] + children - body - else - wrap(:BLOCK, children: [marker] + children, source: function_node) - end - end - - def inline_parameter_begin_marker(function_node) - return nil unless ruby? - - params = named_field(function_node, "parameters") || - function_node.named_children.find { |child| child.kind == "method_parameters" } - return nil unless params - - semicolon = params.next_sibling - return nil unless semicolon && !semicolon.named? && semicolon.text == ";" - - Node.new( - type: :BEGIN, - children: [nil], - first_lineno: semicolon.start_point.row + 1, - first_column: semicolon.start_point.column, - last_lineno: semicolon.start_point.row + 1, - last_column: semicolon.start_point.column, - text: "" - ) - rescue StandardError - nil - end - - def drop_trailing_nil_statement(node) - return node unless node.is_a?(Node) && node.type == :BLOCK - - children = node.children.compact - children.pop while children.last.is_a?(Node) && children.last.type == :NIL - return nil if children.empty? - return children.first if children.size == 1 - - node.children = children - node - end - - def scalar_argument_list_value(node) - text = node.text.to_s.strip - return wrap(:YIELD, children: [nil], source: node) if ruby? && text == "yield" - return wrap(:NIL, children: [], source: node) if text == "nil" - return wrap(:TRUE, children: [], source: node) if text == "true" - return wrap(:FALSE, children: [], source: node) if text == "false" - return wrap(:LIT, children: [text.delete_prefix(":").to_sym], source: node) if text.match?(/\A:[A-Za-z_]\w*[!?=]?\z/) - if text.match?(/\A-?\d+\z/) - return wrap(:INTEGER, children: [text.to_i], source: node) - end - return nil unless bare_identifier_text?(text) - - if ruby? && !ruby_local_name?(text) - wrap(:VCALL, children: [text.to_sym], source: node) - else - wrap(:LVAR, children: [text], source: node) - end - end - - def local_or_call_for_name(name, source) - if ruby? && !ruby_local_name?(name) - wrap(:VCALL, children: [name.to_sym], source: source) - else - wrap(:LVAR, children: [name], source: source) - end - end - - def symbol_literal_node?(node) - node.is_a?(Node) && node.type == :LIT && node.children.first.is_a?(Symbol) - end - - def command_arguments(args) - return [scalar_argument_list_value(args)].compact if args.named_children.empty? - return [normalize_infix_statement(args)] if infix_statement?(args) - return [normalize_dotted_expression(args)] if dotted_expression?(args) - - args.named_children.map { |child| normalize_node(child) }.compact - end - - def parent_named_child?(parent, node) - parent.named_children.any? { |child| same_ts_node?(child, node) } - end - - def same_ts_node?(left, right) - left.kind == right.kind && left.start_byte == right.start_byte && left.end_byte == right.end_byte - rescue StandardError - false - end - - def node_key(node) - [node.kind, node.start_byte, node.end_byte] - rescue StandardError - node.object_id - end - - def bare_identifier_text?(text) - text.to_s.strip.match?(/\A[A-Za-z_]\w*[!?=]?\z/) - end - - def hidden_match?(node) - node.kind == "expression_statement" && - node.text.to_s.lstrip.start_with?("match ") && - node.named_children.any? { |child| child.kind == "match_block" } - end - - def kind_type(kind) - kind.to_s.upcase.gsub(/[^A-Z0-9]+/, "_").to_sym - end - - def ts_node?(node) - node && node.respond_to?(:kind) && node.respond_to?(:named_children) - end - end + require_relative "ast/legacy_normalizer" # Flatten a && chain (binary-nested OR n-ary, version dependent). def flatten_and(node) diff --git a/gems/decomplex/lib/decomplex/ast/adapters/base.rb b/gems/decomplex/lib/decomplex/ast/adapters/base.rb new file mode 100644 index 000000000..e82e65176 --- /dev/null +++ b/gems/decomplex/lib/decomplex/ast/adapters/base.rb @@ -0,0 +1,790 @@ +# frozen_string_literal: true + +module Decomplex + module Ast + UnsupportedLanguageError = Class.new(StandardError) + + # Language-specific syntax-shape decisions live here, before nodes + # are converted into Decomplex's shared AST vocabulary. + class TreeSitterNormalizationAdapter + BINARY_WRAPPER_KINDS = %w[ + binary binary_expression binary_operator boolean_operator comparison_operator + ].freeze + CLASS_KINDS = %w[class class_definition class_declaration class_specifier].freeze + COMMON_ASSIGNMENT_OPERATORS = %w[= += -= *= /= %=].freeze + RUBY_ASSIGNMENT_OPERATORS = (COMMON_ASSIGNMENT_OPERATORS + %w[**= &&= ||= &= |= ^= <<= >>=]).freeze + PYTHON_ASSIGNMENT_OPERATORS = (COMMON_ASSIGNMENT_OPERATORS + %w[//= **= @= &= |= ^= <<= >>= :=]).freeze + LUA_ASSIGNMENT_OPERATORS = %w[=].freeze + TYPESCRIPT_ASSIGNMENT_OPERATORS = ( + COMMON_ASSIGNMENT_OPERATORS + %w[**= <<= >>= >>>= &= |= ^= &&= ||= ??=] + ).freeze + OPERATOR_CALL_OPERATORS = %w[+ - * / % ** | & ^ << >> =~ !~].freeze + BOOLEAN_EXPRESSION_KINDS = %w[binary binary_expression boolean_operator].freeze + COMPARISON_EXPRESSION_KINDS = %w[binary binary_expression comparison_operator].freeze + DOTTED_EXPRESSION_WRAPPER_KINDS = %w[body_statement block_body statement argument_list].freeze + PYTHON_DOTTED_EXPRESSION_WRAPPER_KINDS = (DOTTED_EXPRESSION_WRAPPER_KINDS + %w[expression_statement]).freeze + LITERAL_CONTAINER_KINDS = %w[string delimited_symbol regex regex_literal].freeze + LITERAL_FRAGMENT_KINDS = %w[string_content escape_sequence interpolation string_fragment].freeze + CASE_ARGUMENT_WHEN_KINDS = %w[ + when switch_case case_clause expression_case case_statement switch_section + switch_block_statement_group switch_entry when_entry match_arm + ].freeze + CASE_ELSE_KINDS = %w[else switch_default].freeze + CASE_DEFAULT_PATTERN_KINDS = %w[case_pattern match_pattern pattern].freeze + ADAPTER_FUNCTION_KINDS = %w[ + method function_definition function_declaration method_definition + method_declaration function_item singleton_method + ].freeze + STATEMENT_BLOCK_PARENT_KINDS = %w[ + method_declaration constructor_declaration function_declaration function_body + if_statement while_statement for_statement enhanced_for_statement try_statement + catch_clause finally_clause do_statement lambda_expression + ].freeze + IDENTIFIER_KINDS = %w[ + identifier simple_identifier property_identifier field_identifier shorthand_property_identifier + ].freeze + LEADING_FUNCTION_WRAPPER_KINDS = %w[body_statement statement].freeze + PYTHON_LEADING_FUNCTION_WRAPPER_KINDS = %w[block].freeze + LUA_LEADING_FUNCTION_WRAPPER_KINDS = %w[block].freeze + OWNER_STATEMENT_NESTED_KIND = %w[class class_definition class_declaration module].freeze + LEADING_OWNER_WRAPPER_KINDS = %w[body_statement statement].freeze + PYTHON_LEADING_OWNER_WRAPPER_KINDS = %w[block].freeze + IF_NODE_KINDS = %w[if if_statement if_modifier unless unless_modifier if_expression conditional].freeze + LEADING_IF_WRAPPER_KINDS = %w[body_statement block block_body statement].freeze + PYTHON_LEADING_IF_WRAPPER_KINDS = %w[block].freeze + LUA_LEADING_IF_WRAPPER_KINDS = %w[block].freeze + LEADING_CASE_WRAPPER_KINDS = %w[body_statement block block_body statement].freeze + LEADING_LOOP_WRAPPER_KINDS = %w[body_statement block block_body statement].freeze + RESCUE_BODY_WRAPPER_KINDS = %w[body_statement block_body statement].freeze + ENSURE_BODY_WRAPPER_KINDS = %w[body_statement block_body statement].freeze + ARRAY_LITERAL_WRAPPER_KINDS = %w[ + body_statement block block_body statement argument_list expression_statement + ].freeze + ARRAY_LITERAL_NODE_KINDS = %w[array list].freeze + ELEMENT_REFERENCE_WRAPPER_KINDS = %w[ + body_statement block block_body statement expression_statement expression_list + ].freeze + ELEMENT_REFERENCE_NODE_KINDS = %w[ + element_reference subscript subscript_expression bracket_index_expression + ].freeze + HASH_LITERAL_WRAPPER_KINDS = %w[ + body_statement block block_body statement argument_list expression_statement parenthesized_expression + ].freeze + HASH_LITERAL_NODE_KINDS = %w[hash dictionary object table_constructor].freeze + EMPTY_BODY_WRAPPER_KINDS = %w[body_statement block block_body statement].freeze + HEREDOC_BODY_WRAPPER_KINDS = %w[body_statement block_body statement then].freeze + INTERPOLATED_STATEMENT_WRAPPER_KINDS = %w[body_statement block_body statement argument_list].freeze + CONCATENATED_STRING_WRAPPER_KINDS = %w[body_statement block_body statement argument_list].freeze + PYTHON_CONCATENATED_STRING_WRAPPER_KINDS = (CONCATENATED_STRING_WRAPPER_KINDS + %w[block expression_statement]).freeze + CONCATENATED_STRING_NODE_KINDS = %w[chained_string concatenated_string].freeze + UNWRAP_KINDS = %w[ + parenthesized_expression parenthesized_statements expression_statement statement + case_pattern match_pattern pattern + ].freeze + PYTHON_BODY_FIELD_KINDS = %w[ + elif_clause else_clause for_statement function_definition if_statement + try_statement while_statement with_statement + ].freeze + QUESTION_COLON_TERNARY_KINDS = %w[body_statement block_body statement argument_list conditional].freeze + TYPESCRIPT_TERNARY_KINDS = (QUESTION_COLON_TERNARY_KINDS + %w[ternary_expression]).freeze + + class << self + def for(document) + case document&.language&.to_sym + when :ruby then RubyTreeSitterNormalizationAdapter.new(document) + when :python then PythonTreeSitterNormalizationAdapter.new(document) + when :lua then LuaTreeSitterNormalizationAdapter.new(document) + when :typescript, :javascript then TypeScriptTreeSitterNormalizationAdapter.new(document) + else + raise UnsupportedLanguageError, + "unsupported AST normalization language #{document&.language.inspect}" + end + end + end + + attr_reader :document + + def initialize(document) + @document = document + end + + def ruby? + false + end + + def yield_statement?(_node) + false + end + + def super_statement?(_node) + false + end + + def explicit_alternative(node) + node.named_children.find { |child| %w[else else_clause else_statement].include?(child.kind) } + rescue StandardError + nil + end + + def unary_not_expression?(node) + %w[unary unary_expression].include?(node.kind) && node.text.to_s.lstrip.start_with?("!") + end + + def unary_minus_expression?(node) + %w[unary unary_expression].include?(node.kind) && node.text.to_s.lstrip.start_with?("-") + end + + def binary_operator(node) + direct_binary_operator(node).to_s + end + + def class_node?(node) + CLASS_KINDS.include?(node.kind) + end + + def unwrap_node?(node) + UNWRAP_KINDS.include?(node.kind) && node.named_children.size == 1 + end + + def interpolated_string?(node) + node.kind == "string" && node.named_children.any? { |child| child.kind == "interpolation" } + end + + def lambda_expression?(node) + !lambda_target(node).nil? + rescue StandardError + false + end + + def lambda_target(node) + return node if node.kind == "lambda" + + nil + rescue StandardError + nil + end + + def interpolation_node?(node) + node.kind == "interpolation" + rescue StandardError + false + end + + def instance_variable?(node) + node.kind == "instance_variable" + rescue StandardError + false + end + + def global_variable?(node) + node.kind == "global_variable" + rescue StandardError + false + end + + def member_assignment_target?(_node) + false + end + + def identifier_text_node?(_node) + false + end + + def literal_fragment_assignment_context?(node) + parent = node.parent + return false unless parent.respond_to?(:kind) + return true if literal_container_kind?(parent) + + literal_fragment_kind?(node) && + parent.parent.respond_to?(:kind) && + literal_container_kind?(parent.parent) + rescue StandardError + false + end + + def assignment_operator?(text) + assignment_operators.include?(text.to_s) + end + + def named_field(node, name) + node.child_by_field_name(name) + rescue StandardError + nil + end + + def safe_navigation_call?(_node) + false + end + + def ternary_statement?(node) + !ternary_parts(node).nil? + end + + def ternary_parts(node) + question_colon_ternary_parts(node, QUESTION_COLON_TERNARY_KINDS) + end + + def case_argument_list?(_node) + false + end + + def case_arm?(node) + case_arm_kind?(node) && !case_else_arm?(node) + rescue StandardError + false + end + + def case_else_node(node) + stack = node.named_children.dup + until stack.empty? + child = stack.shift + next unless child.respond_to?(:kind) + + return child if case_else_node?(child) + next if case_arm_kind?(child) + + stack.concat(child.named_children) unless adapter_function_kind?(child) + end + + nil + rescue StandardError + nil + end + + def case_else_arm?(_node) + false + end + + def case_else_node?(node) + CASE_ELSE_KINDS.include?(node&.kind) || case_else_arm?(node) + rescue StandardError + false + end + + def leading_function_statement?(_node) + false + end + + def leading_function_name(node) + node.named_children.find { |child| identifier_kind?(child) }&.text + rescue StandardError + nil + end + + def leading_function_body(node) + node.named_children.reverse.find { |child| child.kind == "body_statement" } + rescue StandardError + nil + end + + def leading_owner_statement?(node) + target = leading_owner_target(node) + return false unless target + + %w[class module].include?(target.children.first&.kind.to_s) && + target.named_children.size >= 2 && + !OWNER_STATEMENT_NESTED_KIND.include?(target.named_children.first.kind) + rescue StandardError + false + end + + def leading_owner_target(node) + node if LEADING_OWNER_WRAPPER_KINDS.include?(node.kind) + rescue StandardError + nil + end + + def leading_if_statement?(node) + target = leading_if_target(node) + return false unless target + + !!( + %w[if unless].include?(target.children.first&.kind.to_s) && + target.named_children.size >= 2 && + !IF_NODE_KINDS.include?(target.named_children.first.kind) + ) + rescue StandardError + false + end + + def leading_if_target(node) + node if LEADING_IF_WRAPPER_KINDS.include?(node.kind) + rescue StandardError + nil + end + + def leading_case_statement?(node) + target = leading_case_target(node) + return false unless target + + %w[case match switch].include?(target.children.first&.kind.to_s) && case_arm_descendant?(target) + rescue StandardError + false + end + + def leading_case_target(node) + node if LEADING_CASE_WRAPPER_KINDS.include?(node.kind) + rescue StandardError + nil + end + + def leading_loop_statement?(node) + target = leading_loop_target(node) + return false unless target + + !target.children.first&.named? && + %w[while until].include?(target.children.first&.kind.to_s) && + target.named_children.size >= 2 + rescue StandardError + false + end + + def leading_loop_target(node) + node if LEADING_LOOP_WRAPPER_KINDS.include?(node.kind) + rescue StandardError + nil + end + + def rescue_body_statement?(node) + rescue_clauses(node).any? + rescue StandardError + false + end + + def rescue_body_target(node) + node if RESCUE_BODY_WRAPPER_KINDS.include?(node.kind) + rescue StandardError + nil + end + + def rescue_body_nodes(node) + target = rescue_body_target(node) || node + named = target.named_children + rescue_index = named.index { |child| rescue_clause?(child) } + return [] unless rescue_index + + named[0...rescue_index] + rescue StandardError + [] + end + + def rescue_clauses(node) + target = rescue_body_target(node) + return [] unless target + + target.named_children.select { |child| rescue_clause?(child) } + rescue StandardError + [] + end + + def rescue_clause_exceptions(node) + exceptions = node.named_children.find { |child| child.kind == "exceptions" } + return [] unless exceptions + return [exceptions] if exceptions.text.to_s.match?(/\A[A-Z]\w*(?:::\w+)*\z/) + return [exceptions] if exceptions.named_children.empty? && !exceptions.text.to_s.strip.empty? + + exceptions.named_children + rescue StandardError + [] + end + + def rescue_clause_exceptions_source(node) + node.named_children.find { |child| child.kind == "exceptions" } + rescue StandardError + nil + end + + def rescue_clause_exception_variable_name(node) + var = node.named_children.find { |child| child.kind == "exception_variable" } + var&.named_children&.find { |child| identifier_kind?(child) } + rescue StandardError + nil + end + + def rescue_clause_exception_variable_source(node) + node.named_children.find { |child| child.kind == "exception_variable" } + rescue StandardError + nil + end + + def rescue_clause_handler(node) + node.named_children.reverse.find do |child| + !%w[exceptions exception_variable comment].include?(child.kind) + end + rescue StandardError + nil + end + + def ensure_body_statement?(node) + !ensure_clause(node).nil? + rescue StandardError + false + end + + def ensure_body_target(node) + node if ENSURE_BODY_WRAPPER_KINDS.include?(node.kind) + rescue StandardError + nil + end + + def ensure_body_nodes(node) + target = ensure_body_target(node) || node + named = target.named_children + ensure_index = named.index { |child| ensure_clause?(child) } + return [] unless ensure_index + + named[0...ensure_index] + rescue StandardError + [] + end + + def ensure_clause(node) + target = ensure_body_target(node) + return nil unless target + + target.named_children.find { |child| ensure_clause?(child) } + rescue StandardError + nil + end + + def ensure_clause_body(_node) + nil + end + + def array_literal_statement?(node) + !array_literal_target(node).nil? + rescue StandardError + false + end + + def array_literal_target(node) + return node if ARRAY_LITERAL_NODE_KINDS.include?(node.kind) + return nil unless ARRAY_LITERAL_WRAPPER_KINDS.include?(node.kind) + return node if bracketed?(node, "[", "]") + + child = exact_single_named_child(node, kinds: ARRAY_LITERAL_NODE_KINDS) + return child if child + + named = node.named_children + return nil unless named.size == 1 && ARRAY_LITERAL_NODE_KINDS.include?(named.first.kind) + + child = named.first + stripped = node.text.to_s.strip + child if stripped == child.text.to_s || stripped == "#{child.text};" + rescue StandardError + nil + end + + def array_literal_values(node) + target = array_literal_target(node) || node + target.named_children + rescue StandardError + [] + end + + def element_reference_statement?(node) + !element_reference_target(node).nil? + rescue StandardError + false + end + + def element_reference_target(node) + return node if ELEMENT_REFERENCE_NODE_KINDS.include?(node.kind) + return nil unless ELEMENT_REFERENCE_WRAPPER_KINDS.include?(node.kind) + + named = node.named_children + if named.size == 1 && ELEMENT_REFERENCE_NODE_KINDS.include?(named.first.kind) + stripped = node.text.to_s.strip + child = named.first + return child if stripped == child.text.to_s || stripped == "#{child.text};" + end + + node if element_reference_shape?(node) + rescue StandardError + nil + end + + def element_reference_receiver(node) + target = element_reference_target(node) || node + target.named_children.first + rescue StandardError + nil + end + + def element_reference_arguments(node) + target = element_reference_target(node) || node + target.named_children.drop(1) + rescue StandardError + [] + end + + def hash_literal_statement?(node) + !hash_literal_target(node).nil? + rescue StandardError + false + end + + def hash_literal_target(node) + return node if HASH_LITERAL_NODE_KINDS.include?(node.kind) + return nil unless HASH_LITERAL_WRAPPER_KINDS.include?(node.kind) + return nil if statement_block_wrapper?(node) + return node if bracketed?(node, "{", "}") + + named = node.named_children + return nil unless named.size == 1 + + child = named.first + return hash_literal_target(child) if node.kind == "parenthesized_expression" + + stripped = node.text.to_s.strip + if stripped == child.text.to_s || stripped == "#{child.text};" + return child if HASH_LITERAL_NODE_KINDS.include?(child.kind) + return hash_literal_target(child) if HASH_LITERAL_WRAPPER_KINDS.include?(child.kind) + end + + nil + rescue StandardError + nil + end + + def hash_literal_values(node) + target = hash_literal_target(node) || node + target.named_children + rescue StandardError + [] + end + + def empty_body_statement?(node) + EMPTY_BODY_WRAPPER_KINDS.include?(node.kind) && + node.named_children.empty? && + node.text.to_s.strip.empty? + rescue StandardError + false + end + + def heredoc_body_statement?(node) + HEREDOC_BODY_WRAPPER_KINDS.include?(node.kind) && + node.named_children.any? { |child| child.kind == "heredoc_body" } + rescue StandardError + false + end + + def heredoc_call_for_body?(_node) + false + end + + def interpolated_statement?(node) + INTERPOLATED_STATEMENT_WRAPPER_KINDS.include?(node.kind) && + node.named_children.any? { |child| child.kind == "interpolation" } + rescue StandardError + false + end + + def concatenated_string_statement?(node) + !concatenated_string_target(node).nil? + rescue StandardError + false + end + + def concatenated_string_target(node) + return node if concatenated_string_node?(node) + return nil unless concatenated_string_wrapper_kinds.include?(node.kind) + + named = node.named_children + return node if named.size > 1 && named.all? { |child| child.kind == "string" } + return named.first if named.size == 1 && concatenated_string_node?(named.first) + + nil + rescue StandardError + nil + end + + def zero_child_identifier_call?(_node) + false + end + + def operator_call_expression?(node) + operator_call_expression_kinds.include?(node.kind) && + OPERATOR_CALL_OPERATORS.include?(binary_operator(node)) + rescue StandardError + false + end + + def boolean_expression_kind?(node) + boolean_expression_kinds.include?(node.kind) + rescue StandardError + false + end + + def comparison_expression_kind?(node) + comparison_expression_kinds.include?(node.kind) + rescue StandardError + false + end + + def dotted_expression_wrapper?(node) + dotted_expression_wrapper_kinds.include?(node.kind) + rescue StandardError + false + end + + private + + def assignment_operators + COMMON_ASSIGNMENT_OPERATORS + end + + def operator_call_expression_kinds + %w[binary binary_expression] + end + + def boolean_expression_kinds + BOOLEAN_EXPRESSION_KINDS + end + + def comparison_expression_kinds + COMPARISON_EXPRESSION_KINDS + end + + def dotted_expression_wrapper_kinds + DOTTED_EXPRESSION_WRAPPER_KINDS + end + + def concatenated_string_wrapper_kinds + CONCATENATED_STRING_WRAPPER_KINDS + end + + def concatenated_string_node?(node) + CONCATENATED_STRING_NODE_KINDS.include?(node&.kind) && + node.named_children.size > 1 && + node.named_children.all? { |child| child.kind == "string" } + end + + def direct_binary_operator(node) + node.children.find { |child| !child.named? && !%w[( )].include?(child.text.to_s) }&.text + rescue StandardError + nil + end + + def question_colon_ternary_parts(node, kinds) + return nil unless kinds.include?(node.kind) + return nil unless node.children.any? { |child| !child.named? && child.text == "?" } + return nil unless node.children.any? { |child| !child.named? && child.text == ":" } + + children = node.named_children + return nil unless children.size >= 3 + + children.first(3) + rescue StandardError + nil + end + + def leading_function_statement_with_keyword?(node, keyword, wrapper_kinds) + wrapper_kinds.include?(node.kind) && + node.children.first&.kind.to_s == keyword && + node.named_children.any? { |child| identifier_kind?(child) } + rescue StandardError + false + end + + def identifier_kind?(node) + IDENTIFIER_KINDS.include?(node&.kind) + end + + def exact_single_named_child(node, kinds:) + children = node.named_children + return nil unless children.size == 1 + + child = children.first + return nil unless kinds.include?(child.kind) + return nil unless node.text.to_s == child.text.to_s + + child + rescue StandardError + nil + end + + def case_arm_kind?(node) + CASE_ARGUMENT_WHEN_KINDS.include?(node&.kind) + end + + def default_case_pattern?(node) + pattern = node.named_children.find { |child| CASE_DEFAULT_PATTERN_KINDS.include?(child.kind) } + pattern&.text.to_s.strip == "_" + rescue StandardError + false + end + + def adapter_function_kind?(node) + ADAPTER_FUNCTION_KINDS.include?(node&.kind) + end + + def statement_block_wrapper?(node) + node.kind == "block" && STATEMENT_BLOCK_PARENT_KINDS.include?(node.parent&.kind) + rescue StandardError + false + end + + def case_arm_descendant?(node) + stack = node.named_children.dup + until stack.empty? + child = stack.shift + next unless child.respond_to?(:kind) + return true if CASE_ARGUMENT_WHEN_KINDS.include?(child.kind) + + stack.concat(child.named_children) + end + + false + rescue StandardError + false + end + + def literal_container_kind?(node) + LITERAL_CONTAINER_KINDS.include?(node&.kind) + end + + def literal_fragment_kind?(node) + LITERAL_FRAGMENT_KINDS.include?(node&.kind) + end + + def rescue_clause?(node) + node&.kind == "rescue" + end + + def ensure_clause?(node) + node&.kind == "ensure" + end + + def bracketed?(node, opening, closing) + node.children.first&.text == opening && node.children.last&.text == closing + rescue StandardError + false + end + + def element_reference_shape?(node) + node.children.first&.text != "[" && + node.children.any? { |child| !child.named? && child.text == "[" } && + node.children.any? { |child| !child.named? && child.text == "]" } && + node.named_children.size >= 2 && + node.named_children.none? { |child| %w[block do_block].include?(child.kind) } + rescue StandardError + false + end + + def descendant(node, kinds:) + stack = node&.named_children.to_a + until stack.empty? + child = stack.shift + next unless child.respond_to?(:kind) + return child if kinds.include?(child.kind) + + stack.concat(child.named_children) + end + + nil + end + end + + end +end diff --git a/gems/decomplex/lib/decomplex/ast/adapters/lua.rb b/gems/decomplex/lib/decomplex/ast/adapters/lua.rb new file mode 100644 index 000000000..b4e1c50b4 --- /dev/null +++ b/gems/decomplex/lib/decomplex/ast/adapters/lua.rb @@ -0,0 +1,184 @@ +# frozen_string_literal: true + +require_relative "base" + +module Decomplex + module Ast + class LuaTreeSitterNormalizationAdapter < TreeSitterNormalizationAdapter + def explicit_alternative(node) + node.named_children.find { |child| %w[elseif_statement else else_statement].include?(child.kind) } + rescue StandardError + nil + end + + def unary_minus_expression?(node) + super || + (node.kind == "expression_list" && node.children.first&.text == "-" && node.named_children.size == 1) + rescue StandardError + false + end + + def binary_operator(node) + direct = direct_binary_operator(node) + return direct.to_s if direct + + child = exact_single_named_child(node, kinds: BINARY_WRAPPER_KINDS) + child ? binary_operator(child) : "" + end + + def unwrap_node?(node) + super || + (node.kind == "expression_list" && + node.named_children.size == 1 && + node.children.first&.text == "(" && + node.children.last&.text == ")") + rescue StandardError + false + end + + def leading_function_statement?(node) + leading_function_statement_with_keyword?(node, "function", LUA_LEADING_FUNCTION_WRAPPER_KINDS) + end + + def leading_function_body(node) + node.named_children.reverse.find { |child| child.kind == "block" } + rescue StandardError + nil + end + + def leading_if_target(node) + if LUA_LEADING_IF_WRAPPER_KINDS.include?(node.kind) + child = exact_single_named_child(node, kinds: %w[if_statement]) + return child if child + end + + super + end + + def array_literal_target(node) + if node.kind == "block" + named = node.named_children + if named.size == 2 && named.first.kind == "identifier" && named.first.text.to_s.empty? + target = lua_positional_table_arguments(named[1]) + return target if target + end + end + + target = lua_positional_table_arguments(node) + return target if target + + super + rescue StandardError + nil + end + + def hash_literal_target(node) + target = lua_keyed_table_arguments(node) + return target if target + + super + rescue StandardError + nil + end + + def hash_literal_values(node) + target = hash_literal_target(node) || node + return target.named_children if target.kind == "arguments" + + super + rescue StandardError + [] + end + + def identifier_text_node?(node) + %w[variable_list expression_list].include?(node.kind) && + node.text.to_s.match?(/\A[A-Za-z_]\w*\z/) + rescue StandardError + false + end + + def member_assignment_target?(node) + return false unless node.kind == "variable_list" + + node.named_children.size == 2 && + node.children.any? { |child| !child.named? && child.text == "." } + rescue StandardError + false + end + + def literal_fragment_assignment_context?(node) + return true if super + + literal_fragment_kind?(node) && node.parent&.kind == "expression_list" + rescue StandardError + false + end + + def lambda_target(node) + return node if node.kind == "function_definition" + + if node.kind == "expression_list" + return node if node.children.first&.kind == "function" && + node.named_children.any? { |child| child.kind == "block" } + + named = node.named_children + return named.first if named.size == 1 && named.first.kind == "function_definition" + end + + super + rescue StandardError + nil + end + + private + + def lua_positional_table_arguments(node) + return nil unless node&.kind == "arguments" + return nil unless bracketed?(node, "{", "}") + + fields = node.named_children + return nil if fields.empty? + return nil unless fields.all? { |field| field.kind == "field" && field.named_children.size <= 1 } + + node + end + + def lua_keyed_table_arguments(node) + if node&.kind == "block" + named = node.named_children + if named.size == 2 && named.first.kind == "identifier" && named.first.text.to_s.empty? + return lua_keyed_table_arguments(named[1]) + end + end + + return nil unless node&.kind == "arguments" + return nil unless bracketed?(node, "{", "}") + + fields = node.named_children + return node if fields.empty? + return nil if fields.all? { |field| field.kind == "field" && field.named_children.size <= 1 } + + node + end + + private + + def assignment_operators + LUA_ASSIGNMENT_OPERATORS + end + + def operator_call_expression_kinds + super + %w[expression_list] + end + + def boolean_expression_kinds + super + %w[expression_list] + end + + def comparison_expression_kinds + super + %w[expression_list] + end + end + + end +end diff --git a/gems/decomplex/lib/decomplex/ast/adapters/python.rb b/gems/decomplex/lib/decomplex/ast/adapters/python.rb new file mode 100644 index 000000000..e0efda06f --- /dev/null +++ b/gems/decomplex/lib/decomplex/ast/adapters/python.rb @@ -0,0 +1,224 @@ +# frozen_string_literal: true + +require_relative "base" + +module Decomplex + module Ast + class PythonTreeSitterNormalizationAdapter < TreeSitterNormalizationAdapter + def yield_statement?(node) + (%w[body_statement block block_body expression_statement statement].include?(node.kind) && + node.children.first&.text == "yield") + rescue StandardError + false + end + + def explicit_alternative(node) + node.named_children.find { |child| %w[elif_clause else else_clause].include?(child.kind) } + rescue StandardError + nil + end + + def case_else_arm?(node) + node.kind == "case_clause" && default_case_pattern?(node) + rescue StandardError + false + end + + def named_field(node, name) + super || python_body_field(node, name) + end + + def leading_function_statement?(node) + leading_function_statement_with_keyword?(node, "def", PYTHON_LEADING_FUNCTION_WRAPPER_KINDS) + end + + def leading_function_body(node) + node.named_children.reverse.find { |child| child.kind == "block" } + rescue StandardError + nil + end + + def leading_owner_target(node) + return node if PYTHON_LEADING_OWNER_WRAPPER_KINDS.include?(node.kind) + + super + rescue StandardError + nil + end + + def leading_if_target(node) + if PYTHON_LEADING_IF_WRAPPER_KINDS.include?(node.kind) + child = exact_single_named_child(node, kinds: %w[if_statement]) + return child if child + end + + super + end + + def rescue_body_target(node) + return node if node.kind == "try_statement" + return node if flattened_try_block?(node, clauses: %w[except_clause]) + + if node.kind == "block" + child = exact_single_named_child(node, kinds: %w[try_statement]) + return child if child + end + + super + rescue StandardError + nil + end + + def rescue_body_nodes(node) + target = rescue_body_target(node) || node + return super unless target.kind == "try_statement" || flattened_try_block?(target, clauses: %w[except_clause]) + + target.named_children.take_while { |child| !%w[except_clause finally_clause].include?(child.kind) } + rescue StandardError + [] + end + + def rescue_clauses(node) + target = rescue_body_target(node) + return [] unless target + + target.named_children.select { |child| child.kind == "except_clause" } + rescue StandardError + [] + end + + def rescue_clause_exceptions(node) + pattern = node.named_children.find { |child| !%w[block comment].include?(child.kind) } + return [] unless pattern + return [pattern] unless pattern.kind == "as_pattern" + + exception = pattern.named_children.find { |child| child.kind != "as_pattern_target" } + exception ? [exception] : [] + rescue StandardError + [] + end + + def rescue_clause_exceptions_source(node) + rescue_clause_exceptions(node).first + rescue StandardError + nil + end + + def rescue_clause_exception_variable_name(node) + pattern = node.named_children.find { |child| child.kind == "as_pattern" } + descendant(pattern, kinds: %w[as_pattern_target]) + rescue StandardError + nil + end + + def rescue_clause_exception_variable_source(node) + rescue_clause_exception_variable_name(node) + rescue StandardError + nil + end + + def rescue_clause_handler(node) + node.named_children.reverse.find { |child| child.kind == "block" } + rescue StandardError + nil + end + + def ensure_body_target(node) + return node if node.kind == "try_statement" + return node if flattened_try_block?(node, clauses: %w[finally_clause]) + + if node.kind == "block" + child = exact_single_named_child(node, kinds: %w[try_statement]) + return child if child + end + + super + rescue StandardError + nil + end + + def ensure_body_nodes(node) + target = ensure_body_target(node) || node + return super unless target.kind == "try_statement" || flattened_try_block?(target, clauses: %w[finally_clause]) + + target.named_children.take_while { |child| child.kind != "finally_clause" } + rescue StandardError + [] + end + + def ensure_clause(node) + target = ensure_body_target(node) + return nil unless target + + target.named_children.find { |child| child.kind == "finally_clause" } + rescue StandardError + nil + end + + def ensure_clause_body(node) + node.named_children.reverse.find { |child| child.kind == "block" } + rescue StandardError + nil + end + + def ternary_parts(node) + return nil unless node.kind == "conditional_expression" + + children = node.named_children + return nil unless children.size >= 3 + + [children[1], children[0], children[2]] + rescue StandardError + nil + end + + def unary_minus_expression?(node) + (%w[unary unary_expression unary_operator].include?(node.kind) && node.text.to_s.lstrip.start_with?("-")) + end + + def empty_body_statement?(node) + super || + (node.kind == "block" && node.named_children.empty? && node.text.to_s.strip == "pass") || + node.kind == "pass_statement" + rescue StandardError + false + end + + private + + def flattened_try_block?(node, clauses:) + node.kind == "block" && + node.children.first&.text == "try" && + node.named_children.any? { |child| clauses.include?(child.kind) } + rescue StandardError + false + end + + def python_body_field(node, name) + return nil unless %w[body consequence].include?(name.to_s) + return nil unless PYTHON_BODY_FIELD_KINDS.include?(node.kind) + + node.named_children.find { |child| child.kind == "block" } + rescue StandardError + nil + end + + def assignment_operators + PYTHON_ASSIGNMENT_OPERATORS + end + + def operator_call_expression_kinds + super + %w[binary_operator] + end + + def concatenated_string_wrapper_kinds + PYTHON_CONCATENATED_STRING_WRAPPER_KINDS + end + + def dotted_expression_wrapper_kinds + PYTHON_DOTTED_EXPRESSION_WRAPPER_KINDS + end + end + + end +end diff --git a/gems/decomplex/lib/decomplex/ast/adapters/ruby.rb b/gems/decomplex/lib/decomplex/ast/adapters/ruby.rb new file mode 100644 index 000000000..07719416a --- /dev/null +++ b/gems/decomplex/lib/decomplex/ast/adapters/ruby.rb @@ -0,0 +1,101 @@ +# frozen_string_literal: true + +require_relative "base" + +module Decomplex + module Ast + class RubyTreeSitterNormalizationAdapter < TreeSitterNormalizationAdapter + def ruby? + true + end + + def yield_statement?(node) + %w[body_statement block block_body statement].include?(node.kind) && + node.children.first&.text == "yield" + rescue StandardError + false + end + + def super_statement?(node) + %w[body_statement block block_body statement].include?(node.kind) && + (node.text.to_s.strip == "super" || + (node.named_children.first&.kind == "super" && + node.named_children.drop(1).all? { |child| child.kind == "argument_list" })) + rescue StandardError + false + end + + def explicit_alternative(node) + node.named_children.find { |child| %w[elsif else].include?(child.kind) } + rescue StandardError + nil + end + + def instance_variable?(node) + node.kind == "instance_variable" || ruby_instance_variable_text?(node.text) + rescue StandardError + false + end + + def global_variable?(node) + node.kind == "global_variable" || ruby_global_variable_text?(node.text) + rescue StandardError + false + end + + def case_argument_list?(node) + node.kind == "argument_list" && + node.children.any? { |child| !child.named? && child.kind == "case" } && + node.named_children.any? { |child| CASE_ARGUMENT_WHEN_KINDS.include?(child.kind) } + rescue StandardError + false + end + + def safe_navigation_call?(node) + node.children.any? { |child| !child.named? && child.text == "&." } + rescue StandardError + false + end + + def leading_function_statement?(node) + leading_function_statement_with_keyword?(node, "def", LEADING_FUNCTION_WRAPPER_KINDS) + end + + def zero_child_identifier_call?(node) + node.kind == "call" && node.named_children.empty? && + node.text.to_s.match?(/\A[A-Za-z_]\w*[!?=]?\z/) + rescue StandardError + false + end + + def heredoc_call_for_body?(node) + return true if node.kind == "heredoc_beginning" + return true if %w[call argument_list].include?(node.kind) && + node.text.to_s.match?(/(?:\A|[\s(,])<<[-~]?[A-Za-z_]\w*/) + + node.named_children.any? do |child| + next false if child.named_children.any? { |grandchild| grandchild.kind == "heredoc_body" } + + heredoc_call_for_body?(child) + end + rescue StandardError + false + end + + private + + def assignment_operators + RUBY_ASSIGNMENT_OPERATORS + end + + def ruby_instance_variable_text?(text) + text.to_s.match?(/\A@[A-Za-z_]\w*[!?=]?\z/) + end + + def ruby_global_variable_text?(text) + text.to_s.match?(/\A\$[A-Za-z_]\w*[!?=]?\z/) + end + end + + end +end diff --git a/gems/decomplex/lib/decomplex/ast/adapters/typescript.rb b/gems/decomplex/lib/decomplex/ast/adapters/typescript.rb new file mode 100644 index 000000000..3859fb4a6 --- /dev/null +++ b/gems/decomplex/lib/decomplex/ast/adapters/typescript.rb @@ -0,0 +1,147 @@ +# frozen_string_literal: true + +require_relative "base" + +module Decomplex + module Ast + class TypeScriptTreeSitterNormalizationAdapter < TreeSitterNormalizationAdapter + def explicit_alternative(node) + node.named_children.find { |child| %w[else else_clause].include?(child.kind) } + rescue StandardError + nil + end + + def safe_navigation_call?(node) + super || + node.children.any? { |child| child.kind == "optional_chain" && child.text.to_s == "?." } || + (node.kind == "call_expression" && node.named_children.any? { |child| safe_navigation_call?(child) }) + rescue StandardError + false + end + + def ternary_parts(node) + question_colon_ternary_parts(node, TYPESCRIPT_TERNARY_KINDS) + end + + def interpolated_string?(node) + super || + (node.kind == "template_string" && + node.named_children.any? { |child| child.kind == "template_substitution" }) + end + + def lambda_target(node) + return node if %w[arrow_function function_expression].include?(node.kind) + + super + rescue StandardError + nil + end + + def interpolation_node?(node) + super || node.kind == "template_substitution" + rescue StandardError + false + end + + def rescue_body_target(node) + return node if node.kind == "try_statement" + + if node.kind == "statement_block" + child = exact_single_named_child(node, kinds: %w[try_statement]) + return child if child + end + + super + rescue StandardError + nil + end + + def rescue_body_nodes(node) + target = rescue_body_target(node) || node + return super unless target.kind == "try_statement" + + target.named_children.take_while { |child| !%w[catch_clause finally_clause].include?(child.kind) } + rescue StandardError + [] + end + + def rescue_clauses(node) + target = rescue_body_target(node) + return [] unless target + + target.named_children.select { |child| child.kind == "catch_clause" } + rescue StandardError + [] + end + + def rescue_clause_exception_variable_name(node) + node.named_children.find { |child| IDENTIFIER_KINDS.include?(child.kind) } + rescue StandardError + nil + end + + def rescue_clause_exception_variable_source(node) + rescue_clause_exception_variable_name(node) + rescue StandardError + nil + end + + def rescue_clause_handler(node) + node.named_children.reverse.find { |child| child.kind == "statement_block" } + rescue StandardError + nil + end + + def ensure_body_target(node) + return node if node.kind == "try_statement" + + if node.kind == "statement_block" + child = exact_single_named_child(node, kinds: %w[try_statement]) + return child if child + end + + super + rescue StandardError + nil + end + + def ensure_body_nodes(node) + target = ensure_body_target(node) || node + return super unless target.kind == "try_statement" + + target.named_children.take_while { |child| child.kind != "finally_clause" } + rescue StandardError + [] + end + + def ensure_clause(node) + target = ensure_body_target(node) + return nil unless target + + target.named_children.find { |child| child.kind == "finally_clause" } + rescue StandardError + nil + end + + def ensure_clause_body(node) + node.named_children.reverse.find { |child| child.kind == "statement_block" } + rescue StandardError + nil + end + + def empty_body_statement?(node) + super || + (node.kind == "statement_block" && node.named_children.empty? && node.text.to_s.strip == "{}") + rescue StandardError + false + end + + private + + def assignment_operators + TYPESCRIPT_ASSIGNMENT_OPERATORS + end + end + + end +end diff --git a/gems/decomplex/lib/decomplex/ast/cache.rb b/gems/decomplex/lib/decomplex/ast/cache.rb new file mode 100644 index 000000000..8339211fe --- /dev/null +++ b/gems/decomplex/lib/decomplex/ast/cache.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +module Decomplex + module Ast + module_function + + def normalized_cache + @normalized_cache ||= {} + end + end +end diff --git a/gems/decomplex/lib/decomplex/ast/legacy_normalizer.rb b/gems/decomplex/lib/decomplex/ast/legacy_normalizer.rb new file mode 100644 index 000000000..b30ae7903 --- /dev/null +++ b/gems/decomplex/lib/decomplex/ast/legacy_normalizer.rb @@ -0,0 +1,2563 @@ +# frozen_string_literal: true + +require "set" +require_relative "node" +require_relative "source_map" +require_relative "adapters/base" +require_relative "adapters/ruby" +require_relative "adapters/python" +require_relative "adapters/lua" +require_relative "adapters/typescript" + +module Decomplex + module Ast + # Tree-sitter exposes each grammar's native node names. Decomplex's + # detectors share a small language-neutral AST vocabulary, so this + # normalizer converts common syntax categories into that vocabulary: + # DEFN, CLASS, IF, CASE/WHEN, AND/OR, CALL, LASGN, ATTRASGN, IVAR, + # LVAR, and friends. The goal is portable structural facts, not + # Ruby semantics. + class TreeSitterNormalizer + FUNCTION_KINDS = %w[ + method function_definition function_declaration method_definition + method_declaration function_item singleton_method + ].freeze + CLASS_KINDS = %w[class class_definition class_declaration class_specifier].freeze + MODULE_KINDS = %w[module].freeze + BLOCK_KINDS = %w[ + block body_statement statement_block statement_list class_body + switch_body match_block then block_body control_structure_body function_body + ].freeze + IF_KINDS = %w[if if_statement if_modifier unless unless_modifier if_expression conditional].freeze + LOOP_KINDS = { + "while" => :WHILE, + "while_statement" => :WHILE, + "while_modifier" => :WHILE, + "until_modifier" => :UNTIL, + "for" => :FOR, + "for_statement" => :FOR, + "for_in_clause" => :FOR + }.freeze + CASE_KINDS = %w[ + case switch_statement expression_switch_statement switch_expression match_statement match_expression + when_expression + ].freeze + WHEN_KINDS = %w[ + when switch_case case_clause expression_case case_statement switch_section + switch_block_statement_group switch_entry when_entry match_arm + ].freeze + ASSIGNMENT_KINDS = %w[ + assignment assignment_expression assignment_statement augmented_assignment + ].freeze + MEMBER_KINDS = %w[ + call attribute member_expression member_access_expression field field_access selector_expression field_expression + navigation_expression directly_assignable_expression expression_list + ].freeze + CALL_KINDS = %w[call call_expression method_call method_call_expression].freeze + IDENTIFIER_KINDS = %w[ + identifier simple_identifier property_identifier field_identifier shorthand_property_identifier + ].freeze + CONST_KINDS = %w[constant scope_resolution type_identifier scoped_type_identifier].freeze + STRING_KINDS = %w[ + string string_content string_literal interpreted_string_literal raw_string_literal + ].freeze + SYMBOL_KINDS = %w[symbol simple_symbol].freeze + NIL_KINDS = %w[nil none null].freeze + RETURN_KINDS = { + "return" => :RETURN, + "return_statement" => :RETURN, + "return_expression" => :RETURN, + "break" => :BREAK, + "break_statement" => :BREAK, + "break_expression" => :BREAK, + "next" => :NEXT, + "continue_statement" => :NEXT + }.freeze + COMPARISON_OPERATORS = %w[== != === !== < <= > >=].freeze + OPERATOR_CALL_OPERATORS = TreeSitterNormalizationAdapter::OPERATOR_CALL_OPERATORS + INFIX_STATEMENT_OPERATORS = (OPERATOR_CALL_OPERATORS + COMPARISON_OPERATORS).freeze + INLINE_DEF_WRAPPER_MIDS = %w[ + public protected private private_class_method module_function + ].freeze + + def initialize(document) + @document = document + @normalization_adapter = TreeSitterNormalizationAdapter.for(document) + @local_stack = [] + @normalizing = Set.new + end + + def normalize + children = + if ruby? + with_ruby_scope(@document.root, reset: true) { normalize_children(@document.root) } + else + normalize_children(@document.root) + end + wrap(:ROOT, children: children, source: @document.root) + end + + private + + def normalize_node(node) + return nil unless ts_node?(node) + key = node_key(node) + return nil if @normalizing.include?(key) + + @normalizing << key + begin + return nil if node.kind == "comment" + return normalize_assignment_lhs(node) if assignment_lhs?(node) + return normalize_infix_statement(node) if infix_statement?(node) + return normalize_dotted_expression(node) if dotted_expression?(node) + return normalize_unary_not_statement(node) if unary_not_statement?(node) + return normalize_wrapped_return_statement(node) if wrapped_return_statement?(node) + + if leading_function_statement?(node) + normalize_leading_function_statement(node) + elsif leading_if_statement?(node) + normalize_leading_if_statement(node) + elsif ensure_body_statement?(node) + normalize_ensure_body_statement(node) + elsif rescue_body_statement?(node) + normalize_rescue_body_statement(node) + elsif modifier_statement?(node) + normalize_modifier_statement(node) + elsif ternary_statement?(node) + normalize_ternary_statement(node) + elsif statement_call_with_block?(node) + normalize_statement_call_with_block(node) + elsif command_call_statement?(node) + normalize_command_call_statement(node) + elsif lambda_expression?(node) + normalize_lambda(node) + elsif FUNCTION_KINDS.include?(node.kind) + normalize_function(node) + elsif class_node?(node) + normalize_class(node) + elsif module_node?(node) + normalize_module(node) + elsif node.kind == "impl_item" + normalize_impl(node) + elsif node.kind == "elsif" + normalize_elsif(node) + elsif IF_KINDS.include?(node.kind) + normalize_if(node) + elsif LOOP_KINDS.key?(node.kind) + normalize_loop(node) + elsif CASE_KINDS.include?(node.kind) || hidden_match?(node) + normalize_case(node) + elsif hash_literal_statement?(node) + normalize_hash_literal_statement(node) + elsif array_literal_statement?(node) + normalize_array_literal_statement(node) + elsif element_reference_statement?(node) + normalize_element_reference_statement(node) + elsif node.kind == "element_reference" + normalize_element_reference(node) + elsif node.kind == "rescue_modifier" + normalize_rescue_modifier(node) + elsif node.kind == "ensure" + normalize_ensure_clause(node) + elsif node.kind == "begin" + normalize_begin(node) + elsif node.kind == "operator_assignment" + normalize_operator_assignment(node) + elsif ASSIGNMENT_KINDS.include?(node.kind) + normalize_assignment(node) + elsif node.kind == "subshell" + normalize_subshell(node) + elsif node.kind == "block_argument" + normalize_block_argument(node) + elsif node.kind == "pair" + normalize_pair(node) + elsif node.kind == "singleton_class" + normalize_singleton_class(node) + elsif node.kind == "yield" + normalize_yield(node) + elsif yield_statement?(node) + normalize_yield_statement(node) + elsif yield_argument_list?(node) + normalize_yield_argument_list(node) + elsif node.kind == "heredoc_beginning" + normalize_heredoc_beginning(node) + elsif node.kind == "chained_string" + normalize_chained_string(node) + elsif interpolation_node?(node) + normalize_interpolation(node) + elsif unary_minus_expression?(node) + normalize_unary_minus(node) + elsif unary_not_expression?(node) + normalize_unary_not(node) + elsif boolean_expression?(node) + normalize_boolean(node) + elsif operator_call_expression?(node) + normalize_operator_call(node) + elsif comparison_expression?(node) + normalize_comparison(node) + elsif CALL_KINDS.include?(node.kind) + normalize_call(node) + elsif member_read_node?(node) + normalize_member_read(node) + elsif BLOCK_KINDS.include?(node.kind) + wrap(:BLOCK, children: normalize_children(node), source: node) + elsif unwrap_node?(node) + normalize_node(node.named_children.first) + elsif RETURN_KINDS.key?(node.kind) + normalize_return(node) + elsif self_node?(node) + wrap(:SELF, children: [], source: node) + elsif instance_variable?(node) + wrap(:IVAR, children: [node.text.to_s], source: node) + elsif global_variable?(node) + normalize_global_variable(node) + elsif const_node?(node) + normalize_const(node) + elsif ruby? && IDENTIFIER_KINDS.include?(node.kind) && node.text.to_s == "yield" + wrap(:YIELD, children: [nil], source: node) + elsif ruby_vcall_identifier?(node) + return wrap(:YIELD, children: [nil], source: node) if node.text.to_s == "yield" + + wrap(:VCALL, children: [node.text.to_s.to_sym], source: node) + elsif vcall_identifier?(node) + wrap(:VCALL, children: [node.text.to_s.to_sym], source: node) + elsif local_identifier?(node) + wrap(:LVAR, children: [node.text.to_s], source: node) + elsif NIL_KINDS.include?(node.kind) + wrap(:NIL, children: [], source: node) + elsif interpolated_string?(node) + normalize_interpolated_string(node) + elsif STRING_KINDS.include?(node.kind) + wrap(:STR, children: [node.text.to_s], source: node) + elsif SYMBOL_KINDS.include?(node.kind) + wrap(:LIT, children: [node.text.to_s.sub(/\A:/, "").to_sym], source: node) + else + wrap(kind_type(node.kind), children: normalize_children(node), source: node) + end + ensure + @normalizing.delete(key) + end + end + + def normalize_function(node) + return normalize_singleton_function(node) if node.kind == "singleton_method" + + name = function_name(node) + args = normalize_parameters(named_field(node, "parameters")) + body = with_ruby_scope(node, reset: true) do + elide_implicit_nil_body( + prepend_inline_parameter_begin( + node, + elide_tail_returns(normalize_body(named_field(node, "body") || block_child(node))) + ) + ) + end + wrap(:DEFN, children: [name, scope(body, args: args, source: node)], source: node) + end + + def normalize_singleton_function(node) + receiver = singleton_receiver(node) + name = singleton_name(node) + args = normalize_parameters(named_field(node, "parameters")) + body = with_ruby_scope(node, reset: true) do + elide_implicit_nil_body( + prepend_inline_parameter_begin( + node, + elide_tail_returns(normalize_body(named_field(node, "body") || block_child(node))) + ) + ) + end + wrap(:DEFS, children: [normalize_node(receiver), name, scope(body, args: args, source: node)], source: node) + end + + def normalize_class(node) + name = const_for(named_field(node, "name") || first_named(node)) + body = normalize_body(named_field(node, "body") || block_child(node)) + wrap(:CLASS, children: [name, nil, scope(body, source: node)], source: node) + end + + def normalize_module(node) + name = const_for(named_field(node, "name") || first_named(node)) + body = normalize_body(named_field(node, "body") || block_child(node)) + wrap(:MODULE, children: [name, scope(body, source: node)], source: node) + end + + def normalize_impl(node) + type_node = named_field(node, "type") || + node.named_children.find do |child| + %w[type_identifier scoped_type_identifier identifier].include?(child.kind) + end + name = const_for(type_node || node) + body = normalize_body(named_field(node, "body") || block_child(node) || node) + wrap(:CLASS, children: [name, nil, scope(body, source: node)], source: node) + end + + def normalize_if(node) + if %w[if_modifier unless_modifier].include?(node.kind) + action, cond_raw = node.named_children + type = node.kind.start_with?("unless") ? :UNLESS : :IF + return wrap(type, children: [normalize_node(cond_raw), normalize_modifier_action(action), nil], source: node) + end + + cond_raw = named_field(node, "condition") || named_field(node, "predicate") || first_named(node) + cond = normalize_node(cond_raw) + positive_raw = named_field(node, "consequence") || named_field(node, "body") || + node.named_children.find { |child| child.kind == "then" } || + branch_child(node, cond_raw, 0) + negative_raw = named_field(node, "alternative") || + explicit_alternative(node) || + (branch_child(node, cond_raw, 1) unless ruby?) + positive = normalize_body(positive_raw) + negative = normalize_else_or_branch(negative_raw) + type = node.kind.start_with?("unless") ? :UNLESS : :IF + wrap(type, children: [cond, positive, negative], source: node) + end + + def normalize_elsif(node) + cond = node.named_children.find { |child| !%w[comment then elsif else].include?(child.kind) } + positive = node.named_children.find { |child| child.kind == "then" } + negative = node.named_children.find { |child| %w[elsif else].include?(child.kind) } + wrap(:IF, children: [normalize_node(cond), normalize_body(positive), normalize_else_or_branch(negative)], + source: node) + end + + def normalize_loop(node) + if %w[while_modifier until_modifier].include?(node.kind) + action, cond = node.named_children + return wrap(LOOP_KINDS.fetch(node.kind), children: [normalize_node(cond), normalize_modifier_action(action), true], + source: node) + end + + cond = normalize_node(named_field(node, "condition") || first_named(node)) + body = normalize_body(named_field(node, "body") || named_field(node, "consequence") || block_child(node)) + wrap(LOOP_KINDS.fetch(node.kind), children: [cond, body], source: node) + end + + def normalize_case(node) + value_raw = case_value(node) + value = normalize_node(value_raw) + whens = case_arms(node).map { |arm| normalize_when(arm) }.compact + fallback = case_else_body(node) + chain = link_when_chain(whens, fallback) + return wrap(:CASE2, children: [chain], source: node) unless value_raw + + wrap(:CASE, children: [value, chain], source: node) + end + + def normalize_when(node) + patterns = normalize_patterns(node) + body = normalize_body(when_body(node)) + wrap(:WHEN, children: [list(patterns, source: node), body, nil], source: node) + end + + def normalize_assignment(node) + left = assignment_left(node) + right = normalize_node(assignment_right(node)) + return normalize_multiple_assignment(left, right, node) if left&.kind == "left_assignment_list" + return assignment_target(left, right, source: node) if assignment_target(left, right, source: node) + + wrap(:LASGN, children: [target_name(left), right], source: node) + end + + def normalize_multiple_assignment(left, right, node) + targets = left.named_children.map do |child| + type = global_variable?(child) ? :GASGN : :LASGN + wrap(type, children: [target_name(child), nil], source: child) + end + wrap(:MASGN, children: [right, list(targets, source: left)], source: node) + end + + def normalize_boolean(node) + type = boolean_operator(node) == "or" ? :OR : :AND + operands = node.named_children.map { |child| normalize_node(child) }.compact + operands = operands.flat_map { |child| Ast.node?(child) && child.type == type ? child.children : [child] } + wrap(type, children: operands, source: node) + end + + def normalize_comparison(node) + operands = node.named_children + left = normalize_node(operands[0]) + right = normalize_node(operands[1]) + wrap(:OPCALL, children: [left, comparison_operator(node).to_sym, list([right], source: operands[1] || node)], + source: node) + end + + def normalize_operator_call(node) + operands = node.named_children + left = normalize_node(operands[0]) + right = normalize_node(operands[1]) + if ruby? && binary_operator(node) == "=~" && regex_literal?(operands[1]) + return wrap(:MATCH3, children: [right, left], source: node) + elsif ruby? && binary_operator(node) == "=~" + return wrap(:CALL, children: [left, :=~, list([right], source: operands[1] || node)], source: node) + end + + wrap(:OPCALL, children: [left, binary_operator(node).to_sym, list([right], source: operands[1] || node)], + source: node) + end + + def normalize_element_reference(node) + recv = node.named_children.first + args = node.named_children.drop(1).map { |child| normalize_node(child) }.compact + if ruby? && self_node?(recv) + return wrap(:FCALL, children: [:[], list(args, source: node)], source: node) + end + + wrap(:CALL, children: [normalize_node(recv), :[], list(args, source: node)], source: node) + end + + def normalize_rescue_modifier(node) + body = normalize_node(node.named_children.first) + handler = normalize_node(node.named_children[1]) + resbody = wrap(:RESBODY, children: [nil, handler, nil], source: node) + wrap(:RESCUE, children: [body, resbody, nil], source: node) + end + + def normalize_ensure_clause(node) + normalize_body_nodes(node.named_children, source: node) + end + + def normalize_begin(node) + rescue_nodes = node.named_children.select { |child| child.kind == "rescue" } + ensure_node = node.named_children.find { |child| child.kind == "ensure" } + if rescue_nodes.empty? + return wrap(:BEGIN, children: normalize_children(node), source: node) unless ensure_node + + body_nodes = node.named_children.take_while { |child| child.kind != "ensure" } + body = normalize_body_nodes(body_nodes, source: body_nodes.first || node) + ensure_body = normalize_body(ensure_node) + source = source_from_nodes(body_nodes.first || node, ensure_node.named_children.last || ensure_node) + return wrap(:ENSURE, children: [body, ensure_body], source: source) + end + + body_nodes = node.named_children.take_while { |child| child.kind != "rescue" } + body = normalize_body_nodes(body_nodes, source: body_nodes.first || node) + resbodies = rescue_nodes.map { |child| normalize_rescue_clause(child) } + source = source_from_nodes(body_nodes.first || node, rescue_source_end(rescue_nodes.last) || rescue_nodes.last || node) + rescued = wrap(:RESCUE, children: [body, link_rescue_chain(resbodies), nil], source: source) + return rescued unless ensure_node + + ensure_body = normalize_body(ensure_node) + ensure_source = source_from_nodes(body_nodes.first || node, ensure_node.named_children.last || ensure_node) + wrap(:ENSURE, children: [rescued, ensure_body], source: ensure_source) + end + + def normalize_operator_assignment(node) + left = assignment_left(node) + right_raw = assignment_right(node) + right = normalize_node(right_raw) + operator = operator_assignment_operator(node) + + if left&.kind == "element_reference" + recv = left.named_children.first + args = left.named_children.drop(1).map { |child| normalize_node(child) }.compact + return wrap(:OP_ASGN1, children: [normalize_node(recv), operator, list(args, source: left), right], + source: node) + end + + if member_read_node?(left) + recv, mid = member_parts(left) + return wrap(:OP_ASGN2, children: [normalize_node(recv), false, mid.to_sym, operator, right], source: node) + end + + logical = normalize_logical_operator_assignment(left, operator, right, source: node) + return logical if logical + if instance_variable?(left) || global_variable?(left) + return assignment_target(left, augmented_assignment_value(left, operator, right_raw, node), source: node) + end + + assignment_target(left, right, source: node) || + wrap(:LASGN, children: [target_name(left), augmented_assignment_value(left, operator, right_raw, node)], + source: node) + end + + def normalize_subshell(node) + children = node.named_children.filter_map do |child| + case child.kind + when "interpolation" then normalize_interpolation(child) + when "string_content" then wrap(:STR, children: [child.text.to_s], source: child) + end + end + type = children.any? { |child| child.is_a?(Node) && child.type == :EVSTR } ? :DXSTR : :XSTR + wrap(type, children: children, source: node) + end + + def normalize_pair(node) + key = node.named_children.first + value = node.named_children[1] + if node.children.any? { |child| !child.named? && child.text == "=>" } + return wrap(:HASH, children: [normalize_node(key), normalize_node(value)].compact, source: node) + end + + key_lit = wrap(:LIT, children: [key.text.to_s.to_sym], source: key || node) + if ruby? && key&.kind == "hash_key_symbol" && value.nil? + name = key.text.to_s + return wrap(:HASH, children: [key_lit, local_or_call_for_name(name, key)], source: node) + end + + wrap(:HASH, children: [key_lit, normalize_node(value)].compact, source: node) + end + + def normalize_block_argument(node) + value = normalize_node(node.named_children.first) + wrap(:BLOCK_PASS, children: [nil, value], source: node) + end + + def normalize_singleton_class(node) + recv = normalize_node(node.named_children.first) + body = normalize_body(node.named_children[1]) + wrap(:SCLASS, children: [recv, scope(body, source: node)], source: node) + end + + def normalize_lambda(node) + target = lambda_target(node) || node + body_node = named_field(target, "body") || block_child(target) || target.named_children.last + body = with_ruby_scope(target) do + dynamic_scope(normalize_body(body_node)) + end + wrap(:LAMBDA, children: [scope(body, source: target)], source: target) + end + + def normalize_yield(node) + args_node = node.named_children.find { |child| child.kind == "argument_list" } + args = args_node ? yield_argument_nodes(args_node) : yield_inline_arguments(node) + wrap(:YIELD, children: [list(args, source: args_node || node)], source: node) + end + + def yield_statement?(node) + normalization_adapter.yield_statement?(node) + end + + def normalize_yield_statement(node) + args_node = node.named_children.find { |child| child.kind == "argument_list" } + args = args_node ? yield_argument_nodes(args_node) : yield_inline_arguments(node) + wrap(:YIELD, children: [list(args, source: args_node || node)], source: node) + end + + def yield_argument_list?(node) + node.kind == "argument_list" && parent_node(node)&.children&.first&.text == "yield" + rescue StandardError + false + end + + def normalize_yield_argument_list(node) + args = yield_argument_nodes(node) + source = parent_node(node) || node + wrap(:YIELD, children: [list(args, source: node)], source: source) + end + + def yield_inline_arguments(node) + node.named_children.reject { |child| child.kind == "yield" }.map { |child| normalize_node(child) }.compact + end + + def yield_argument_nodes(node) + return [scalar_argument_list_value(node)].compact if node.named_children.empty? + + node.named_children.map { |child| normalize_node(child) }.compact + end + + def super_statement?(node) + normalization_adapter.super_statement?(node) + end + + def normalize_super_statement(node) + args_node = node.named_children.find { |child| child.kind == "argument_list" } + args = + if args_node && args_node.named_children.empty? + [scalar_argument_list_value(args_node)].compact + elsif args_node + args_node.named_children.map { |child| normalize_node(child) }.compact + else + [] + end + wrap(:SUPER, children: [list(args, source: args_node || node)], source: node) + end + + def normalize_unary_not(node) + operand = node.named_children.first + wrap(:OPCALL, children: [normalize_node(operand), :!, nil], source: node) + end + + def normalize_unary_not_statement(node) + operand = node.named_children.first + wrap(:OPCALL, children: [normalize_node(operand), :!, nil], source: node) + end + + def normalize_unary_minus(node) + operand = node.named_children.first + if ts_node?(operand) && operand.kind == "integer" + return wrap(:INTEGER, children: [-operand.text.to_i], source: operand) + end + + wrap(:OPCALL, children: [normalize_node(operand), :-@, nil], source: node) + end + + def normalize_infix_statement(node) + left, operator, right = infix_statement_parts(node) + if ruby? && operator == "=~" && regex_literal?(right) + return wrap(:MATCH3, children: [normalize_node(right), normalize_node(left)], source: node) + elsif ruby? && operator == "=~" + return wrap(:CALL, children: [normalize_node(left), :=~, list([normalize_node(right)].compact, source: right)], + source: node) + end + + wrap(:OPCALL, children: [normalize_node(left), operator.to_sym, list([normalize_node(right)].compact, source: right)], + source: node) + end + + def normalize_dotted_expression(node) + block = call_block(node) + call = normalize_dotted_call_expression(node, source: block ? source_before_child(node, block) : node) + return call unless block + + args = normalize_block_parameters(block) + body = with_ruby_scope(block) do + dynamic_scope(normalize_body(named_field(block, "body") || block_child(block) || block)) + end + wrap(:ITER, children: [call, scope(body, args: args, source: node)], source: node) + end + + def normalize_dotted_call_expression(node, source: node) + target = dotted_call_target(node) || node + recv, mid = dotted_call_parts(target) + args = call_arguments(target, nil) + type = safe_navigation_call?(target) ? :QCALL : :CALL + wrap(type, children: [normalize_node(recv), mid.to_sym, list(args, source: source)], source: source) + end + + def normalize_argument_list_call_with_block(node) + return nil unless ruby? && ts_node?(node) && node.kind == "argument_list" + + block = call_block(node) + return nil unless block + + call = normalize_argument_list_call(node) + return nil unless call + + args = normalize_block_parameters(block) + body = with_ruby_scope(block) do + dynamic_scope(normalize_body(named_field(block, "body") || block_child(block) || block)) + end + wrap(:ITER, children: [call, scope(body, args: args, source: node)], source: node) + end + + def normalize_argument_list_call(node) + return nil unless ruby? && ts_node?(node) && node.kind == "argument_list" + + function = node.named_children.first + args_node = node.named_children.find { |child| child.kind == "argument_list" } + args = args_node ? args_node.named_children.map { |child| normalize_node(child) }.compact : [] + wrap(:FCALL, children: [function.text.to_sym, list(args, source: args_node || node)], source: node) + end + + def normalize_call(node) + return normalize_zero_child_call(node) if zero_child_identifier_call?(node) + return normalize_call_with_block(node) if call_block(node) + return normalize_visibility_inline_def(node) if visibility_inline_def_call?(node) + + if named_field(node, "receiver") && named_field(node, "method") + recv, mid = member_parts(node) + args = call_arguments(node, nil) + type = safe_navigation_call?(node) ? :QCALL : :CALL + return wrap(type, children: [normalize_node(recv), mid.to_sym, list(args, source: node)], source: node) + end + + function = named_field(node, "function") || named_field(node, "call") || node.named_children.first + args = call_arguments(node, function) + return wrap(:YIELD, children: [list(args, source: node)], source: node) if ruby? && function&.text == "yield" + + if member_read_node?(function) + recv, mid = member_parts(function) + return wrap(:CALL, children: [normalize_node(recv), mid.to_sym, list(args, source: node)], source: node) + end + + if function && IDENTIFIER_KINDS.include?(function.kind) + type = args.empty? ? :VCALL : :FCALL + return wrap(type, children: [function.text.to_sym, list(args, source: node)], source: node) + end + + if ruby? && function && const_node?(function) + return wrap(:FCALL, children: [function.text.to_sym, list(args, source: node)], source: node) + end + + wrap(:CALL, children: [normalize_node(function), :call, list(args, source: node)], source: node) + end + + def normalize_return(node) + normalize_return_node(node, elide_symbol: false) + end + + def wrapped_return_statement?(node) + return false unless ts_node?(node) + return false unless %w[body_statement block_body statement block].include?(node.kind) + return false if node.text.to_s.include?("\n") + + keyword = node.children.first + keyword && !keyword.named? && RETURN_KINDS.key?(keyword.kind) + end + + def normalize_wrapped_return_statement(node) + keyword = node.children.first + children = node.named_children.map { |child| normalize_return_value(child) }.compact + wrap(RETURN_KINDS.fetch(keyword.kind), children: children, source: node) + end + + def normalize_return_node(node, elide_symbol:) + children = node.named_children.map { |child| normalize_return_value(child) }.compact + return children.first if elide_symbol && ruby? && children.size == 1 && symbol_literal_node?(children.first) + + wrap(RETURN_KINDS.fetch(node.kind), children: children, source: node) + end + + def normalize_return_value(node) + return normalize_node(node) unless ts_node?(node) && node.kind == "argument_list" + return scalar_argument_list_value(node) if node.named_children.empty? + return normalize_argument_list_element_reference(node) if argument_list_element_reference?(node) + return normalize_boolean(node) if boolean_expression?(node) + return normalize_ternary_statement(node) if ternary_statement?(node) + return normalize_case(node) if case_argument_list?(node) + return normalize_argument_list_call_with_block(node) if argument_list_call_with_block?(node) + return normalize_dotted_expression(node) if dotted_expression?(node) + return normalize_argument_list_unary_not(node) if argument_list_unary_not?(node) + return normalize_infix_statement(node) if infix_statement?(node) + + function = node.named_children.first + nested_args = node.named_children[1] + if function && IDENTIFIER_KINDS.include?(function.kind) && nested_args&.kind == "argument_list" + args = nested_args.named_children.map { |child| normalize_node(child) }.compact + return wrap(:FCALL, children: [function.text.to_sym, list(args, source: nested_args)], source: node) + end + + values = node.named_children.map { |child| normalize_node(child) }.compact + return values.first if values.size == 1 + + list(values, source: node) + end + + def argument_list_element_reference?(node) + node.kind == "argument_list" && + node.children.first&.text != "[" && + node.children.any? { |child| !child.named? && child.text == "[" } && + node.children.any? { |child| !child.named? && child.text == "]" } && + node.named_children.size >= 2 && + node.named_children.none? { |child| %w[block do_block].include?(child.kind) } + end + + def normalize_argument_list_element_reference(node) + return nil unless ruby? && ts_node?(node) && argument_list_element_reference?(node) + + recv = node.named_children.first + args = node.named_children.drop(1).map { |child| normalize_node(child) }.compact + wrap(:CALL, children: [normalize_node(recv), :[], list(args, source: node)], source: node) + end + + def normalize_call_with_block(node) + block = call_block(node) + call = normalize_call_without_block(node, block) + args = normalize_block_parameters(block) + body = with_ruby_scope(block) do + dynamic_scope(normalize_body(named_field(block, "body") || block_child(block) || block)) + end + wrap(:ITER, children: [call, scope(body, args: args, source: node)], source: node) + end + + def normalize_call_without_block(node, block) + call_source = block ? source_before_child(node, block) : node + if dotted_call?(node) + recv, mid = dotted_call_parts(node) + args = call_arguments(node, nil) + arg_list = args.empty? ? nil : list(args, source: call_source) + type = safe_navigation_call?(node) ? :QCALL : :CALL + return wrap(type, children: [normalize_node(recv), mid.to_sym, arg_list], source: call_source) + end + + function = named_field(node, "function") || named_field(node, "call") || + node.named_children.find { |child| !same_ts_node?(child, block) } + args = call_arguments(node, function) + + if function && IDENTIFIER_KINDS.include?(function.kind) + return wrap(:FCALL, children: [function.text.to_sym, list(args, source: call_source)], source: call_source) + end + + if ruby? && function && const_node?(function) + return wrap(:FCALL, children: [function.text.to_sym, list(args, source: call_source)], source: call_source) + end + + if member_read_node?(function) + recv, mid = member_parts(function) + type = safe_navigation_call?(function) ? :QCALL : :CALL + return wrap(type, children: [normalize_node(recv), mid.to_sym, list(args, source: call_source)], source: call_source) + end + + wrap(:CALL, children: [normalize_node(function), :call, list(args, source: call_source)], source: call_source) + end + + def normalize_visibility_inline_def(node) + message = node.named_children.first&.text.to_s + args = node.named_children.find { |child| child.kind == "argument_list" } + method = inline_def_from_argument_list(args) + wrap(:FCALL, children: [message.to_sym, list([method].compact, source: args || node)], source: node) + end + + def normalize_modifier_statement(node) + keyword = modifier_keyword(node) + action, cond = modifier_parts(node) + type = + case keyword + when "unless" then :UNLESS + when "while" then :WHILE + when "until" then :UNTIL + else :IF + end + normalized_action = normalize_modifier_action(action) + children = %i[WHILE UNTIL].include?(type) ? [normalize_node(cond), normalized_action, true] : + [normalize_node(cond), normalized_action, nil] + wrap(type, children: children, source: node) + end + + def normalize_modifier_action(node) + modifier_return_action?(node) ? normalize_return_node(node, elide_symbol: false) : normalize_node(node) + end + + def modifier_return_action?(node) + ts_node?(node) && RETURN_KINDS.key?(node.kind) + end + + def normalize_command_call_statement(node) + function = node.named_children.first + if visibility_inline_def_statement?(node, function) + method = inline_def_from_statement(node) + return wrap(:FCALL, children: [function.text.to_sym, list([method].compact, source: node)], source: node) + end + + args_node = node.named_children.find { |child| %w[argument_list arguments].include?(child.kind) } + args = args_node ? command_arguments(args_node) : [] + block = call_block(node) + call_source = block ? source_before_child(node, block) : node + if ruby? && function&.text == "yield" + return wrap(:YIELD, children: [list(args, source: args_node || call_source)], source: call_source) + end + + call = wrap(args.empty? ? :VCALL : :FCALL, + children: [function.text.to_sym, list(args, source: args_node || call_source)], + source: call_source) + return call unless block + + block_args = normalize_block_parameters(block) + body = with_ruby_scope(block) do + dynamic_scope(normalize_body(named_field(block, "body") || block_child(block) || block)) + end + wrap(:ITER, children: [call, scope(body, args: block_args, source: node)], source: node) + end + + def dynamic_scope(node) + return node unless node.is_a?(Node) + return node if %i[DEFN DEFS CLASS MODULE SCLASS LAMBDA].include?(node.type) + + node.type = :DASGN if node.type == :LASGN + node.type = :DVAR if node.type == :LVAR + node.children = node.children.map { |child| dynamic_scope(child) } + node + end + + def normalize_zero_child_call(node) + wrap(:VCALL, children: [node.text.to_s.to_sym], source: node) + end + + def normalize_member_read(node) + recv, mid = member_parts(node) + return wrap(kind_type(node.kind), children: normalize_children(node), source: node) unless recv && mid + + wrap(:CALL, children: [normalize_node(recv), mid.to_sym, nil], source: node) + end + + def normalize_const(node) + if %w[scope_resolution scoped_type_identifier].include?(node.kind) + parts = node.named_children + base = normalize_const(parts[0]) if parts[0] + name = (named_field(node, "name") || parts[-1])&.text.to_s + return wrap(:COLON2, children: [base, name.to_sym], source: node) + end + + wrap(:CONST, children: [node.text.to_s.to_sym], source: node) + end + + def normalize_children(node) + node.named_children.filter_map do |child| + next if child.kind == "heredoc_body" + next if assignment_rhs?(child) + + normalize_node(child) + end + end + + def normalize_body(node) + return nil unless ts_node?(node) + return normalize_leading_function_statement(node) if leading_function_statement?(node) + return normalize_leading_owner_statement(node) if leading_owner_statement?(node) + return normalize_leading_case_statement(node) if leading_case_statement?(node) + return normalize_ensure_body_statement(node) if ensure_body_statement?(node) + return normalize_rescue_body_statement(node) if rescue_body_statement?(node) + return normalize_heredoc_body_statement(node) if heredoc_body_statement?(node) + return normalize_leading_loop_statement(node) if leading_loop_statement?(node) + return normalize_leading_if_statement(node) if leading_if_statement?(node) + return normalize_elsif(node) if node.kind == "elsif" + return normalize_wrapped_return_statement(node) if wrapped_return_statement?(node) + return normalize_yield_statement(node) if yield_statement?(node) + return normalize_super_statement(node) if super_statement?(node) + return normalize_unary_not_statement(node) if unary_not_statement?(node) + return normalize_operator_assignment_statement(node) if operator_assignment_statement?(node) + return normalize_element_reference_statement(node) if element_reference_statement?(node) + return normalize_hash_literal_statement(node) if hash_literal_statement?(node) + return normalize_array_literal_statement(node) if array_literal_statement?(node) + return normalize_concatenated_string_statement(node) if concatenated_string_statement?(node) + return normalize_interpolated_statement(node) if interpolated_statement?(node) + return nil if empty_body_statement?(node) + return normalize_terminal_statement(node) if terminal_statement?(node) + return normalize_modifier_statement(node) if modifier_statement?(node) + return normalize_ternary_statement(node) if ternary_statement?(node) + return normalize_statement_call_with_block(node) if statement_call_with_block?(node) + return normalize_command_call_statement(node) if command_call_statement?(node) + return normalize_infix_statement(node) if infix_statement?(node) + return normalize_boolean(node) if boolean_expression?(node) + return normalize_dotted_expression(node) if dotted_expression?(node) + + if BLOCK_KINDS.include?(node.kind) + children = normalize_children(node) + if children.empty? && bare_identifier_text?(node.text) + return wrap(:VCALL, children: [node.text.to_s.strip.to_sym], source: node) + end + return nil if children.empty? + return children.first if children.size == 1 + + return wrap(:BLOCK, children: children, source: node) + end + + normalize_node(node) + end + + def normalize_body_nodes(nodes, source:) + children = nodes.map { |child| normalize_body(child) }.compact + return nil if children.empty? + return children.first if children.size == 1 + + wrap(:BLOCK, children: children, source: source) + end + + def normalize_patterns(node) + patterns = node.named_children.select do |child| + %w[pattern case_pattern match_pattern switch_pattern when_condition].include?(child.kind) + end + patterns = [named_field(node, "value")].compact if patterns.empty? + patterns = [node.named_children.find { |child| !BLOCK_KINDS.include?(child.kind) && !statement_node?(child) }].compact if patterns.empty? + + patterns.flat_map do |pattern| + pattern_text = pattern.text.to_s + pattern_children = pattern.named_children + if pattern_text.include?("::") + [wrap(:CONST, children: [pattern_text.to_sym], source: pattern)] + elsif %w[pattern case_pattern match_pattern switch_pattern when_condition expression_list].include?(pattern.kind) && + pattern_children.empty? && pattern_text.match?(/\A-?\d+\z/) + [wrap(:INTEGER, children: [], source: pattern)] + elsif ruby? && %w[pattern case_pattern match_pattern switch_pattern when_condition expression_list].include?(pattern.kind) && + pattern_children.empty? && pattern_text.match?(/\A[A-Z]\w*\z/) + [wrap(:CONST, children: [pattern_text.to_sym], source: pattern)] + elsif ruby? && %w[pattern case_pattern match_pattern switch_pattern when_condition expression_list].include?(pattern.kind) && + pattern_children.empty? && pattern_text.match?(/\A[A-Za-z_]\w*[!?=]?\z/) + [local_or_call_for_name(pattern_text, pattern)] + elsif %w[pattern case_pattern match_pattern switch_pattern when_condition expression_list].include?(pattern.kind) + pattern_children.map { |child| normalize_node(child) }.compact + else + [normalize_node(pattern)].compact + end + end + end + + def assignment_target(left, right, source: nil) + return nil unless ts_node?(left) + source ||= left + + if instance_variable?(left) + return wrap(:IASGN, children: [left.text.to_s, right], source: source) + end + + if global_variable?(left) + return wrap(:GASGN, children: [left.text.to_s, right], source: source) + end + + if left.kind == "element_reference" + recv = left.named_children.first + args = left.named_children.drop(1).map { |child| normalize_node(child) }.compact + return wrap(:ATTRASGN, children: [normalize_node(recv), :[]=, list(args + [right], source: left)], + source: source) + end + + if member_read_node?(left) || normalization_adapter.member_assignment_target?(left) + recv, mid = member_parts(left) + writer = left.text.to_s.include?("&.") ? mid.to_sym : "#{mid}=".to_sym + return wrap(:ATTRASGN, children: [normalize_node(recv), writer, list([right], source: left)], + source: source) + end + + return assignment_target(left.named_children.first, right, source: source) if left.kind == "expression_list" + + nil + end + + def normalize_assignment_lhs(node) + right = normalize_node(next_named_sibling(node)) + source = parent_node(node) || node + assignment_target(node, right, source: source) || + wrap(:LASGN, children: [target_name(node), right], source: source) + end + + def target_name(left) + return left.text.to_s.sub(/\A\*/, "") if ts_node?(left) && IDENTIFIER_KINDS.include?(left.kind) + return left.text.to_s.sub(/\A\*/, "") if ts_node?(left) && %w[splat splat_parameter rest_assignment].include?(left.kind) + return left.text.to_s if ts_node?(left) + + Ast.slice(normalize_node(left), @document.lines) + end + + def case_value(node) + named_field(node, "value") || named_field(node, "subject") || + named_field(node, "condition") || + node.named_children.find do |child| + !WHEN_KINDS.include?(child.kind) && !BLOCK_KINDS.include?(child.kind) && child.kind != "else" + end + end + + def case_arms(node) + arms = [] + stack = node.named_children.dup + until stack.empty? + child = stack.shift + next unless ts_node?(child) + + if normalization_adapter.case_arm?(child) + arms << child + elsif normalization_adapter.case_else_node?(child) + next + else + stack.concat(child.named_children) unless FUNCTION_KINDS.include?(child.kind) + end + end + arms + end + + def when_body(node) + named_field(node, "body") || named_field(node, "consequence") || + named_field(node, "value") || + node.named_children.reverse.find { |child| BLOCK_KINDS.include?(child.kind) || statement_node?(child) } + end + + def link_when_chain(whens, fallback = nil) + whens.reverse.inject(fallback) do |next_when, current| + current.children[2] = next_when + current + end + end + + def case_else_body(node) + else_node = normalization_adapter.case_else_node(node) + return nil unless else_node + + if normalization_adapter.case_else_arm?(else_node) || else_node.kind == "switch_default" + body = when_body(else_node) + return normalize_body(body) if body + end + + normalize_else_or_branch(else_node) + end + + def normalize_else_or_branch(node) + return nil unless ts_node?(node) + return normalize_body(node) unless node.kind == "else" + + normalize_body_nodes(node.named_children, source: node) + end + + def link_rescue_chain(resbodies) + resbodies.reverse.inject(nil) do |next_rescue, current| + current.children[2] = next_rescue + current + end + end + + def boolean_expression?(node) + (normalization_adapter.boolean_expression_kind?(node) || boolean_statement?(node)) && + %w[and or].include?(boolean_operator(node)) + end + + def boolean_statement?(node) + return false unless %w[body_statement block_body statement argument_list].include?(node.kind) + return false unless %w[&& || and or].include?(binary_operator(node)) + return false if node.named_children.size < 2 + + node.children.all? do |child| + child.named? || %w[&& || and or ( )].include?(child.text.to_s) + end + end + + def operator_call_expression?(node) + normalization_adapter.operator_call_expression?(node) + end + + def infix_statement?(node) + left, operator, right = infix_statement_parts(node) + left && right && INFIX_STATEMENT_OPERATORS.include?(operator) + end + + def dotted_expression?(node) + normalization_adapter.dotted_expression_wrapper?(node) && dotted_call?(node) + end + + def argument_list_call_with_block?(node) + return false unless node.kind == "argument_list" + return false if dotted_call?(node) + return false unless call_block(node) + + IDENTIFIER_KINDS.include?(node.named_children.first&.kind) + end + + def infix_statement_parts(node) + return [nil, nil, nil] unless %w[body_statement block_body statement argument_list].include?(node.kind) + + named_index = 0 + left = nil + right = nil + operator = nil + node.children.each do |child| + if child.named? + left ||= child + right = child if operator + named_index += 1 + elsif INFIX_STATEMENT_OPERATORS.include?(child.text.to_s) + operator = child.text.to_s + end + end + return [nil, nil, nil] unless named_index == 2 && operator + + [left, operator, right] + rescue StandardError + [nil, nil, nil] + end + + def argument_list_unary_not?(node) + node.kind == "argument_list" && + node.children.first&.text == "!" && + node.named_children.size == 1 + rescue StandardError + false + end + + def unary_not_statement?(node) + %w[body_statement block_body statement argument_list].include?(node.kind) && + node.children.first&.text == "!" && + node.named_children.size == 1 + rescue StandardError + false + end + + def normalize_argument_list_unary_not(node) + return nil unless ruby? && ts_node?(node) && argument_list_unary_not?(node) + + operand = node.named_children.first + wrap(:OPCALL, children: [normalize_node(operand), :!, nil], source: node) + end + + def comparison_expression?(node) + return false if literal_fragment_expression_list?(node) + + normalization_adapter.comparison_expression_kind?(node) && + COMPARISON_OPERATORS.include?(comparison_operator(node)) + end + + def regex_literal?(node) + ts_node?(node) && %w[regex regex_literal].include?(node.kind) + end + + def unary_not_expression?(node) + normalization_adapter.unary_not_expression?(node) + end + + def unary_minus_expression?(node) + normalization_adapter.unary_minus_expression?(node) + end + + def boolean_operator(node) + direct = binary_operator(node) + return "and" if %w[&& and].include?(direct) + return "or" if %w[|| or].include?(direct) + return nil if ts_node?(node) + + text = spaced_text(node) + return "and" if text.include?("&&") || text.match?(/\band\b/) + return "or" if text.include?("||") || text.match?(/\bor\b/) + + nil + end + + def comparison_operator(node) + direct = binary_operator(node) + return direct if COMPARISON_OPERATORS.include?(direct) + + spaced_text(node)[/(===|!==|==|!=|<=|>=|<|>)/, 1] + end + + def binary_operator(node) + normalization_adapter.binary_operator(node) + end + + def spaced_text(node) + " #{node.text} " + end + + def class_node?(node) + normalization_adapter.class_node?(node) + end + + def module_node?(node) + MODULE_KINDS.include?(node.kind) && named_field(node, "name") + end + + def unwrap_node?(node) + normalization_adapter.unwrap_node?(node) + end + + def statement_node?(node) + node.kind.end_with?("_statement") || node.kind.end_with?("_expression") || + %w[return break next].include?(node.kind) + end + + def local_identifier?(node) + IDENTIFIER_KINDS.include?(node.kind) + end + + def ruby_vcall_identifier?(node) + return false unless ruby? + return false unless IDENTIFIER_KINDS.include?(node.kind) + return false if assignment_lhs?(node) + return false if ruby_definition_identifier?(node) + + !ruby_local_name?(node.text.to_s) + end + + def ruby_definition_identifier?(node) + parent = parent_node(node) + return false unless ts_node?(parent) + + if %w[method singleton_method].include?(parent.kind) + name = named_field(parent, "name") || + parent.named_children.find { |child| IDENTIFIER_KINDS.include?(child.kind) } + return same_ts_node?(name, node) + end + + %w[ + method_parameters block_parameters lambda_parameters + optional_parameter keyword_parameter block_parameter + ].include?(parent.kind) + end + + def ruby_local_name?(name) + @local_stack.reverse.any? { |scope| scope.include?(name) } + end + + def ruby? + normalization_adapter.ruby? + end + + def normalization_adapter + @normalization_adapter ||= TreeSitterNormalizationAdapter.for(@document) + end + + def interpolated_string?(node) + normalization_adapter.interpolated_string?(node) + end + + def lambda_expression?(node) + normalization_adapter.lambda_expression?(node) + end + + def lambda_target(node) + normalization_adapter.lambda_target(node) + end + + def interpolation_node?(node) + normalization_adapter.interpolation_node?(node) + end + + def normalize_interpolated_string(node) + wrap(:DSTR, children: normalize_children(node), source: node) + end + + def vcall_identifier?(node) + return false unless local_identifier?(node) + return false if ruby? && ruby_local_name?(node.text.to_s) + + parent = parent_node(node) + return false unless ts_node?(parent) + return false if %w[method method_parameters parameter_list argument_list arguments].include?(parent.kind) + return false if member_read_node?(parent) + return false if dotted_expression?(parent) + return false if assignment_lhs?(node) || assignment_rhs?(node) + + return true if %w[body_statement block_body then].include?(parent.kind) && parent_named_child?(parent, node) + return true if %w[if_modifier unless_modifier].include?(parent.kind) && same_ts_node?(parent.named_children.first, node) + + false + end + + def const_node?(node) + CONST_KINDS.include?(node.kind) + end + + def self_node?(node) + %w[self this].include?(node.kind) || node.text == "self" || node.text == "this" + end + + def instance_variable?(node) + normalization_adapter.instance_variable?(node) + end + + def global_variable?(node) + normalization_adapter.global_variable?(node) + end + + def member_read_node?(node) + ts_node?(node) && MEMBER_KINDS.include?(node.kind) && member_parts(node).all? + end + + def assignment_lhs?(node) + return false if prev_sibling(node)&.text == ":" + return false if literal_fragment_assignment_context?(node) + + sibling = next_sibling(node) + sibling && assignment_operator?(sibling.text) + end + + def assignment_rhs?(node) + return false if literal_fragment_assignment_context?(node) + + sibling = prev_sibling(node) + sibling && assignment_operator?(sibling.text) + end + + def literal_fragment_assignment_context?(node) + normalization_adapter.literal_fragment_assignment_context?(node) + end + + def literal_fragment_expression_list?(node) + return false unless ts_node?(node) && node.kind == "expression_list" + + named = node.named_children + named.size == 1 && literal_fragment_assignment_context?(named.first) + rescue StandardError + false + end + + def assignment_operator?(text) + normalization_adapter.assignment_operator?(text) + end + + def operator_assignment_operator(node) + raw = node.children.find { |child| !child.named? && child.text.to_s.end_with?("=") }&.text.to_s + op = raw.sub(/=\z/, "") + op = "||" if raw == "||=" + op = "&&" if raw == "&&=" + op.to_sym + end + + def augmented_assignment_value(left, operator, right_raw, source) + receiver = assignment_receiver(left) + right = normalize_node(right_raw) + wrap(:CALL, children: [receiver, operator, list([right].compact, source: right_raw || left)], source: source) + end + + def normalize_logical_operator_assignment(left, operator, right, source:) + return nil unless ruby? && [:"||", :"&&"].include?(operator) + return nil unless ts_node?(left) && IDENTIFIER_KINDS.include?(left.kind) + + name = target_name(left) + type = operator == :"||" ? :OP_ASGN_OR : :OP_ASGN_AND + receiver = wrap(:LVAR, children: [name], source: left) + assignment = wrap(:LASGN, children: [name, right], source: source) + wrap(type, children: [receiver, operator, assignment], source: source) + end + + def assignment_receiver(left) + return nil unless ts_node?(left) + return wrap(:LVAR, children: [left.text.to_s], source: left) if IDENTIFIER_KINDS.include?(left.kind) + return wrap(:IVAR, children: [left.text.to_s], source: left) if instance_variable?(left) + return normalize_global_variable(left) if global_variable?(left) + return normalize_const(left) if const_node?(left) + + normalize_node(left) + end + + def with_ruby_scope(node, reset: false) + return yield unless ruby? + + previous = @local_stack + @local_stack = [] if reset + @local_stack = @local_stack + [ruby_scope_locals(node)] + yield + ensure + @local_stack = previous if ruby? + end + + def ruby_scope_locals(node) + locals = Set.new + collect_ruby_scope_locals(node, locals, root: true) + locals + end + + def collect_ruby_scope_locals(node, locals, root: false) + return unless ts_node?(node) + return if !root && ruby_scope_boundary?(node) + + collect_ruby_parameter_locals(node, locals) + collect_ruby_assignment_locals(node, locals) + + node.named_children.each do |child| + next if ruby_scope_child_boundary?(child) + + collect_ruby_scope_locals(child, locals) + end + end + + def collect_ruby_parameter_locals(node, locals) + return unless %w[method_parameters block_parameters lambda_parameters].include?(node.kind) + + node.named_children.each do |child| + collect_identifier_names(child, locals) + end + end + + def collect_ruby_assignment_locals(node, locals) + if node.kind == "exception_variable" + collect_identifier_names(node, locals) + return + end + + return unless ruby_assignment_node?(node) + + left = assignment_left(node) + collect_assignment_target_names(left, locals) + end + + def ruby_assignment_node?(node) + return false unless ts_node?(node) + return true if %w[assignment operator_assignment].include?(node.kind) + return true if node.kind == "pattern" && node.children.any? { |child| !child.named? && child.text == "=" } + + %w[body_statement block_body statement].include?(node.kind) && + node.children.any? { |child| !child.named? && assignment_operator?(child.text) } + end + + def collect_assignment_target_names(node, locals) + return unless ts_node?(node) + + if IDENTIFIER_KINDS.include?(node.kind) + locals.add(node.text.to_s.sub(/\A\*/, "")) + return + end + + return unless %w[left_assignment_list expression_list splat splat_parameter rest_assignment].include?(node.kind) + + node.named_children.each { |child| collect_assignment_target_names(child, locals) } + end + + def collect_identifier_names(node, locals) + return unless ts_node?(node) + + locals.add(node.text.to_s.sub(/\A\*/, "")) if IDENTIFIER_KINDS.include?(node.kind) + locals.add(node.text.to_s) if normalization_adapter.identifier_text_node?(node) + node.children.select(&:named?).each { |child| collect_identifier_names(child, locals) } + end + + def ruby_scope_boundary?(node) + return false if %w[block do_block].include?(node.kind) && parent_node(node)&.kind == "lambda" + + FUNCTION_KINDS.include?(node.kind) || class_node?(node) || module_node?(node) || + %w[singleton_class lambda block do_block].include?(node.kind) + end + + def ruby_scope_child_boundary?(node) + ruby_scope_boundary?(node) + end + + def member_parts(node) + return [nil, nil] if node.kind == "expression_list" && + !(named_field(node, "operand") && named_field(node, "field")) + + return dotted_call_parts(node) if dotted_call?(node) + + recv = named_field(node, "receiver") || named_field(node, "object") || + named_field(node, "operand") || named_field(node, "value") || + named_field(node, "expression") || + node.named_children.find { |child| child.kind != "navigation_suffix" } + mid = named_field(node, "method") || named_field(node, "field") || + named_field(node, "property") || named_field(node, "suffix") || + node.named_children.find { |child| child.kind == "navigation_suffix" } || + node.named_children.reject { |child| %w[block do_block argument_list arguments].include?(child.kind) }.last + return [nil, nil] unless recv && mid && recv != mid + + [recv, member_name(mid).sub(/=\z/, "")] + end + + def member_name(node) + return "" unless ts_node?(node) + + if node.kind == "navigation_suffix" + suffix = named_field(node, "suffix") || + node.named_children.find { |child| IDENTIFIER_KINDS.include?(child.kind) } || + node.named_children.last + return suffix&.text.to_s.sub(/\A[.?]+/, "") + end + + node.text.to_s.sub(/\A[.?]+/, "") + end + + def call_arguments(node, function) + args = named_field(node, "arguments") || named_field(node, "argument") || + node.named_children.find { |child| %w[argument_list arguments].include?(child.kind) } + return [] unless args + + children = args.named_children.reject { |child| function && child == function } + return [normalize_dotted_expression(args)] if dotted_expression?(args) + if children.empty? + scalar = scalar_argument_list_value(args) + return [scalar] if scalar + + return literal_arguments_from_text(args) + end + return [normalize_infix_statement(args)] if infix_statement?(args) + + children.map { |child| normalize_node(child) }.compact + end + + def assignment_left(node) + named_field(node, "left") || node.named_children.first + end + + def assignment_right(node) + named_field(node, "right") || node.named_children[1] + end + + def function_name(node) + return singleton_name(node) if node.kind == "singleton_method" + + name = named_field(node, "name") || + node.named_children.find do |child| + IDENTIFIER_KINDS.include?(child.kind) || child.kind == "constant" + end + name&.text.to_s.to_sym + end + + def singleton_receiver(node) + receiver = named_field(node, "receiver") + return receiver if receiver + + name = named_field(node, "name") || + node.named_children.reverse.find { |child| IDENTIFIER_KINDS.include?(child.kind) } + parameters = named_field(node, "parameters") + body = named_field(node, "body") || block_child(node) + node.named_children.find do |child| + !same_ts_node?(child, name) && + !same_ts_node?(child, parameters) && + !same_ts_node?(child, body) + end + end + + def singleton_name(node) + name = named_field(node, "name")&.text || + node.named_children.reverse.find { |child| IDENTIFIER_KINDS.include?(child.kind) }&.text.to_s + name.to_s.to_sym + end + + def first_named(node) + node.named_children.first + end + + def block_child(node) + node.named_children.find { |child| BLOCK_KINDS.include?(child.kind) || %w[block do_block].include?(child.kind) } + end + + def branch_child(node, cond, index) + node.named_children.reject { |child| child == cond || %w[comment else elsif].include?(child.kind) }[index] + end + + def explicit_alternative(node) + normalization_adapter.explicit_alternative(node) + end + + def const_for(node) + return wrap(:CONST, children: ["(anonymous)".to_sym], source: @document.root) unless ts_node?(node) + return normalize_const(node) if const_node?(node) + + wrap(:CONST, children: [node.text.to_s.to_sym], source: node) + end + + def normalize_parameters(node) + return nil unless ruby? && ts_node?(node) + + defaults = node.named_children.filter_map do |param| + name = named_field(param, "name") + value = named_field(param, "value") + next unless name && value + + wrap(:LASGN, children: [name.text.to_sym, normalize_node(value)], source: param) + end + return nil if defaults.empty? + + wrap(:ARGS, children: defaults, source: node) + end + + def normalize_block_parameters(block) + return nil unless ruby? && ts_node?(block) + + params = block.named_children.find { |child| child.kind == "block_parameters" } + return nil unless params + + destructured = params.named_children.select { |child| child.kind == "destructured_parameter" } + pre_init = destructured.map { |param| normalize_destructured_block_parameter(param) }.compact + return nil if pre_init.empty? + + wrap(:ARGS, children: pre_init, source: params) + end + + def normalize_destructured_block_parameter(param) + targets = [] + param.named_children.each { |child| collect_destructured_parameter_targets(child, targets) } + return nil if targets.empty? + + wrap(:MASGN, + children: [ + wrap(:DVAR, children: [nil], source: param), + list(targets, source: param), + nil, + ], + source: param) + end + + def collect_destructured_parameter_targets(node, targets) + return unless ts_node?(node) + + if IDENTIFIER_KINDS.include?(node.kind) + targets << wrap(:DASGN, children: [node.text.to_s, nil], source: node) + return + end + + node.named_children.each { |child| collect_destructured_parameter_targets(child, targets) } + end + + def scope(body, args: nil, source: nil) + wrap(:SCOPE, children: [nil, args, body], source: body || args || source || @document.root) + end + + def list(children, source:) + return nil if children.nil? || children.empty? + + wrap(:LIST, children: children, source: source) + end + + def wrap(type, children:, source:) + if source.respond_to?(:start_point) + first_lineno = source.start_point.row + 1 + first_column = source.start_point.column + last_lineno = source.end_point.row + 1 + last_column = source.end_point.column + text = source.text.to_s + else + first_lineno = source.first_lineno + first_column = source.first_column + last_lineno = source.last_lineno + last_column = source.last_column + text = source.text.to_s + end + + Node.new( + type: type, + children: children, + first_lineno: first_lineno, + first_column: first_column, + last_lineno: last_lineno, + last_column: last_column, + text: text + ) + end + + def source_before_child(node, child) + text = @document.source.byteslice(node.start_byte...child.start_byte).to_s.rstrip + return node if text.empty? + + lines = text.lines + last_lineno = node.start_point.row + lines.size + last_column = + if lines.size <= 1 + node.start_point.column + text.length + else + lines.last.to_s.chomp.length + end + Node.new( + type: :SOURCE, + children: [], + first_lineno: node.start_point.row + 1, + first_column: node.start_point.column, + last_lineno: last_lineno, + last_column: last_column, + text: text + ) + end + + def source_from_nodes(first_node, last_node) + return first_node unless ts_node?(first_node) && ts_node?(last_node) + + text = @document.source.byteslice(first_node.start_byte...last_node.end_byte).to_s + Node.new( + type: :SOURCE, + children: [], + first_lineno: first_node.start_point.row + 1, + first_column: first_node.start_point.column, + last_lineno: last_node.end_point.row + 1, + last_column: last_node.end_point.column, + text: text + ) + end + + def source_from_normalized_nodes(first_node, last_node) + return first_node unless first_node.is_a?(Node) && last_node.is_a?(Node) + + text = + if first_node.first_lineno == last_node.last_lineno + @document.lines[first_node.first_lineno - 1].to_s.byteslice(first_node.first_column...last_node.last_column) + else + ([@document.lines[first_node.first_lineno - 1].to_s.byteslice(first_node.first_column..)] + + @document.lines[first_node.first_lineno...(last_node.last_lineno - 1)] + + [@document.lines[last_node.last_lineno - 1].to_s.byteslice(0...last_node.last_column)]).join + end + Node.new( + type: :SOURCE, + children: [], + first_lineno: first_node.first_lineno, + first_column: first_node.first_column, + last_lineno: last_node.last_lineno, + last_column: last_node.last_column, + text: text.to_s + ) + end + + def named_field(node, name) + normalization_adapter.named_field(node, name) + end + + def parent_node(node) + node.parent + rescue StandardError + nil + end + + def next_sibling(node) + node.next_sibling + rescue StandardError + nil + end + + def prev_sibling(node) + node.prev_sibling + rescue StandardError + nil + end + + def next_named_sibling(node) + node.next_named_sibling + rescue StandardError + nil + end + + def modifier_statement?(node) + %w[body_statement block_body statement].include?(node.kind) && + modifier_keyword(node) && + node.named_children.size >= 2 + end + + def ternary_statement?(node) + normalization_adapter.ternary_statement?(node) + end + + def normalize_ternary_statement(node) + cond, positive, negative = normalization_adapter.ternary_parts(node) + wrap(:IF, children: [normalize_node(cond), normalize_node(positive), normalize_node(negative)], source: node) + end + + def case_argument_list?(node) + normalization_adapter.case_argument_list?(node) + end + + def leading_function_statement?(node) + normalization_adapter.leading_function_statement?(node) + end + + def normalize_leading_function_statement(node) + name = normalization_adapter.leading_function_name(node).to_s.to_sym + body = normalization_adapter.leading_function_body(node) + normalized_body = with_ruby_scope(node, reset: true) do + elide_tail_returns(normalize_body(body)) + end + wrap(:DEFN, children: [name, scope(normalized_body, source: node)], source: node) + end + + def command_call_statement?(node) + return false unless %w[body_statement block block_body statement].include?(node.kind) + return false if dotted_call?(node) + return false unless node.named_children.first&.kind == "identifier" + + node.named_children.any? { |child| %w[argument_list arguments].include?(child.kind) } || + call_block(node) + end + + def zero_child_identifier_call?(node) + normalization_adapter.zero_child_identifier_call?(node) + end + + def dotted_call?(node) + return false unless ts_node?(node) + target = dotted_call_target(node) + return true if target && dotted_call_node?(target) + + dotted_call_node?(node) + end + + def dotted_call_node?(node) + return false unless ts_node?(node) + return false unless node.children.any? { |child| child.text == "." || child.text == "&." } + + callable = dotted_callable_children(node) + return false if callable.any? { |child| %w[string_content interpolation].include?(child.kind) } + + callable.size >= 2 + end + + def dotted_call_target(node) + return nil unless ts_node?(node) + + named = node.named_children + return nil unless named.size == 1 + + child = named.first + dotted_call_node?(child) ? child : nil + rescue StandardError + nil + end + + def dotted_callable_children(node) + node.named_children.reject { |child| %w[block do_block argument_list arguments].include?(child.kind) } + end + + def safe_navigation_call?(node) + ts_node?(node) && normalization_adapter.safe_navigation_call?(node) + end + + def dotted_call_parts(node) + target = dotted_call_target(node) || node + callable = dotted_callable_children(target) + [callable.first, callable[1].text.to_s.sub(/=\z/, "")] + end + + def leading_if_statement?(node) + normalization_adapter.leading_if_statement?(node) + end + + def leading_case_statement?(node) + normalization_adapter.leading_case_statement?(node) + end + + def normalize_leading_case_statement(node) + target = normalization_adapter.leading_case_target(node) || node + value = normalize_node(case_value(target)) + whens = case_arms(target).map { |arm| normalize_when(arm) }.compact + wrap(:CASE, children: [value, link_when_chain(whens, case_else_body(target))], source: target) + end + + def leading_loop_statement?(node) + normalization_adapter.leading_loop_statement?(node) + end + + def rescue_body_statement?(node) + normalization_adapter.rescue_body_statement?(node) + end + + def normalize_rescue_body_statement(node) + target = normalization_adapter.rescue_body_target(node) || node + body_nodes = normalization_adapter.rescue_body_nodes(target) + body = normalize_body_nodes(body_nodes, source: target) + rescue_nodes = normalization_adapter.rescue_clauses(target) + resbodies = rescue_nodes.map { |child| normalize_rescue_clause(child) } + source = source_from_nodes(body_nodes.first || target, rescue_source_end(rescue_nodes.last) || rescue_nodes.last || target) + wrap(:RESCUE, children: [body, link_rescue_chain(resbodies), nil], source: source) + end + + def normalize_rescue_clause(node) + exceptions = normalization_adapter.rescue_clause_exceptions(node) + exception_nodes = exceptions.map do |child| + if child.kind == "exceptions" && child.text.to_s.match?(/\A[A-Z]\w*(?:::\w+)*\z/) + normalize_const(child) + else + normalize_node(child) + end + end.compact + exception_source = normalization_adapter.rescue_clause_exceptions_source(node) + exception_variable = rescue_exception_variable(node) + handler = normalization_adapter.rescue_clause_handler(node) + body = prepend_rescue_exception_assignment(normalize_body(handler), exception_variable) + wrap(:RESBODY, children: [list(exception_nodes, source: exception_source || node), body, nil], + source: node) + end + + def rescue_source_end(node) + return nil unless ts_node?(node) + + handler = normalization_adapter.rescue_clause_handler(node) + return handler.named_children.last || handler if ts_node?(handler) + + node.named_children.reverse.find { |child| !%w[comment].include?(child.kind) } || node + end + + def rescue_exception_variable(node) + name = normalization_adapter.rescue_clause_exception_variable_name(node) + return nil unless name + + source = normalization_adapter.rescue_clause_exception_variable_source(node) || name + wrap(:LASGN, children: [name.text.to_s, wrap(:ERRINFO, children: [], source: source)], source: source) + end + + def prepend_rescue_exception_assignment(body, assignment) + return body unless assignment + return assignment unless body.is_a?(Node) + + if body.type == :BLOCK + body.children = [assignment] + body.children.compact + body + else + wrap(:BLOCK, children: [assignment, body], source: source_from_normalized_nodes(assignment, body)) + end + end + + def ensure_body_statement?(node) + normalization_adapter.ensure_body_statement?(node) + end + + def normalize_ensure_body_statement(node) + target = normalization_adapter.ensure_body_target(node) || node + body = if rescue_body_statement?(target) + normalize_rescue_body_statement(target) + else + normalize_body_nodes(normalization_adapter.ensure_body_nodes(target), source: target) + end + ensure_node = normalization_adapter.ensure_clause(target) + ensure_body = normalize_body(normalization_adapter.ensure_clause_body(ensure_node) || ensure_node) + wrap(:ENSURE, children: [body, ensure_body], source: body || node) + end + + def array_literal_statement?(node) + normalization_adapter.array_literal_statement?(node) + end + + def element_reference_statement?(node) + normalization_adapter.element_reference_statement?(node) + end + + def normalize_element_reference_statement(node) + target = normalization_adapter.element_reference_target(node) || node + recv = normalization_adapter.element_reference_receiver(target) + args = normalization_adapter.element_reference_arguments(target).map { |child| normalize_node(child) }.compact + if ruby? && self_node?(recv) + return wrap(:FCALL, children: [:[], list(args, source: target)], source: target) + end + + wrap(:CALL, children: [normalize_node(recv), :[], list(args, source: target)], source: target) + end + + def hash_literal_statement?(node) + normalization_adapter.hash_literal_statement?(node) + end + + def normalize_hash_literal_statement(node) + target = normalization_adapter.hash_literal_target(node) || node + children = normalization_adapter.hash_literal_values(target).map do |child| + normalize_hash_literal_value(child) + end.compact + wrap(:HASH, children: children, source: target) + end + + def normalize_hash_literal_value(node) + if node.kind == "field" + named = node.named_children + if named.size >= 2 + key = named.first + value = named[1] + key_lit = wrap(:LIT, children: [key.text.to_s.to_sym], source: key || node) + return wrap(:HASH, children: [key_lit, normalize_node(value)].compact, source: node) + end + end + + normalize_node(node) + end + + def normalize_array_literal_statement(node) + target = normalization_adapter.array_literal_target(node) || node + values = normalization_adapter.array_literal_values(target).map do |child| + normalize_array_literal_value(child) + end.compact + return wrap(:ZLIST, children: [], source: target) if values.empty? + + list(values, source: target) + end + + def normalize_array_literal_value(node) + if node.kind == "field" + named = node.named_children + return normalize_node(named.first) if named.size == 1 + return normalize_terminal_statement(node) if named.empty? + end + + normalize_node(node) + end + + def empty_body_statement?(node) + normalization_adapter.empty_body_statement?(node) + end + + def heredoc_body_statement?(node) + normalization_adapter.heredoc_body_statement?(node) + end + + def normalize_heredoc_body_statement(node) + heredoc_bodies = node.named_children.select { |child| child.kind == "heredoc_body" } + children = node.named_children.filter_map do |child| + next if child.kind == "heredoc_body" + + if heredoc_call_for_body?(child) + with_current_heredoc_body(heredoc_bodies.shift) { normalize_node(child) } + else + normalize_body(child) + end + end + return nil if children.empty? + return children.first if children.size == 1 + + wrap(:BLOCK, children: children, source: node) + end + + def heredoc_call_for_body?(node) + return false unless ts_node?(node) + + normalization_adapter.heredoc_call_for_body?(node) + end + + def with_current_heredoc_body(body) + previous = @current_heredoc_body + @current_heredoc_body = body + yield + ensure + @current_heredoc_body = previous + end + + def normalize_heredoc_beginning(node) + body = @current_heredoc_body || + parent_node(parent_node(node))&.named_children&.find { |child| child.kind == "heredoc_body" } + children = body ? normalize_heredoc_children(body) : [] + wrap(:DSTR, children: children, source: node) + end + + def normalize_heredoc_children(node) + node.named_children.filter_map do |child| + case child.kind + when "interpolation" + normalize_interpolation(child) + when "heredoc_content" + text = child.text.to_s + text.empty? ? nil : wrap(:STR, children: [text], source: child) + else + nil + end + end + end + + def normalize_interpolation(node) + exprs = node.named_children.map { |child| normalize_node(child) }.compact + body = exprs.size == 1 ? exprs.first : list(exprs, source: node) + wrap(:EVSTR, children: [body].compact, source: node) + end + + def interpolated_statement?(node) + normalization_adapter.interpolated_statement?(node) + end + + def normalize_interpolated_statement(node) + wrap(:DSTR, children: normalize_children(node), source: node) + end + + def concatenated_string_statement?(node) + normalization_adapter.concatenated_string_statement?(node) + end + + def normalize_concatenated_string_statement(node) + normalized = node.named_children.map { |child| [child, normalize_node(child)] } + parts = normalized.flat_map do |_child, child_node| + child_node.is_a?(Node) && child_node.type == :DSTR ? child_node.children : [child_node] + end.compact + wrap(:DSTR, children: parts, source: dynamic_string_source(normalized) || node.named_children.first) + end + + def normalize_chained_string(node) + normalized = node.named_children.map { |child| [child, normalize_node(child)] } + parts = normalized.flat_map do |_child, child_node| + child_node.is_a?(Node) && child_node.type == :DSTR ? child_node.children : [child_node] + end.compact + wrap(:DSTR, children: parts, source: dynamic_string_source(normalized) || node.named_children.first || node) + end + + def dynamic_string_source(normalized_children) + normalized_children.find do |_child, child_node| + child_node.is_a?(Node) && child_node.type == :DSTR && + child_node.children.any? { |part| part.is_a?(Node) && part.type == :EVSTR } + end&.first + end + + def terminal_statement?(node) + %w[body_statement block_body statement argument_list].include?(node.kind) && + node.named_children.empty? && + !node.text.to_s.strip.empty? + end + + def normalize_terminal_statement(node) + text = node.text.to_s.strip + return wrap(:YIELD, children: [nil], source: node) if ruby? && text == "yield" + return wrap(:IVAR, children: [text], source: node) if text.match?(/\A@[A-Za-z_]\w*[!?=]?\z/) + return normalize_global_variable(node) if text.match?(/\A\$/) + return wrap(:NIL, children: [], source: node) if text == "nil" + return wrap(:TRUE, children: [], source: node) if text == "true" + return wrap(:FALSE, children: [], source: node) if text == "false" + return wrap(:LIT, children: [text.delete_prefix(":").to_sym], source: node) if text.match?(/\A:[A-Za-z_]\w*[!?=]?\z/) + return wrap(:INTEGER, children: [text.to_i], source: node) if text.match?(/\A-?\d+\z/) + return wrap(:ZLIST, children: [], source: node) if text == "[]" + + if bare_identifier_text?(text) + return wrap(:VCALL, children: [text.to_sym], source: node) if ruby? && !ruby_local_name?(text) + + return wrap(:LVAR, children: [text], source: node) + end + + wrap(kind_type(node.kind), children: [], source: node) + end + + def normalize_global_variable(node) + text = node.text.to_s + return wrap(:NTH_REF, children: [text.delete_prefix("$").to_i], source: node) if text.match?(/\A\$[1-9]\d*\z/) + + wrap(:GVAR, children: [text], source: node) + end + + def normalize_leading_loop_statement(node) + target = normalization_adapter.leading_loop_target(node) || node + return normalize_loop(target) unless same_ts_node?(target, node) + + keyword = target.children.first.kind + cond = normalize_node(target.named_children.first) + body = normalize_body(target.named_children[1]) + wrap(keyword == "until" ? :UNTIL : :WHILE, children: [cond, body], source: target) + end + + def operator_assignment_statement?(node) + %w[body_statement block_body statement].include?(node.kind) && + operator_assignment_statement_parts(node)[1] + rescue StandardError + false + end + + def normalize_operator_assignment_statement(node) + left, operator, right_raw = operator_assignment_statement_parts(node) + right = normalize_node(right_raw) + + if left&.kind == "element_reference" + recv = left.named_children.first + args = left.named_children.drop(1).map { |child| normalize_node(child) }.compact + return wrap(:OP_ASGN1, children: [normalize_node(recv), operator, list(args, source: left), right], + source: node) + end + + if member_read_node?(left) + recv, mid = member_parts(left) + return wrap(:OP_ASGN2, children: [normalize_node(recv), false, mid.to_sym, operator, right], source: node) + end + + logical = normalize_logical_operator_assignment(left, operator, right, source: node) + return logical if logical + if instance_variable?(left) || global_variable?(left) + return assignment_target(left, augmented_assignment_value(left, operator, right_raw, node), source: node) + end + + assignment_target(left, right, source: node) || + wrap(:LASGN, children: [target_name(left), augmented_assignment_value(left, operator, right_raw, node)], + source: node) + end + + def operator_assignment_statement_parts(node) + left = nil + operator = nil + right = nil + node.children.each do |child| + if child.named? + left ||= child + right = child if operator + elsif child.text.to_s.match?(/\A(?:[+\-*\/%&|^]|\|\||&&)=\z/) + raw = child.text.to_s + operator = raw.sub(/=\z/, "") + operator = "||" if raw == "||=" + operator = "&&" if raw == "&&=" + end + end + return [nil, nil, nil] unless left && operator && right + + [left, operator.to_sym, right] + end + + def leading_owner_statement?(node) + normalization_adapter.leading_owner_statement?(node) + end + + def normalize_leading_owner_statement(node) + target = normalization_adapter.leading_owner_target(node) || node + keyword = target.children.first.kind + name = const_for(target.named_children.first) + body_node = named_field(target, "body") || + target.named_children.reverse.find { |child| BLOCK_KINDS.include?(child.kind) } + body = normalize_body(body_node) + if keyword == "module" + wrap(:MODULE, children: [name, scope(body, source: target)], source: target) + else + wrap(:CLASS, children: [name, nil, scope(body, source: target)], source: target) + end + end + + def normalize_leading_if_statement(node) + target = normalization_adapter.leading_if_target(node) || node + return normalize_if(target) unless same_ts_node?(target, node) + + keyword = target.children.first.kind + cond = target.named_children.find { |child| !%w[comment then elsif else].include?(child.kind) } + consequence = target.named_children.find { |child| child.kind == "then" } || + branch_child(target, cond, 0) + alternative = explicit_alternative(target) + type = keyword == "unless" ? :UNLESS : :IF + wrap(type, children: [normalize_node(cond), normalize_body(consequence), normalize_else_or_branch(alternative)], + source: target) + end + + def modifier_keyword(node) + seen_named = false + node.children.each do |child| + seen_named ||= child.named? + return child.kind if seen_named && !child.named? && %w[if unless while until].include?(child.kind) + end + nil + rescue StandardError + nil + end + + def modifier_parts(node) + [node.named_children.first, node.named_children.last] + end + + def call_block(node) + node.named_children.find { |child| %w[block do_block].include?(child.kind) } + end + + def statement_call_with_block?(node) + %w[body_statement block_body statement].include?(node.kind) && + call_block(node) && + statement_block_call(node) + end + + def statement_block_call(node) + return node if dotted_call?(node) + return node if member_read_node?(node) + + block = call_block(node) + node.named_children.find do |child| + !same_ts_node?(child, block) && (CALL_KINDS.include?(child.kind) || member_read_node?(child)) + end + end + + def normalize_statement_call_with_block(node) + block = call_block(node) + call = normalize_call_without_block(statement_block_call(node), block) + args = normalize_block_parameters(block) + body = with_ruby_scope(block) do + dynamic_scope(normalize_body(named_field(block, "body") || block_child(block) || block)) + end + wrap(:ITER, children: [call, scope(body, args: args, source: node)], source: node) + end + + def visibility_inline_def_call?(node) + return false unless node.kind == "call" + + message = node.named_children.first&.text.to_s + return false unless INLINE_DEF_WRAPPER_MIDS.include?(message) + + args = node.named_children.find { |child| child.kind == "argument_list" } + args&.text.to_s.lstrip.start_with?("def ") + end + + def visibility_inline_def_statement?(node, function) + INLINE_DEF_WRAPPER_MIDS.include?(function&.text.to_s) && node.text.to_s.include?("def ") + end + + def inline_def_from_argument_list(args) + return nil unless ruby? && ts_node?(args) + + inline_def_from_source(args) + end + + def inline_def_from_statement(node) + source = node.named_children.find { |child| child.kind == "argument_list" } || node + inline_def_from_source(source) + end + + def inline_def_from_source(source) + return nil unless ruby? && ts_node?(source) + + body = inline_def_body(source) + receiver = inline_def_receiver(source) + normalized_body = with_ruby_scope(source, reset: true) do + elide_tail_returns(normalize_body(body)) + end + if receiver + name = inline_def_name_after_receiver(source, receiver) + return nil if name.to_s.empty? + + return wrap(:DEFS, children: [normalize_node(receiver), name.to_sym, scope(normalized_body, source: source)], + source: source) + end + + name = source.named_children.find { |child| IDENTIFIER_KINDS.include?(child.kind) }&.text.to_s + return nil if name.to_s.empty? + + wrap(:DEFN, children: [name.to_sym, scope(normalized_body, source: source)], source: source) + end + + def inline_def_receiver(source) + return nil unless source.text.to_s.match?(/\bdef\s+[^.\s]+\./) + + source.named_children.find { |child| self_node?(child) || const_node?(child) } + end + + def inline_def_name_after_receiver(source, receiver) + index = source.named_children.index { |child| same_ts_node?(child, receiver) } + source.named_children[(index.to_i + 1)..]&.find { |child| IDENTIFIER_KINDS.include?(child.kind) }&.text.to_s + end + + def inline_def_body(node) + stack = node.named_children.reverse + until stack.empty? + child = stack.shift + return child if child.kind == "body_statement" + + stack.concat(child.named_children.reverse) + end + nil + end + + def literal_arguments_from_text(args) + text = args.text.to_s + return [normalize_heredoc_beginning(args)] if text.match?(/\A\s*<<[-~]?[A-Za-z_]\w*/) + + text.scan(/:([A-Za-z_]\w*[!?=]?)/).map do |name| + wrap(:LIT, children: [name.first.to_sym], source: args) + end + end + + def elide_tail_returns(node) + return node unless ruby? + return node unless node.is_a?(Node) + return node if %i[DEFN DEFS CLASS MODULE SCLASS LAMBDA ITER].include?(node.type) + return node.children.first if node.type == :RETURN + + case node.type + when :BLOCK + children = node.children.dup + children[-1] = elide_tail_returns(children[-1]) if children.any? + node.children = children + when :SCOPE + children = node.children.dup + children[2] = elide_tail_returns(children[2]) + node.children = children + when :IF, :UNLESS + children = node.children.dup + children[1] = elide_tail_returns(children[1]) + children[2] = elide_tail_returns(children[2]) if children.size > 2 + node.children = children + when :CASE + children = node.children.dup + children[1] = elide_tail_returns(children[1]) + node.children = children + when :CASE2 + children = node.children.dup + children[0] = elide_tail_returns(children[0]) + node.children = children + when :WHEN + children = node.children.dup + children[1] = elide_tail_returns(children[1]) + children[2] = elide_tail_returns(children[2]) if children.size > 2 + node.children = children + when :RESCUE + children = node.children.dup + children[0] = elide_tail_returns(children[0]) + children[1] = elide_tail_returns(children[1]) + node.children = children + when :RESBODY + children = node.children.dup + children[1] = elide_tail_returns(children[1]) + children[2] = elide_tail_returns(children[2]) if children.size > 2 + node.children = children + end + + node + end + + def elide_implicit_nil_body(node) + return node unless ruby? + node = drop_trailing_nil_statement(node) + return nil if node.is_a?(Node) && node.type == :NIL + + node + end + + def prepend_inline_parameter_begin(function_node, body) + marker = inline_parameter_begin_marker(function_node) + return body unless marker + + children = body.is_a?(Node) && body.type == :BLOCK ? body.children.compact : [body].compact + return nil if children.empty? + + if body.is_a?(Node) && body.type == :BLOCK + body.children = [marker] + children + body + else + wrap(:BLOCK, children: [marker] + children, source: function_node) + end + end + + def inline_parameter_begin_marker(function_node) + return nil unless ruby? + + params = named_field(function_node, "parameters") || + function_node.named_children.find { |child| child.kind == "method_parameters" } + return nil unless params + + semicolon = params.next_sibling + return nil unless semicolon && !semicolon.named? && semicolon.text == ";" + + Node.new( + type: :BEGIN, + children: [nil], + first_lineno: semicolon.start_point.row + 1, + first_column: semicolon.start_point.column, + last_lineno: semicolon.start_point.row + 1, + last_column: semicolon.start_point.column, + text: "" + ) + rescue StandardError + nil + end + + def drop_trailing_nil_statement(node) + return node unless node.is_a?(Node) && node.type == :BLOCK + + children = node.children.compact + children.pop while children.last.is_a?(Node) && children.last.type == :NIL + return nil if children.empty? + return children.first if children.size == 1 + + node.children = children + node + end + + def scalar_argument_list_value(node) + text = node.text.to_s.strip + return wrap(:YIELD, children: [nil], source: node) if ruby? && text == "yield" + return wrap(:NIL, children: [], source: node) if text == "nil" + return wrap(:TRUE, children: [], source: node) if text == "true" + return wrap(:FALSE, children: [], source: node) if text == "false" + return wrap(:LIT, children: [text.delete_prefix(":").to_sym], source: node) if text.match?(/\A:[A-Za-z_]\w*[!?=]?\z/) + if text.match?(/\A-?\d+\z/) + return wrap(:INTEGER, children: [text.to_i], source: node) + end + return nil unless bare_identifier_text?(text) + + if ruby? && !ruby_local_name?(text) + wrap(:VCALL, children: [text.to_sym], source: node) + else + wrap(:LVAR, children: [text], source: node) + end + end + + def local_or_call_for_name(name, source) + if ruby? && !ruby_local_name?(name) + wrap(:VCALL, children: [name.to_sym], source: source) + else + wrap(:LVAR, children: [name], source: source) + end + end + + def symbol_literal_node?(node) + node.is_a?(Node) && node.type == :LIT && node.children.first.is_a?(Symbol) + end + + def command_arguments(args) + return [scalar_argument_list_value(args)].compact if args.named_children.empty? + return [normalize_infix_statement(args)] if infix_statement?(args) + return [normalize_dotted_expression(args)] if dotted_expression?(args) + + args.named_children.map { |child| normalize_node(child) }.compact + end + + def parent_named_child?(parent, node) + parent.named_children.any? { |child| same_ts_node?(child, node) } + end + + def same_ts_node?(left, right) + left.kind == right.kind && left.start_byte == right.start_byte && left.end_byte == right.end_byte + rescue StandardError + false + end + + def node_key(node) + [node.kind, node.start_byte, node.end_byte] + rescue StandardError + node.object_id + end + + def bare_identifier_text?(text) + text.to_s.strip.match?(/\A[A-Za-z_]\w*[!?=]?\z/) + end + + def hidden_match?(node) + node.kind == "expression_statement" && + node.text.to_s.lstrip.start_with?("match ") && + node.named_children.any? { |child| child.kind == "match_block" } + end + + def kind_type(kind) + kind.to_s.upcase.gsub(/[^A-Z0-9]+/, "_").to_sym + end + + def ts_node?(node) + node && node.respond_to?(:kind) && node.respond_to?(:named_children) + end + end + + end +end diff --git a/gems/decomplex/lib/decomplex/ast/node.rb b/gems/decomplex/lib/decomplex/ast/node.rb new file mode 100644 index 000000000..44f618049 --- /dev/null +++ b/gems/decomplex/lib/decomplex/ast/node.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +module Decomplex + module Ast + Node = Struct.new( + :type, :children, :first_lineno, :first_column, :last_lineno, :last_column, + :text, + keyword_init: true + ) + + module_function + + def node?(node) + node.is_a?(Node) + end + end +end diff --git a/gems/decomplex/lib/decomplex/ast/semantic_node.rb b/gems/decomplex/lib/decomplex/ast/semantic_node.rb new file mode 100644 index 000000000..dfba884c3 --- /dev/null +++ b/gems/decomplex/lib/decomplex/ast/semantic_node.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true + +module Decomplex + module Ast + SemanticNode = Struct.new( + :type, :children, :span, :text, :language, :metadata, + keyword_init: true + ) do + def [](key) + metadata.fetch(key) + end + + def fetch(key, *fallback) + metadata.fetch(key, *fallback) + end + + def walk(&block) + return enum_for(:walk) unless block + + block.call(self) + children.each { |child| child.walk(&block) if child.respond_to?(:walk) } + end + end + + module_function + + def semantic_node?(node) + node.is_a?(SemanticNode) + end + end +end diff --git a/gems/decomplex/lib/decomplex/ast/semantic_normalizer.rb b/gems/decomplex/lib/decomplex/ast/semantic_normalizer.rb new file mode 100644 index 000000000..c56030a99 --- /dev/null +++ b/gems/decomplex/lib/decomplex/ast/semantic_normalizer.rb @@ -0,0 +1,136 @@ +# frozen_string_literal: true + +require_relative "semantic_node" + +module Decomplex + module Ast + class SemanticNormalizer + FACT_COLLECTIONS = { + owner_defs: :owner, + function_defs: :function, + call_sites: :call, + state_declarations: :state_declaration, + state_param_origins: :state_param_origin, + state_reads: :state_read, + state_writes: :state_write, + decision_sites: :decision, + branch_arms: :branch_arm + }.freeze + + attr_reader :document + + def initialize(document) + @document = document + end + + def normalize + SemanticNode.new( + type: :root, + children: semantic_facts, + span: root_span, + text: document.source.to_s, + language: document.language&.to_sym, + metadata: { + file: document.file, + language: document.language&.to_sym + } + ) + end + + private + + def semantic_facts + FACT_COLLECTIONS.flat_map do |collection, type| + Array(document.public_send(collection)).map { |fact| semantic_fact(type, fact) } + end.sort_by { |node| [node.span[0], node.span[1], node.type.to_s, node.text.to_s] } + end + + def semantic_fact(type, fact) + metadata = fact.to_h + source_text = source_text(metadata[:span]) + metadata[:enclosing_span] = enclosing_decision_span(metadata) if type == :decision + SemanticNode.new( + type: type, + children: [], + span: metadata[:span] || line_span(metadata[:line]), + text: source_text.empty? ? fact_text(type, metadata) : source_text, + language: document.language&.to_sym, + metadata: metadata.merge(language: document.language&.to_sym, source_text: source_text) + ) + end + + def fact_text(type, metadata) + case type + when :function + metadata[:signature] || metadata[:name].to_s + when :call + compact_text(metadata[:receiver], metadata[:message]).join(".") + when :decision + metadata[:predicate].to_s + when :branch_arm + metadata[:body].to_s + when :state_read, :state_write, :state_declaration, :state_param_origin + compact_text(metadata[:receiver], metadata[:field]).join(".") + else + metadata[:name].to_s + end + end + + def compact_text(*values) + values.compact.map(&:to_s).reject(&:empty?) + end + + def root_span + last_line = document.lines.length + last_column = document.lines.last.to_s.length + [1, 0, [last_line, 1].max, last_column] + end + + def line_span(line) + line_number = line || 1 + [line_number, 0, line_number, 0] + end + + def enclosing_decision_span(metadata) + span = metadata[:span] + return span unless span + + line = span[0] + source_line = document.lines[line - 1].to_s + keyword_column = source_line.index(/\b(if|unless|while|until)\b/) + return span unless keyword_column && keyword_column <= span[1] + + end_line, end_column = matching_end_point(line, keyword_column) + [line, keyword_column, end_line, end_column] + end + + def matching_end_point(start_line, keyword_column) + depth = 0 + document.lines[(start_line - 1)..].to_a.each_with_index do |line_text, offset| + stripped = line_text.strip + depth += 1 if stripped.match?(/\A(?:if|unless|while|until)\b/) + if stripped == "end" && line_text.index(/\S/).to_i == keyword_column + depth -= 1 + return [start_line + offset, keyword_column + stripped.length] if depth <= 0 + end + end + [start_line, document.lines[start_line - 1].to_s.length] + end + + def source_text(span) + return "" unless span + + first_line, first_column, last_line, last_column = span + if first_line == last_line + return document.lines[first_line - 1].to_s[first_column...last_column].to_s + end + + parts = [] + parts << document.lines[first_line - 1].to_s[first_column..].to_s + parts.concat(document.lines[first_line...(last_line - 1)] || []) + parts << document.lines[last_line - 1].to_s[0...last_column].to_s + parts.join + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/ast/source_map.rb b/gems/decomplex/lib/decomplex/ast/source_map.rb new file mode 100644 index 000000000..101c4ff3e --- /dev/null +++ b/gems/decomplex/lib/decomplex/ast/source_map.rb @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +require_relative "node" + +module Decomplex + module Ast + module_function + + # Exact source text of a node, trivial formatting normalised. + def slice(node, _lines) + return "" unless node?(node) + + node.text.to_s.strip.gsub(/\s+/, " ") + end + end +end diff --git a/gems/decomplex/lib/decomplex/decision_pressure.rb b/gems/decomplex/lib/decomplex/decision_pressure.rb index 7c2e9b8da..372a21155 100644 --- a/gems/decomplex/lib/decomplex/decision_pressure.rb +++ b/gems/decomplex/lib/decomplex/decision_pressure.rb @@ -1,190 +1,145 @@ # frozen_string_literal: true -require_relative "ast" +require_relative "syntax" module Decomplex - # Decision-pressure: attribute every defensive type/nil guard to the - # canonical ROOT CONTRACT its subject comes from, then rank contracts + # Decision-pressure: attribute defensive type/nil guards to the + # canonical root contract their subject comes from, then rank contracts # by how many re-derived decisions they drive. - # - # Use-role discipline (Rapps & Weyuker 1985 c-use/p-use; McCabe / - # Cognitive Complexity count DECISIONS, not reads): a single blended - # "N defensive decisions" scalar is a category error -- it sums - # populations with OPPOSITE actions. This detector therefore splits, - # and the report NEVER presents one combined number: - # - # * c-use (`emit(x.full_type)`, `y = x.full_type`, `return - # x.full_type`) -- pure consumption, NOT a decision. Excluded by - # construction (never recorded). Not complexity. - # * ELIMINABLE guard (`x.nil?`, `is_a?`, `kind_of?`, - # `instance_of?`, `respond_to?`, `x&.m`, `x.acc rescue nil`) -- - # contract-eliminable: a stronger contract removes it. The - # actionable slop. -> tighten the contract / nil-kill (DELETE). - # * ESSENTIAL dispatch (`x.string?`, `.collection?`, - # `.heap_provenance?` -- a domain `?` query over a value that is - # legitimately a sum). NOT removable by typing; it IS the - # contract. Debt ONLY if the same dispatch is re-scattered, which - # is a DIFFERENT metric (Fat-Union / Missing-Abstractions). Shown - # as a per-contract context count, never summed into the headline. - # - # Pressure is decomplex-scoped: intra-procedural only (a local is - # resolved to the accessor it was assigned from IN THE SAME METHOD). - # Cross-procedure pressure is nil-kill's, by the recorded boundary. class DecisionPressure - GUARD_MIDS = %i[is_a? kind_of? instance_of? nil? respond_to?].freeze - TRANSIENT_NOARG_MIDS = %i[pop shift].freeze + GUARD_MIDS = %w[ + is_a? kind_of? instance_of? nil? respond_to? + is_none is_some is_null isNull + ].freeze + TRANSIENT_NOARG_MIDS = %w[pop shift].freeze Hit = Struct.new(:contract, :file, :defn, :line, :span, keyword_init: true) def self.scan(files) guard = [] dispatch = [] - files.each do |f| - root, lines = Ast.parse(f) - e = new(f, lines) - e.walk(root, [], {}) - guard.concat(e.guard_hits) - dispatch.concat(e.dispatch_hits) + files.each do |file| + document = Syntax.parse(file, parser: "tree_sitter") + assignment_maps = document.local_methods.to_h do |method| + [method.name, build_assignment_map(method)] + end + + document.call_sites.each do |call| + next if call.receiver.to_s.empty? + + asgmap = assignment_maps.fetch(call.function, {}) + if eliminable_guard?(call) + contract = contract_of(call.receiver, asgmap) + guard << hit(contract, call) if contract + elsif essential_dispatch?(call) + contract = contract_of(call.receiver, asgmap) + dispatch << hit(contract, call) if contract + end + end + + guard.concat(rescue_nil_hits(document, assignment_maps)) end Report.new(guard, dispatch) end - attr_reader :guard_hits, :dispatch_hits + def self.eliminable_guard?(call) + GUARD_MIDS.include?(call.message.to_s) || call.safe_navigation + end - def initialize(file, lines) - @file = file - @lines = lines - @guard_hits = [] - @dispatch_hits = [] + def self.essential_dispatch?(call) + call.message.to_s.end_with?("?") end - def walk(node, defstack, asgmap) - return unless Ast.node?(node) + def self.hit(contract, call) + Hit.new( + contract: contract, + file: call.file, + defn: call.function, + line: call.line, + span: call.span + ) + end - if %i[DEFN DEFS].include?(node.type) - name = node.children[node.type == :DEFS ? 1 : 0].to_s - defstack = defstack + [name] - asgmap = build_asgmap(node) - end + def self.rescue_nil_hits(document, assignment_maps) + document.local_methods.flat_map do |method| + asgmap = assignment_maps.fetch(method.name, {}) + method.statements.filter_map do |statement| + next unless statement.source.match?(/\brescue\s+nil\b/) - record_decision(node, defstack, asgmap) - record_rescue_nil(node, defstack, asgmap) - node.children.each { |c| walk(c, defstack, asgmap) } - end + call = document.call_sites.find do |candidate| + candidate.function == method.name && inside_span?(candidate.span, statement.span) + end + next unless call + + contract = contract_of(call_expression(call), asgmap) + next unless contract - private - - # name => rhs-source-node, for `name = ` LASGNs in - # this method (intra-procedural only). First simple assignment wins. - def build_asgmap(defn_node) - map = {} - stack = Ast.body_stmts(defn_node).dup - until stack.empty? - n = stack.pop - next unless Ast.node?(n) - - if n.type == :LASGN - nm = n.children[0].to_s - src = n.children[1] - map[nm] ||= src if !map.key?(nm) && simple_source?(src) + Hit.new( + contract: contract, + file: method.file, + defn: method.name, + line: statement.line, + span: statement.span + ) end - n.children.each { |c| stack << c } end - map end - def simple_source?(n) - return false unless Ast.node?(n) + def self.build_assignment_map(method) + method.statements.each_with_object({}) do |statement, map| + next unless statement.writes.size == 1 - case n.type - when :IVAR then true - when :CALL, :QCALL - recv, mid, args = n.children - recv && (args.nil? || mid == :[]) - else false - end + name = statement.writes.first.to_s + map[name] ||= simple_source_contract(statement.source) + end.compact end - def hit(contract, defstack, node) - Hit.new(contract: contract, file: @file, - defn: defstack.last || "(top-level)", - line: node.first_lineno, - span: [node.first_lineno, node.first_column, - node.last_lineno, node.last_column]) + def self.simple_source_contract(source) + match = source.to_s.match(/\A\s*[A-Za-z_]\w*\s*=\s*(.+?)\s*\z/m) + return nil unless match + + rhs = match[1].strip + return nil if rhs.match?(/\s(?:if|unless|rescue)\s|\?|:/) + + contract_of(rhs, {}) end - # At most ONE record per node. ELIMINABLE guard takes precedence - # over ESSENTIAL dispatch (a `?` that is also a GUARD_MID, or a - # safe-nav, is the eliminable kind). - def record_decision(node, defstack, asgmap) - return unless %i[CALL QCALL].include?(node.type) - - recv, mid, _args = node.children - return unless recv - - guard = - (node.type == :CALL && GUARD_MIDS.include?(mid)) || - node.type == :QCALL # safe-nav = implicit nil decision on recv - if guard - c = contract_of(recv, asgmap) - @guard_hits << hit(c, defstack, node) if c - return + def self.contract_of(receiver, assignment_map, depth = 0) + source = receiver.to_s.strip + return nil if source.empty? || depth >= 8 + + mapped = assignment_map[source] + return mapped if mapped + + return source if source.start_with?("@") + + if (match = source.match(/\A(?:[A-Za-z_]\w*|self)\s*\[(.+)\]\z/)) + return "[#{match[1].strip}]" end - # essential dispatch: a domain `?` query over a contract. NOT a - # GUARD_MID (those are eliminable, handled above). Legitimate - # polymorphism -- counted separately, never as pressure. - return unless node.type == :CALL && mid.to_s.end_with?("?") + return "~local" if source.match?(/\A[A-Za-z_]\w*\z/) + + if source.include?(".") + member = source.split(".").last.to_s + member = member.sub(/\(.*\)\z/, "") + return nil if TRANSIENT_NOARG_MIDS.include?(member) - c = contract_of(recv, asgmap) - @dispatch_hits << hit(c, defstack, node) if c + return ".#{member}" unless member.empty? + end + + nil end - # `x.accessor rescue nil` -- a defensive nil-swallow that exists - # only because the receiver is loosely typed. Eliminable guard - # (the exact idiom typed contracts remove). Conservative: bare - # `rescue nil` wrapping a single contract-resolvable call. - def record_rescue_nil(node, defstack, asgmap) - return unless node.type == :RESCUE - - body, resb, = node.children - return unless Ast.node?(resb) && resb.type == :RESBODY - return unless resb.children[0].nil? # bare rescue (no class list) - - handler = resb.children[1] - nil_handler = handler.nil? || - (Ast.node?(handler) && handler.type == :NIL) - return unless nil_handler - return unless Ast.node?(body) && %i[CALL QCALL].include?(body.type) - - c = contract_of(body, asgmap) - @guard_hits << hit(c, defstack, node) if c + def self.call_expression(call) + [call.receiver, call.message].map(&:to_s).reject(&:empty?).join(".") end - # Canonical root contract of a subject node, resolving locals - # through the intra-method assignment map. - def contract_of(n, asgmap, depth = 0) - return nil unless Ast.node?(n) && depth < 8 - - case n.type - when :LVAR, :DVAR - nm = n.children[0].to_s - src = asgmap[nm] - src ? contract_of(src, asgmap, depth + 1) : "~local" - when :IVAR - n.children[0].to_s # already includes the leading @ - when :CALL, :QCALL - recv, mid, args = n.children - if mid == :[] - key = args && Ast.node?(args) ? args.children.compact.first : nil - kt = (Ast.node?(key) ? Ast.slice(key, @lines) : key.inspect) - "[#{kt}]" - elsif args.nil? && recv && !TRANSIENT_NOARG_MIDS.include?(mid) - ".#{mid}" # no-arg accessor: the contract - end - when :VCALL - ".#{n.children[0]}" - end + def self.inside_span?(inner, outer) + return false unless inner && outer + + starts_after_or_at = (inner[0] > outer[0]) || (inner[0] == outer[0] && inner[1] >= outer[1]) + ends_before_or_at = (inner[2] < outer[2]) || (inner[2] == outer[2] && inner[3] <= outer[3]) + starts_after_or_at && ends_before_or_at end class Report @@ -193,19 +148,6 @@ def initialize(guard_hits, dispatch_hits) @dispatch = dispatch_hits end - # Rows are keyed/driven by ELIMINABLE guards (the actionable - # slop). A contract with only ESSENTIAL dispatch and zero - # eliminable guards produces NO row -- legitimate polymorphism is - # not pressure and must not be surfaced as actionable. - # - # `decisions` == eliminable guard count (the headline number, - # back-compat). `essential` == count of essential dispatches on - # the SAME contract (context only; NEVER summed into decisions, - # and deliberately NOT added to sites/spans so downstream - # consumers see the eliminable signal unchanged). - # - # [{ contract:, decisions:, essential:, methods:, sites:[...], - # spans:{} }] ; ranked by eliminable decisions; "~local" last. def ranked ess = Hash.new(0) @dispatch.each { |h| ess[h.contract] += 1 } diff --git a/gems/decomplex/lib/decomplex/derived_state.rb b/gems/decomplex/lib/decomplex/derived_state.rb index 2ac1b2e40..b6c202432 100644 --- a/gems/decomplex/lib/decomplex/derived_state.rb +++ b/gems/decomplex/lib/decomplex/derived_state.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -require_relative "ast" +require_relative "local_flow" module Decomplex # Derived-state def-use staleness (intra-procedural, the design @@ -8,91 +8,37 @@ module Decomplex # # Plague: redundant state that drifts. `b = f(a)` makes b a derived # copy of a. If a is then reassigned later in the same method but b - # is NOT recomputed, every later use of b is stale -- the exact - # "field copied from elsewhere then used for similar decisions" bug. + # is NOT recomputed, every later use of b is stale. class DerivedState - Asgn = Struct.new(:name, :deps, :line, :span, keyword_init: true) + Asgn = Struct.new(:name, :deps, :line, :span, :statement_index, keyword_init: true) def self.scan(files) - out = [] - files.each do |f| - root, lines = Ast.parse(f) - new(f, lines).each_method(root) do |defn, stmts| - out.concat(analyze(f, defn, stmts)) - end - end - out.sort_by { |h| -h[:gap] } - end - - def initialize(file, lines) - @file = file - @lines = lines + LocalFlow.scan(files).flat_map do |method| + analyze(method.file, method.name, assignments(method)) + end.sort_by { |h| -h[:gap] } end - def each_method(node, defstack = [], &blk) - return unless Ast.node?(node) - - if %i[DEFN DEFS].include?(node.type) - name = node.children[node.type == :DEFS ? 1 : 0].to_s - yield name, Ast.body_stmts(node) - end - node.children.each { |c| each_method(c, defstack, &blk) } - end - - # RHS constructs whose nested LASGNs are BRANCH-LOCAL initialization - # of the binding being assigned -- not later method-scope sequential - # reassignments. Recursing into them is the dominant DSS false - # positive (`x = if c; y = ...; use y; end` flattens `y` into the - # ordered list, so `analyze` mis-reads it as "y reassigned after x"). - BRANCH_RHS = %i[IF CASE CASE2 CASE3 AND OR WHILE UNTIL - RESCUE ENSURE].freeze - - # Flatten statements (incl. inside simple blocks) to ordered LASGNs. - # - # Fail-safe scoping: when an LASGN's VALUE child is a branch - # construct, record the LASGN itself but DO NOT descend into the - # conditional RHS. A genuine method-scope reassignment is always a - # top-level statement (an LASGN whose parent is the method body, not - # the value child of another LASGN), so it still enters the list -> - # the real `b = f(a); a = ...; use b` desync is still caught (no - # false negative). Non-branch values still recurse (`a = b = c`). - def self.lasgns(stmts) - acc = [] - walk = lambda do |n| - return unless Ast.node?(n) - - if n.type == :LASGN - acc << n - val = n.children[1] - if Ast.node?(val) && BRANCH_RHS.include?(val.type) - # branch-local RHS: do not flatten its inner assignments - else - n.children.each { |c| walk.call(c) } - end - else - n.children.each { |c| walk.call(c) } + def self.assignments(method) + method.statements.flat_map do |statement| + statement.writes.map do |name| + Asgn.new( + name: name, + deps: dependencies_for(statement, name), + line: statement.line, + span: statement.span, + statement_index: statement.index + ) end end - stmts.each { |s| walk.call(s) } - acc end - def self.lvars(node, acc = []) - return acc unless Ast.node?(node) - - acc << node.children[0].to_s if node.type == :LVAR - node.children.each { |c| lvars(c, acc) } - acc + def self.dependencies_for(statement, name) + statement.dependencies.filter_map do |left, right| + right.to_s if left.to_s == name.to_s + end.uniq end - def self.analyze(file, defn, stmts) - asgns = lasgns(stmts).map do |n| - Asgn.new(name: n.children[0].to_s, - deps: lvars(n.children[1]).uniq, - line: n.first_lineno, - span: [n.first_lineno, n.first_column, - n.last_lineno, n.last_column]) - end + def self.analyze(file, defn, asgns) out = [] asgns.each_with_index do |b, i| next if b.deps.empty? @@ -100,13 +46,13 @@ def self.analyze(file, defn, stmts) b.deps.each do |a| next if a == b.name - # a reassigned strictly after b's definition? - reasn = asgns[(i + 1)..].find { |x| x.name == a } + reasn = asgns[(i + 1)..].find do |x| + x.name == a && x.statement_index > b.statement_index + end next unless reasn - # b recomputed at or after a's reassignment? recomputed = asgns[(i + 1)..].any? do |x| - x.name == b.name && x.line >= reasn.line + x.name == b.name && x.statement_index >= reasn.statement_index end next if recomputed diff --git a/gems/decomplex/lib/decomplex/false_simplicity.rb b/gems/decomplex/lib/decomplex/false_simplicity.rb index fd3f8ff32..d25791132 100644 --- a/gems/decomplex/lib/decomplex/false_simplicity.rb +++ b/gems/decomplex/lib/decomplex/false_simplicity.rb @@ -1,540 +1,79 @@ # frozen_string_literal: true -require_relative "ast" +require "set" require_relative "syntax" module Decomplex - # False simplicity: code whose local syntax understates its non-local - # behaviour -- hidden dynamic dispatch, hidden mutation, hidden - # global/context dependency, hidden IO/effects, callback/control - # inversion, runtime reflection, monkeypatch/reopen. Seven - # sub-detectors, one category, ranked support x scatter (same - # blast-radius thesis as Missing Abstractions: one trigger reinvented - # across N methods is one missing abstraction). + # False simplicity: code whose local syntax understates non-local behavior. # - # #8 (protocol-pair names: open/close, lock/unlock) is NOT here -- it - # is already Broken Protocols (SequenceMine, Engler co-call mining). - # - # Pure normalized syntax-tree matching. No dataflow, no CFG, no points-to. - # Language lexicons are provider data: Ruby's was mined from - # RuboCop/Reek/stdlib, while other languages use their own effectful - # runtime surfaces instead of inheriting Ruby's. - # See docs/agents/false-simplicity.md. + # The detector does not mine language grammar directly. Production scanning + # consumes Syntax::Document semantic effect sites and owner/function facts; + # language adapters own language-specific effect lexicons and syntax quirks. class FalseSimplicity Hit = Struct.new(:kind, :detail, :file, :defn, :line, :span, keyword_init: true) ClassRec = Struct.new(:name, :file, :line, :core, :span, keyword_init: true) - Lexicon = Struct.new( - :dispatch_mids, :meta_mids, :method_obj_mids, :io_consts, - :io_bare, :dir_context, :context_pairs, :context_bare, - :callback_set, :core_consts, - keyword_init: true - ) - - EMPTY_PAIRS = {}.freeze - COMMON_CALLBACK_SET = %w[ - transaction synchronize lock with_lock unlock mutex atomic subscribe - callback hook - ].freeze - RUBY_LEXICON = Lexicon.new( - dispatch_mids: %w[send __send__ public_send const_get constantize - instance_variable_get].freeze, - meta_mids: %w[define_method define_singleton_method alias_method - class_eval module_eval instance_eval class_exec - module_exec instance_exec eval const_set - instance_variable_set remove_method undef_method - prepend singleton_class binding].freeze, - method_obj_mids: %i[method public_method instance_method].freeze, - io_consts: %w[File IO Dir FileUtils Open3 Socket TCPSocket UDPSocket - TCPServer UNIXSocket Tempfile Pathname Marshal].freeze, - io_bare: %w[puts print warn gets readline readlines system - exec spawn fork sleep open abort exit exit!].freeze, - dir_context: %w[pwd getwd home].freeze, - context_pairs: { - "Time" => %w[now current], "Date" => %w[today current], - "DateTime" => %w[now current], "Process" => %w[pid ppid uid gid euid], - "Thread" => %w[current list main], "Fiber" => %w[current], - "Random" => %w[rand bytes], "GC" => %w[stat count], - "ObjectSpace" => %w[each_object count_objects] - }.freeze, - context_bare: %w[rand srand].freeze, - callback_set: %w[transaction synchronize lock with_lock unlock - mutex atomic reentrant subscribe callback hook].freeze, - core_consts: %w[String Symbol Integer Float Numeric Rational Complex - Array Hash Set Range Struct Object BasicObject Kernel - Module Class Comparable Enumerable Enumerator Proc Method - UnboundMethod NilClass TrueClass FalseClass Exception - StandardError RuntimeError ArgumentError TypeError - NameError NoMethodError IO File Dir Time Date DateTime - Regexp MatchData Thread Mutex Fiber Process Math GC - ObjectSpace Marshal Random Encoding].freeze - ).freeze - PYTHON_LEXICON = Lexicon.new( - dispatch_mids: %w[getattr setattr hasattr __getattr__ __setattr__ import_module].freeze, - meta_mids: %w[eval exec compile type globals locals vars setattr delattr].freeze, - method_obj_mids: %i[method].freeze, - io_consts: %w[Path pathlib os sys subprocess socket shutil].freeze, - io_bare: %w[print input open exec eval].freeze, - dir_context: %w[getcwd home].freeze, - context_pairs: { - "time" => %w[time monotonic perf_counter], - "datetime" => %w[now today utcnow], - "random" => %w[random randint randrange choice] - }.freeze, - context_bare: %w[random randint randrange].freeze, - callback_set: COMMON_CALLBACK_SET, - core_consts: [].freeze - ).freeze - JS_LEXICON = Lexicon.new( - dispatch_mids: %w[eval Function call apply bind].freeze, - meta_mids: %w[eval Function defineProperty defineProperties setPrototypeOf].freeze, - method_obj_mids: %i[method].freeze, - io_consts: %w[console Console fs process Deno Bun].freeze, - io_bare: %w[setTimeout setInterval fetch require import].freeze, - dir_context: [].freeze, - context_pairs: { - "Date" => %w[now], - "Math" => %w[random], - "performance" => %w[now] - }.freeze, - context_bare: [].freeze, - callback_set: COMMON_CALLBACK_SET, - core_consts: [].freeze - ).freeze - GO_LEXICON = Lexicon.new( - dispatch_mids: %w[Call CallSlice Method MethodByName ValueOf TypeOf].freeze, - meta_mids: %w[Call CallSlice MethodByName New MakeFunc].freeze, - method_obj_mids: %i[method].freeze, - io_consts: %w[os io ioutil fs net http exec syscall].freeze, - io_bare: %w[panic print println recover].freeze, - dir_context: %w[Getwd UserHomeDir].freeze, - context_pairs: { - "time" => %w[Now Since Until], - "rand" => %w[Int Intn Float64 Read] - }.freeze, - context_bare: [].freeze, - callback_set: (COMMON_CALLBACK_SET + %w[Lock Unlock RLock RUnlock Do Go Add Done Wait]).uniq.freeze, - core_consts: [].freeze - ).freeze - RUST_LEXICON = Lexicon.new( - dispatch_mids: %w[downcast downcast_ref downcast_mut call call_mut call_once].freeze, - meta_mids: %w[transmute from_raw_parts from_raw_parts_mut].freeze, - method_obj_mids: %i[method].freeze, - io_consts: %w[std tokio fs env process net io].freeze, - io_bare: %w[panic todo unimplemented unreachable].freeze, - dir_context: %w[current_dir home_dir].freeze, - context_pairs: { - "SystemTime" => %w[now], - "Instant" => %w[now] - }.freeze, - context_bare: [].freeze, - callback_set: (COMMON_CALLBACK_SET + %w[lock read write spawn await]).uniq.freeze, - core_consts: [].freeze - ).freeze - ZIG_LEXICON = Lexicon.new( - dispatch_mids: %w[field fieldParentPtr ptrCast alignCast call].freeze, - meta_mids: %w[typeInfo TypeOf ptrCast intFromPtr ptrFromInt eval].freeze, - method_obj_mids: %i[method].freeze, - io_consts: %w[std os fs process net Thread Mutex Atomic].freeze, - io_bare: %w[panic unreachable].freeze, - dir_context: [].freeze, - context_pairs: { - "time" => %w[timestamp nanoTimestamp milliTimestamp] - }.freeze, - context_bare: [].freeze, - callback_set: (COMMON_CALLBACK_SET + %w[lock unlock spawn wait signal]).uniq.freeze, - core_consts: [].freeze - ).freeze - LUA_LEXICON = Lexicon.new( - dispatch_mids: %w[load loadfile dofile require rawget rawset].freeze, - meta_mids: %w[setmetatable getmetatable debug eval load loadfile].freeze, - method_obj_mids: %i[method].freeze, - io_consts: %w[io os debug package].freeze, - io_bare: %w[print error assert require collectgarbage].freeze, - dir_context: [].freeze, - context_pairs: { - "os" => %w[time clock date getenv], - "math" => %w[random] - }.freeze, - context_bare: [].freeze, - callback_set: COMMON_CALLBACK_SET, - core_consts: [].freeze - ).freeze - C_LEXICON = Lexicon.new( - dispatch_mids: %w[dlsym dlopen GetProcAddress].freeze, - meta_mids: %w[setjmp longjmp va_start va_arg].freeze, - method_obj_mids: %i[method].freeze, - io_consts: %w[FILE DIR pthread mutex atomic].freeze, - io_bare: %w[printf fprintf fopen open read write close system exec abort exit assert].freeze, - dir_context: %w[getcwd getenv].freeze, - context_pairs: EMPTY_PAIRS, - context_bare: %w[rand time clock].freeze, - callback_set: (COMMON_CALLBACK_SET + %w[pthread_mutex_lock pthread_mutex_unlock]).uniq.freeze, - core_consts: [].freeze - ).freeze - CPP_LEXICON = Lexicon.new( - dispatch_mids: %w[dynamic_cast typeid any_cast get_if visit invoke].freeze, - meta_mids: %w[reinterpret_cast const_cast dlsym dlopen].freeze, - method_obj_mids: %i[method].freeze, - io_consts: %w[std filesystem fstream iostream thread mutex atomic].freeze, - io_bare: %w[throw abort exit assert system].freeze, - dir_context: %w[current_path].freeze, - context_pairs: { - "chrono" => %w[now], - "random_device" => %w[operator()] - }.freeze, - context_bare: [].freeze, - callback_set: (COMMON_CALLBACK_SET + %w[lock unlock try_lock wait notify_one notify_all]).uniq.freeze, - core_consts: [].freeze - ).freeze - CSHARP_LEXICON = Lexicon.new( - dispatch_mids: %w[Invoke GetMethod GetProperty GetField Activator CreateInstance].freeze, - meta_mids: %w[Invoke GetType Reflection Emit DynamicMethod].freeze, - method_obj_mids: %i[method].freeze, - io_consts: %w[Console File Directory Path Process Socket HttpClient Environment].freeze, - io_bare: %w[throw].freeze, - dir_context: %w[CurrentDirectory GetEnvironmentVariable].freeze, - context_pairs: { - "DateTime" => %w[Now UtcNow Today], - "Guid" => %w[NewGuid], - "Random" => %w[Next NextDouble] - }.freeze, - context_bare: [].freeze, - callback_set: (COMMON_CALLBACK_SET + %w[Lock Monitor Enter Exit Wait Pulse]).uniq.freeze, - core_consts: [].freeze - ).freeze - JAVA_LEXICON = Lexicon.new( - dispatch_mids: %w[invoke getMethod getDeclaredMethod getField getDeclaredField forName].freeze, - meta_mids: %w[invoke setAccessible newInstance Proxy].freeze, - method_obj_mids: %i[method].freeze, - io_consts: %w[System File Files Paths ProcessBuilder Socket HttpClient Thread Lock AtomicReference].freeze, - io_bare: %w[throw].freeze, - dir_context: %w[getProperty getenv].freeze, - context_pairs: { - "System" => %w[currentTimeMillis nanoTime getenv getProperty], - "Instant" => %w[now], - "UUID" => %w[randomUUID], - "Math" => %w[random] - }.freeze, - context_bare: [].freeze, - callback_set: (COMMON_CALLBACK_SET + %w[lock unlock wait notify notifyAll submit execute]).uniq.freeze, - core_consts: [].freeze - ).freeze - SWIFT_LEXICON = Lexicon.new( - dispatch_mids: %w[perform value setValue selector NSClassFromString].freeze, - meta_mids: %w[Mirror unsafeBitCast withUnsafePointer withUnsafeBytes].freeze, - method_obj_mids: %i[method].freeze, - io_consts: %w[FileManager Process URLSession DispatchQueue Thread Lock NSLock].freeze, - io_bare: %w[print fatalError preconditionFailure assertionFailure].freeze, - dir_context: %w[currentDirectoryPath homeDirectoryForCurrentUser].freeze, - context_pairs: { - "Date" => %w[now], - "UUID" => %w[init] - }.freeze, - context_bare: [].freeze, - callback_set: (COMMON_CALLBACK_SET + %w[lock unlock async sync]).uniq.freeze, - core_consts: [].freeze - ).freeze - KOTLIN_LEXICON = Lexicon.new( - dispatch_mids: %w[invoke call callBy memberProperties declaredMemberFunctions].freeze, - meta_mids: %w[reflection javaClass Class forName setAccessible].freeze, - method_obj_mids: %i[method].freeze, - io_consts: %w[System File Files Paths ProcessBuilder Socket HttpClient Thread Mutex AtomicReference].freeze, - io_bare: %w[println print error check require TODO].freeze, - dir_context: %w[getProperty getenv].freeze, - context_pairs: { - "System" => %w[currentTimeMillis nanoTime getenv getProperty], - "Instant" => %w[now], - "UUID" => %w[randomUUID], - "Random" => %w[nextInt nextLong nextDouble] - }.freeze, - context_bare: [].freeze, - callback_set: (COMMON_CALLBACK_SET + %w[lock unlock synchronized launch async await]).uniq.freeze, - core_consts: [].freeze - ).freeze - LANGUAGE_LEXICONS = { - ruby: RUBY_LEXICON, - python: PYTHON_LEXICON, - javascript: JS_LEXICON, - typescript: JS_LEXICON, - go: GO_LEXICON, - rust: RUST_LEXICON, - zig: ZIG_LEXICON, - lua: LUA_LEXICON, - c: C_LEXICON, - cpp: CPP_LEXICON, - csharp: CSHARP_LEXICON, - java: JAVA_LEXICON, - swift: SWIFT_LEXICON, - kotlin: KOTLIN_LEXICON - }.freeze - - # Compatibility aliases for tests and downstream code that inspect - # detector constants directly. - DISPATCH_MIDS = RUBY_LEXICON.dispatch_mids - META_MIDS = RUBY_LEXICON.meta_mids - METHOD_OBJ_MIDS = RUBY_LEXICON.method_obj_mids - IO_CONSTS = RUBY_LEXICON.io_consts - # bare `p`/`pp` deliberately excluded: single/double-letter, too - # often a local-var bareword (VCALL) to flag as Kernel#p. - IO_BARE = RUBY_LEXICON.io_bare - DIR_CONTEXT = RUBY_LEXICON.dir_context - CONTEXT_PAIRS = RUBY_LEXICON.context_pairs - CONTEXT_BARE = RUBY_LEXICON.context_bare - CALLBACK_SET = RUBY_LEXICON.callback_set - CORE = RUBY_LEXICON.core_consts def self.scan(files) hits = [] recs = [] - files.each do |f| - root, lines = Ast.parse(f) - e = new(f, lines, language: Syntax.language_for(f)) - e.walk(root, [], []) - hits.concat(e.hits) - recs.concat(e.classrecs) + files.each do |file| + document = Syntax.parse(file, parser: "tree_sitter") + hits.concat(hits_for_document(document)) + doc_recs, doc_hits = class_records_for_document(document) + recs.concat(doc_recs) + hits.concat(doc_hits) end Report.new(hits, recs) end - attr_reader :hits, :classrecs - - def initialize(file, lines, language: :ruby, lexicon: nil) - @file = file - @lines = lines - @language = language.to_sym - @lexicon = lexicon || self.class.lexicon_for(@language) - @hits = [] - @classrecs = [] - end - - def self.lexicon_for(language) - LANGUAGE_LEXICONS.fetch(language.to_sym) - end - - def walk(node, defs, cls) - return unless Ast.node?(node) - - case node.type - when :CLASS, :MODULE - return walk_class(node, defs, cls) - when :SCLASS - return unless @language == :ruby - - recv = node.children[0] - emit(:metaprogramming, "class << #{Ast.slice(recv, @lines)}", - dn(defs), node) unless recv.type == :SELF - when :DEFN, :DEFS - nm = (node.type == :DEFN ? node.children[0] : node.children[1]) - emit(:metaprogramming, "def #{nm}", dn(defs), node) \ - if @language == :ruby && %i[method_missing respond_to_missing?].include?(nm) - nd = Ast.def_push(node, defs) - return node.children.each { |c| walk(c, nd, cls) } - when :CALL, :FCALL, :VCALL, :OPCALL - classify_call(node, defs) - when :ATTRASGN - emit(:hidden_mutation, node.children[1].to_s, dn(defs), node) - when :OP_ASGN1, :OP_ASGN2 - emit(:hidden_mutation, "op-assign", dn(defs), node) - when :GVAR, :GASGN - emit(:context_dependency, node.children[0].to_s, dn(defs), node) if @language == :ruby - when :XSTR, :DXSTR - emit(:hidden_io, "backtick", dn(defs), node) if @language == :ruby - when :YIELD - emit(:dynamic_dispatch, "yield", dn(defs), node) if @language == :ruby - when :ITER - cm = callee_mid(node.children[0]) - emit(:callback_inversion, cm.to_s, dn(defs), node) \ - if cm && callback?(cm.to_s) && !@lexicon.meta_mids.include?(cm.to_s) - end - - node.children.each { |c| walk(c, defs, cls) } - end - - private - - def dn(defs) - defs.last || "(top-level)" - end - - # Takes the triggering node so line AND span come from one place. - def emit(kind, detail, defn, node) - @hits << Hit.new(kind: kind, detail: detail, file: @file, - defn: defn, line: node.first_lineno, - span: [node.first_lineno, node.first_column, - node.last_lineno, node.last_column]) - end - - def walk_class(node, defs, cls) - cpath = node.children[0] - body = node.children[node.type == :CLASS ? 2 : 1] - simple = const_simple(cpath) - based = Ast.node?(cpath) && cpath.type == :COLON2 && - !cpath.children[0].nil? && !cpath.text.to_s.start_with?("::") - fqn = (cls + [const_text(cpath)]).join("::") - if has_def?(body) - core = cls.empty? && !based && @lexicon.core_consts.include?(simple) - @classrecs << ClassRec.new(name: fqn, file: @file, - line: node.first_lineno, core: core, - span: [node.first_lineno, node.first_column, - node.last_lineno, node.last_column]) - emit(:monkeypatch, simple, simple, node) if core + def self.hits_for_document(document) + document.semantic_effect_sites.map do |site| + Hit.new(kind: site.kind, detail: site.detail, file: site.file, + defn: site.function || "(top-level)", line: site.line, + span: site.span) end - newcls = cls + [const_text(cpath)] - node.children.each { |c| walk(c, defs, newcls) } end - # Exactly one hit per call node, highest-signal kind first, so - # counts are not inflated by a node matching two lexicons. - def classify_call(call, defs) - recv, mid = - case call.type - when :CALL, :OPCALL then [call.children[0], call.children[1]] - else [nil, call.children[0]] - end - m = mid.to_s - - if (block_pass?(call) || block_literal_call?(call)) && - callback?(m) && !@lexicon.meta_mids.include?(m) - return emit(:callback_inversion, m, dn(defs), call) - end - return emit(:metaprogramming, m, dn(defs), call) if @lexicon.meta_mids.include?(m) - return emit(:dynamic_dispatch, m, dn(defs), call) if @lexicon.dispatch_mids.include?(m) - - if m == "call" && recv - return emit(:dynamic_dispatch, "method(...).call", dn(defs), call) \ - if method_obj?(recv) - return emit(:dynamic_dispatch, "#{Ast.slice(recv, @lines)}.call", - dn(defs), call) if var_recv?(recv) - end - - cp = const_recv(recv) - if cp - base = cp.sub(/\A::/, "").split("::").first - if base == "Dir" && @lexicon.dir_context.include?(m) - return emit(:context_dependency, "Dir.#{m}", dn(defs), call) - end - if @lexicon.io_consts.include?(base) || (@language == :ruby && cp.start_with?("Net::")) - return emit(:hidden_io, "#{cp}.#{m}", dn(defs), call) - end - if @language == :ruby - return emit(:hidden_io, "URI.open", dn(defs), call) \ - if base == "URI" && m == "open" - return emit(:context_dependency, "ENV", dn(defs), call) if cp == "ENV" - end - if @lexicon.context_pairs[base]&.include?(m) - return emit(:context_dependency, "#{base}.#{m}", dn(defs), call) - end - end - - if recv.nil? - return emit(:hidden_io, m, dn(defs), call) if @lexicon.io_bare.include?(m) - return emit(:context_dependency, m, dn(defs), call) \ - if @lexicon.context_bare.include?(m) - end - - if m.length > 1 && m.end_with?("!") && !%w[!= !~].include?(m) - return emit(:hidden_mutation, m, dn(defs), call) - end - emit(:hidden_mutation, "<<", dn(defs), call) \ - if call.type == :OPCALL && m == "<<" - end - - def callback?(str) - @lexicon.callback_set.include?(str) || - str =~ /\A(with_|around_|on_|before_|after_)/ || - str =~ /_hook\z/ - end - - def callee_mid(call) - return nil unless Ast.node?(call) - - case call.type - when :CALL, :OPCALL then call.children[1] - when :FCALL, :VCALL then call.children[0] - end - end - - def block_pass?(call) - args = - case call.type - when :CALL, :OPCALL then call.children[2] - when :FCALL then call.children[1] - end - return false unless Ast.node?(args) - # `f(&b)` -> args IS the BLOCK_PASS; `f(a, &b)` -> LIST[..., BLOCK_PASS]. - return true if args.type == :BLOCK_PASS - - args.type == :LIST && - args.children.any? { |c| Ast.node?(c) && c.type == :BLOCK_PASS } - end - - def block_literal_call?(call) - text = call.text.to_s - text.include?("{") || text.match?(/\bdo\b/) - end - - def method_obj?(recv) - Ast.node?(recv) && %i[CALL FCALL].include?(recv.type) && - @lexicon.method_obj_mids.include?( - recv.type == :CALL ? recv.children[1] : recv.children[0] - ) - end - - def var_recv?(recv) - Ast.node?(recv) && - %i[VCALL LVAR DVAR IVAR CVAR GVAR].include?(recv.type) - end - - def const_recv(recv) - return nil unless Ast.node?(recv) && - %i[CONST COLON2 COLON3].include?(recv.type) - - const_text(recv) - end - - def const_text(n) - return n.to_s unless Ast.node?(n) - - case n.type - when :CONST then n.children[0].to_s - when :COLON3 then "::#{n.children[0]}" - when :COLON2 - return "::#{n.children[1]}" if n.text.to_s.start_with?("::") - - b = n.children[0] - b ? "#{const_text(b)}::#{n.children[1]}" : n.children[1].to_s - else Ast.slice(n, @lines) - end - end - - def const_simple(n) - return n.to_s unless Ast.node?(n) - - case n.type - when :CONST, :COLON3 then n.children[0].to_s - when :COLON2 then n.children[1].to_s - else const_text(n) + def self.class_records_for_document(document) + function_owners = document.function_defs.map(&:owner).compact.to_set + core_names = core_owner_names(document.language) + recs = [] + hits = [] + document.owner_defs.each do |owner| + canonical = owner.name.to_s.sub(/\A::/, "") + next if canonical.empty? + next unless function_owners.include?(owner.name) || function_owners.include?(canonical) + + simple = canonical.split("::").last + core = !canonical.include?("::") && core_names.include?(simple) + rec = ClassRec.new(name: canonical, file: owner.file, line: owner.line, + core: core, span: owner.span) + recs << rec + next unless core + + hits << Hit.new(kind: :monkeypatch, detail: simple, file: owner.file, + defn: simple, line: owner.line, span: owner.span) end + [recs, hits] end - # A def reachable without crossing a nested namespace -- methods - # added to THIS class/module. SCLASS is descended (its defs attach - # to the enclosing object); CLASS/MODULE prune (separate namespace). - def has_def?(n) - return false unless Ast.node?(n) - return true if %i[DEFN DEFS].include?(n.type) - return false if %i[CLASS MODULE].include?(n.type) + def self.core_owner_names(language) + profile = Syntax.language_profile(language) + return [] unless profile.respond_to?(:effect_lexicon, true) - n.children.any? { |c| has_def?(c) } + lexicon = profile.send(:effect_lexicon) + lexicon&.core_consts.to_a + rescue ArgumentError + [] end # Groups hits by [kind, detail] and ranks by blast radius: # scatter = distinct (file, method) units, support = occurrences. # Cross-file project-class reopen (same FQN with methods in >=2 # files) becomes monkeypatch hits here; core reopens were already - # emitted per occurrence during the walk. + # emitted per occurrence during the scan. class Report def initialize(hits, classrecs) @hits = hits.dup @@ -542,10 +81,10 @@ def initialize(hits, classrecs) next if recs.first.core next if recs.map(&:file).uniq.size < 2 - recs.each do |r| - @hits << Hit.new(kind: :monkeypatch, detail: "reopen #{r.name}", - file: r.file, defn: r.name, line: r.line, - span: r.span) + recs.each do |rec| + @hits << Hit.new(kind: :monkeypatch, detail: "reopen #{rec.name}", + file: rec.file, defn: rec.name, line: rec.line, + span: rec.span) end end end @@ -553,14 +92,14 @@ def initialize(hits, classrecs) attr_reader :hits def findings - @hits.group_by { |h| [h.kind, h.detail] }.map do |(kind, detail), hs| - units = hs.map { |h| [h.file, h.defn] }.uniq - sites = hs.map { |h| "#{h.file}:#{h.defn}:#{h.line}" }.uniq + @hits.group_by { |hit| [hit.kind, hit.detail] }.map do |(kind, detail), hits| + units = hits.map { |hit| [hit.file, hit.defn] }.uniq + sites = hits.map { |hit| "#{hit.file}:#{hit.defn}:#{hit.line}" }.uniq spans = {} - hs.each { |h| spans["#{h.file}:#{h.defn}:#{h.line}"] ||= h.span } - { kind: kind, detail: detail, support: hs.size, + hits.each { |hit| spans["#{hit.file}:#{hit.defn}:#{hit.line}"] ||= hit.span } + { kind: kind, detail: detail, support: hits.size, scatter: units.size, at: sites.first, sites: sites, spans: spans } - end.sort_by { |h| [-h[:scatter], -h[:support], h[:kind].to_s, h[:detail]] } + end.sort_by { |hit| [-hit[:scatter], -hit[:support], hit[:kind].to_s, hit[:detail]] } end end end diff --git a/gems/decomplex/lib/decomplex/fat_union.rb b/gems/decomplex/lib/decomplex/fat_union.rb index 37d9d9d98..b98dd4584 100644 --- a/gems/decomplex/lib/decomplex/fat_union.rb +++ b/gems/decomplex/lib/decomplex/fat_union.rb @@ -1,147 +1,146 @@ # frozen_string_literal: true -require_relative "ast" +require_relative "syntax" module Decomplex # Fat-union detector: Missing-Abstractions for product-vs-sum # decomposition. A `case when ClassA when ClassB ...` # dispatch where the arms read mostly the SAME members of - # (and/or members are read OUTSIDE the dispatch in the same - # method) is a union whose common core should be a struct, with a - # SMALL union for the genuinely-varying part. Every such fat union - # is a Neglected-Updates / Missing-Abstractions generator (the - # storage/provenance invariant-#16 shape at the type level). - # - # decomplex MEASURES and ranks the use-site cohesion evidence; the - # extraction is value-object work, nil-kill's owned territory - # (design.md nil-kill boundary). Output routes there. - # - # v1 scope (principle 4, exact before semantic): `case` over CLASS - # CONSTANTS only. `is_a?` if/elsif chains and `:kind`-tagged hashes - # are a documented future scope limit, reported absent not - # approximated. Zero deps, AST only, no points-to. + # is a union whose common core should be a struct, with a small union + # for the genuinely-varying part. class FatUnion - CONST_TYPES = %i[CONST COLON2 COLON3].freeze + CONSTANT_PATTERN = /\A[A-Z]\w*(?:(?:::|\.)[A-Z]\w*)*\z/ + IF_DISPATCH_PATTERN = /\A(?.+?)\s*(?:==|===)\s*(?[A-Z]\w*(?:(?:::|\.)[A-Z]\w*)*)\z/ Site = Struct.new(:variant_set, :arm_members, :outside, :file, :defn, :line, :span, keyword_init: true) def self.scan(files, min_variants: 3, min_common: 2, ratio: 0.6) - sites = [] - files.each do |f| - root, lines = Ast.parse(f) - e = new(f, lines) - e.walk(root, "(top-level)", nil) - sites.concat(e.sites) + sites = files.flat_map do |file| + document = Syntax.parse(file, parser: "tree_sitter") + new(document).sites end Report.new(sites, min_variants: min_variants, min_common: min_common, ratio: ratio) end - attr_reader :sites + attr_reader :document - def initialize(file, lines) - @file = file - @lines = lines - @sites = [] + def initialize(document) + @document = document end - # Carries the enclosing def NAME and NODE so "accessed outside the - # dispatch but in the same method" (the strongest common-core - # tell) is computable by pruning the case subtree. - def walk(node, defn, defn_node) - return unless Ast.node?(node) - - case node.type - when :DEFN then defn = node.children[0].to_s; defn_node = node - when :DEFS then defn = node.children[1].to_s; defn_node = node - when :CASE - s = record_case(node, defn, defn_node) - @sites << s if s - end - node.children.each { |c| walk(c, defn, defn_node) } + def sites + arms = document.branch_arms + case_sites = arms + .select { |arm| arm.kind == :case } + .group_by { |arm| [arm.file, arm.function, arm.decision_span, arm.predicate] } + .filter_map { |_key, case_arms| record_case(case_arms) } + case_sites + if_dispatch_sites(arms) end private - def record_case(node, defn, defn_node) - disc = node.children[0] - return nil unless Ast.node?(disc) # predicate-less = if-chain - - disc_txt = Ast.slice(disc, @lines) - arms = {} # "ClassName" => [member, ...] - whenn = node.children[1] - while Ast.node?(whenn) && whenn.type == :WHEN - consts = const_patterns(whenn.children[0]) - unless consts.empty? # class-constant dispatch only (v1 scope) - mem = subtree_members(whenn.children[1], disc_txt) - consts.each { |c| (arms[c] ||= []).concat(mem) } - end - whenn = whenn.children[2] + def record_case(arms) + predicate = arms.first.predicate.to_s + return nil if predicate.empty? + + arm_members = {} + arms.each do |arm| + variants = constant_patterns(arm.member) + next if variants.empty? + + members = members_inside(predicate, arm.function, arm.span) + variants.each { |variant| (arm_members[variant] ||= []).concat(members) } end - return nil if arms.size < 2 - - arms.transform_values!(&:uniq) - Site.new(variant_set: arms.keys.sort, arm_members: arms, - outside: outside_members(defn_node, node, disc_txt), - file: @file, defn: defn, line: node.first_lineno, - span: [node.first_lineno, node.first_column, - node.last_lineno, node.last_column]) + return nil if arm_members.size < 2 + + arm_members.transform_values!(&:uniq) + Site.new( + variant_set: arm_members.keys.sort, + arm_members: arm_members, + outside: members_outside(predicate, arms.first.function, arms.first.decision_span), + file: arms.first.file, + defn: arms.first.function, + line: arms.first.decision_line, + span: arms.first.decision_span + ) end - # disc-members read in the enclosing method but NOT inside this - # case. Pruned by the case's LINE SPAN, not object identity; adapters - # are free to materialize fresh wrapper nodes per traversal. Empty - # for a top-level case (no enclosing method) -- documented limit. - def outside_members(defn_node, case_node, disc_txt) - return [] unless Ast.node?(defn_node) - - acc = [] - collect(defn_node, disc_txt, case_node.first_lineno, - case_node.last_lineno, acc) - acc.uniq + def if_dispatch_sites(arms) + arms.select { |arm| arm.kind == :if && arm.member == "then" } + .filter_map { |arm| [arm, if_dispatch_match(arm.predicate)] } + .reject { |_arm, match| match.nil? } + .group_by { |arm, match| [arm.file, arm.function, match[:subject]] } + .filter_map { |_key, matched| record_if_dispatch(matched) } + end + + def record_if_dispatch(matched) + predicate = matched.first[1][:subject] + arm_members = {} + matched.each do |arm, match| + members = members_inside(predicate, arm.function, arm.span) + (arm_members[match[:variant]] ||= []).concat(members) + end + return nil if arm_members.size < 2 + + arm_members.transform_values!(&:uniq) + Site.new( + variant_set: arm_members.keys.sort, + arm_members: arm_members, + outside: members_outside(predicate, matched.first[0].function, matched.first[0].decision_span), + file: matched.first[0].file, + defn: matched.first[0].function, + line: matched.first[0].decision_line, + span: matched.first[0].decision_span + ) end - def collect(node, disc_txt, cfl, cll, acc) - return unless Ast.node?(node) - # entire subtree lies within the case -> it is inside, skip. - return if node.first_lineno >= cfl && node.last_lineno <= cll + def if_dispatch_match(predicate) + source = predicate.to_s.strip + source = source[1...-1].strip if source.start_with?("(") && source.end_with?(")") + match = source.match(IF_DISPATCH_PATTERN) + return nil unless match - m = member_access(node, disc_txt) - acc << m if m - node.children.each { |c| collect(c, disc_txt, cfl, cll, acc) } + { subject: match[:subject].strip, variant: match[:variant].strip } end - def subtree_members(body, disc_txt) - acc = [] - stack = [body] - until stack.empty? - n = stack.pop - next unless Ast.node?(n) + def members_inside(predicate, function, span) + member_calls(predicate, function) + .select { |call| inside_span?(call.span, span) } + .map { |call| member_name(call) } + .uniq + end - m = member_access(n, disc_txt) - acc << m if m - n.children.each { |c| stack << c } - end - acc.uniq + def members_outside(predicate, function, decision_span) + member_calls(predicate, function) + .reject { |call| inside_span?(call.span, decision_span) } + .map { |call| member_name(call) } + .uniq end - # `.foo` / `.foo(..)` / ` << x` / `.foo = x` - # -> "foo" / "<<" / "foo". nil otherwise. - def member_access(n, disc_txt) - return nil unless %i[CALL OPCALL ATTRASGN].include?(n.type) + def member_calls(predicate, function) + document.call_sites.select do |call| + call.function == function && + call.receiver.to_s == predicate && + !call.message.to_s.empty? + end + end - recv, mid, = n.children - return nil unless Ast.node?(recv) && Ast.slice(recv, @lines) == disc_txt + def member_name(call) + call.message.to_s.sub(/=\z/, "") + end - mid.to_s.sub(/=\z/, "") + def constant_patterns(member) + member.to_s.split(/\s*,\s*/).map { |pattern| pattern.sub(/\Acase\s+/, "") } + .select { |pattern| pattern.match?(CONSTANT_PATTERN) } end - def const_patterns(plist) - return [] unless Ast.node?(plist) + def inside_span?(inner, outer) + return false unless inner && outer - plist.children.filter_map do |p| - Ast.slice(p, @lines) if Ast.node?(p) && CONST_TYPES.include?(p.type) - end + starts_after_or_at = (inner[0] > outer[0]) || (inner[0] == outer[0] && inner[1] >= outer[1]) + ends_before_or_at = (inner[2] < outer[2]) || (inner[2] == outer[2] && inner[3] <= outer[3]) + starts_after_or_at && ends_before_or_at end class Report @@ -159,18 +158,12 @@ def fat_unions v = vset.size next if v < @min_variants - # member -> distinct variant-classes accessing it, across - # every dispatch site of this variant-set. vcls = Hash.new { |h, k| h[k] = {} } outside = {} group.each do |s| s.arm_members.each { |cls, ms| ms.each { |m| vcls[m][cls] = true } } s.outside.each { |m| outside[m] = true } end - # member universe = accessed in an arm OR only outside the - # dispatch (a member read ONLY outside is the strongest - # 'belongs in the common struct' signal -- it must not be - # dropped just because no arm names it). keys = vcls.keys | outside.keys common = keys.select do |m| outside[m] || (vcls[m] && vcls[m].size >= v) diff --git a/gems/decomplex/lib/decomplex/flay_similarity.rb b/gems/decomplex/lib/decomplex/flay_similarity.rb index 4d8d298bd..8d9e6b56b 100644 --- a/gems/decomplex/lib/decomplex/flay_similarity.rb +++ b/gems/decomplex/lib/decomplex/flay_similarity.rb @@ -24,7 +24,7 @@ class FlaySimilarity IDENTIFIER_KINDS = %w[ identifier constant type_identifier field_identifier property_identifier - shorthand_property_identifier_pattern variable_name + shorthand_property_identifier_pattern simple_identifier variable_name ].freeze LITERAL_KINDS = %w[ string string_content string_literal interpreted_string_literal raw_string_literal @@ -34,22 +34,22 @@ class FlaySimilarity SKIP_CANDIDATE_KINDS = %w[ comment identifier constant type_identifier field_identifier property_identifier parameters formal_parameters parameter_list argument_list arguments - block_parameters method_parameters + block_parameters call_suffix function_value_parameters method_parameters value_argument scope_resolution ].freeze CLONE_CANDIDATE_KINDS = %w[ array assignment assignment_statement block case case_clause class - class_definition class_declaration do_block enum_declaration for - for_statement hash if if_statement match_expression match_statement - method method_definition module operator_assignment singleton_method + class_definition class_declaration compound_statement conjunction_expression control_structure_body + do_block enum_declaration for for_statement function_body hash if if_statement match_expression + match_statement method method_definition module operator_assignment singleton_method statements struct_declaration switch_case switch_expression switch_statement unless until while while_statement ].freeze BODY_KINDS = %w[ body block body_statement declaration_list statement_block compound_statement - suite do_block + function_body statements suite do_block ].freeze - CALL_KINDS = %w[call call_expression method_invocation invocation_expression].freeze + CALL_KINDS = %w[call call_expression function_call method_call method_invocation invocation_expression].freeze def self.scan(files, mass: DEFAULT_MASS, fuzzy: DEFAULT_FUZZY) new(files, mass: mass, fuzzy: fuzzy).scan @@ -236,7 +236,7 @@ def site_for(candidate) end def uniq_sites(candidates) - candidates.uniq { |candidate| [candidate.file, candidate.line, candidate.node_name] } + candidates.uniq { |candidate| [candidate.file, candidate.line, candidate.span, candidate.node_name] } end def fuzzy_signatures(candidate) @@ -341,12 +341,12 @@ def fingerprint_call(node, active) end def call_message(node) - return nil unless node.children.any? { |child| %w[argument_list arguments].include?(child.kind) } + return nil unless node.children.any? { |child| %w[argument_list arguments call_suffix].include?(child.kind) } callee = named_field(node, "function") || named_field(node, "callee") return callee_message(callee) if callee - argument_node = node.children.find { |child| %w[argument_list arguments].include?(child.kind) } + argument_node = node.children.find { |child| %w[argument_list arguments call_suffix].include?(child.kind) } named_before_args = node.named_children.select do |child| argument_node.nil? || child.start_byte < argument_node.start_byte end @@ -356,11 +356,18 @@ def call_message(node) def callee_message(node) return nil unless ts_node?(node) return node.text if IDENTIFIER_KINDS.include?(node.kind) + return navigation_suffix_message(node) if %w[navigation_expression directly_assignable_expression].include?(node.kind) leaf = node.named_children.reverse.find { |child| IDENTIFIER_KINDS.include?(child.kind) } leaf&.text end + def navigation_suffix_message(node) + suffix = node.named_children.reverse.find { |child| child.kind == "navigation_suffix" } + leaf = suffix&.named_children&.reverse&.find { |child| IDENTIFIER_KINDS.include?(child.kind) } + leaf&.text + end + def terminal_token(node) kind = node.kind.to_s return "id" if IDENTIFIER_KINDS.include?(kind) diff --git a/gems/decomplex/lib/decomplex/inconsistent_rename_clone.rb b/gems/decomplex/lib/decomplex/inconsistent_rename_clone.rb index 59c91cb9b..0a8bcd730 100644 --- a/gems/decomplex/lib/decomplex/inconsistent_rename_clone.rb +++ b/gems/decomplex/lib/decomplex/inconsistent_rename_clone.rb @@ -1,86 +1,62 @@ # frozen_string_literal: true -require_relative "ast" +require_relative "local_flow" module Decomplex # Narrow clone bug detector: a pasted block was renamed, but one # occurrence kept the old spelling. This is intentionally not a # general Type-2/Type-3 clone detector; the structural similarity # scanner owns that broader signal. - # - # The important false-positive guard is cross-method evidence. Local - # branch symmetry inside one method often has the same skeleton with - # different receiver/container variables, but that is not a pasted - # rename bug. class InconsistentRenameClone Block = Struct.new(:skeleton, :names, :file, :defn, :line, :span, keyword_init: true) - HOLE_TYPES = %i[LVAR DVAR IVAR LASGN DASGN IASGN].freeze MIN_TOKENS = 8 def self.scan(files) - blocks = [] - files.each do |f| - root, = Ast.parse(f) - new(f).collect(root, [], blocks) - end - Report.new(blocks).inconsistent_renames - end - - def initialize(file) - @file = file - end - - def collect(node, defstack, blocks) - return unless Ast.node?(node) + blocks = LocalFlow.scan(files).filter_map do |method| + next if method.statements.size < 3 - defstack = Ast.def_push(node, defstack) - if node.type == :BLOCK - stmts = node.children.compact - add_block(stmts, defstack, blocks) if stmts.size >= 3 + new.add_block(method) end - node.children.each { |child| collect(child, defstack, blocks) } + Report.new(blocks).inconsistent_renames end - private - - def add_block(stmts, defstack, blocks) + def add_block(method) skeleton = [] names = [] - stmts.each { |stmt| tokenize(stmt, skeleton, names) } - return if skeleton.size < MIN_TOKENS - - blocks << Block.new(skeleton: skeleton, names: names, file: @file, - defn: defstack.last || "(top-level)", - line: stmts.first.first_lineno, - span: [stmts.first.first_lineno, - stmts.first.first_column, - stmts.last.last_lineno, - stmts.last.last_column]) + method.statements.each { |statement| tokenize(statement.source, skeleton, names) } + return nil if skeleton.size < MIN_TOKENS + + Block.new( + skeleton: skeleton, + names: names, + file: method.file, + defn: method.name, + line: method.statements.first.line, + span: [ + method.statements.first.span[0], + method.statements.first.span[1], + method.statements.last.span[2], + method.statements.last.span[3] + ] + ) end - def tokenize(node, skeleton, names) - return unless Ast.node?(node) - - case node.type - when *HOLE_TYPES - skeleton << :ID - names << node.children[0].to_s - when :VCALL - skeleton << :ID - names << node.children[0].to_s - when :CALL, :FCALL - skeleton << node.type - mid = node.children[node.type == :CALL ? 1 : 0] - skeleton << :MID - names << mid.to_s - when :LIT, :STR, :SYM, :INTEGER, :FLOAT - skeleton << node.type - else - skeleton << node.type + private + + def tokenize(source, skeleton, names) + source.to_s.scan(/[A-Za-z_]\w*[!?=]?|@\w+|\d+(?:\.\d+)?|:[A-Za-z_]\w*|\"[^\"]*\"|'[^']*'|\S/) do |token| + case token + when /\A[@A-Za-z_]\w*[!?=]?\z/ + skeleton << :ID + names << token.delete_prefix("@").delete_suffix("=") + when /\A(?::[A-Za-z_]\w*|\d+(?:\.\d+)?|\"[^\"]*\"|'[^']*')\z/ + skeleton << :LIT + else + skeleton << token + end end - node.children.each { |child| tokenize(child, skeleton, names) } end class Report diff --git a/gems/decomplex/lib/decomplex/local_flow.rb b/gems/decomplex/lib/decomplex/local_flow.rb index 10c9ef2c6..135aae474 100644 --- a/gems/decomplex/lib/decomplex/local_flow.rb +++ b/gems/decomplex/lib/decomplex/local_flow.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true require "set" -require_relative "ast" +require_relative "syntax" module Decomplex # Conservative intra-procedural local data-flow support for function-level @@ -19,222 +19,50 @@ class LocalFlow :before_index, :after_index, :line, :kind, :text, keyword_init: true ) - OWNER_TYPES = %i[CLASS MODULE].freeze - METHOD_TYPES = %i[DEFN DEFS].freeze - SKIP_NESTED_TYPES = %i[CLASS MODULE DEFN DEFS LAMBDA].freeze - LOCAL_READ_TYPES = %i[LVAR DVAR].freeze - LOCAL_WRITE_TYPES = %i[LASGN DASGN].freeze - def self.scan(files) files.flat_map do |file| - root, lines = Ast.parse(file) - new(file, lines).scan(root) - end - end - - def initialize(file, lines) - @file = file - @lines = lines - end - - def scan(root) - out = [] - collect_methods(root, [], out) - out - end - - private - - def collect_methods(node, owners, out) - return unless Ast.node?(node) - - if OWNER_TYPES.include?(node.type) - owner = full_owner_name(owners, node) - owner_methods(node).each { |method| out << method_summary(method, owner) } - collect_nested_owners(node, owners + [owner_segment(node)], out) - elsif METHOD_TYPES.include?(node.type) && owners.empty? - out << method_summary(node, "(top-level)") - else - node.children.each { |child| collect_methods(child, owners, out) } - end - end - - def collect_nested_owners(node, owners, out) - return unless Ast.node?(node) - return if METHOD_TYPES.include?(node.type) - - node.children.each do |child| - next unless Ast.node?(child) - - if OWNER_TYPES.include?(child.type) - collect_methods(child, owners, out) - else - collect_nested_owners(child, owners, out) + Syntax.parse(file, parser: "tree_sitter").local_methods.map do |method| + method_summary(method) end end end - def method_summary(node, owner) - statements = Ast.body_stmts(node).each_with_index.map do |stmt, index| - statement_summary(stmt, index) - end + private_class_method def self.method_summary(method) MethodSummary.new( - id: "#{owner}##{method_name(node)}", - owner: owner, - name: method_name(node), - file: @file, - line: node.first_lineno, - span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], - node: node, - statements: statements, - boundaries: structural_boundaries(statements) + id: method.id, + owner: method.owner, + name: method.name, + file: method.file, + line: method.line, + span: method.span, + node: method.node, + statements: method.statements.map { |statement| statement_summary(statement) }, + boundaries: method.boundaries.map { |boundary| boundary_summary(boundary) } ) end - def statement_summary(node, index) + private_class_method def self.statement_summary(statement) Statement.new( - index: index, - line: node.first_lineno, - end_line: node.last_lineno, - span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], - source: Ast.slice(node, @lines), - reads: local_reads(node).to_set, - writes: local_writes(node).to_set, - dependencies: assignment_dependencies(node), - co_uses: co_use_edges(node) + index: statement.index, + line: statement.line, + end_line: statement.end_line, + span: statement.span, + source: statement.source, + reads: statement.reads.to_set, + writes: statement.writes.to_set, + dependencies: statement.dependencies, + co_uses: statement.co_uses ) end - def structural_boundaries(statements) - statements.each_cons(2).filter_map do |left, right| - boundary = source_boundary(left.end_line + 1, right.line - 1) - next unless boundary - - Boundary.new( - before_index: left.index, - after_index: right.index, - line: boundary[:line], - kind: boundary[:kind], - text: boundary[:text] - ) - end - end - - def source_boundary(first_line, last_line) - return nil if first_line > last_line - - blank = nil - (first_line..last_line).each do |line_number| - text = @lines[line_number - 1].to_s - stripped = text.strip - if stripped.start_with?("#") - return { - line: line_number, - kind: :comment, - text: stripped, - } - end - blank ||= { line: line_number, kind: :blank, text: stripped } if stripped.empty? - end - blank - end - - def owner_methods(owner_node) - body = owner_body(owner_node) - return [] unless Ast.node?(body) - - owner_statements(body).flat_map do |stmt| - next [] unless Ast.node?(stmt) - - if METHOD_TYPES.include?(stmt.type) - [stmt] - elsif visibility_call?(stmt) - inline_methods(stmt) - else - [] - end - end - end - - def inline_methods(stmt) - args = stmt.children[1] - return [] unless Ast.node?(args) - - args.children.compact.select { |arg| Ast.node?(arg) && METHOD_TYPES.include?(arg.type) } - end - - def owner_body(owner_node) - scope = owner_node.children[owner_node.type == :CLASS ? 2 : 1] - return nil unless Ast.node?(scope) && scope.type == :SCOPE - - scope.children[2] - end - - def owner_statements(body) - body.type == :BLOCK ? body.children.compact : [body] - end - - def visibility_call?(node) - node.type == :FCALL && %i[public protected private].include?(node.children[0]) - end - - def method_name(node) - if node.type == :DEFS - receiver = node.children[0] - prefix = Ast.node?(receiver) && receiver.type == :SELF ? "self" : Ast.slice(receiver, @lines) - "#{prefix}.#{node.children[1]}" - else - node.children[0].to_s - end - end - - def full_owner_name(owners, node) - (owners + [owner_segment(node)]).join("::") - end - - def owner_segment(node) - text = Ast.slice(node.children[0], @lines) - text.empty? ? "(anonymous)" : text - end - - def local_reads(node) - reads = [] - walk_local(node) do |child| - reads << child.children[0].to_s if LOCAL_READ_TYPES.include?(child.type) - end - reads - end - - def local_writes(node) - writes = [] - walk_local(node) do |child| - writes << child.children[0].to_s if LOCAL_WRITE_TYPES.include?(child.type) - end - writes - end - - def assignment_dependencies(node) - deps = [] - walk_local(node) do |child| - next unless LOCAL_WRITE_TYPES.include?(child.type) - - lhs = child.children[0].to_s - rhs = child.children[1] - local_reads(rhs).uniq.each { |read| deps << [lhs, read] unless lhs == read } - end - deps.uniq - end - - def co_use_edges(node) - local_reads(node).uniq.combination(2).map { |left, right| [left, right] } - end - - def walk_local(node, &block) - return unless Ast.node?(node) - return if SKIP_NESTED_TYPES.include?(node.type) - - yield node - node.children.each { |child| walk_local(child, &block) } + private_class_method def self.boundary_summary(boundary) + Boundary.new( + before_index: boundary.before_index, + after_index: boundary.after_index, + line: boundary.line, + kind: boundary.kind, + text: boundary.text + ) end end end diff --git a/gems/decomplex/lib/decomplex/mutability_pressure.rb b/gems/decomplex/lib/decomplex/mutability_pressure.rb index 394e2d50d..b91eaadd1 100644 --- a/gems/decomplex/lib/decomplex/mutability_pressure.rb +++ b/gems/decomplex/lib/decomplex/mutability_pressure.rb @@ -1,6 +1,5 @@ # frozen_string_literal: true -require_relative "ast" require_relative "state_mesh" module Decomplex @@ -178,4 +177,4 @@ def classify_and_rank end end end -end \ No newline at end of file +end diff --git a/gems/decomplex/lib/decomplex/operational_discontinuity.rb b/gems/decomplex/lib/decomplex/operational_discontinuity.rb index d1500353f..4e487ddc2 100644 --- a/gems/decomplex/lib/decomplex/operational_discontinuity.rb +++ b/gems/decomplex/lib/decomplex/operational_discontinuity.rb @@ -12,7 +12,7 @@ class OperationalDiscontinuity DEFAULT_MAX_CONTINUING = 1 DEFAULT_MIN_SCORE = 12 DEFAULT_HIGH_CONFIDENCE_MIN_SCORE = 20 - PHASE_COMMENT_PATTERN = /\A#\s*(?:\d+[a-z]?\s*[.)]|(?:phase|step|stage)\b)/i + PHASE_COMMENT_PATTERN = %r{\A(?:#|//|--)\s*(?:\d+[a-z]?\s*[.)]|(?:phase|step|stage)\b)}i GRAMMAR_METHOD_PATTERN = /\Aparse(?:_|$)/ RangeInfo = Struct.new(:first, :last, keyword_init: true) diff --git a/gems/decomplex/lib/decomplex/ordered_protocol_mine.rb b/gems/decomplex/lib/decomplex/ordered_protocol_mine.rb index 073fa85a8..cb9c81bd8 100644 --- a/gems/decomplex/lib/decomplex/ordered_protocol_mine.rb +++ b/gems/decomplex/lib/decomplex/ordered_protocol_mine.rb @@ -1,213 +1,54 @@ # frozen_string_literal: true require "set" -require_relative "ast" +require_relative "syntax" module Decomplex # ImplicitControlFlow finds internal call order where order is state-dependent, # e.g. `prepare; validate` when `prepare` writes state that `validate` reads. # Generic call-order repetition is intentionally ignored. class ImplicitControlFlow - MethodEffect = Struct.new(:file, :owner, :name, :line, :reads, :writes, keyword_init: true) + MethodEffect = Syntax::ProtocolMethodEffect Call = Struct.new(:mid, :file, :owner, :defn, :line, :span, :reads, :writes, keyword_init: true) MethodSequence = Struct.new(:file, :owner, :defn, :line, :calls, keyword_init: true) - Path = Struct.new(:calls, :terminal, keyword_init: true) - PATH_LIMIT = 64 - - DECLARATIVE_MIDS = %w[ - abstract! alias_method any attr_accessor attr_reader attr_writer bind - cast checked enum extend final include interface! let must must_because - nilable override overridable params prepend private private_class_method - protected public require require_relative requires_ancestor sealed! sig - type_member type_template untyped unsafe void - ].freeze - TEST_DSL_MIDS = %w[ - a_kind_of after around before be be_a be_an be_empty be_falsey be_nil - be_truthy change contain_exactly context describe eq eql equal expect - have_attributes have_key have_received it match not_to raise_error - receive subject to - ].freeze + + DECLARATIVE_MIDS = Syntax::RUBY_PROTOCOL_DECLARATIVE_MIDS + TEST_DSL_MIDS = Syntax::RUBY_PROTOCOL_TEST_DSL_MIDS IGNORED_MIDS = (DECLARATIVE_MIDS + TEST_DSL_MIDS).freeze - OPTIONAL_DIAGNOSTIC_MIDS = %w[ - error! fixable! read_interpolated_string warn! - ].freeze - MUTATING_MIDS = %w[ - << []= add append clear collect! compact! concat declare delete delete_if - each_key= fill filter! keep_if mark merge! move push reject! replace - resolve shift stamp store unshift update write - ].freeze - NON_MUTATING_OPERATOR_MIDS = %w[! != !~].freeze - MUTATING_SUFFIXES = %w[!].freeze + OPTIONAL_DIAGNOSTIC_MIDS = Syntax::RUBY_PROTOCOL_OPTIONAL_DIAGNOSTIC_MIDS + MUTATING_MIDS = Syntax::RUBY_PROTOCOL_MUTATING_MIDS + NON_MUTATING_OPERATOR_MIDS = Syntax::RUBY_PROTOCOL_NON_MUTATING_OPERATOR_MIDS + MUTATING_SUFFIXES = Syntax::RUBY_PROTOCOL_MUTATING_SUFFIXES def self.scan(files) - parsed = files.each_with_object({}) do |file, out| - out[file] = Ast.parse(file) - end - effect_index = EffectIndex.build(parsed) - sequences = [] - parsed.each do |file, (root, lines)| - miner = new(file, lines, effect_index) - miner.walk(root, []) - sequences.concat(miner.sequences) - end + documents = files.map { |file| Syntax.parse(file, parser: "tree_sitter") } + effect_index = EffectIndex.new(documents.flat_map(&:protocol_method_effects)) + sequences = documents.flat_map { |document| sequences_for_document(document, effect_index) } Report.new(sequences) end - attr_reader :sequences - - def initialize(file, lines, effect_index) - @file = file - @lines = lines - @effect_index = effect_index - @sequences = [] - end - - def walk(node, owners) - return unless Ast.node?(node) - - case node.type - when :CLASS, :MODULE - owners = owners + [owner_name(node)] - when :DEFN, :DEFS - record_method_paths(node, owners.join("::")) - return - end - - node.children.each { |child| walk(child, owners) } - end - - private - - def record_method_paths(node, owner) - defn = method_name(node) - method_paths(node).each do |path| - calls = path.calls.map { |call| call_for(call, owner, defn) } + def self.sequences_for_document(document, effect_index) + document.protocol_call_paths.filter_map do |path| + calls = path.calls.map { |call| call_for_path(call, path, effect_index) } next if calls.count { |call| stateful_call?(call) } < 2 - @sequences << MethodSequence.new( - file: @file, - owner: owner, - defn: defn, - line: node.first_lineno, + MethodSequence.new( + file: path.file, + owner: path.owner, + defn: path.name, + line: path.line, calls: calls ) end end - def method_paths(node) - paths_for_statements(Ast.body_stmts(node)) - end - - def paths_for_statements(statements) - statements.compact.each_with_object([empty_path]) do |statement, paths| - next if Ast.node?(statement) && statement.type == :BEGIN - - statement_paths = paths_for(statement) - paths.replace(append_statement_paths(paths, statement_paths)) - end - end - - def append_statement_paths(paths, statement_paths) - combine_path_lists(paths, statement_paths) - end - - def combine_path_lists(left_paths, right_paths) - combined = left_paths.flat_map do |path| - if path.terminal - [path] - else - right_paths.map do |right_path| - Path.new(calls: path.calls + right_path.calls, terminal: right_path.terminal) - end - end - end - combined.first(PATH_LIMIT) - end - - def paths_for(node) - return [empty_path] unless Ast.node?(node) - - case node.type - when :BLOCK - paths_for_statements(node.children) - when :SCOPE - paths_for(scope_body(node)) - when :IF, :UNLESS - branch_paths(node) - when :CASE, :CASE2 - case_paths(node) - when :RETURN, :BREAK, :NEXT, :REDO, :RETRY - generic_paths(node).map { |path| Path.new(calls: path.calls, terminal: true) } - else - generic_paths(node) - end - end - - def branch_paths(node) - condition = node.children[0] - positive = node.children[1] - negative = node.children[2] - alternatives = paths_for(positive) + (negative ? paths_for(negative) : [empty_path]) - combine_path_lists(paths_for(condition), alternatives) - end - - def case_paths(node) - condition, first_when = case_parts(node) - combine_path_lists(paths_for(condition), when_paths(first_when)) - end - - def case_parts(node) - return [nil, node.children[0]] if node.type == :CASE2 - - [node.children[0], node.children[1]] - end - - def when_paths(node) - return [empty_path] unless Ast.node?(node) - - return paths_for(node) unless node.type == :WHEN - - patterns = node.children[0] - body = node.children[1] - next_node = node.children[2] - current_branch = combine_path_lists(paths_for(patterns), paths_for(body)) - (current_branch + when_paths(next_node)).first(PATH_LIMIT) - end - - def generic_paths(node) - return [empty_path] unless Ast.node?(node) - return [empty_path] if %i[CLASS MODULE DEFN DEFS LAMBDA].include?(node.type) - - child_paths = node.children.each_with_object([empty_path]) do |child, paths| - paths.replace(combine_path_lists(paths, paths_for(child))) - end - - internal_mid = internal_protocol_call(node) - return child_paths unless internal_mid - - combine_path_lists([Path.new(calls: [raw_call(internal_mid, node)], terminal: false)], child_paths) - end - - def raw_call(mid, node) - Call.new( - mid: mid, - file: @file, - owner: nil, - defn: nil, - line: node.first_lineno, - span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], - reads: [], - writes: [] - ) - end - - def call_for(call, owner, defn) - effect = @effect_index.effect_for(owner, call.mid) + def self.call_for_path(call, path, effect_index) + effect = effect_index.effect_for(path.owner, call.mid) Call.new( mid: call.mid, - file: call.file, - owner: owner, - defn: defn, + file: path.file, + owner: path.owner, + defn: path.name, line: call.line, span: call.span, reads: effect ? effect.reads : [], @@ -215,60 +56,11 @@ def call_for(call, owner, defn) ) end - def stateful_call?(call) + def self.stateful_call?(call) !(call.reads + call.writes).empty? end - def empty_path - Path.new(calls: [], terminal: false) - end - - def scope_body(node) - node.children[2] - end - - def owner_name(node) - name = node.children[0] - res = Ast.node?(name) ? Ast.slice(name, @lines) : name.to_s - res.empty? ? "(anonymous)" : res - end - - def method_name(node) - node.children[node.type == :DEFS ? 1 : 0].to_s - end - - def internal_protocol_call(node) - mid = call_mid(node) - return nil unless mid - return nil if IGNORED_MIDS.include?(mid) - return nil unless internal_receiver?(node) - - mid - end - - def call_mid(node) - case node.type - when :CALL, :OPCALL, :ATTRASGN then node.children[1].to_s - when :FCALL, :VCALL then node.children[0].to_s - end - end - - def internal_receiver?(node) - return true if %i[FCALL VCALL].include?(node.type) - - receiver = node.children[0] - Ast.node?(receiver) && receiver.type == :SELF - end - class EffectIndex - def self.build(parsed) - effects = [] - parsed.each do |file, (root, lines)| - effects.concat(EffectCollector.new(file, lines).scan(root)) - end - new(effects) - end - def initialize(effects) @by_owner_name = effects.to_h { |effect| [[effect.owner, effect.name], effect] } @by_name = effects.group_by(&:name) @@ -291,182 +83,6 @@ def effect_stateful?(effect) end end - class EffectCollector - def initialize(file, lines) - @file = file - @lines = lines - end - - def scan(root) - out = [] - walk(root, [], out) - out - end - - private - - def walk(node, owners, out) - return unless Ast.node?(node) - - case node.type - when :CLASS, :MODULE - owners = owners + [owner_name(node)] - when :DEFN, :DEFS - out << method_effect(node, owners.join("::")) - return - end - - node.children.each { |child| walk(child, owners, out) } - end - - def method_effect(node, owner) - reads = Set.new - writes = Set.new - collect_state_access(node, reads, writes) - MethodEffect.new( - file: @file, - owner: owner, - name: method_name(node), - line: node.first_lineno, - reads: reads.to_a.sort, - writes: writes.to_a.sort - ) - end - - def collect_state_access(node, reads, writes) - return unless Ast.node?(node) - return if %i[CLASS MODULE DEFN DEFS LAMBDA].include?(node.type) && !%i[DEFN DEFS].include?(node.type) - - case node.type - when :IASGN - writes << normalize_state(node.children[0].to_s) - when :LASGN - collect_index_write(node, writes) - when :IVAR - reads << normalize_state(node.children[0].to_s) - when :ATTRASGN - collect_attr_write(node, writes) - when :CALL, :OPCALL - collect_bare_reader_comparison(node, reads) - collect_receiver_mutation(node, writes) - collect_self_reader(node, reads) - when :VCALL, :FCALL - collect_self_reader(node, reads) - end - - node.children.each { |child| collect_state_access(child, reads, writes) } - end - - def collect_attr_write(node, writes) - receiver, mid = node.children - attr = mid.to_s.sub(/=$/, "") - if mid == :[]= - writes << state_receiver_token(receiver) if state_receiver_token(receiver) - elsif self_receiver?(receiver) - writes << normalize_state(attr) - elsif (receiver_token = state_receiver_token(receiver)) - writes << "#{receiver_token}.#{attr}" - end - end - - def collect_index_write(node, writes) - name = node.children[0].to_s - return unless name.match?(/\A@?[A-Za-z_]\w*\[/) - - writes << normalize_state(name.sub(/\[.*\]\z/, "")) - end - - def collect_bare_reader_comparison(node, reads) - receiver, mid = node.children - return unless %w[== != === < <= > >=].include?(mid.to_s) - return unless Ast.node?(receiver) && receiver.type == :LVAR - - reads << normalize_state(receiver.children[0].to_s) - end - - def collect_receiver_mutation(node, writes) - receiver, mid = node.children - return unless mutating_mid?(mid.to_s) - - token = state_receiver_token(receiver) - writes << token if token - end - - def collect_self_reader(node, reads) - mid = call_mid(node) - return unless mid - return if mutating_mid?(mid) - return if IGNORED_MIDS.include?(mid) - return unless no_args?(node) - return if node.type == :CALL && !self_receiver?(node.children[0]) - - reads << normalize_state(mid) - end - - def mutating_mid?(mid) - return false if NON_MUTATING_OPERATOR_MIDS.include?(mid) - - MUTATING_MIDS.include?(mid) || MUTATING_SUFFIXES.any? { |suffix| mid.end_with?(suffix) } - end - - def no_args?(node) - case node.type - when :CALL, :OPCALL - node.children[2].nil? - when :VCALL - true - when :FCALL - node.children[1].nil? - else - false - end - end - - def state_receiver_token(node) - return nil unless Ast.node?(node) - - case node.type - when :IVAR - normalize_state(node.children[0].to_s) - when :SELF - "self" - when :VCALL, :FCALL - normalize_state(node.children[0].to_s) - when :LVAR - normalize_state(node.children[0].to_s) - when :CALL - return nil unless no_args?(node) - - normalize_state(node.children[1].to_s) - else - nil - end - end - - def self_receiver?(node) - Ast.node?(node) && node.type == :SELF - end - - def call_mid(node) - case node.type - when :CALL, :OPCALL, :ATTRASGN then node.children[1].to_s - when :FCALL, :VCALL then node.children[0].to_s - end - end - - def owner_name(node) - Ast.slice(node.children[0], @lines).to_s.empty? ? "(anonymous)" : Ast.slice(node.children[0], @lines) - end - - def method_name(node) - node.children[node.type == :DEFS ? 1 : 0].to_s - end - - def normalize_state(name) - name.to_s.sub(/\A@/, "").sub(/=\z/, "") - end - end - class Report def initialize(sequences) @sequences = sequences diff --git a/gems/decomplex/lib/decomplex/oversized_predicate.rb b/gems/decomplex/lib/decomplex/oversized_predicate.rb index 366f3cafa..358aeb0b4 100644 --- a/gems/decomplex/lib/decomplex/oversized_predicate.rb +++ b/gems/decomplex/lib/decomplex/oversized_predicate.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -require_relative "ast" +require_relative "syntax" module Decomplex # Flags boolean predicates with too many independent condition atoms. @@ -11,14 +11,13 @@ module Decomplex # AST preserves the same AND/OR tree either way. class OversizedPredicate LIMIT = 3 - PREDICATE_NODES = %i[IF WHILE UNTIL].freeze def self.scan(files, limit: LIMIT) findings = [] files.each do |file| - root, lines = Ast.parse(file) - new(file, lines, limit).tap do |scanner| - scanner.walk(root, []) + document = Syntax.parse(file, parser: "tree_sitter") + new(file, limit).tap do |scanner| + scanner.collect(document) findings.concat(scanner.findings) end end @@ -29,55 +28,40 @@ def self.scan(files, limit: LIMIT) attr_reader :findings - def initialize(file, lines, limit) + def initialize(file, limit) @file = file - @lines = lines @limit = limit @findings = [] end - def walk(node, defstack) - return unless Ast.node?(node) - - defstack = Ast.def_push(node, defstack) - record_predicate(node, defstack) - node.children.each { |child| walk(child, defstack) } + def collect(document) + document.decision_sites.each { |site| record_predicate(site) } end private - def record_predicate(node, defstack) - return unless PREDICATE_NODES.include?(node.type) - return if predicate_helper?(defstack.last) - - cond = node.children[0] - return unless Ast.node?(cond) + def record_predicate(site) + return if predicate_helper?(site.function) - atoms = condition_atoms(cond) + atoms = condition_atoms(site.predicate) return unless atoms.size > @limit - defn = defstack.last || "" - at = "#{@file}:#{defn}:#{node.first_lineno}" + defn = site.function || "" + at = "#{@file}:#{defn}:#{site.line}" @findings << { at: at, count: atoms.size, - predicate: Ast.slice(cond, @lines), - atoms: atoms.map { |atom| Ast.slice(atom, @lines) }, - spans: { at => [node.first_lineno, node.first_column, node.last_lineno, node.last_column] }, + predicate: site.predicate, + atoms: atoms, + spans: { at => site.enclosing_span || site.span }, } end - def condition_atoms(node) - return [] unless Ast.node?(node) - - case node.type - when :AND, :OR - node.children.flat_map { |child| condition_atoms(child) } - when :NOT - condition_atoms(node.children[0]) - else - [node] - end + def condition_atoms(predicate) + predicate.to_s + .split(/\s*(?:&&|\|\||\band\b|\bor\b)\s*/) + .map { |atom| atom.gsub(/[()]/, "").strip } + .reject(&:empty?) end def predicate_helper?(name) diff --git a/gems/decomplex/lib/decomplex/path_condition.rb b/gems/decomplex/lib/decomplex/path_condition.rb index 24aee0223..300c37ff6 100644 --- a/gems/decomplex/lib/decomplex/path_condition.rb +++ b/gems/decomplex/lib/decomplex/path_condition.rb @@ -1,90 +1,31 @@ # frozen_string_literal: true -require_relative "ast" +require_relative "syntax" module Decomplex - # Path-condition normal form. `if x; if y; act` and `if x && y; act` - # and `act if x && y` all reduce to the same guarded action with - # path condition {x, y}. Mining the PATH CONDITION (not the syntactic - # if) is what makes nested control flow and flat conjunction the same - # decision -- the user's "is `if x / if y` related to `if x && y`?". - # - # A site = an effectful leaf (call / assignment) reached under >= 2 - # guard atoms. Scatter = the same guard set reached in >= 2 (file, - # def) units. Neglected = a guarded action that is a high-support - # guard set minus exactly one atom. + # Path-condition normal form. `if x; if y; act` and + # `act if x && y` reduce to the same guarded action with path + # condition {x, y}. class PathCondition Site = Struct.new(:guards, :action, :file, :defn, :line, :span, keyword_init: true) def self.scan(files) - sites = [] - files.each do |f| - root, lines = Ast.parse(f) - e = new(f, lines) - e.walk(root, [], []) - sites.concat(e.sites) + sites = files.flat_map do |file| + Syntax.parse(file, parser: "tree_sitter").path_condition_sites.map do |site| + Site.new( + guards: site.guards, + action: site.action, + file: site.file, + defn: site.function, + line: site.line, + span: site.span + ) + end end Report.new(sites) end - attr_reader :sites - - def initialize(file, lines) - @file = file - @lines = lines - @sites = [] - end - - # guards: array of [text, negated?] atoms currently in scope. - def walk(node, defstack, guards) - return unless Ast.node?(node) - - defstack = Ast.def_push(node, defstack) - - case node.type - when :IF, :UNLESS - cond, a, b = node.children - atoms = cond_atoms(cond) - then_g = node.type == :IF ? atoms : negate(atoms) - else_g = node.type == :IF ? negate(atoms) : atoms - walk(a, defstack, guards + then_g) if a - walk(b, defstack, guards + else_g) if b - # the condition itself may contain nested constructs - walk(cond, defstack, guards) - return - when :CALL, :FCALL, :VCALL, :ATTRASGN, :LASGN, :IASGN, :OPCALL - record(node, defstack, guards) if guards.size >= 2 - end - - node.children.each { |c| walk(c, defstack, guards) } - end - - private - - def cond_atoms(cond) - Ast.flatten_and(cond).map do |a| - t = Ast.slice(a, @lines) - text, neg = Ast.canon_polarity(t) - [text, neg] - end - end - - def negate(atoms) - atoms.map { |t, n| [t, !n] } - end - - def record(node, defstack, guards) - members = guards.map { |t, n| (n ? "!" : "") + t }.uniq.sort - return if members.size < 2 - - @sites << Site.new(guards: members, action: Ast.slice(node, @lines)[0, 80], - file: @file, defn: defstack.last || "(top-level)", - line: node.first_lineno, - span: [node.first_lineno, node.first_column, - node.last_lineno, node.last_column]) - end - class Report def initialize(sites) @sites = sites diff --git a/gems/decomplex/lib/decomplex/predicate_alias.rb b/gems/decomplex/lib/decomplex/predicate_alias.rb index 9832d35c6..092768ca5 100644 --- a/gems/decomplex/lib/decomplex/predicate_alias.rb +++ b/gems/decomplex/lib/decomplex/predicate_alias.rb @@ -1,6 +1,5 @@ # frozen_string_literal: true -require_relative "ast" require_relative "syntax" module Decomplex @@ -22,49 +21,20 @@ class PredicateAlias def self.scan(files) preds = [] files.each do |f| - root, lines = Ast.parse(f) - new(f, lines).tap { |p| p.walk(root) }.preds.each { |p| preds << p } + Syntax.parse(f, parser: "tree_sitter").predicate_defs.each do |predicate| + preds << Pred.new( + name: predicate.name, + body: predicate.body, + file: predicate.file, + defn: predicate.name, + line: predicate.line, + span: predicate.span + ) + end end Report.new(preds) end - attr_reader :preds - - def initialize(file, lines) - @file = file - @lines = lines - @preds = [] - end - - def walk(node) - return unless Ast.node?(node) - - record_def(node) if node.type == :DEFN - node.children.each { |c| walk(c) } - end - - private - - # Single-expression boolean-ish method: `def x?(...) end`. - # The scope node's body is one statement (not a BLOCK of many). - def record_def(node) - name = node.children[0].to_s - scope = node.children[1] - return unless Ast.node?(scope) && scope.type == :SCOPE - - body = scope.children[2] - return unless Ast.node?(body) - return if body.type == :BLOCK # multi-statement => not a pure predicate - - txt = Ast.slice(body, @lines) - return if txt.empty? || txt.length > 200 - - @preds << Pred.new(name: name, body: txt, file: @file, - defn: name, line: node.first_lineno, - span: [node.first_lineno, node.first_column, - node.last_lineno, node.last_column]) - end - class Report def initialize(preds) @preds = preds diff --git a/gems/decomplex/lib/decomplex/redundant_nil_guard.rb b/gems/decomplex/lib/decomplex/redundant_nil_guard.rb index 98ff88a42..b6a7456a8 100644 --- a/gems/decomplex/lib/decomplex/redundant_nil_guard.rb +++ b/gems/decomplex/lib/decomplex/redundant_nil_guard.rb @@ -1,16 +1,11 @@ # frozen_string_literal: true require "set" -require_relative "ast" +require_relative "syntax" module Decomplex - # Redundant nil-guard detector. Finds nil checks or safe-navigation - # performed after the same stable subject is already proven non-nil - # on the current intra-method path. - # - # Conservative by design: local variables and no-arg accessor-style - # subjects only, no loop reasoning, no interprocedural facts. Local - # reassignment invalidates the proof. + # Finds nil checks or safe-navigation performed after the same stable subject + # is already proven non-nil on the current intra-method path. class RedundantNilGuard Finding = Struct.new(:file, :defn, :line, :span, :local, :guard, :proof, keyword_init: true) do @@ -22,72 +17,65 @@ def to_h Flow = Struct.new(:known, :terminated, keyword_init: true) NilFact = Struct.new(:local, :non_nil_when_true, keyword_init: true) - TERMINATING_CALLS = %i[raise fail abort exit exit!].freeze + TERMINATING_CALLS = %w[raise fail abort exit exit!].freeze def self.scan(files) files.flat_map do |file| - root, lines = Ast.parse(file) - new(file, lines).tap { |scanner| scanner.walk(root, []) }.findings + document = Syntax.parse(file, parser: "tree_sitter") + new(document).scan end.sort_by { |f| [f.file, f.line, f.local, f.guard] }.map(&:to_h) end - attr_reader :findings + attr_reader :document, :findings - def initialize(file, lines) - @file = file - @lines = lines + def initialize(document) + @document = document @findings = [] end - def walk(node, defstack) - return unless Ast.node?(node) - - if %i[DEFN DEFS].include?(node.type) - name = node.children[node.type == :DEFS ? 1 : 0].to_s - process_block(Ast.body_stmts(node), defstack + [name], Set.new) - return + def scan + document.function_defs.each do |function| + process_block(method_statements(function.body), function.name, Set.new) end - - node.children.each { |child| walk(child, defstack) } + findings end private - def process_block(stmts, defstack, known) + def process_block(stmts, function, known) current = known.dup stmts.each do |stmt| - flow = process_stmt(stmt, defstack, current) + flow = process_stmt(stmt, function, current) current = flow.known return flow if flow.terminated end Flow.new(known: current, terminated: false) end - def process_stmt(node, defstack, known) - return Flow.new(known: known.dup, terminated: false) unless Ast.node?(node) + def process_stmt(node, function, known) + return Flow.new(known: known.dup, terminated: false) unless ts_node?(node) - case node.type - when :IF, :UNLESS - process_branch(node, defstack, known) - when :LASGN - inspect_node(node.children[1], defstack, known) + if if_node?(node) + process_branch(node, function, known) + elsif assignment_node?(node) + inspect_node(assignment_rhs(node), function, known) next_known = known.dup - next_known.delete(node.children[0].to_s) + next_known.delete(assignment_lhs_name(node).to_s) Flow.new(known: next_known, terminated: false) else - inspect_node(node, defstack, known) + inspect_node(node, function, known) Flow.new(known: known.dup, terminated: terminating?(node)) end end - def process_branch(node, defstack, known) - cond, then_body, else_body = node.children - inspect_node(cond, defstack, known) + def process_branch(node, function, known) + cond = branch_condition(node) + inspect_node(cond, function, known) - then_known = known_for_branch(node.type, true, cond, known) - else_known = known_for_branch(node.type, false, cond, known) - then_flow = process_block(stmts_for(then_body), defstack, then_known) - else_flow = process_block(stmts_for(else_body), defstack, else_known) + then_known = known_for_branch(node, true, cond, known) + else_known = known_for_branch(node, false, cond, known) + then_flow = process_block(stmts_for(branch_then_body(node)), function, then_known) + else_flow = process_block(stmts_for(branch_else_body(node)), function, else_known) if then_flow.terminated && else_flow.terminated Flow.new(known: Set.new, terminated: true) @@ -100,46 +88,42 @@ def process_branch(node, defstack, known) end end - def known_for_branch(node_type, body_branch, cond, known) + def known_for_branch(node, body_branch, cond, known) next_known = known.dup - cond_true_branch = - if node_type == :IF - body_branch - else - !body_branch - end + cond_true_branch = unless_node?(node) ? !body_branch : body_branch branch_nil_facts(cond, cond_true_branch).each { |fact| next_known.add(fact.local) } next_known end - def inspect_node(node, defstack, known) - return unless Ast.node?(node) + def inspect_node(node, function, known) + return unless ts_node?(node) - recorded = record_redundant(node, defstack, known) - return if %i[DEFN DEFS].include?(node.type) - return if recorded && node.type == :OPCALL + recorded = record_redundant(node, function, known) + return if recorded && safe_navigation_call?(node) + return if method_like_node?(node) - node.children.each { |child| inspect_node(child, defstack, known) } + node.children.each { |child| inspect_node(child, function, known) } end - def record_redundant(node, defstack, known) + def record_redundant(node, function, known) local = redundant_nil_subject(node, known) return false unless local @findings << Finding.new( - file: @file, - defn: defstack.last || "(top-level)", - line: node.first_lineno, + file: document.file, + defn: function, + line: line(node), span: span(node), local: local, - guard: Ast.slice(node, @lines), + guard: normalize_text(node.text), proof: "#{local} is already proven non-nil on this path" ) true end def redundant_nil_subject(node, known) - return qcall_subject(node, known) if node.type == :QCALL + subject = safe_navigation_subject(node) + return subject if subject && known.include?(subject) fact = nil_fact(node) return nil unless fact && known.include?(fact.local) @@ -148,37 +132,37 @@ def redundant_nil_subject(node, known) end def nil_fact(node) - return nil unless Ast.node?(node) + return nil unless ts_node?(node) + return nil_fact(node.named_children.first) if parenthesized_wrapper?(node) - case node.type - when :CALL - recv, mid, args = node.children - return nil unless mid == :nil? && args.nil? - - subject = subject_key(recv) - subject ? NilFact.new(local: subject, non_nil_when_true: false) : nil - when :OPCALL - recv, mid, args = node.children - return negated_nil_fact(recv) if mid == :! - return comparison_nil_fact(recv, mid, args) if %i[== !=].include?(mid) + if nil_predicate_call?(node) + subject = subject_key(call_receiver_node(node)) + return subject ? NilFact.new(local: subject, non_nil_when_true: false) : nil + end + if non_nil_predicate_call?(node) + subject = subject_key(call_receiver_node(node)) + return subject ? NilFact.new(local: subject, non_nil_when_true: true) : nil + end - nil - else - nil + if unary_not?(node) + return negated_nil_fact(node.named_children.first) end + + comparison_nil_fact(node) end def branch_nil_facts(node, cond_truth) - return [] unless Ast.node?(node) + return [] unless ts_node?(node) + return branch_nil_facts(node.named_children.first, cond_truth) if parenthesized_wrapper?(node) - if node.type == :AND + if boolean_and?(node) return [] unless cond_truth - return Ast.flatten_and(node).flat_map { |child| branch_nil_facts(child, true) } + return flatten_boolean_and(node).flat_map { |child| branch_nil_facts(child, true) } end - if node.type == :OPCALL && node.children[1] == :! - return branch_nil_facts(node.children[0], !cond_truth) + if unary_not?(node) + return branch_nil_facts(node.named_children.first, !cond_truth) end safe_receiver = safe_nav_receiver_fact(node) @@ -192,9 +176,7 @@ def branch_nil_facts(node, cond_truth) end def safe_nav_receiver_fact(node) - return nil unless Ast.node?(node) && node.type == :QCALL - - subject = subject_key(node.children[0]) + subject = safe_navigation_subject(node) subject ? NilFact.new(local: subject, non_nil_when_true: true) : nil end @@ -213,77 +195,348 @@ def negated_nil_fact(node) non_nil_when_true: !fact.non_nil_when_true) end - def comparison_nil_fact(recv, mid, args) - subject = subject_key(recv) - return nil unless subject && nil_arg?(args) + def comparison_nil_fact(node) + return nil unless ts_node?(node) && node.kind == "binary" + + operator = direct_operator(node) + return nil unless %w[== !=].include?(operator) + + left, right = node.named_children + subject = nil + if nil_literal?(right) + subject = subject_key(left) + elsif nil_literal?(left) + subject = subject_key(right) + end + return nil unless subject + + NilFact.new(local: subject, non_nil_when_true: operator == "!=") + end + + def method_statements(node) + body = method_body_node(node) + return [] unless body + + stmts_for(body) + end + + def method_body_node(node) + return nil unless ts_node?(node) + + case node.kind + when "method", "singleton_method", "argument_list", "function_definition", "function_item", + "function_declaration", "method_declaration" + node.named_children.reverse.find do |child| + %w[body_statement block compound_statement function_body statement_block].include?(child.kind) + end + when "body_statement", "block", "compound_statement", "function_body", "statement_block" + if method_like_node?(node) + node.named_children.reverse.find do |child| + %w[body_statement block compound_statement function_body statement_block].include?(child.kind) + end + else + node + end + end + end + + def stmts_for(node) + return [] unless ts_node?(node) + return [node] if if_node?(node) + return [node] if assignment_node?(node) + return [node] if call_node?(node) + + named = node.named_children.reject { |child| child.kind == "comment" } + if named.size == 1 && %w[statements statement_list].include?(named.first.kind) + return [named.first] if if_node?(named.first) + + named = named.first.named_children.reject { |child| child.kind == "comment" } + end + return [node] if named.empty? && !node.text.to_s.strip.empty? + + named + end + + def if_node?(node) + return false unless ts_node?(node) + return true if %w[if unless if_statement if_expression if_modifier unless_modifier].include?(node.kind) && node.named_children.any? + return true if node.kind == "expression_statement" && node.text.to_s.lstrip.start_with?("if ") + return false unless %w[body_statement block statements statement_list].include?(node.kind) + + first_token = node.children.first + return true if first_token && !first_token.named? && %w[if unless].include?(first_token.kind.to_s) + + seen_named = false + node.children.any? do |child| + seen_named ||= child.named? + seen_named && !child.named? && %w[if unless].include?(child.kind.to_s) + end + end + + def unless_node?(node) + node.kind.to_s.include?("unless") || first_token_kind(node) == "unless" + end + + def modifier_if_node?(node) + return true if %w[if_modifier unless_modifier].include?(node.kind) + return false unless %w[body_statement block statements statement_list].include?(node.kind) + + seen_named = false + node.children.any? do |child| + seen_named ||= child.named? + seen_named && !child.named? && %w[if unless].include?(child.kind.to_s) + end + end + + def branch_condition(node) + modifier_if_node?(node) ? node.named_children.last : node.named_children.first + end + + def branch_then_body(node) + if modifier_if_node?(node) + node.named_children.first + else + node.named_children.find { |child| child.kind == "then" } || node.named_children[1] + end + end + + def branch_else_body(node) + return nil if modifier_if_node?(node) + + node.named_children.find { |child| %w[else elsif].include?(child.kind) } || node.named_children[2] + end + + def assignment_node?(node) + ts_node?(node) && (%w[assignment assignment_expression assignment_statement].include?(node.kind) || flat_assignment_statement?(node)) + end + + def assignment_lhs_name(node) + assignment_lhs(node)&.text + end + + def assignment_lhs(node) + node.named_children.first if assignment_node?(node) + end + + def assignment_rhs(node) + node.named_children[1] if assignment_node?(node) + end + + def flat_assignment_statement?(node) + return false unless ts_node?(node) && node.kind == "body_statement" + + node.children.count { |child| !child.named? && child.text == "=" } == 1 && + node.named_children.size >= 2 + end + + def nil_predicate_call?(node) + call_node?(node) && %w[nil? is_none is_null isNull].include?(call_message(node).to_s) + end + + def non_nil_predicate_call?(node) + call_node?(node) && %w[is_some isSome present?].include?(call_message(node).to_s) + end - NilFact.new(local: subject, non_nil_when_true: mid == :!=) + def safe_navigation_call?(node) + ts_node?(node) && node.kind == "call" && + node.children.any? { |child| !child.named? && child.text == "&." } end - def qcall_subject(node, known) - recv = node.children[0] - subject = subject_key(recv) - subject if subject && known.include?(subject) + def safe_navigation_subject(node) + return nil unless safe_navigation_call?(node) + + subject_key(call_receiver_node(node)) + end + + def call_receiver_node(node) + return nil unless call_node?(node) + + if adjacent_field_call?(node) + return named_field(node, "object") || named_field(node, "receiver") || + named_field(node, "expression") || named_field(node, "operand") || + node.named_children.first + end + + if %w[call call_expression function_call invocation_expression method_invocation method_call].include?(node.kind) + if node.kind == "call" + names = node.named_children.select { |child| %w[identifier simple_identifier].include?(child.kind) } + return names.first if names.size >= 2 + end + + if %w[invocation_expression method_invocation].include?(node.kind) + names = node.named_children.select { |child| %w[identifier simple_identifier].include?(child.kind) } + return names.first if names.size >= 2 + end + + callee = named_field(node, "function") || named_field(node, "callee") || node.named_children.first + if field_like_node?(callee) + return named_field(callee, "object") || named_field(callee, "receiver") || + named_field(callee, "expression") || named_field(callee, "operand") || + callee.named_children.first + end + end + + node.named_children.first + end + + def call_message(node) + return nil unless call_node?(node) + + if adjacent_field_call?(node) + field = named_field(node, "field") || named_field(node, "property") || + named_field(node, "name") || named_field(node, "suffix") || + node.named_children.last + return field&.text.to_s.sub(/\A[.?]+/, "") + end + + if %w[call call_expression function_call invocation_expression method_invocation method_call].include?(node.kind) + if node.kind == "call" + names = node.named_children.select { |child| %w[identifier simple_identifier].include?(child.kind) } + return names.last.text if names.size >= 2 + end + + if %w[invocation_expression method_invocation].include?(node.kind) + names = node.named_children.select { |child| %w[identifier simple_identifier].include?(child.kind) } + return names[1].text if names.size >= 2 + end + + callee = named_field(node, "function") || named_field(node, "callee") || node.named_children.first + if field_like_node?(callee) + field = named_field(callee, "field") || named_field(callee, "property") || + named_field(callee, "name") || named_field(callee, "suffix") || + callee.named_children.last + return field&.text.to_s.sub(/\A[.?]+/, "") + end + return callee.text if %w[identifier simple_identifier].include?(callee&.kind) + end + + node.named_children.reverse.find { |child| %w[identifier simple_identifier].include?(child.kind) }&.text + end + + def call_has_arguments?(node) + ts_node?(node) && + (node.named_children.any? { |child| %w[argument_list arguments call_suffix].include?(child.kind) } || + %w[argument_list arguments call_suffix].include?(next_sibling(node)&.kind)) end def subject_key(node) - return nil unless Ast.node?(node) - - case node.type - when :LVAR, :DVAR - node.children[0].to_s - when :VCALL - node.children[0].to_s - when :CALL - recv, mid, args = node.children - return nil unless args.nil? && stable_reader_name?(mid) - return "self.#{mid}" if recv&.type == :SELF - - recv_key = subject_key(recv) - recv_key ? "#{recv_key}.#{mid}" : nil + return nil unless ts_node?(node) + + case node.kind + when "identifier", "simple_identifier" + node.text + when "self", "this" + "self" + when "call", "call_expression", "function_call", "method_invocation", "invocation_expression", "method_call" + return nil if call_has_arguments?(node) + + receiver = call_receiver_node(node) + message = call_message(node) + return nil unless message && stable_reader_name?(message) + return "self.#{message}" if receiver&.kind == "self" + + recv_key = subject_key(receiver) + recv_key ? "#{recv_key}.#{message}" : nil else nil end end - def stable_reader_name?(mid) - name = mid.to_s - !(name.end_with?("=", "!") || name == "[]") + def stable_reader_name?(name) + text = name.to_s + !(text.end_with?("=", "!") || text == "[]") end - def local_name(node) - return nil unless Ast.node?(node) && %i[LVAR DVAR].include?(node.type) + def nil_literal?(node) + ts_node?(node) && node.kind == "nil" + end - node.children[0].to_s + def unary_not?(node) + ts_node?(node) && node.kind == "unary" && + node.children.any? { |child| !child.named? && child.text == "!" } end - def nil_arg?(args) - return false unless Ast.node?(args) + def parenthesized_wrapper?(node) + ts_node?(node) && %w[condition_clause parenthesized_expression parenthesized_statements].include?(node.kind) && + node.named_children.size == 1 + end - args.children.any? { |child| Ast.node?(child) && child.type == :NIL } + def boolean_and?(node) + ts_node?(node) && node.kind == "binary" && direct_operator(node) == "&&" end - def stmts_for(node) - return [] unless Ast.node?(node) + def flatten_boolean_and(node) + return [node] unless boolean_and?(node) - node.type == :BLOCK ? node.children.compact : [node] + node.named_children.flat_map { |child| flatten_boolean_and(child) } + end + + def direct_operator(node) + node.children.find { |child| !child.named? && !%w[( )].include?(child.text.to_s) }&.text.to_s end def terminating?(node) - return false unless Ast.node?(node) - return true if %i[RETURN NEXT BREAK].include?(node.type) - return false unless %i[FCALL VCALL CALL].include?(node.type) + return false unless ts_node?(node) + return true if %w[return break next].include?(node.kind) + return true if node.text.to_s.strip.match?(/\A(?:return|break|next)\b/) + return true if node.kind == "identifier" && TERMINATING_CALLS.include?(node.text.to_s) + + call_node?(node) && TERMINATING_CALLS.include?(call_message(node).to_s) + end + + def method_like_node?(node) + ts_node?(node) && %w[method singleton_method function_definition function_item function_declaration method_declaration].include?(node.kind) + end + + def call_node?(node) + ts_node?(node) && + (%w[call argument_list call_expression function_call invocation_expression method_invocation method_call].include?(node.kind) || + adjacent_field_call?(node)) + end + + def adjacent_field_call?(node) + field_like_node?(node) && %w[argument_list arguments call_suffix].include?(next_sibling(node)&.kind) + end + + def next_sibling(node) + node.next_sibling + rescue StandardError + nil + end + + def first_token_kind(node) + node.children.find { |child| !child.named? }&.kind.to_s + end - mid = if node.type == :CALL - node.children[1] - else - node.children[0] - end - TERMINATING_CALLS.include?(mid) + def line(node) + node.start_point.row + 1 end def span(node) - [node.first_lineno, node.first_column, node.last_lineno, node.last_column] + [node.start_point.row + 1, node.start_point.column, node.end_point.row + 1, node.end_point.column] + end + + def normalize_text(text) + text.to_s.lines.map(&:strip).reject(&:empty?).join(" ") + end + + def named_field(node, name) + node.child_by_field_name(name) + rescue StandardError + nil + end + + def field_like_node?(node) + ts_node?(node) && + %w[ + attribute directly_assignable_expression dot_index_expression expression_list field field_access + field_expression member_access_expression member_expression navigation_expression scoped_identifier + selector_expression variable_list + ].include?(node.kind) + end + + def ts_node?(node) + node && node.respond_to?(:kind) && node.respond_to?(:children) end end end diff --git a/gems/decomplex/lib/decomplex/semantic_alias.rb b/gems/decomplex/lib/decomplex/semantic_alias.rb index 1a8ca6aff..d3c60352a 100644 --- a/gems/decomplex/lib/decomplex/semantic_alias.rb +++ b/gems/decomplex/lib/decomplex/semantic_alias.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -require_relative "ast" +require_relative "syntax" module Decomplex # Semantic predicate alias. The exact-text PredicateAlias misses the @@ -26,73 +26,53 @@ def self.scan(files) preds = [] uses = [] files.each do |f| - root, lines = Ast.parse(f) - e = new(f, lines) - e.walk(root, []) - preds.concat(e.preds) - uses.concat(e.uses) + document = Syntax.parse(f, parser: "tree_sitter") + document.predicate_defs.each do |predicate| + next unless semantic_predicate_definition?(predicate) + + preds << Pred.new( + name: predicate.name, + canon: canon(predicate.body), + file: predicate.file, + line: predicate.line, + span: predicate.span + ) + end + document.comparison_sites.each do |comparison| + uses << Use.new( + canon: canon(comparison.source), + file: comparison.file, + defn: comparison.function, + line: comparison.line, + raw: comparison.source, + span: comparison.span + ) + end end Report.new(preds, uses) end - attr_reader :preds, :uses - - def initialize(file, lines) - @file = file - @lines = lines - @preds = [] - @uses = [] - end - - def walk(node, defstack) - return unless Ast.node?(node) - - defstack = Ast.def_push(node, defstack) - record_pred(node) if node.type == :DEFN - if %i[CALL OPCALL].include?(node.type) && comparison?(node) - c = canon(Ast.slice(node, @lines)) - @uses << Use.new(canon: c, file: @file, - defn: defstack.last || "(top-level)", - line: node.first_lineno, - raw: Ast.slice(node, @lines), - span: [node.first_lineno, node.first_column, - node.last_lineno, node.last_column]) - end - node.children.each { |ch| walk(ch, defstack) } + def self.semantic_predicate_definition?(predicate) + predicate.name.to_s.end_with?("?") || + predicate.body.to_s.match?(/(?:==|!=|&&|\|\||\band\b|\bor\b)/) end # Canonical predicate form: drop a leading `!`, strip a leading # receiver chain (`a.b.`, `@`, `self.`) before the final # `name OP value`, collapse spaces. Pure syntactic folding. def self.canon(text) - t, = Ast.canon_polarity(text) + t, = canon_polarity(text) t = t.sub(/\Aself\./, "").sub(/\A@/, "") # strip a single receiver hop: `recv.attr == :v` -> `attr == :v` t = t.sub(/\A[A-Za-z_]\w*(?:\([^)]*\))?\.(?=[A-Za-z_]\w*\s*(==|!=|\.))/, "") t.gsub(/\s+/, " ").strip end - private - - def canon(text) = self.class.canon(text) - - def comparison?(node) - mid = node.children[node.type == :OPCALL ? 1 : 1] - %i[== != nil?].include?(mid) || - (node.type == :CALL && node.children[1] == :nil?) - end - - def record_pred(node) - name = node.children[0].to_s - return unless name.end_with?("?") - - stmts = Ast.body_stmts(node) - return unless stmts.size == 1 + def self.canon_polarity(text) + source = text.to_s.strip + return [source[1..].to_s.strip, true] if source.start_with?("!") - @preds << Pred.new(name: name, canon: canon(Ast.slice(stmts.first, @lines)), - file: @file, line: node.first_lineno, - span: [node.first_lineno, node.first_column, - node.last_lineno, node.last_column]) + [source, false] end class Report @@ -122,7 +102,7 @@ def reification_misses @uses.filter_map do |u| ps = bycanon[u.canon] next unless ps && !ps.empty? - next if u.defn.end_with?("?") && ps.any? { |p| p.name == u.defn } + next if ps.any? { |p| p.name == u.defn } { predicate: ps.first.name, canon: u.canon, at: "#{u.file}:#{u.defn}:#{u.line}", diff --git a/gems/decomplex/lib/decomplex/sequence_mine.rb b/gems/decomplex/lib/decomplex/sequence_mine.rb index 99994d3c4..87f60680f 100644 --- a/gems/decomplex/lib/decomplex/sequence_mine.rb +++ b/gems/decomplex/lib/decomplex/sequence_mine.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -require_relative "ast" +require_relative "syntax" module Decomplex # Guarded-pair / protocol mining (Engler "Bugs as Deviant Behavior", @@ -10,8 +10,8 @@ module Decomplex # deviant -- the "similar path, one missing the step" plague that is # the literal shape of bugs #1/#2/#9. # - # Unit = the SET of distinct call message-names in a method (FCALL / - # CALL mid). Domain-agnostic (Engler): no name heuristics, mine all + # Unit = the SET of distinct semantic call message-names in a method. + # Domain-agnostic (Engler): no name heuristics, mine all # pairs, rank by support, accept FP. Same proven shape as co_update, # over calls instead of assigned attributes. class SequenceMine @@ -45,9 +45,9 @@ class SequenceMine def self.scan(files) calls = [] files.each do |f| - root, lines = Ast.parse(f) - e = new(f) - e.walk(root, []) + document = Syntax.parse(f, parser: "tree_sitter") + e = new(f, document) + e.collect calls.concat(e.calls) end Report.new(calls) @@ -55,49 +55,54 @@ def self.scan(files) attr_reader :calls - def initialize(file) + def initialize(file, document) @file = file + @document = document @calls = [] end - def walk(node, defstack) - return unless Ast.node?(node) - - defstack = Ast.def_push(node, defstack) - if %i[CALL FCALL VCALL].include?(node.type) - mid = node.children[node.type == :CALL ? 1 : 0] - if protocol_event?(node, mid.to_s) - @calls << Call.new(mid: mid.to_s, file: @file, - defn: defstack.last || "(top-level)", - line: node.first_lineno, - span: [node.first_lineno, node.first_column, - node.last_lineno, node.last_column]) + def collect + @document.call_sites.each do |call| + mid = call.message.to_s + nested_protocol_events(call).each do |nested_mid| + @calls << Call.new(mid: nested_mid, file: @file, + defn: call.function || "(top-level)", + line: call.line, + span: call.span) + end + if protocol_event?(call, mid) + @calls << Call.new(mid: mid, file: @file, + defn: call.function || "(top-level)", + line: call.line, + span: call.span) end end - node.children.each { |c| walk(c, defstack) } end private - def protocol_event?(node, mid) + def protocol_event?(call, mid) return false if IGNORED_MIDS.include?(mid) - return false if passive_reader_call?(node, mid) + return false if passive_reader_call?(call, mid) true end - def passive_reader_call?(node, mid) + def passive_reader_call?(call, mid) return false if zero_arg_action_name?(mid) - case node.type - when :CALL - node.children[2].nil? - when :VCALL - true - when :FCALL - node.children[1].nil? - else - false + return false unless call.arguments.to_a.empty? + + true + end + + def nested_protocol_events(call) + return [] unless IGNORED_MIDS.include?(call.message.to_s) + + candidates = call.arguments.to_a + candidates += source_text(call.span).scan(/\b[a-z_]\w*[!?]?\b/) + candidates.uniq.select do |candidate| + !IGNORED_MIDS.include?(candidate) && zero_arg_action_name?(candidate) end end @@ -110,6 +115,21 @@ def zero_arg_action_name?(mid) end end + def source_text(span) + return "" unless span + + first_line, first_column, last_line, last_column = span + if first_line == last_line + return @document.lines[first_line - 1].to_s[first_column...last_column].to_s + end + + parts = [] + parts << @document.lines[first_line - 1].to_s[first_column..].to_s + parts.concat(@document.lines[first_line...(last_line - 1)] || []) + parts << @document.lines[last_line - 1].to_s[0...last_column].to_s + parts.join + end + class Report # No frequency blocklist: a pervasive protocol (alloc_mark + # cleanup in every method) is exactly the high-frequency case we diff --git a/gems/decomplex/lib/decomplex/state_branch_density.rb b/gems/decomplex/lib/decomplex/state_branch_density.rb index 4deb8cbf6..03a8ef636 100644 --- a/gems/decomplex/lib/decomplex/state_branch_density.rb +++ b/gems/decomplex/lib/decomplex/state_branch_density.rb @@ -1,7 +1,6 @@ # frozen_string_literal: true require "set" -require_relative "ast" require_relative "syntax" module Decomplex @@ -9,260 +8,97 @@ module Decomplex # object-owned state. This is the "state + control flow" surface: # branch decisions over ivars, globals, or receiver attributes. class StateBranchDensity - BRANCH_TYPES = %i[IF UNLESS WHILE UNTIL].freeze - NOISE_MIDS = %i[! != == === < <= > >= [] []= to_s inspect class].freeze Decision = Struct.new(:file, :defn, :line, :span, :predicate, :state_refs, keyword_init: true) def self.scan(files) - decisions = [] - parsed = files.to_h { |file| [file, Ast.parse(file)] } - global_immutable_readers = Hash.new { |h, k| h[k] = Set.new } - global_immutable_reader_types = Hash.new { |h, k| h[k] = {} } - global_type_aliases = {} - parsed.each_value do |_root, lines| - scanner = new(nil, lines) - scanner.immutable_struct_readers(lines).each do |name, readers| - global_immutable_readers[name].merge(readers) + documents = files.to_h do |file| + [file, Syntax.parse(file, parser: "tree_sitter")] + end + immutable_readers = Hash.new { |h, k| h[k] = Set.new } + immutable_reader_types = Hash.new { |h, k| h[k] = {} } + type_aliases = {} + + documents.each_value do |document| + document.immutable_struct_readers.each do |name, readers| + immutable_readers[name].merge(readers) end - scanner.immutable_struct_reader_types(lines).each do |name, readers| - global_immutable_reader_types[name].merge!(readers) + document.immutable_struct_reader_types.each do |name, readers| + immutable_reader_types[name].merge!(readers) end - global_type_aliases.merge!(scanner.type_aliases(lines)) + type_aliases.merge!(document.type_aliases) end - parsed.each do |file, (root, lines)| - scanner = new( + + decisions = documents.flat_map do |file, document| + new( file, - lines, - immutable_readers: global_immutable_readers, - immutable_reader_types: global_immutable_reader_types, - type_aliases: global_type_aliases - ) - scanner.walk(root, []) - decisions.concat(scanner.decisions) + document, + immutable_readers: immutable_readers, + immutable_reader_types: immutable_reader_types, + type_aliases: type_aliases + ).decisions end Report.new(decisions) end attr_reader :decisions - def initialize(file, lines, immutable_readers: nil, immutable_reader_types: nil, type_aliases: nil) + def initialize(file, document, immutable_readers:, immutable_reader_types:, type_aliases:) @file = file - @lines = lines - @decisions = [] - @totals = Hash.new(0) - @immutable_readers = immutable_readers || immutable_struct_readers(lines) - @immutable_reader_types = immutable_reader_types || immutable_struct_reader_types(lines) - @type_aliases = type_aliases || type_aliases(lines) - @method_param_types = method_param_types(lines) - end - - def walk(node, defstack) - return unless Ast.node?(node) - - defstack = Ast.def_push(node, defstack) - record_branch(node, defstack) - node.children.each { |child| walk(child, defstack) } - end - - def record_branch(node, defstack) - cond = - case node.type - when *BRANCH_TYPES - node.children[0] - when :CASE - node.children[0] - else - nil - end - return unless Ast.node?(cond) - - defn = defstack.last || "(top-level)" - @totals[[@file, defn]] += 1 - refs = state_refs(cond, defn) - return if refs.empty? - - @decisions << Decision.new( - file: @file, - defn: defn, - line: node.first_lineno, - span: [node.first_lineno, node.first_column, - node.last_lineno, node.last_column], - predicate: Ast.slice(cond, @lines), - state_refs: refs.uniq.sort + @document = document + @decisions = semantic_decisions( + immutable_readers: immutable_readers, + immutable_reader_types: immutable_reader_types, + type_aliases: type_aliases ) end - def state_refs(node, defn) - refs = [] - collect_state_refs(node, refs, defn) - refs - end - - def collect_state_refs(node, refs, defn) - return unless Ast.node?(node) - - case node.type - when :IVAR - refs << node.children[0].to_s - when :GVAR - refs << node.children[0].to_s - when :CALL, :QCALL, :OPCALL - recv, mid, args = node.children - if state_attr_read?(recv, mid, args, defn) - refs << "#{Ast.slice(recv, @lines)}.#{mid}" - end - end - node.children.each { |child| collect_state_refs(child, refs, defn) } - end - - def state_attr_read?(recv, mid, args, defn) - return false unless recv - return false if NOISE_MIDS.include?(mid) - return false unless args.nil? || empty_arg_list?(args) - return false if immutable_struct_const_read?(recv, mid, defn) - - # `user.admin?`, `user.name`, `@cart.empty?`, `config.enabled` - # are state-derived decisions. `a == 0` has no no-arg receiver - # read and is deliberately not counted. - true - end - - def immutable_struct_const_read?(recv, mid, defn) - owner_type = immutable_receiver_type(recv, defn) - return false unless owner_type - - immutable_reader?(owner_type, mid) - end - - def immutable_receiver_type(recv, defn) - return false unless Ast.node?(recv) - - if %i[CALL QCALL OPCALL].include?(recv.type) - recv_recv, recv_mid, recv_args = recv.children - return immutable_reader_result_type(recv_recv, recv_mid, recv_args, defn) - end - return false unless recv.type == :LVAR - - param_types = @method_param_types[defn] - return false unless param_types - - param_types[recv.children[0].to_s] - end - - def immutable_reader?(type_name, mid) - return false unless type_name - - resolved_type_name = resolve_type_alias(type_name) - readers = if @immutable_readers.key?(resolved_type_name) - @immutable_readers[resolved_type_name] - else - @immutable_readers[resolved_type_name.split("::").last] - end - readers&.include?(mid) || false - end - - def immutable_reader_result_type(recv, mid, args, defn) - return nil unless args.nil? || empty_arg_list?(args) + private - owner_type = immutable_receiver_type(recv, defn) - return nil unless owner_type - - resolved_type_name = resolve_type_alias(owner_type) - reader_types = if @immutable_reader_types.key?(resolved_type_name) - @immutable_reader_types[resolved_type_name] - else - @immutable_reader_types[resolved_type_name.split("::").last] - end - reader_types[mid] - end - - def empty_arg_list?(args) - Ast.node?(args) && args.type == :LIST && args.children.compact.empty? - end - - def immutable_struct_readers(lines) - readers = Hash.new { |h, k| h[k] = Set.new } - class_stack = [] - lines.each do |line| - if (match = line.match(/\A\s*class\s+([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\s*<\s*T::Struct\b/)) - class_stack << match[1] - next - end - if class_stack.any? && (match = line.match(/\A\s*const\s+:([A-Za-z_]\w*)\b/)) - readers[class_stack.last].add(match[1].to_sym) - next - end - class_stack.pop if class_stack.any? && line.match?(/\A\s*end\s*(?:#.*)?\z/) + def semantic_decisions(immutable_readers:, immutable_reader_types:, type_aliases:) + branch_decisions = @document.branch_decisions( + immutable_readers: immutable_readers, + immutable_reader_types: immutable_reader_types, + type_aliases: type_aliases + ) + filter_wrapper_decisions(branch_decisions).map do |decision| + Decision.new( + file: @file, + defn: decision.function, + line: decision.line, + span: decision.span, + predicate: decision.predicate, + state_refs: decision.state_refs.uniq.sort + ) end - readers end - def immutable_struct_reader_types(lines) - reader_types = Hash.new { |h, k| h[k] = {} } - class_stack = [] - lines.each do |line| - if (match = line.match(/\A\s*class\s+([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\s*<\s*T::Struct\b/)) - class_stack << match[1] - next - end - if class_stack.any? && (match = line.match(/\A\s*const\s+:([A-Za-z_]\w*)\s*,\s*([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\b/)) - reader_types[class_stack.last][match[1].to_sym] = match[2] - next - end - class_stack.pop if class_stack.any? && line.match?(/\A\s*end\s*(?:#.*)?\z/) + def filter_wrapper_decisions(decisions) + decisions.reject do |decision| + wrapper_predicate?(decision.predicate) && nested_state_decision?(decision, decisions) end - reader_types end - def type_aliases(lines) - aliases = {} - lines.each do |line| - if (match = line.match(/\A\s*([A-Z]\w*)\s*=\s*T\.type_alias\s*\{\s*([A-Z]\w*(?:::[A-Z]\w*)*)\s*\}/)) - aliases[match[1]] = match[2] - elsif (match = line.match(/\A\s*([A-Z]\w*)\s*=\s*([A-Z]\w*(?:::[A-Z]\w*)*)\b/)) - aliases[match[1]] = match[2] - end - end - aliases + def wrapper_predicate?(predicate) + predicate.to_s.match?(/\A(?:if|unless|while|until)\b/) end - def resolve_type_alias(type_name) - seen = Set.new - current = type_name - loop do - break current if seen.include?(current) - - seen.add(current) - target = @type_aliases[current] || @type_aliases[current.split("::").last] - break current unless target + def nested_state_decision?(decision, decisions) + decisions.any? do |candidate| + next false if candidate.equal?(decision) + next false unless candidate.function == decision.function + next false unless encloses?(decision.span, candidate.span) - current = target + (Array(candidate.state_refs) - Array(decision.state_refs)).empty? end end - def method_param_types(lines) - types_by_method = {} - pending_sig = +"" - lines.each do |line| - pending_sig << line if pending_sig_active?(line, pending_sig) - if (match = line.match(/\A\s*def\s+([A-Za-z_]\w*[!?=]?)(?:\s|\(|$)/)) - types_by_method[match[1]] = sig_param_types(pending_sig) - pending_sig = +"" - end - end - types_by_method - end - - def pending_sig_active?(line, pending_sig) - !pending_sig.empty? || line.match?(/\A\s*sig\b/) - end - - def sig_param_types(sig_source) - match = sig_source.match(/params\s*\((.*?)\)/m) - return {} unless match + def encloses?(outer, inner) + return false unless outer && inner - match[1].scan(/([A-Za-z_]\w*)\s*:\s*([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)/).to_h + starts_before = outer[0] < inner[0] || (outer[0] == inner[0] && outer[1] <= inner[1]) + ends_after = outer[2] > inner[2] || (outer[2] == inner[2] && outer[3] >= inner[3]) + starts_before && ends_after end class Report diff --git a/gems/decomplex/lib/decomplex/state_mesh.rb b/gems/decomplex/lib/decomplex/state_mesh.rb index ddf10e0ef..1ac809619 100644 --- a/gems/decomplex/lib/decomplex/state_mesh.rb +++ b/gems/decomplex/lib/decomplex/state_mesh.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -require_relative "ast" +require_relative "syntax" require_relative "semantic_alias" require "json" @@ -12,9 +12,9 @@ module Decomplex # organized by dir -> file -> function. # # Phases: - # 1. Discover state fields (ATTRASGN + IASGN with >= min_writes) - # 2. Find all write sites (reusing CoUpdate's walk logic) - # 3. Find all read sites (new walker: CALL/IVAR matching field names) + # 1. Discover state fields from Syntax state-write facts + # 2. Find all write sites from Syntax state-write facts + # 3. Find all read sites from Syntax state-read facts # 4. Find re-derivation sites via SemanticAlias reification misses # 5. Compute messiness per field # 6. Render hierarchical JSON graph @@ -32,16 +32,14 @@ class StateMesh # `custom_fields` overrides field discovery with an explicit list. # `min_writes` is the threshold for auto-discovered fields (default 2). def self.scan(files, min_writes: 2, custom_fields: nil) - src_map = {} - files.each do |f| - root, lines = Ast.parse(f) - src_map[f] = { root: root, lines: lines } + documents = files.to_h do |file| + [file, Syntax.parse(file, parser: "tree_sitter")] end - new(src_map, min_writes: min_writes, custom_fields: custom_fields) + new(documents, min_writes: min_writes, custom_fields: custom_fields) end - def initialize(src_map, min_writes: 2, custom_fields: nil) - @src_map = src_map + def initialize(documents, min_writes: 2, custom_fields: nil) + @documents = documents @min_writes = min_writes @custom_fields = custom_fields @writes = [] @@ -52,46 +50,19 @@ def initialize(src_map, min_writes: 2, custom_fields: nil) # ---- Phase 1+2: discover fields and walk write sites --------------- def discover_fields! - @src_map.each do |file, data| - walk_writes(data[:root], data[:lines], [], file) - end - end - - def walk_writes(node, lines, defstack, file) - return unless Ast.node?(node) - - case node.type - when :CLASS, :MODULE - defstack = defstack + [node.children[0].to_s] - when :DEFN then defstack = defstack + [node.children[0].to_s] - when :DEFS then defstack = defstack + [node.children[1].to_s] - when :ATTRASGN - recv, msg, = node.children - if msg == :[]= - node.children.each { |c| walk_writes(c, lines, defstack, file) } - return + @documents.each do |file, document| + document.state_writes.each do |write| + @writes << Write.new( + attr: write.field, + norm: normalize(write.field), + recv: write.receiver, + file: file, + defn: write.function, + line: write.line, + span: write.span + ) end - attr = msg.to_s.sub(/=$/, "") - norm = normalize(attr) - span = [node.first_lineno, node.first_column, - node.last_lineno, node.last_column] - @writes << Write.new(attr: attr, norm: norm, - recv: recv_slice(node.children[0], lines), - file: file, - defn: defstack.last || "(top-level)", - line: node.first_lineno, span: span) - when :IASGN - attr = node.children[0].to_s # "@storage" - norm = normalize(attr) - span = [node.first_lineno, node.first_column, - node.last_lineno, node.last_column] - @writes << Write.new(attr: attr, norm: norm, recv: "self", - file: file, - defn: defstack.last || "(top-level)", - line: node.first_lineno, span: span) end - - node.children.each { |c| walk_writes(c, lines, defstack, file) } end # ---- Phase 3: walk read sites ------------------------------------- @@ -100,53 +71,23 @@ def find_reads! # Build the set of normalized field names we care about. field_norms = known_field_norms - @src_map.each do |file, data| - walk_reads(data[:root], data[:lines], [], file, field_norms) - end - end - - def walk_reads(node, lines, defstack, file, field_norms) - return unless Ast.node?(node) - - case node.type - when :CLASS, :MODULE - defstack = defstack + [node.children[0].to_s] - when :DEFN then defstack = defstack + [node.children[0].to_s] - when :DEFS then defstack = defstack + [node.children[1].to_s] - when :CALL, :OPCALL, :FCALL - # CALL(recv, :method, args) - attribute reads have no args - # FCALL(:method, args) - attribute reads have no args - recv = node.type == :CALL || node.type == :OPCALL ? node.children[0] : nil - mid = node.type == :CALL || node.type == :OPCALL ? node.children[1] : node.children[0] - args = node.type == :CALL || node.type == :OPCALL ? node.children[2] : node.children[1] - - # Skip if called with arguments (it's a method call, not attr read) - if args.nil? || (Ast.node?(args) && args.type == :LIST && args.children.compact.empty?) - name = mid.to_s - if field_norms.include?(name) - span = [node.first_lineno, node.first_column, - node.last_lineno, node.last_column] - @reads << Read.new(attr: name, norm: name, - recv: recv_slice(recv, lines), - file: file, - defn: defstack.last || "(top-level)", - line: node.first_lineno, span: span) - end - end - when :IVAR - name = node.children[0].to_s # e.g. "@storage" - norm = normalize(name) - if field_norms.include?(norm) - span = [node.first_lineno, node.first_column, - node.last_lineno, node.last_column] - @reads << Read.new(attr: name, norm: norm, recv: "self", - file: file, - defn: defstack.last || "(top-level)", - line: node.first_lineno, span: span) + @documents.each do |file, document| + document.state_reads.each do |read| + norm = normalize(read.field) + next unless field_norms.include?(norm) + next if write_target_read?(read) + + @reads << Read.new( + attr: read.field, + norm: norm, + recv: read.receiver, + file: file, + defn: read.function, + line: read.line, + span: read.span + ) end end - - node.children.each { |c| walk_reads(c, lines, defstack, file, field_norms) } end # ---- Phase 4: re-derivation sites --------------------------------- @@ -157,7 +98,7 @@ def find_re_derivations!(reification_misses = nil) # Accept pre-computed misses (for testing) or compute them. if reification_misses.nil? - files = @src_map.keys + files = @documents.keys sa = SemanticAlias.scan(files) reification_misses = sa.reification_misses end @@ -433,13 +374,21 @@ def known_field_norms end end - def recv_slice(node, lines) - return "?" unless Ast.node?(node) + def write_target_read?(read) + @writes.any? do |write| + write.file == read.file && + write.defn == read.function && + write.recv == read.receiver && + write.attr == read.field && + write.line == read.line && + same_start?(write.span, read.span) + end + end + + def same_start?(write_span, read_span) + return false unless write_span && read_span - sl = node.first_lineno - el = node.last_lineno - t = sl == el ? lines[sl - 1][node.first_column...node.last_column] : lines[sl - 1][node.first_column..] - t.to_s.strip.gsub(/\s+/, " ") + write_span[0] == read_span[0] && write_span[1] == read_span[1] end end end diff --git a/gems/decomplex/lib/decomplex/structural_topology.rb b/gems/decomplex/lib/decomplex/structural_topology.rb index cdd53c120..964ef7561 100644 --- a/gems/decomplex/lib/decomplex/structural_topology.rb +++ b/gems/decomplex/lib/decomplex/structural_topology.rb @@ -1,11 +1,11 @@ # frozen_string_literal: true -require_relative "ast" +require_relative "syntax" module Decomplex # StructuralTopology is Decomplex's conservative static model of method - # ownership and direct internal calls over the normalized Tree-sitter AST. - # It deliberately resolves only same-owner bare/self calls; dynamic dispatch + # ownership and direct internal calls over Syntax structural facts. It + # deliberately resolves only same-owner bare/self calls; dynamic dispatch # belongs to higher-recall detectors. class StructuralTopology Method = Struct.new(:id, :owner, :name, :file, :line, :span, :visibility, keyword_init: true) @@ -14,25 +14,16 @@ class StructuralTopology keyword_init: true ) - VISIBILITY_MIDS = %i[public protected private].freeze - OWNER_TYPES = %i[CLASS MODULE].freeze - METHOD_TYPES = %i[DEFN DEFS].freeze - SKIP_NESTED_TYPES = %i[CLASS MODULE DEFN DEFS LAMBDA].freeze - CONDITIONAL_TYPES = %i[IF UNLESS CASE CASE2].freeze - ITERATION_TYPES = %i[ITER FOR WHILE UNTIL].freeze - def self.scan(files) - methods = [] - parsed = files.each_with_object({}) do |file, out| - out[file] = Ast.parse(file) + documents = files.to_h do |file| + [file, Syntax.parse(file, parser: "tree_sitter")] end - parsed.each do |file, (root, lines)| - methods.concat(MethodCollector.new(file, lines).scan(root)) + methods = documents.flat_map do |file, document| + MethodFacts.new(file, document).methods end - - edges = parsed.flat_map do |file, (root, lines)| - EdgeCollector.new(file, lines, methods).scan(root) + edges = documents.flat_map do |file, document| + EdgeFacts.new(file, document, methods).edges end Graph.new(methods, edges) @@ -102,237 +93,61 @@ def call_sites(id) end end - class MethodCollector - def initialize(file, lines) + class MethodFacts + def initialize(file, document) @file = file - @lines = lines - end - - def scan(root) - out = [] - top_level_methods(root).each { |method| out << method } - walk(root, [], out) - out - end - - private - - def top_level_methods(root) - methods_from_statements(top_level_statements(root), top_level_owner) - end - - def walk(node, owners, out) - return unless Ast.node?(node) - - if OWNER_TYPES.include?(node.type) - owner = full_owner_name(owners, node) - owner_methods(node, owner).each { |method| out << method } - node.children.each { |child| walk(child, owners + [owner_segment(node)], out) } - else - node.children.each { |child| walk(child, owners, out) } - end - end - - def owner_methods(owner_node, owner) - body = owner_body(owner_node) - return [] unless Ast.node?(body) - - methods_from_statements(owner_statements(body), owner) - end - - def methods_from_statements(statements, owner) - methods = [] - visibility = :public - statements.each do |stmt| - next unless Ast.node?(stmt) - - if bare_visibility_marker?(stmt) - visibility = stmt.children[0].to_sym - elsif visibility_call?(stmt) - visibility = handle_visibility_call(stmt, owner, visibility, methods) - elsif METHOD_TYPES.include?(stmt.type) - methods << method_record(stmt, owner, visibility) - end - end - methods - end - - def handle_visibility_call(stmt, owner, current_visibility, methods) - visibility = stmt.children[0].to_sym - args = stmt.children[1] - return visibility unless Ast.node?(args) - - each_arg(args) do |arg| - if METHOD_TYPES.include?(arg.type) - methods << method_record(arg, owner, visibility) - elsif (name = literal_method_name(arg)) - method = methods.reverse.find { |row| row.name == name } - method.visibility = visibility if method - end - end - - current_visibility - end - - def owner_body(owner_node) - scope = owner_node.children[owner_node.type == :CLASS ? 2 : 1] - return nil unless Ast.node?(scope) && scope.type == :SCOPE - - scope.children[2] - end - - def owner_statements(body) - body.type == :BLOCK ? body.children.compact : [body] - end - - def top_level_statements(root) - return [] unless Ast.node?(root) - - root.children.compact.flat_map do |child| - Ast.node?(child) && child.type == :BLOCK ? child.children.compact : [child] - end - end - - def bare_visibility_marker?(node) - node.type == :VCALL && VISIBILITY_MIDS.include?(node.children[0]) - end - - def visibility_call?(node) - node.type == :FCALL && VISIBILITY_MIDS.include?(node.children[0]) - end - - def each_arg(args) - args.children.compact.each do |arg| - yield arg if Ast.node?(arg) - end - end - - def literal_method_name(node) - return node.children[0].to_s if node.type == :LIT && node.children[0].is_a?(Symbol) - return node.children[0].to_s if %i[STR DSTR].include?(node.type) - - nil - end - - def method_record(node, owner, visibility) - name = method_name(node) - Method.new( - id: "#{owner}##{name}", - owner: owner, - name: name, - file: @file, - line: node.first_lineno, - span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], - visibility: node.type == :DEFS ? :public : visibility - ) - end - - def method_name(node) - if node.type == :DEFS - receiver = node.children[0] - prefix = Ast.node?(receiver) && receiver.type == :SELF ? "self" : Ast.slice(receiver, @lines) - "#{prefix}.#{node.children[1]}" - else - node.children[0].to_s + @document = document + end + + def methods + @document.function_defs.map do |function| + owner = owner_for_fact(function) + Method.new( + id: "#{owner}##{function.name}", + owner: owner, + name: function.name, + file: @file, + line: function.line, + span: function.span, + visibility: function.visibility || :public + ) end end - def full_owner_name(owners, node) - (owners + [owner_segment(node)]).join("::") - end - - def top_level_owner - "(top-level:#{@file})" - end + private - def owner_segment(node) - text = Ast.slice(node.children[0], @lines) - text.empty? ? "(anonymous)" : text + def owner_for_fact(fact) + TopLevelOwner.new(@file, @document).owner_for(fact) end end - class EdgeCollector - def initialize(file, lines, methods) + class EdgeFacts + def initialize(file, document, methods) @file = file - @lines = lines + @document = document @method_by_id = methods.to_h { |method| [method.id, method] } + @owner_mapper = TopLevelOwner.new(file, document) end - def scan(root) - out = [] - top_level_methods(root).each do |method_node| - method = @method_by_id["#{top_level_owner}##{method_name(method_node)}"] - collect_calls(method_node, method, [], out) if method + def edges + @document.call_sites.filter_map do |call| + edge_for_call(call) end - walk(root, [], out) - out end private - def top_level_methods(root) - top_level_statements(root).select { |stmt| Ast.node?(stmt) && METHOD_TYPES.include?(stmt.type) } - end - - def walk(node, owners, out) - return unless Ast.node?(node) - - if OWNER_TYPES.include?(node.type) - owner = (owners + [owner_segment(node)]).join("::") - owner_methods(node).each do |method_node| - method = @method_by_id["#{owner}##{method_name(method_node)}"] - collect_calls(method_node, method, [], out) if method - end - node.children.each { |child| walk(child, owners + [owner_segment(node)], out) } - else - node.children.each { |child| walk(child, owners, out) } - end - end - - def owner_methods(owner_node) - body = owner_body(owner_node) - return [] unless Ast.node?(body) - - owner_statements(body).flat_map do |stmt| - next [] unless Ast.node?(stmt) - - if METHOD_TYPES.include?(stmt.type) - [stmt] - elsif visibility_call?(stmt) - inline_methods(stmt) - else - [] - end - end - end + def edge_for_call(call) + return nil unless call.receiver.to_s == "self" - def inline_methods(stmt) - args = stmt.children[1] - return [] unless Ast.node?(args) + owner = @owner_mapper.owner_for(call) + caller = @method_by_id["#{owner}##{call.function}"] + return nil unless caller - args.children.compact.select { |arg| Ast.node?(arg) && METHOD_TYPES.include?(arg.type) } - end - - def collect_calls(node, caller, context_stack, out) - return unless Ast.node?(node) - return if SKIP_NESTED_TYPES.include?(node.type) && !METHOD_TYPES.include?(node.type) - - context_stack = context_stack + [:conditional] if CONDITIONAL_TYPES.include?(node.type) - context_stack = context_stack + [:iterates] if ITERATION_TYPES.include?(node.type) - - if (edge = internal_edge(node, caller, context_stack)) - out << edge unless edge.caller == edge.callee - end - - node.children.each { |child| collect_calls(child, caller, context_stack, out) } - end - - def internal_edge(node, caller, context_stack) - call = internal_call_name(node, caller) - return nil unless call - - callee = @method_by_id["#{caller.owner}##{call[:name]}"] + callee_name = scoped_name(caller, call.message) + callee = @method_by_id["#{owner}##{callee_name}"] return nil unless callee + return nil if caller.id == callee.id Edge.new( caller: caller.id, @@ -340,70 +155,74 @@ def internal_edge(node, caller, context_stack) caller_name: caller.name, callee_name: callee.name, file: @file, - line: node.first_lineno, - span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], - type: edge_type(context_stack), - kind: call[:kind], - confidence: call[:confidence] + line: call.line, + span: call.span, + type: edge_type(call.control), + kind: call_kind(call), + confidence: :high ) end - def internal_call_name(node, caller) - case node.type - when :FCALL, :VCALL - { name: scoped_name(caller, node.children[0]), kind: :bare_internal, confidence: :high } - when :CALL, :OPCALL - receiver, mid = node.children - return nil unless Ast.node?(receiver) && receiver.type == :SELF - - { name: scoped_name(caller, mid), kind: :direct_self, confidence: :high } - end + def scoped_name(caller, message) + caller.name.to_s.start_with?("self.") ? "self.#{message}" : message.to_s end - def scoped_name(caller, mid) - caller.name.start_with?("self.") ? "self.#{mid}" : mid.to_s + def edge_type(control) + %i[conditional iterates].include?(control) ? control : :always end - def edge_type(context_stack) - context_stack.last || :always + def call_kind(call) + source_text(call.span).lstrip.start_with?("self.") ? :direct_self : :bare_internal end - def owner_body(owner_node) - scope = owner_node.children[owner_node.type == :CLASS ? 2 : 1] - return nil unless Ast.node?(scope) && scope.type == :SCOPE + def source_text(span) + return "" unless span - scope.children[2] + first_line, first_column, last_line, last_column = span + if first_line == last_line + return @document.lines[first_line - 1].to_s[first_column...last_column].to_s + end + + parts = [] + parts << @document.lines[first_line - 1].to_s[first_column..].to_s + parts.concat(@document.lines[first_line...(last_line - 1)] || []) + parts << @document.lines[last_line - 1].to_s[0...last_column].to_s + parts.join end + end - def owner_statements(body) - body.type == :BLOCK ? body.children.compact : [body] + class TopLevelOwner + def initialize(file, document) + @file = file + @document = document end - def top_level_statements(root) - return [] unless Ast.node?(root) + def owner_for(fact) + owner = fact.owner.to_s + return owner unless owner == file_owner + return owner if enclosed_by_matching_owner?(fact) - root.children.compact.flat_map do |child| - Ast.node?(child) && child.type == :BLOCK ? child.children.compact : [child] - end + top_level_owner end - def visibility_call?(node) - node.type == :FCALL && VISIBILITY_MIDS.include?(node.children[0]) - end + private - def method_name(node) - if node.type == :DEFS - receiver = node.children[0] - prefix = Ast.node?(receiver) && receiver.type == :SELF ? "self" : Ast.slice(receiver, @lines) - "#{prefix}.#{node.children[1]}" - else - node.children[0].to_s + def enclosed_by_matching_owner?(fact) + @document.owner_defs.any? do |owner| + owner.name.to_s == fact.owner.to_s && encloses?(owner.span, fact.span) end end - def owner_segment(node) - text = Ast.slice(node.children[0], @lines) - text.empty? ? "(anonymous)" : text + def encloses?(outer, inner) + return false unless outer && inner + + starts_before = outer[0] < inner[0] || (outer[0] == inner[0] && outer[1] <= inner[1]) + ends_after = outer[2] > inner[2] || (outer[2] == inner[2] && outer[3] >= inner[3]) + starts_before && ends_after + end + + def file_owner + File.basename(@file.to_s, File.extname(@file.to_s)) end def top_level_owner diff --git a/gems/decomplex/lib/decomplex/superfluous_state.rb b/gems/decomplex/lib/decomplex/superfluous_state.rb index 49abcd3e7..93a0c7505 100644 --- a/gems/decomplex/lib/decomplex/superfluous_state.rb +++ b/gems/decomplex/lib/decomplex/superfluous_state.rb @@ -1,7 +1,6 @@ # frozen_string_literal: true require "set" -require_relative "ast" module Decomplex # SuperfluousState -- fields that could be eliminated entirely. diff --git a/gems/decomplex/lib/decomplex/syntax.rb b/gems/decomplex/lib/decomplex/syntax.rb index cb557320a..042d9cb13 100644 --- a/gems/decomplex/lib/decomplex/syntax.rb +++ b/gems/decomplex/lib/decomplex/syntax.rb @@ -2,7 +2,6 @@ require "set" require "rbconfig" -require_relative "ast" module Decomplex module Syntax @@ -10,17 +9,27 @@ module Syntax :params, :signature, :kind, keyword_init: true) OwnerDef = Struct.new(:file, :name, :kind, :line, :span, keyword_init: true) CallSite = Struct.new(:receiver, :message, :file, :function, :owner, :line, :span, - :conditional, :arguments, :control, keyword_init: true) + :conditional, :arguments, :control, :safe_navigation, :block, + keyword_init: true) StateDeclaration = Struct.new(:field, :owner, :type, :file, :line, :span, keyword_init: true) StateParamOrigin = Struct.new(:field, :receiver, :owner, :param, :file, :function, :line, :span, keyword_init: true) - DecisionSite = Struct.new(:kind, :members, :file, :function, :line, :span, :predicate, keyword_init: true) + DecisionSite = Struct.new(:kind, :members, :file, :function, :line, :span, :predicate, + :enclosing_span, keyword_init: true) StateRead = Struct.new(:field, :receiver, :file, :function, :line, :span, :owner, keyword_init: true) StateWrite = Struct.new(:field, :receiver, :file, :function, :line, :span, :owner, keyword_init: true) BranchDecision = Struct.new(:file, :function, :line, :span, :predicate, :state_refs, keyword_init: true) BranchArm = Struct.new(:file, :function, :kind, :line, :span, :decision_line, :decision_span, :predicate, :member, :body, keyword_init: true) + PredicateDef = Struct.new(:file, :name, :owner, :body, :line, :span, keyword_init: true) + ComparisonSite = Struct.new(:file, :function, :line, :span, :source, :operator, keyword_init: true) + LocalMethod = Struct.new(:id, :owner, :name, :file, :line, :span, :node, + :statements, :boundaries, keyword_init: true) + LocalStatement = Struct.new(:index, :line, :end_line, :span, :source, :reads, + :writes, :dependencies, :co_uses, keyword_init: true) + LocalBoundary = Struct.new(:before_index, :after_index, :line, :kind, :text, keyword_init: true) + PathConditionSite = Struct.new(:guards, :action, :file, :function, :line, :span, keyword_init: true) LanguageLexicon = Struct.new( :type_guard_patterns, :diagnostic_patterns, :trivial_patterns, :nil_literal_patterns, @@ -370,10 +379,11 @@ def generated_prelude?(_document, _node) def call_target(document, node) case node.kind - when "call_expression", "method_invocation", "invocation_expression" + when "call_expression", "method_invocation", "invocation_expression", "function_call", "method_call" generic_call_target(document, node) when "attribute", "selector_expression", "field", "field_access", "member_expression", - "member_access_expression", "field_expression", "expression_list" + "member_access_expression", "field_expression", "expression_list", + "dot_index_expression", "variable_list", "identifier", "simple_identifier" adjacent_argument_call_target(node) end end @@ -391,62 +401,47 @@ def state_target(lhs) end end - class RubySyntaxAdapter < TreeSitterLanguageAdapter - def function_name(node) - case node.kind - when "body_statement" - hidden_ruby_method_name(node) - when "singleton_method" - name = named_field(node, "name")&.text || - node.named_children.reverse.find do |child| - %w[identifier field_identifier property_identifier].include?(child.kind) - end&.text - name && "self.#{name}" - when "argument_list" - inline_def_name(node) - else - super - end - end + class RubySyntaxAdapter < TreeSitterLanguageAdapter; end + class PythonSyntaxAdapter < TreeSitterLanguageAdapter def visibility(_document, node) - return ruby_inline_def_visibility(node) if inline_def_argument_list?(node) + name = function_name(node).to_s + return :private if name.start_with?("_") && !name.start_with?("__") - ruby_method_visibility(node) + :public end - def owner_name_from_declaration(document, node) - return hidden_ruby_owner_name(node) if hidden_ruby_owner_declaration?(node) + def call_target(document, node) + python_adjacent_call_target(node) || super + end + def local_methods(document) super end - def owner_kind(node) - return hidden_ruby_owner_kind(node) if hidden_ruby_owner_declaration?(node) + private - super - end + def python_function_body_statements(node) + body = named_field(node, "body") || + node.named_children.find { |child| child.kind == "block" } + return [] unless body - def call_target(document, node) - case node.kind - when "call" - ruby_call_target(node) - when "body_statement" - ruby_bare_body_call_target(node) - when "identifier" - ruby_bare_call_target(node) - else - super - end + body.named_children.reject { |child| child.kind == "comment" } end - end - class PythonSyntaxAdapter < TreeSitterLanguageAdapter - def visibility(_document, node) - name = function_name(node).to_s - return :private if name.start_with?("_") && !name.start_with?("__") + def python_adjacent_call_target(node) + return nil unless %w[identifier].include?(node.kind) - :public + args = next_sibling(node) + return nil unless args&.kind == "argument_list" + + { + receiver: "self", + message: node.text, + arguments: args.named_children.map { |child| normalize_text(child.text) } + } + rescue StandardError + nil end end @@ -502,6 +497,10 @@ def generated_prelude?(document, node) end class ZigSyntaxAdapter < TreeSitterLanguageAdapter + def visibility(_document, node) + modifier_visibility(node) || :private + end + def state_declaration(node) return zig_container_field_declaration(node) if node.kind == "container_field" @@ -573,481 +572,528 @@ def c_visibility(node) end end - class RubySyntaxAdapter - def function_params(node) - return hidden_ruby_method_params(node) if hidden_ruby_method_definition?(node) + class TreeSitterLanguageAdapter + BRANCH_KINDS = %w[if unless if_statement if_modifier unless_modifier if_expression + while until while_statement for for_statement + case switch_statement expression_switch_statement switch_expression + match_statement match_expression when_expression].freeze + COMPARISON_OPERATORS = %w[== !=].freeze + NOISE_MESSAGES = %w[! != == === < <= > >= [] []= to_s inspect class].freeze - params = super - if inline_def_argument_list?(node) - params = node.named_children.find { |child| child.kind == "method_parameters" } - &.named_children - &.filter_map { |param| parameter_name(param) } - &.uniq || params - end - params + def initial_stack(document) + [{ file_owner: file_owner(document.file), language: document.language }] end - def function_signature(document, node) - if hidden_ruby_method_definition?(node) - return normalize_text(hidden_ruby_method_signature(document, node)) - end - - signature = preceding_ruby_signature(document, node) - return signature unless signature.empty? - - super + def push_context(document, stack, node) + next_stack = push_owner_context(document, stack, node) + name = function_name(node) + next_stack = name ? next_stack + [function_context(node, next_stack)] : next_stack + control = control_context(node) + control ? next_stack + [{ control: control }] : next_stack end - def state_declaration(node) - ruby_t_let_state_declaration(node) || super + def structural_facts_for_node(document, node, stack) + out = { + function_defs: [], + owner_defs: [], + call_sites: [], + state_declarations: [], + state_param_origins: [], + state_reads: [], + state_writes: [] + } + record_function_def(document, node, stack, out[:function_defs]) + record_owner_def(document, node, stack, out[:owner_defs]) + record_call_site(document, node, stack, out[:call_sites]) + record_state_declaration(document, node, stack, out[:state_declarations]) + record_state_param_origin(document, node, stack, out[:state_param_origins]) + record_state_read(document, node, stack, out[:state_reads]) + record_state_write(document, node, stack, out[:state_writes]) + out end - def state_read_target(node) - ruby_state_variable_target(node) || super + def after_structural_facts(document, out) + record_implicit_state_accesses(document, out) if implicit_state_accesses? end - def state_target(lhs) - ruby_state_variable_target(lhs) || super + def decision_site_facts(document, node, stack) + out = [] + record_decision_site(document, node, stack, out) + out end - private - - def inline_def_argument_list?(node) - ts_node?(node) && node.kind == "argument_list" && node.children.first&.kind.to_s == "def" + def branch_decision_facts(document, node, stack, immutable_readers:, immutable_reader_types:, type_aliases:) + out = [] + record_branch_decision( + document, + node, + stack, + out, + immutable_readers: immutable_readers, + immutable_reader_types: immutable_reader_types, + type_aliases: type_aliases, + method_param_types: method_param_types(document) + ) + out end - def inline_def_name(node) - return nil unless inline_def_argument_list?(node) - - receiver_index = node.named_children.index { |child| child.kind == "self" || child.kind == "constant" } - search = receiver_index ? node.named_children[(receiver_index + 1)..] : node.named_children - name = search&.find { |child| %w[identifier field_identifier property_identifier].include?(child.kind) }&.text - receiver_index ? "self.#{name}" : name + def branch_arm_facts(document, node, stack) + out = [] + record_branch_arm(document, node, stack, out) + out end - def hidden_ruby_method_definition?(node) - ts_node?(node) && node.kind == "body_statement" && node.children.first&.kind.to_s == "def" - end + def comparison_site_facts(document, node, stack) + target = comparison_target(node) + return [] unless target - def hidden_ruby_method_name(node) - return nil unless hidden_ruby_method_definition?(node) + [ + ComparisonSite.new( + file: document.file, + function: current_function(stack), + line: line(node), + span: span(node), + source: target[:source], + operator: target[:operator] + ) + ] + end - receiver_index = node.named_children.index { |child| child.kind == "self" || child.kind == "constant" } - search = receiver_index ? node.named_children[(receiver_index + 1)..] : node.named_children - name = search&.find { |child| %w[identifier field_identifier property_identifier].include?(child.kind) }&.text - receiver_index ? "self.#{name}" : name + def implicit_state_accesses? + false end - def hidden_ruby_method_params(node) - params = node.named_children.find { |child| child.kind == "method_parameters" } + def function_params(node) + params = if node.kind == "method_declaration" + lists = node.named_children.select { |child| child.kind == "parameter_list" } + lists.size > 1 ? lists[1] : lists.first + else + named_field(node, "parameters") || + node.named_children.find do |child| + %w[parameters formal_parameters function_value_parameters parameter_list].include?(child.kind) + end + end + params ||= node.named_children.select { |child| child.kind == "parameter" } if node.kind == "function_declaration" return [] unless params - params.named_children.filter_map { |param| parameter_name(param) }.uniq + Array(params.respond_to?(:named_children) ? params.named_children : params).filter_map do |param| + parameter_name(param) + end.uniq end - def hidden_ruby_method_signature(document, node) - body = node.named_children.find { |child| child.kind == "body_statement" } - end_byte = body ? body.start_byte : node.end_byte - document.source.byteslice(node.start_byte, end_byte - node.start_byte).to_s.strip.sub(/;+\z/, "") + def function_signature(document, node) + body = named_field(node, "body") + text = + if body + document.source.byteslice(node.start_byte, body.start_byte - node.start_byte).to_s.strip + else + line_text(document, node).strip + end + normalize_text(text.empty? ? line_text(document, node) : text) rescue StandardError - line_text(document, node).strip + normalize_text(line_text(document, node)) end - def hidden_ruby_owner_declaration?(node) - return false unless ts_node?(node) - return false unless node.kind == "body_statement" - - %w[class module].include?(node.children.first&.kind.to_s) + def method_param_types(_document) + {} end - def hidden_ruby_owner_name(node) - node.named_children.find { |child| %w[constant identifier type_identifier].include?(child.kind) }&.text + def predicate_def(_document, function_def) + body = generic_predicate_body(function_def.body) + return nil unless body + + PredicateDef.new( + file: function_def.file, + name: function_def.name, + owner: function_def.owner, + body: body, + line: function_def.line, + span: function_def.span + ) end - def hidden_ruby_owner_kind(node) - node.children.first&.kind.to_s == "module" ? :module : :class + def local_methods(document) + document.function_defs.map do |function_def| + statements = generic_function_body_statements(function_def.body) + local_names = generic_local_names(function_def, statements) + local_statements = statements.each_with_index.map do |statement, index| + generic_local_statement(statement, index, local_names) + end + owner = function_def.owner.to_s == file_owner(document.file) ? "(top-level)" : function_def.owner + + LocalMethod.new( + id: "#{owner}##{function_def.name}", + owner: owner, + name: function_def.name, + file: function_def.file, + line: function_def.line, + span: function_def.span, + node: function_def.body, + statements: local_statements, + boundaries: generic_structural_boundaries(document, local_statements) + ) + end end - def ruby_method_visibility(node) - modifier_visibility(node) + def path_condition_sites(document) + out = [] + document.function_defs.each do |function_def| + generic_function_body_statements(function_def.body).each do |statement| + generic_path_walk(document, statement, function_def.name, [], out) + end + end + out end - def ruby_inline_def_visibility(node) - parent = parent_node(node) - return nil unless parent&.kind == "call" + private - target = ruby_call_target(parent) - visibility = target && target[:receiver] == "self" && target[:message]&.to_sym - %i[private protected public].include?(visibility) ? visibility : nil - end + def generic_predicate_body(node) + body = generic_function_body_node(node) + return nil unless body - def ruby_call_target(node) - receiver = named_field(node, "receiver") - method = named_field(node, "method") - message = method&.text || first_named_text(node, %w[identifier constant]) - message ||= normalize_text(node.text) if receiver.nil? && ruby_simple_call_text?(node.text) - return nil unless message + statement = generic_function_body_statements(node).last || body + source = normalize_text(statement.text) + source = source.sub(/\Areturn\s+/, "").sub(/;\z/, "").strip + return nil if source.empty? || source.length > 200 + return nil unless source.match?(/\A(?:true|false)\z|\b(?:true|false|null|nil)\b|(?:==|!=|&&|\|\||\band\b|\bor\b)/i) - { - receiver: receiver ? normalize_text(receiver.text) : "self", - message: message, - arguments: ruby_argument_texts(node) - } + source end - def ruby_bare_call_target(node) - return nil unless ruby_bare_call_identifier?(node) + def generic_function_body_node(node) + return nil unless ts_node?(node) - { - receiver: "self", - message: node.text, - arguments: [] - } + named_field(node, "body") || + node.named_children.reverse.find do |child| + %w[block body body_statement function_body statement_block compound_statement declaration_list].include?(child.kind) + end end - def ruby_bare_body_call_target(node) - return nil if hidden_ruby_method_definition?(node) || hidden_ruby_owner_declaration?(node) + def generic_function_body_statements(node) + body = generic_function_body_node(node) + return [] unless body - explicit = ruby_explicit_receiver_body_call_target(node) - return explicit if explicit + named = body.named_children.reject { |child| comment_node?(child) } + if named.size == 1 && %w[statements statement_list].include?(named.first.kind) + return [named.first] if branch_node?(named.first) - message = node.text.to_s.strip - return nil unless ruby_simple_call_text?(message) - return nil if %w[true false nil self].include?(message) + named = named.first.named_children.reject { |child| comment_node?(child) } + end + return [] if named.empty? && body.text.to_s.strip.empty? + return [body] if branch_node?(body) + return [body] if generic_assignment_statement?(body) + return [body] if named.empty? - { - receiver: "self", - message: message, - arguments: [] - } + named end - def ruby_explicit_receiver_body_call_target(node) - receiver, message = node.named_children - return nil unless receiver && message - return nil unless %w[self constant identifier].include?(receiver.kind) - return nil unless %w[identifier constant].include?(message.kind) + def generic_local_names(function_def, statements) + names = Set.new(function_def.params.to_a.map(&:to_s)) + statements.each do |statement| + names.merge(generic_local_writes(statement)) + end + names + end - { - receiver: normalize_text(receiver.text), - message: message.text, - arguments: [] - } + def generic_local_statement(node, index, local_names) + reads = generic_local_reads(node, local_names).uniq + writes = generic_local_writes(node).uniq + LocalStatement.new( + index: index, + line: line(node), + end_line: span(node)[2], + span: span(node), + source: normalize_text(node.text), + reads: reads.to_set, + writes: writes.to_set, + dependencies: generic_assignment_dependencies(node, local_names), + co_uses: reads.combination(2).map { |left, right| [left, right] } + ) end - def ruby_simple_call_text?(text) - text.to_s.strip.match?(/\A[a-z_]\w*[!?=]?\z/) + def generic_local_reads(node, local_names) + reads = [] + generic_walk_local(node) do |child| + name = generic_local_identifier_text(child) + next unless name + next unless local_names.include?(name) + next if generic_local_write_node?(child) + next if generic_declaration_name?(child) + next if generic_member_name?(child) + next if generic_call_name?(child) + + reads << name + end + reads end - def ruby_bare_call_identifier?(node) - parent = parent_node(node) - return false unless parent - return false if ruby_declaration_name?(node, parent) - return false if %w[method_parameters block_parameters argument_list assignment].include?(parent.kind) - if parent.kind == "call" - return false if named_field(parent, "receiver") + def generic_local_writes(node) + writes = [] + if (name = generic_local_declaration_name(node)) + writes << name + end + writes.concat(generic_assignment_lhs_names(node)) - first = parent.named_children.first - return first == node && next_sibling(node)&.kind == "argument_list" + generic_walk_local(node) do |child| + next unless generic_identifier?(child) + next unless generic_local_write_node?(child) + + writes << child.text.to_s end - return false if next_sibling(node)&.text == "=" || prev_sibling(node)&.text == "=" - return false if next_sibling(node)&.text == "." || prev_sibling(node)&.text == "." + writes + end + + def generic_assignment_dependencies(node, local_names) + lhs_names = generic_local_writes(node) + return [] if lhs_names.empty? - %w[body_statement then else elsif ensure rescue].include?(parent.kind) || - next_sibling(node)&.kind == "argument_list" + reads = generic_local_reads(node, local_names) - lhs_names + lhs_names.product(reads).reject { |left, right| left == right }.uniq end - def ruby_declaration_name?(node, parent) - return true if hidden_ruby_method_definition?(parent) - return true if hidden_ruby_owner_declaration?(parent) - return true if %w[method singleton_method class module].include?(parent.kind) + def generic_structural_boundaries(document, statements) + statements.each_cons(2).filter_map do |left, right| + boundary = generic_source_boundary(document, left.end_line + 1, right.line - 1) + next unless boundary - false + LocalBoundary.new( + before_index: left.index, + after_index: right.index, + line: boundary[:line], + kind: boundary[:kind], + text: boundary[:text] + ) + end end - def ruby_argument_texts(node) - args = named_field(node, "arguments") || node.named_children.find { |child| child.kind == "argument_list" } - return [] unless args + def generic_source_boundary(document, first_line, last_line) + return nil if first_line > last_line - values = args.named_children.map { |child| normalize_text(child.text) } - return values unless values.empty? + blank = nil + (first_line..last_line).each do |line_number| + text = document.lines[line_number - 1].to_s + stripped = text.strip + return { line: line_number, kind: :comment, text: stripped } if stripped.start_with?("#", "//", "--") - text = args.text.to_s.strip - text = text[1...-1] if text.start_with?("(") && text.end_with?(")") - text.split(/\s*,\s*/).map { |arg| normalize_text(arg) }.reject(&:empty?) + blank ||= { line: line_number, kind: :blank, text: stripped } if stripped.empty? + end + blank end - def ruby_t_let_state_declaration(node) - lhs = named_field(node, "left") || node.named_children.first - rhs = named_field(node, "right") || named_field(node, "value") || node.named_children[1] - target = state_target(lhs) - return nil unless target && target[:receiver] == "self" && target[:field].to_s.start_with?("@") - return nil unless rhs&.kind == "call" - - receiver = named_field(rhs, "receiver") || rhs.named_children.first - method = named_field(rhs, "method") || rhs.named_children.find { |child| child.kind == "identifier" } - return nil unless receiver&.text == "T" && method&.text == "let" + def generic_walk_local(node, &block) + return unless ts_node?(node) - args = named_field(rhs, "arguments") || rhs.named_children.find { |child| child.kind == "argument_list" } - type = args&.named_children&.[](1)&.text - return nil if type.to_s.empty? + stack = [node] + until stack.empty? + current = stack.pop + next unless ts_node?(current) + next if current != node && generic_nested_local_scope?(current) - { field: target[:field], type: normalize_text(type) } + yield current + current.named_children.reverse_each { |child| stack << child } + end end - def skip_state_write_node?(node) - node.kind == "operator_assignment" || - (assignment_lhs?(node) && next_sibling(node)&.text.to_s != "=" && !ruby_instance_variable_node?(node)) + def generic_nested_local_scope?(node) + function_name(node) || owner_name_from_declaration(nil, node) end - def skip_state_write_target?(target) - super || target[:field].to_s.start_with?("$") + def generic_identifier?(node) + ts_node?(node) && %w[identifier simple_identifier field_identifier property_identifier].include?(node.kind) end - def state_write_source_node(node) - assignment_lhs?(node) ? (parent_node(node) || node) : super - end + def generic_local_identifier_text(node) + return node.text.to_s if generic_identifier?(node) + return nil unless ts_node?(node) + return nil unless %w[argument pattern directly_assignable_expression value_argument].include?(node.kind) + return nil unless node.named_children.empty? - def direct_state_ref(node) - node.text if ruby_state_variable_node?(node) + text = node.text.to_s + simple_identifier_text?(text) ? text : nil end - def hidden_if?(node) - return false unless ts_node?(node) - return false unless %w[expression_statement block body_statement].include?(node.kind) - - %w[if unless].include?(first_token_kind(node)) + def generic_assignment_statement?(node) + ts_node?(node) && + (%w[assignment assignment_expression augmented_assignment assignment_statement operator_assignment].include?(node.kind) || + node.children.any? { |child| !child.named? && %w[= += -= *= /= %=].include?(child.text.to_s) }) end - def hidden_modifier_if?(node) - return false unless ts_node?(node) - return false unless node.kind == "body_statement" + def generic_local_write_node?(node) + return false unless generic_identifier?(node) + + parent = parent_node(node) + return false unless parent + return false if generic_member_name?(node) + return true if generic_declaration_name?(node) - seen_named = false - node.children.any? do |child| - seen_named ||= child.named? - seen_named && !child.named? && %w[if unless].include?(child.kind) + if %w[assignment assignment_expression augmented_assignment assignment_statement operator_assignment].include?(parent.kind) + lhs = named_field(parent, "left") || parent.named_children.first + return lhs == node end - end - def modifier_condition(node) - node.named_children.last + assignment_lhs?(node) end - def hidden_case?(node) - return false unless ts_node?(node) - return false unless %w[body_statement block_body argument_list].include?(node.kind) + def generic_declaration_name?(node) + parent = parent_node(node) + return false unless parent - first_token_kind(node) == "case" + generic_local_declaration_name_node(parent) == node end - def hidden_match?(node) - node.kind == "expression_statement" && - first_token_kind(node) == "match" && - node.named_children.any? { |child| child.kind == "match_block" } + def generic_local_declaration_name(node) + generic_local_declaration_name_node(node)&.text end - def case_pattern_texts(patterns) - texts = super - return texts unless texts.any? { |text| text.start_with?("*") } - - out = [] - pending_plain = [] - texts.each_with_index do |text, index| - if text.start_with?("*") - out << pending_plain.join(", ") unless pending_plain.empty? - pending_plain = [] - out << if texts.size == 1 || index.positive? - text.delete_prefix("*") - else - text - end - else - pending_plain << text + def generic_local_declaration_name_node(node) + return nil unless ts_node?(node) + return nil unless %w[ + declaration init_declarator let_declaration lexical_declaration local_variable_declaration + property_declaration short_var_declaration variable_declaration variable_declarator + ].include?(node.kind) + + if node.kind == "short_var_declaration" + left = node.named_children.find { |child| child.kind == "expression_list" } + if left + identifier = left.named_children.find { |child| generic_identifier?(child) } + return identifier if identifier end + return left if simple_identifier_text?(left&.text) end - out << pending_plain.join(", ") unless pending_plain.empty? - out - end - - def ruby_state_variable_target(node) - return nil unless ruby_state_variable_node?(node) - { receiver: "self", field: node.text } - end + variable = node.named_children.find { |child| child.kind == "variable_declaration" } + return variable if simple_identifier_text?(variable&.text) - def ruby_state_variable_node?(node) - ts_node?(node) && %w[instance_variable global_variable].include?(node.kind) - end + declaration_assignment = node.named_children.find { |child| child.kind == "assignment_statement" } + if declaration_assignment + lhs = declaration_assignment.named_children.first + identifier = lhs&.named_children&.find { |child| generic_identifier?(child) } + return identifier if identifier + return lhs if simple_identifier_text?(lhs&.text) + end - def ruby_instance_variable_node?(node) - ts_node?(node) && node.kind == "instance_variable" + named_field(node, "pattern") || + named_field(node, "name") || + node.named_children.find { |child| child.kind == "pattern" } || + node.named_children.find { |child| child.kind == "variable_declaration" }&.named_children&.find { |child| generic_identifier?(child) } || + node.named_children.find { |child| child.kind == "expression_list" }&.named_children&.find { |child| generic_identifier?(child) } || + node.named_children.find { |child| generic_identifier?(child) } end - def preceding_ruby_signature(document, node) - cursor = line(node) - 2 - lines = document.lines - cursor -= 1 while cursor >= 0 && lines[cursor].to_s.strip.empty? - return "" if cursor.negative? + def generic_assignment_lhs_names(node) + return [] unless ts_node?(node) + return [] unless %w[assignment assignment_expression assignment_statement augmented_assignment operator_assignment].include?(node.kind) - stripped = lines[cursor].to_s.strip - if stripped == "end" - start = cursor - while start >= 0 - text = lines[start].to_s.strip - return normalize_text(lines[start..cursor].join("\n")) if text == "sig do" - return "" if start != cursor && text.match?(/\A(?:def|class|module)\b/) - - start -= 1 - end - return "" if start.negative? - end - - return normalize_text(stripped) if stripped.start_with?("sig ") - return "" unless stripped == "}" || stripped.end_with?("}") + lhs = named_field(node, "left") || node.named_children.first + return [] unless ts_node?(lhs) + return [lhs.text] if generic_identifier?(lhs) + return [lhs.text] if simple_identifier_text?(lhs.text) - start = cursor - while start >= 0 - text = lines[start].to_s.strip - return normalize_text(lines[start..cursor].join("\n")) if text.start_with?("sig ") - return "" if text.match?(/\A(?:def|class|module)\b/) + lhs.named_children.filter_map { |child| child.text if generic_identifier?(child) } + end - start -= 1 - end - "" + def simple_identifier_text?(text) + text.to_s.match?(/\A[A-Za-z_]\w*\z/) end - def method_param_types(document) - types_by_method = {} - pending_sig = +"" - document.lines.each do |line| - pending_sig << line if pending_sig_active?(line, pending_sig) - if (match = line.match(/\A\s*def\s+([A-Za-z_]\w*[!?=]?)(?:\s|\(|$)/)) - types_by_method[match[1]] = sig_param_types(pending_sig) - pending_sig = +"" - end + def generic_member_name?(node) + parent = parent_node(node) + if parent&.kind == "navigation_suffix" + owner = parent_node(parent) + return true if owner && field_like_node?(owner) end - types_by_method - end + return false unless parent && field_like_node?(parent) - def pending_sig_active?(line, pending_sig) - !pending_sig.empty? || line.match?(/\A\s*sig\b/) + field = named_field(parent, "field") || named_field(parent, "property") || + named_field(parent, "name") || named_field(parent, "suffix") || + parent.named_children.last + field == node end - def sig_param_types(sig_source) - match = sig_source.match(/params\s*\((.*?)\)/m) - return {} unless match + def generic_call_name?(node) + parent = parent_node(node) + return false unless parent - match[1].scan(/([A-Za-z_]\w*)\s*:\s*([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)/).to_h + %w[call_expression method_invocation invocation_expression].include?(parent.kind) && + (named_field(parent, "function") == node || parent.named_children.first == node) end - end - class TreeSitterLanguageAdapter - BRANCH_KINDS = %w[if unless if_statement if_modifier unless_modifier if_expression - while until while_statement for for_statement - case switch_statement expression_switch_statement switch_expression - match_statement match_expression when_expression].freeze - NOISE_MESSAGES = %w[! != == === < <= > >= [] []= to_s inspect class].freeze + def generic_path_walk(document, node, function, guards, out) + return unless ts_node?(node) + return if generic_nested_local_scope?(node) - def initial_stack(document) - [{ file_owner: file_owner(document.file), language: document.language }] - end + if branch_node?(node) + condition = generic_branch_condition(node) + atoms = generic_path_condition_atoms(condition) + generic_branch_body_nodes(node).each do |child| + generic_path_walk(document, child, function, guards + atoms, out) + end + return + end - def push_context(document, stack, node) - next_stack = push_owner_context(document, stack, node) - name = function_name(node) - next_stack = name ? next_stack + [function_context(node, next_stack)] : next_stack - control = control_context(node) - control ? next_stack + [{ control: control }] : next_stack - end + if guards.size >= 2 && generic_path_action_node?(node) + out << PathConditionSite.new( + guards: guards.uniq.sort, + action: normalize_text(node.text), + file: document.file, + function: function, + line: line(node), + span: span(node) + ) + return + end - def structural_facts_for_node(document, node, stack) - out = { - function_defs: [], - owner_defs: [], - call_sites: [], - state_declarations: [], - state_param_origins: [], - state_reads: [], - state_writes: [] - } - record_function_def(document, node, stack, out[:function_defs]) - record_owner_def(document, node, stack, out[:owner_defs]) - record_call_site(document, node, stack, out[:call_sites]) - record_state_declaration(document, node, stack, out[:state_declarations]) - record_state_param_origin(document, node, stack, out[:state_param_origins]) - record_state_read(document, node, stack, out[:state_reads]) - record_state_write(document, node, stack, out[:state_writes]) - out + node.named_children.each { |child| generic_path_walk(document, child, function, guards, out) } end - def after_structural_facts(document, out) - record_implicit_state_accesses(document, out) if implicit_state_accesses? + def generic_branch_condition(node) + named_field(node, "condition") || named_field(node, "value") || + named_field(node, "subject") || node.named_children.first end - def decision_site_facts(document, node, stack) - out = [] - record_decision_site(document, node, stack, out) - out + def generic_branch_body_nodes(node) + bodies = [ + named_field(node, "consequence"), + named_field(node, "body"), + named_field(node, "alternative") + ].compact + bodies = node.named_children.drop(1) if bodies.empty? + bodies.flat_map do |body| + children = body.named_children.reject { |child| comment_node?(child) } + children.empty? ? [body] : children + end end - def branch_decision_facts(document, node, stack, immutable_readers:, immutable_reader_types:, type_aliases:) - out = [] - record_branch_decision( - document, - node, - stack, - out, - immutable_readers: immutable_readers, - immutable_reader_types: immutable_reader_types, - type_aliases: type_aliases, - method_param_types: method_param_types(document) - ) - out + def comment_node?(node) + node.kind.to_s.include?("comment") end - def branch_arm_facts(document, node, stack) - out = [] - record_branch_arm(document, node, stack, out) - out - end + def generic_path_condition_atoms(condition) + return [] unless ts_node?(condition) - def implicit_state_accesses? - false + if boolean_container?(condition) && boolean_and?(condition) + flatten_boolean_and(condition).map { |child| decision_member_text(child) }.uniq.sort + else + [decision_member_text(condition)] + end end - def function_params(node) - params = if node.kind == "method_declaration" - node.named_children.select { |child| child.kind == "parameter_list" }[1] - else - named_field(node, "parameters") || - node.named_children.find { |child| %w[parameters formal_parameters parameter_list].include?(child.kind) } - end - return [] unless params + def generic_path_action_node?(node) + return false unless ts_node?(node) + return false if branch_node?(node) - params.named_children.filter_map do |param| - parameter_name(param) - end.uniq + generic_assignment_statement?(node) || + %w[call call_expression expression_statement return_statement identifier simple_identifier].include?(node.kind) end - def function_signature(document, node) - body = named_field(node, "body") - text = - if body - document.source.byteslice(node.start_byte, body.start_byte - node.start_byte).to_s.strip - else - line_text(document, node).strip - end - normalize_text(text.empty? ? line_text(document, node) : text) - rescue StandardError - normalize_text(line_text(document, node)) - end + def comparison_target(node) + return nil unless %w[binary binary_expression].include?(node.kind) - def method_param_types(_document) - {} - end + operator = direct_operator(node) + return nil unless COMPARISON_OPERATORS.include?(operator) - private + { source: normalize_text(node.text), operator: operator } + end def push_owner_context(document, stack, node) owner = owner_name_from_declaration(document, node) @@ -1112,6 +1158,8 @@ def line_text(document, node) def control_context(node) return :iterates if %w[while until while_statement for for_statement for_in_statement loop_expression do_block].include?(node.kind) + return :iterates if node.kind == "expression_statement" && node.text.to_s.lstrip.match?(/\A(?:for|while|loop)\b/) + return :iterates if node.kind == "labeled_statement" && node.text.to_s.lstrip.start_with?("for ") return :conditional if branch_node?(node) nil @@ -1140,9 +1188,10 @@ def record_decision_site(document, node, stack, out) function: current_function(stack), line: line(node), span: span(node), - predicate: decision_predicate(node) + predicate: decision_predicate(node), + enclosing_span: span(node) ) - when "body_statement", "block_body", "argument_list" + when "body_statement", "block", "block_body", "argument_list", "statements" return unless hidden_case?(node) return if node.named_children.any? { |child| child.kind == "case" } return if predicate_less_case?(node) @@ -1157,7 +1206,8 @@ def record_decision_site(document, node, stack, out) function: current_function(stack), line: line(node), span: span(node), - predicate: decision_predicate(node) + predicate: decision_predicate(node), + enclosing_span: span(node) ) when "expression_statement" return unless hidden_match?(node) @@ -1172,7 +1222,8 @@ def record_decision_site(document, node, stack, out) function: current_function(stack), line: line(node), span: span(node), - predicate: decision_predicate(node) + predicate: decision_predicate(node), + enclosing_span: span(node) ) end end @@ -1201,10 +1252,23 @@ def record_conjunction_decision(document, node, stack, out) function: current_function(stack), line: conjunction_span(node)[0], span: conjunction_span(node), - predicate: normalize_text(node.text) + predicate: normalize_text(node.text), + enclosing_span: decision_enclosing_span(node) ) end + def decision_enclosing_span(node) + parent = parent_node(node) + seen = Set.new + while ts_node?(parent) && !seen.include?(node_key(parent)) + seen << node_key(parent) + return span(parent) if branch_node?(parent) || %w[while until].include?(parent.kind) + + parent = parent_node(parent) + end + span(node) + end + def record_function_def(document, node, stack, out) name = function_name(node) return unless name @@ -1243,17 +1307,20 @@ def record_call_site(document, node, stack, out) target = normalize_target_receiver(target, stack) return if noise_call?(target) + source_node = target[:source_node] || node out << CallSite.new( receiver: target[:receiver], message: target[:message], file: document.file, function: current_function(stack), owner: current_owner(document, stack), - line: line(node), - span: span(node), + line: line(source_node), + span: span(source_node), conditional: conditional_context?(stack), arguments: target[:arguments], - control: current_control(stack) + control: current_control(stack), + safe_navigation: target[:safe_navigation] || false, + block: target[:block] || call_has_block?(source_node) ) end @@ -1407,9 +1474,12 @@ def record_branch_arm(document, node, stack, out) case node.kind when "while", "until", "while_statement", "for", "for_statement" record_loop_arm(document, node, stack, out) - when "case", "body_statement", "switch_statement", "expression_switch_statement", "switch_expression", + when "case", "body_statement", "block", "expression_statement", "statements", "switch_statement", "expression_switch_statement", "switch_expression", "match_statement", "match_expression", "when_expression" return if node.kind == "body_statement" && !hidden_case?(node) + return if node.kind == "block" && !hidden_case?(node) + return if node.kind == "statements" && !hidden_case?(node) + return if node.kind == "expression_statement" && !hidden_match?(node) record_case_arms(document, node, stack, out) end @@ -1597,7 +1667,7 @@ def decision_predicate(node) return normalize_text(modifier_condition(node).text) if hidden_modifier_if?(node) && modifier_condition(node) target = decision_subject(node) - normalize_text(target ? target.text : node.text) + strip_enclosing_parentheses(normalize_text(target ? target.text : node.text)) end def decision_subject(node) @@ -1637,7 +1707,7 @@ def flatten_boolean_and(node) def boolean_container?(node) return false unless ts_node?(node) - return true if %w[binary binary_expression boolean_operator].include?(node.kind) + return true if %w[binary binary_expression boolean_operator conjunction_expression disjunction_expression].include?(node.kind) return boolean_container?(node.named_children.first) if parenthesized_wrapper?(node) return false unless %w[body_statement block_body statement pattern argument_list].include?(node.kind) return false unless %w[&& and].include?(direct_operator(node)) @@ -1662,7 +1732,7 @@ def conjunction_span(node) end def parenthesized_wrapper?(node) - ts_node?(node) && %w[parenthesized_statements parenthesized_expression].include?(node.kind) && + ts_node?(node) && %w[condition_clause parenthesized_statements parenthesized_expression].include?(node.kind) && node.named_children.size == 1 end @@ -1709,7 +1779,12 @@ def if_node?(node) end def hidden_if?(node) - false + return false unless ts_node?(node) + return true if node.kind == "expression_statement" && node.text.to_s.lstrip.start_with?("if ") + return false unless %w[block body_statement statements statement_list].include?(node.kind) + + first_token = node.children.first + first_token && !first_token.named? && %w[if unless].include?(first_token.kind.to_s) end def hidden_modifier_if?(node) @@ -1721,11 +1796,17 @@ def modifier_condition(node) end def hidden_case?(node) - false + return false unless ts_node?(node) + return false unless %w[body_statement block statements statement_list].include?(node.kind) + + first_token = node.children.first + first_token && !first_token.named? && %w[case match switch when].include?(first_token.kind.to_s) end def hidden_match?(node) - false + ts_node?(node) && + node.kind == "expression_statement" && + node.text.to_s.lstrip.start_with?("match ") end def first_token_kind(node) @@ -2060,16 +2141,35 @@ def anonymous_owner_name(document, node) end def generic_call_target(document, node) + if %w[method_invocation invocation_expression].include?(node.kind) + adjacent = generic_adjacent_method_invocation_target(node) + return adjacent if adjacent + end + callee = named_field(node, "function") || named_field(node, "callee") || node.named_children.first return nil unless callee return nil if callee.kind == "builtin_function" || callee.text.to_s.start_with?("@") - target = target_from_callee(callee).merge(arguments: []) + target = target_from_callee(callee).merge( + arguments: call_argument_nodes(node).map { |argument| normalize_text(argument.text) } + ) first_argument_receiver_call_target(document, node, target) || target rescue NoMethodError nil end + def generic_adjacent_method_invocation_target(node) + names = node.named_children.select { |child| %w[identifier simple_identifier].include?(child.kind) } + return nil unless names.size >= 2 + + args = node.named_children.find { |child| %w[argument_list arguments call_suffix].include?(child.kind) } + { + receiver: normalize_text(names.first.text), + message: names[1].text, + arguments: Array(args&.named_children).map { |child| normalize_text(child.text) } + } + end + def first_argument_receiver_call_target(_document, node, target) return nil unless first_argument_receiver? return nil unless target[:receiver] == "self" @@ -2087,14 +2187,19 @@ def first_argument_receiver_call_target(_document, node, target) def call_argument_nodes(node) args = named_field(node, "arguments") || - node.named_children.find { |child| child.kind == "argument_list" } - Array(args&.named_children) + node.named_children.find { |child| %w[argument_list arguments].include?(child.kind) } + return Array(args&.named_children) if args + return [] unless node.kind == "call_expression" + + callee = named_field(node, "function") || named_field(node, "callee") || node.named_children.first + node.named_children.reject { |child| child == callee } end def adjacent_argument_call_target(node) - return nil unless next_sibling(node)&.kind == "argument_list" + args = next_sibling(node) + return nil unless %w[argument_list arguments call_suffix].include?(args&.kind) - target_from_callee(node).merge(arguments: []) + target_from_callee(node).merge(arguments: args.named_children.map { |child| normalize_text(child.text) }) rescue NoMethodError nil end @@ -2271,7 +2376,8 @@ def generic_state_read_target(node) { receiver: normalize_text(receiver.text), field: method.text } when "field", "field_access", "selector_expression", "member_expression", "member_access_expression", "attribute", - "field_expression", "navigation_expression", "directly_assignable_expression", "expression_list" + "field_expression", "navigation_expression", "directly_assignable_expression", "expression_list", + "dot_index_expression", "variable_list" return nil if node.kind == "expression_list" && !(named_field(node, "operand") && named_field(node, "field")) object = named_field(node, "object") || named_field(node, "receiver") || @@ -2308,7 +2414,8 @@ def generic_state_target(lhs) { receiver: normalize_text(receiver.text), field: method.text.sub(/=\z/, "") } when "field", "field_access", "selector_expression", "member_expression", "member_access_expression", "attribute", - "field_expression", "navigation_expression", "directly_assignable_expression", "expression_list" + "field_expression", "navigation_expression", "directly_assignable_expression", "expression_list", + "dot_index_expression", "variable_list" if lhs.kind == "expression_list" && !(named_field(lhs, "operand") && named_field(lhs, "field")) return generic_state_target(lhs.named_children.first) end @@ -2344,6 +2451,11 @@ def direct_state_ref(_node) nil end + def call_has_block?(node) + ts_node?(node) && + node.named_children.any? { |child| %w[block do_block lambda].include?(child.kind) } + end + def next_sibling(node) node.next_sibling rescue StandardError @@ -2377,8 +2489,11 @@ def parent_node(node) end def field_like_node?(node) - %w[field field_access selector_expression member_expression member_access_expression attribute field_expression - navigation_expression directly_assignable_expression expression_list scoped_identifier].include?(node.kind) + %w[ + attribute directly_assignable_expression dot_index_expression expression_list field field_access + field_expression member_access_expression member_expression navigation_expression scoped_identifier + selector_expression variable_list + ].include?(node.kind) end def member_field_text(field) @@ -2449,9 +2564,9 @@ def parameter_name(param) return param.text if %w[identifier simple_identifier shorthand_property_identifier_pattern].include?(param.kind) name = named_field(param, "name") || - param.named_children.find do |child| + param.named_children.select do |child| %w[identifier simple_identifier field_identifier property_identifier].include?(child.kind) - end + end.last text = name&.text.to_s return nil if text.empty? || text == "_" @@ -2460,6 +2575,8 @@ def parameter_name(param) def normalize_target_receiver(target, stack) receiver = target[:receiver].to_s + return target.merge(receiver: "self") if %w[self this].include?(receiver) + current_receiver = current_receiver_name(stack) return target unless current_receiver return target.merge(receiver: "self") if receiver == current_receiver @@ -2735,6 +2852,22 @@ def branch_arms @branch_arms ||= adapter.branch_arms(self) end + def predicate_defs + @predicate_defs ||= adapter.predicate_defs(self) + end + + def comparison_sites + @comparison_sites ||= adapter.comparison_sites(self) + end + + def local_methods + @local_methods ||= adapter.local_methods(self) + end + + def path_condition_sites + @path_condition_sites ||= adapter.path_condition_sites(self) + end + def immutable_struct_readers adapter.immutable_struct_readers(lines) end @@ -3129,11 +3262,11 @@ def structural_facts(document) profile.after_structural_facts(document, out) out[:function_defs].uniq! { |fn| [fn.file, fn.owner, fn.name, fn.line] } out[:owner_defs].uniq! { |owner| [owner.file, owner.name, owner.kind] } - out[:call_sites].uniq! { |call| [call.file, call.owner, call.function, call.line, call.receiver, call.message] } + out[:call_sites].uniq! { |call| [call.file, call.owner, call.function, call.span, call.receiver, call.message] } out[:state_declarations].uniq! { |decl| [decl.file, decl.owner, decl.field] } out[:state_param_origins].uniq! { |origin| [origin.file, origin.owner, origin.function, origin.field, origin.param] } - out[:state_reads].uniq! { |read| [read.file, read.owner, read.function, read.line, read.receiver, read.field] } - out[:state_writes].uniq! { |write| [write.file, write.owner, write.function, write.line, write.receiver, write.field] } + out[:state_reads].uniq! { |read| [read.file, read.owner, read.function, read.span, read.receiver, read.field] } + out[:state_writes].uniq! { |write| [write.file, write.owner, write.function, write.span, write.receiver, write.field] } out end end @@ -3147,6 +3280,28 @@ def branch_arms(document) out end + def predicate_defs(document) + profile = syntax_profile(document.language) + document.function_defs.filter_map { |function_def| profile.predicate_def(document, function_def) } + end + + def comparison_sites(document) + profile = syntax_profile(document.language) + out = [] + walk(document, profile) do |node, stack| + out.concat(profile.comparison_site_facts(document, node, stack)) + end + out + end + + def local_methods(document) + syntax_profile(document.language).local_methods(document) + end + + def path_condition_sites(document) + syntax_profile(document.language).path_condition_sites(document) + end + def immutable_struct_readers(lines) SourceTextHelpers.immutable_struct_readers(lines) end @@ -3264,3 +3419,7 @@ def walk(document, profile, &block) end end + +require_relative "syntax/ruby" +require_relative "syntax/effects" +require_relative "syntax/protocols" diff --git a/gems/decomplex/lib/decomplex/syntax/effects.rb b/gems/decomplex/lib/decomplex/syntax/effects.rb new file mode 100644 index 000000000..80e36df57 --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/effects.rb @@ -0,0 +1,192 @@ +# frozen_string_literal: true + +module Decomplex + module Syntax + SemanticEffectSite = Struct.new(:kind, :detail, :file, :function, :owner, :line, :span, + keyword_init: true) + EffectLexicon = Struct.new( + :dispatch_mids, :meta_mids, :method_obj_mids, :io_consts, + :io_bare, :dir_context, :context_pairs, :context_bare, + :callback_set, :core_consts, + keyword_init: true + ) + + class Document + def semantic_effect_sites + @semantic_effect_sites ||= adapter.semantic_effect_sites(self) + end + end + + class TreeSitterLanguageAdapter + def semantic_effect_sites(document) + semantic_effect_sites_from_calls(document) + end + + private + + def effect_lexicon + nil + end + + def semantic_effect_sites_from_calls(document) + return [] unless effect_lexicon + + document.call_sites.filter_map { |call| semantic_effect_site_for_call(call) } + end + + def semantic_effect_site_for_call(call) + lexicon = effect_lexicon + message = call.message.to_s + + if effect_callback_call?(call, message) + return semantic_effect_site_from_call(call, :callback_inversion, message) + end + return semantic_effect_site_from_call(call, :metaprogramming, message) if lexicon.meta_mids.include?(message) + return semantic_effect_site_from_call(call, :dynamic_dispatch, message) if lexicon.dispatch_mids.include?(message) + + if message == "call" && !call.receiver.to_s.empty? + return semantic_effect_site_from_call(call, :dynamic_dispatch, "method(...).call") if method_object_receiver?(call.receiver) + return semantic_effect_site_from_call(call, :dynamic_dispatch, "#{call.receiver}.call") if variable_receiver?(call.receiver) + end + + const_effect_site_for_call(call, message) || + bare_effect_site_for_call(call, message) || + mutation_effect_site_for_call(call, message) + end + + def const_effect_site_for_call(call, message) + receiver = call.receiver.to_s + return nil if receiver.empty? || receiver == "self" + + lexicon = effect_lexicon + base = receiver.sub(/\A::/, "").split("::").first + return semantic_effect_site_from_call(call, :context_dependency, "Dir.#{message}") \ + if base == "Dir" && lexicon.dir_context.include?(message) + + if lexicon.io_consts.include?(base) || ruby_net_receiver?(receiver) + return semantic_effect_site_from_call(call, :hidden_io, "#{receiver.sub(/\A::/, "")}.#{message}") + end + return semantic_effect_site_from_call(call, :context_dependency, "ENV") if receiver == "ENV" + + if lexicon.context_pairs[base]&.include?(message) + return semantic_effect_site_from_call(call, :context_dependency, "#{base}.#{message}") + end + + nil + end + + def bare_effect_site_for_call(call, message) + return nil unless call.receiver.to_s == "self" + + lexicon = effect_lexicon + return semantic_effect_site_from_call(call, :hidden_io, message) if lexicon.io_bare.include?(message) + return semantic_effect_site_from_call(call, :context_dependency, message) if lexicon.context_bare.include?(message) + + nil + end + + def mutation_effect_site_for_call(call, message) + return semantic_effect_site_from_call(call, :hidden_mutation, message) \ + if message.length > 1 && message.end_with?("!") && !%w[!= !~].include?(message) + + nil + end + + def effect_callback_call?(call, message) + (call.block || call.arguments.to_a.any? { |arg| arg.to_s.start_with?("&") }) && + effect_callback_name?(message) && + !effect_lexicon.meta_mids.include?(message) + end + + def effect_callback_name?(message) + effect_lexicon.callback_set.include?(message) || + message.match?(/\A(with_|around_|on_|before_|after_)/) || + message.match?(/_hook\z/) + end + + def method_object_receiver?(receiver) + names = effect_lexicon.method_obj_mids.map(&:to_s).map { |name| Regexp.escape(name) } + return false if names.empty? + + receiver.to_s.match?(/(?:\A|\.)(?:#{names.join("|")})\s*\(/) + end + + def variable_receiver?(receiver) + receiver.to_s.match?(/\A(?:[a-z_]\w*|[@$][A-Za-z_]\w*)\z/) + end + + def ruby_net_receiver?(_receiver) + false + end + + def semantic_effect_site_from_call(call, kind, detail) + SemanticEffectSite.new( + kind: kind, + detail: detail, + file: call.file, + function: call.function, + owner: call.owner, + line: call.line, + span: call.span + ) + end + + def semantic_effect_site(document, node, stack, kind, detail) + SemanticEffectSite.new( + kind: kind, + detail: detail, + file: document.file, + function: current_function(stack), + owner: current_owner(document, stack), + line: line(node), + span: span(node) + ) + end + end + + class TreeSitterAdapter + def semantic_effect_sites(document) + syntax_profile(document.language).semantic_effect_sites(document) + end + end + + GENERIC_SYSTEM_EFFECT_LEXICON = EffectLexicon.new( + dispatch_mids: [].freeze, + meta_mids: [].freeze, + method_obj_mids: [].freeze, + io_consts: [].freeze, + io_bare: %w[print println eprintln printf puts panic].freeze, + dir_context: [].freeze, + context_pairs: {}.freeze, + context_bare: [].freeze, + callback_set: [].freeze, + core_consts: [].freeze + ).freeze + + class TreeSitterLanguageAdapter + private + + def effect_lexicon + GENERIC_SYSTEM_EFFECT_LEXICON + end + end + + class RustSyntaxAdapter + private + + def effect_lexicon + GENERIC_SYSTEM_EFFECT_LEXICON + end + end + + class ZigSyntaxAdapter + private + + def effect_lexicon + GENERIC_SYSTEM_EFFECT_LEXICON + end + end + end +end + +require_relative "ruby_effects" diff --git a/gems/decomplex/lib/decomplex/syntax/protocols.rb b/gems/decomplex/lib/decomplex/syntax/protocols.rb new file mode 100644 index 000000000..355138940 --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/protocols.rb @@ -0,0 +1,82 @@ +# frozen_string_literal: true + +module Decomplex + module Syntax + ProtocolMethodEffect = Struct.new(:file, :owner, :name, :line, :reads, :writes, + keyword_init: true) + ProtocolCall = Struct.new(:mid, :file, :owner, :defn, :line, :span, keyword_init: true) + ProtocolMethodPath = Struct.new(:file, :owner, :name, :line, :calls, keyword_init: true) + ProtocolPath = Struct.new(:calls, :terminal, keyword_init: true) + + class Document + def protocol_method_effects + @protocol_method_effects ||= adapter.protocol_method_effects(self) + end + + def protocol_call_paths + @protocol_call_paths ||= adapter.protocol_call_paths(self) + end + end + + class TreeSitterLanguageAdapter + def protocol_method_effects(document) + document.function_defs.map do |function_def| + reads = document.state_reads.select do |read| + read.owner == function_def.owner && read.function == function_def.name + end.map(&:field).uniq.sort + writes = document.state_writes.select do |write| + write.owner == function_def.owner && write.function == function_def.name + end.map(&:field).uniq.sort + + ProtocolMethodEffect.new( + file: function_def.file, + owner: function_def.owner, + name: function_def.name.to_s.split(/[.:]/).last, + line: function_def.line, + reads: reads, + writes: writes + ) + end + end + + def protocol_call_paths(document) + document.function_defs.map do |function_def| + calls = document.call_sites.select do |call| + call.owner == function_def.owner && + call.function == function_def.name && + call.receiver.to_s == "self" + end.map do |call| + ProtocolCall.new( + mid: call.message.to_s.split(/[.:]/).last, + file: call.file, + owner: call.owner, + defn: call.function, + line: call.line, + span: call.span + ) + end + + ProtocolMethodPath.new( + file: function_def.file, + owner: function_def.owner, + name: function_def.name.to_s.split(/[.:]/).last, + line: function_def.line, + calls: calls + ) + end + end + end + + class TreeSitterAdapter + def protocol_method_effects(document) + syntax_profile(document.language).protocol_method_effects(document) + end + + def protocol_call_paths(document) + syntax_profile(document.language).protocol_call_paths(document) + end + end + end +end + +require_relative "ruby_protocols" diff --git a/gems/decomplex/lib/decomplex/syntax/ruby.rb b/gems/decomplex/lib/decomplex/syntax/ruby.rb new file mode 100644 index 000000000..bb6e6f165 --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/ruby.rb @@ -0,0 +1,896 @@ +# frozen_string_literal: true + +module Decomplex + module Syntax + class RubySyntaxAdapter < TreeSitterLanguageAdapter + def function_name(node) + case node.kind + when "body_statement" + hidden_ruby_method_name(node) + when "singleton_method" + receiver = named_field(node, "receiver") || + node.named_children.find { |child| %w[self constant identifier].include?(child.kind) } + name = named_field(node, "name")&.text || + node.named_children.reverse.find do |child| + %w[identifier field_identifier property_identifier].include?(child.kind) + end&.text + receiver_text = receiver&.text.to_s + name && "#{receiver_text.empty? || receiver_text == "self" ? "self" : receiver_text}.#{name}" + when "argument_list" + inline_def_name(node) + else + super + end + end + + def visibility(_document, node) + return ruby_inline_def_visibility(node) if inline_def_argument_list?(node) + + ruby_method_visibility(node) + end + + def owner_name_from_declaration(document, node) + return hidden_ruby_owner_name(node) if hidden_ruby_owner_declaration?(node) + + super + end + + def owner_kind(node) + return hidden_ruby_owner_kind(node) if hidden_ruby_owner_declaration?(node) + + super + end + + def call_target(document, node) + case node.kind + when "call" + ruby_proc_call_target(node) || ruby_call_target(node) + when "body_statement" + ruby_bare_body_call_target(node) + when "identifier" + ruby_bare_call_target(node) + else + super + end + end + end + + + class RubySyntaxAdapter + def function_params(node) + return hidden_ruby_method_params(node) if hidden_ruby_method_definition?(node) + + params = super + if inline_def_argument_list?(node) + params = node.named_children.find { |child| child.kind == "method_parameters" } + &.named_children + &.filter_map { |param| parameter_name(param) } + &.uniq || params + end + params + end + + def function_signature(document, node) + if hidden_ruby_method_definition?(node) + return normalize_text(hidden_ruby_method_signature(document, node)) + end + + signature = preceding_ruby_signature(document, node) + return signature unless signature.empty? + + super + end + + def state_declaration(node) + ruby_t_let_state_declaration(node) || super + end + + def state_read_target(node) + ruby_state_variable_target(node) || super + end + + def state_target(lhs) + ruby_state_variable_target(lhs) || super + end + + def after_structural_facts(document, out) + super + apply_ruby_visibility!(out) + end + + def predicate_def(_document, function_def) + expression = ruby_single_expression_function_body(function_def.body) + return nil unless expression + + body = normalize_text(expression.text).delete_suffix(";").strip + return nil if body.empty? || body == "nil" || body.length > 200 + + PredicateDef.new( + file: function_def.file, + name: function_def.name, + owner: function_def.owner, + body: body, + line: function_def.line, + span: function_def.span + ) + end + + def local_methods(document) + document.function_defs.map do |function_def| + statements = ruby_function_body_statements(function_def.body) + local_names = ruby_local_names(function_def, statements) + local_statements = statements.each_with_index.map do |statement, index| + ruby_local_statement(statement, index, local_names) + end + owner = ruby_local_flow_owner(document, function_def.owner) + + LocalMethod.new( + id: "#{owner}##{function_def.name}", + owner: owner, + name: function_def.name, + file: function_def.file, + line: function_def.line, + span: function_def.span, + node: function_def.body, + statements: local_statements, + boundaries: ruby_structural_boundaries(document, local_statements) + ) + end + end + + def path_condition_sites(document) + out = [] + document.function_defs.each do |function_def| + ruby_function_body_statements(function_def.body).each do |statement| + ruby_path_walk(document, statement, function_def.name, [], out) + end + end + out + end + + private + + def comparison_target(node) + ruby_nil_predicate_comparison(node) || super + end + + def ruby_nil_predicate_comparison(node) + return nil unless node.kind == "call" + + target = ruby_call_target(node) + return nil unless target && target[:message].to_s == "nil?" + + { source: normalize_text(node.text), operator: "nil?" } + end + + def inline_def_argument_list?(node) + ts_node?(node) && node.kind == "argument_list" && node.children.first&.kind.to_s == "def" + end + + def inline_def_name(node) + return nil unless inline_def_argument_list?(node) + + receiver_index = node.named_children.index { |child| child.kind == "self" || child.kind == "constant" } + search = receiver_index ? node.named_children[(receiver_index + 1)..] : node.named_children + name = search&.find { |child| %w[identifier field_identifier property_identifier].include?(child.kind) }&.text + receiver_index ? "self.#{name}" : name + end + + def hidden_ruby_method_definition?(node) + ts_node?(node) && node.kind == "body_statement" && node.children.first&.kind.to_s == "def" + end + + def hidden_ruby_method_name(node) + return nil unless hidden_ruby_method_definition?(node) + + receiver_index = node.named_children.index { |child| child.kind == "self" || child.kind == "constant" } + search = receiver_index ? node.named_children[(receiver_index + 1)..] : node.named_children + name = search&.find { |child| %w[identifier field_identifier property_identifier].include?(child.kind) }&.text + receiver_index ? "self.#{name}" : name + end + + def hidden_ruby_method_params(node) + params = node.named_children.find { |child| child.kind == "method_parameters" } + return [] unless params + + params.named_children.filter_map { |param| parameter_name(param) }.uniq + end + + def hidden_ruby_method_signature(document, node) + body = node.named_children.find { |child| child.kind == "body_statement" } + end_byte = body ? body.start_byte : node.end_byte + document.source.byteslice(node.start_byte, end_byte - node.start_byte).to_s.strip.sub(/;+\z/, "") + rescue StandardError + line_text(document, node).strip + end + + def ruby_single_expression_function_body(node) + body = ruby_method_body_wrapper(node) + return nil unless body + + ruby_single_expression_body_child(body) + end + + def ruby_method_body_wrapper(node) + return nil unless ts_node?(node) + + case node.kind + when "method", "singleton_method", "argument_list" + node.named_children.reverse.find { |child| child.kind == "body_statement" } + when "body_statement" + if hidden_ruby_method_definition?(node) + node.named_children.reverse.find { |child| child.kind == "body_statement" } + else + node + end + end + end + + def ruby_single_expression_body_child(body) + named = body.named_children.reject { |child| child.kind == "comment" } + return body if named.empty? + return named.first if named.size == 1 + return named.first if ruby_heredoc_body?(body, named) + + nil + end + + def ruby_heredoc_body?(_body, named_children) + named_children.first&.kind == "call" && + named_children[1..].to_a.all? { |child| child.kind == "heredoc_body" } + end + + def ruby_function_body_statements(node) + body = ruby_method_body_wrapper(node) + return [] unless body + + named = body.named_children.reject { |child| child.kind == "comment" } + return [] if named.empty? && body.text.to_s.strip.empty? + return [body] if hidden_if?(body) || hidden_modifier_if?(body) || hidden_case?(body) + return [body] if ruby_flat_assignment_statement?(body) + return [body] if named.empty? || ruby_heredoc_body?(body, named) + + named + end + + def ruby_local_names(function_def, statements) + names = Set.new(function_def.params.to_a.map(&:to_s)) + statements.each do |statement| + ruby_walk_local(statement) do |node| + names.add(node.text.to_s) if ruby_local_write_identifier?(node) + end + end + names + end + + def ruby_local_statement(node, index, local_names) + reads = ruby_local_reads(node, local_names).uniq + writes = ruby_local_writes(node).uniq + LocalStatement.new( + index: index, + line: line(node), + end_line: span(node)[2], + span: span(node), + source: normalize_text(node.text), + reads: reads.to_set, + writes: writes.to_set, + dependencies: ruby_assignment_dependencies(node, local_names), + co_uses: reads.combination(2).map { |left, right| [left, right] } + ) + end + + def ruby_local_reads(node, local_names) + reads = [] + ruby_walk_local(node) do |child| + reads << child.text.to_s if ruby_local_read_identifier?(child, local_names) + end + reads + end + + def ruby_local_writes(node) + writes = [] + ruby_walk_local(node) do |child| + writes << child.text.to_s if ruby_local_write_identifier?(child) + end + writes + end + + def ruby_assignment_dependencies(node, local_names) + deps = [] + if ruby_flat_assignment_statement?(node) + lhs = node.named_children.first + rhs = node.named_children[1] + ruby_local_reads(rhs, local_names).uniq.each do |read| + deps << [lhs.text.to_s, read] unless lhs.text.to_s == read + end + return deps.uniq + end + + ruby_walk_local(node) do |child| + next unless child.kind == "assignment" + + lhs = child.named_children.first + rhs = child.named_children[1] + next unless lhs&.kind == "identifier" && rhs + + ruby_local_reads(rhs, local_names).uniq.each do |read| + deps << [lhs.text.to_s, read] unless lhs.text.to_s == read + end + end + deps.uniq + end + + def ruby_structural_boundaries(document, statements) + statements.each_cons(2).filter_map do |left, right| + boundary = ruby_source_boundary(document, left.end_line + 1, right.line - 1) + next unless boundary + + LocalBoundary.new( + before_index: left.index, + after_index: right.index, + line: boundary[:line], + kind: boundary[:kind], + text: boundary[:text] + ) + end + end + + def ruby_source_boundary(document, first_line, last_line) + return nil if first_line > last_line + + blank = nil + (first_line..last_line).each do |line_number| + text = document.lines[line_number - 1].to_s + stripped = text.strip + return { line: line_number, kind: :comment, text: stripped } if stripped.start_with?("#") + + blank ||= { line: line_number, kind: :blank, text: stripped } if stripped.empty? + end + blank + end + + def ruby_walk_local(node, &block) + return unless ts_node?(node) + + stack = [node] + until stack.empty? + current = stack.pop + next unless ts_node?(current) + next if current != node && ruby_nested_local_scope?(current) + + yield current + current.children.reverse_each { |child| stack << child } + end + end + + def ruby_nested_local_scope?(node) + %w[class module method singleton_method lambda].include?(node.kind) + end + + def ruby_local_read_identifier?(node, local_names) + return false unless node.kind == "identifier" + return false unless local_names.include?(node.text.to_s) + return false if ruby_local_write_identifier?(node) + return false if ruby_declaration_name?(node, parent_node(node)) + return false if ruby_call_message_identifier?(node) + + true + end + + def ruby_local_write_identifier?(node) + return false unless node.kind == "identifier" + + parent = parent_node(node) + (parent&.kind == "assignment" && parent.named_children.first == node) || + (ruby_flat_assignment_statement?(parent) && parent.named_children.first == node) + end + + def ruby_flat_assignment_statement?(node) + return false unless ts_node?(node) && node.kind == "body_statement" + + node.children.count { |child| !child.named? && child.text == "=" } == 1 && + node.named_children.size >= 2 + end + + def ruby_call_message_identifier?(node) + parent = parent_node(node) + return false unless parent&.kind == "call" + + prev_sibling(node)&.text == "." || + (named_field(parent, "receiver").nil? && parent.named_children.first == node) + end + + def ruby_local_flow_owner(document, owner) + owner.to_s == file_owner(document.file) ? "(top-level)" : owner + end + + def ruby_path_walk(document, node, function, guards, out) + return unless ts_node?(node) + + if ruby_path_if_node?(node) + ruby_path_walk_if(document, node, function, guards, out) + return + end + + if guards.size >= 2 && ruby_path_action_node?(node) + record_ruby_path_condition(document, node, function, guards, out) + end + + node.children.each { |child| ruby_path_walk(document, child, function, guards, out) } + end + + def ruby_path_walk_if(document, node, function, guards, out) + condition = ruby_path_condition(node) + atoms = ruby_path_condition_atoms(condition) + then_guards = ruby_unless_node?(node) ? ruby_negate_guards(atoms) : atoms + else_guards = ruby_unless_node?(node) ? atoms : ruby_negate_guards(atoms) + + ruby_path_body_nodes(ruby_path_then_body(node)).each do |child| + ruby_path_walk(document, child, function, guards + then_guards, out) + end + ruby_path_body_nodes(ruby_path_else_body(node)).each do |child| + ruby_path_walk(document, child, function, guards + else_guards, out) + end + ruby_path_walk(document, condition, function, guards, out) + end + + def ruby_path_if_node?(node) + return false unless ts_node?(node) + return true if node.named? && %w[if unless if_modifier unless_modifier].include?(node.kind) + + hidden_if?(node) || hidden_modifier_if?(node) + end + + def ruby_unless_node?(node) + node.kind.to_s.include?("unless") || first_token_kind(node) == "unless" + end + + def ruby_path_condition(node) + if hidden_modifier_if?(node) || %w[if_modifier unless_modifier].include?(node.kind) + node.named_children.last + elsif hidden_if?(node) + node.named_children.first + else + node.named_children.first + end + end + + def ruby_path_then_body(node) + if hidden_modifier_if?(node) || %w[if_modifier unless_modifier].include?(node.kind) + node.named_children.first + else + node.named_children.find { |child| child.kind == "then" } || node.named_children[1] + end + end + + def ruby_path_else_body(node) + return nil if hidden_modifier_if?(node) || %w[if_modifier unless_modifier].include?(node.kind) + + node.named_children.find { |child| child.kind == "else" } || + node.named_children.find { |child| child.kind == "elsif" } || + node.named_children[2] + end + + def ruby_path_body_nodes(node) + return [] unless ts_node?(node) + + return [node] if ruby_path_action_node?(node) || ruby_path_if_node?(node) + + node.named_children.reject { |child| child.kind == "comment" } + end + + def ruby_path_condition_atoms(condition) + return [] unless ts_node?(condition) + + flatten_boolean_and(condition).map do |atom| + text, negated = ruby_path_canon_polarity(decision_member_text(atom)) + [text, negated] + end + end + + def ruby_path_canon_polarity(text) + source = text.to_s.strip + return [source[1..].to_s.strip, true] if source.start_with?("!") + + [source, false] + end + + def ruby_negate_guards(guards) + guards.map { |text, negated| [text, !negated] } + end + + def ruby_path_action_node?(node) + return true if %w[call assignment operator_assignment binary].include?(node.kind) + + ruby_flat_assignment_statement?(node) + end + + def record_ruby_path_condition(document, node, function, guards, out) + members = guards.map { |text, negated| "#{negated ? "!" : ""}#{text}" }.uniq.sort + return if members.size < 2 + + out << PathConditionSite.new( + guards: members, + action: normalize_text(node.text)[0, 80], + file: document.file, + function: function, + line: line(node), + span: span(node) + ) + end + + def hidden_ruby_owner_declaration?(node) + return false unless ts_node?(node) + return false unless node.kind == "body_statement" + + %w[class module].include?(node.children.first&.kind.to_s) + end + + def hidden_ruby_owner_name(node) + node.named_children.find { |child| %w[constant identifier type_identifier].include?(child.kind) }&.text + end + + def hidden_ruby_owner_kind(node) + node.children.first&.kind.to_s == "module" ? :module : :class + end + + def ruby_method_visibility(node) + modifier_visibility(node) + end + + def ruby_inline_def_visibility(node) + parent = parent_node(node) + return nil unless parent&.kind == "call" + + target = ruby_call_target(parent) + visibility = target && target[:receiver] == "self" && target[:message]&.to_sym + %i[private protected public].include?(visibility) ? visibility : nil + end + + def ruby_call_target(node) + receiver = named_field(node, "receiver") + method = named_field(node, "method") + message = method&.text || first_named_text(node, %w[identifier constant]) + message ||= normalize_text(node.text) if receiver.nil? && ruby_simple_call_text?(node.text) + return nil unless message + + { + receiver: receiver ? normalize_text(receiver.text) : "self", + message: message, + arguments: ruby_argument_texts(node), + safe_navigation: ruby_safe_navigation_call?(node) + } + end + + def ruby_bare_call_target(node) + return nil unless ruby_bare_call_identifier?(node) + + parent = parent_node(node) + source_node = + if parent&.kind == "call" || next_sibling(node)&.kind == "argument_list" + parent + else + node + end + { + receiver: "self", + message: node.text, + arguments: ruby_argument_texts(source_node), + source_node: source_node, + safe_navigation: source_node && ruby_safe_navigation_call?(source_node) + } + end + + def ruby_bare_body_call_target(node) + return nil if hidden_ruby_method_definition?(node) || hidden_ruby_owner_declaration?(node) + + explicit = ruby_explicit_receiver_body_call_target(node) + return explicit if explicit + + message = node.text.to_s.strip + return nil unless ruby_simple_call_text?(message) + return nil if %w[true false nil self].include?(message) + + { + receiver: "self", + message: message, + arguments: [] + } + end + + def ruby_explicit_receiver_body_call_target(node) + receiver, message = node.named_children + return nil unless receiver && message + return nil unless %w[self constant identifier].include?(receiver.kind) + return nil unless %w[identifier constant].include?(message.kind) + + { + receiver: normalize_text(receiver.text), + message: message.text, + arguments: [] + } + end + + def ruby_simple_call_text?(text) + text.to_s.strip.match?(/\A[a-z_]\w*[!?=]?\z/) + end + + def ruby_bare_call_identifier?(node) + parent = parent_node(node) + return false unless parent + return false if ruby_declaration_name?(node, parent) + return false if %w[method_parameters block_parameters argument_list assignment].include?(parent.kind) + if parent.kind == "call" + return false if named_field(parent, "receiver") + + first = parent.named_children.first + return first == node && next_sibling(node)&.kind == "argument_list" + end + return false if next_sibling(node)&.text == "=" || prev_sibling(node)&.text == "=" + return false if next_sibling(node)&.text == "." || prev_sibling(node)&.text == "." + + %w[body_statement then else elsif ensure rescue].include?(parent.kind) || + next_sibling(node)&.kind == "argument_list" + end + + def ruby_declaration_name?(node, parent) + return true if hidden_ruby_method_definition?(parent) + return true if hidden_ruby_owner_declaration?(parent) + return true if %w[method singleton_method class module].include?(parent.kind) + + false + end + + def ruby_argument_texts(node) + args = named_field(node, "arguments") || node.named_children.find { |child| child.kind == "argument_list" } + return [] unless args + + values = args.named_children.map { |child| normalize_text(child.text) } + return values unless values.empty? + + text = args.text.to_s.strip + text = text[1...-1] if text.start_with?("(") && text.end_with?(")") + text.split(/\s*,\s*/).map { |arg| normalize_text(arg) }.reject(&:empty?) + end + + def ruby_proc_call_target(node) + return nil unless ts_node?(node) && node.kind == "call" + return nil unless node.children.any? { |child| !child.named? && child.text == "." } + return nil unless named_field(node, "method").nil? + + receiver = named_field(node, "receiver") || node.named_children.first + args = named_field(node, "arguments") || + node.named_children.find { |child| child.kind == "argument_list" } + return nil unless receiver && args + + { + receiver: normalize_text(receiver.text), + message: "call", + arguments: ruby_argument_texts(node), + safe_navigation: ruby_safe_navigation_call?(node), + block: call_has_block?(node) + } + end + + def ruby_safe_navigation_call?(node) + ts_node?(node) && node.children.any? { |child| !child.named? && child.text == "&." } + end + + def ruby_t_let_state_declaration(node) + lhs = named_field(node, "left") || node.named_children.first + rhs = named_field(node, "right") || named_field(node, "value") || node.named_children[1] + target = state_target(lhs) + return nil unless target && target[:receiver] == "self" && target[:field].to_s.start_with?("@") + return nil unless rhs&.kind == "call" + + receiver = named_field(rhs, "receiver") || rhs.named_children.first + method = named_field(rhs, "method") || rhs.named_children.find { |child| child.kind == "identifier" } + return nil unless receiver&.text == "T" && method&.text == "let" + + args = named_field(rhs, "arguments") || rhs.named_children.find { |child| child.kind == "argument_list" } + type = args&.named_children&.[](1)&.text + return nil if type.to_s.empty? + + { field: target[:field], type: normalize_text(type) } + end + + def skip_state_write_node?(node) + node.kind == "operator_assignment" || + (assignment_lhs?(node) && next_sibling(node)&.text.to_s != "=" && !ruby_instance_variable_node?(node)) + end + + def skip_state_write_target?(target) + super || target[:field].to_s.start_with?("$") + end + + def state_write_source_node(node) + assignment_lhs?(node) ? (parent_node(node) || node) : super + end + + def direct_state_ref(node) + node.text if ruby_state_variable_node?(node) + end + + def hidden_if?(node) + return false unless ts_node?(node) + return false unless %w[expression_statement block body_statement].include?(node.kind) + + %w[if unless].include?(first_token_kind(node)) + end + + def hidden_modifier_if?(node) + return false unless ts_node?(node) + return false unless node.kind == "body_statement" + + seen_named = false + node.children.any? do |child| + seen_named ||= child.named? + seen_named && !child.named? && %w[if unless].include?(child.kind) + end + end + + def modifier_condition(node) + node.named_children.last + end + + def hidden_case?(node) + return false unless ts_node?(node) + return false unless %w[body_statement block_body argument_list].include?(node.kind) + + first_token_kind(node) == "case" + end + + def hidden_match?(node) + node.kind == "expression_statement" && + first_token_kind(node) == "match" && + node.named_children.any? { |child| child.kind == "match_block" } + end + + def case_pattern_texts(patterns) + texts = super + return texts unless texts.any? { |text| text.start_with?("*") } + + out = [] + pending_plain = [] + texts.each_with_index do |text, index| + if text.start_with?("*") + out << pending_plain.join(", ") unless pending_plain.empty? + pending_plain = [] + out << if texts.size == 1 || index.positive? + text.delete_prefix("*") + else + text + end + else + pending_plain << text + end + end + out << pending_plain.join(", ") unless pending_plain.empty? + out + end + + def ruby_state_variable_target(node) + return nil unless ruby_state_variable_node?(node) + + { receiver: "self", field: node.text } + end + + def ruby_state_variable_node?(node) + return false unless ts_node?(node) + return true if %w[instance_variable global_variable].include?(node.kind) + + node.named_children.empty? && node.text.to_s.match?(/\A[@$][A-Za-z_]\w*[!?=]?\z/) + end + + def ruby_instance_variable_node?(node) + ts_node?(node) && node.kind == "instance_variable" + end + + def preceding_ruby_signature(document, node) + cursor = line(node) - 2 + lines = document.lines + cursor -= 1 while cursor >= 0 && lines[cursor].to_s.strip.empty? + return "" if cursor.negative? + + stripped = lines[cursor].to_s.strip + if stripped == "end" + start = cursor + while start >= 0 + text = lines[start].to_s.strip + return normalize_text(lines[start..cursor].join("\n")) if text == "sig do" + return "" if start != cursor && text.match?(/\A(?:def|class|module)\b/) + + start -= 1 + end + return "" if start.negative? + end + + return normalize_text(stripped) if stripped.start_with?("sig ") + return "" unless stripped == "}" || stripped.end_with?("}") + + start = cursor + while start >= 0 + text = lines[start].to_s.strip + return normalize_text(lines[start..cursor].join("\n")) if text.start_with?("sig ") + return "" if text.match?(/\A(?:def|class|module)\b/) + + start -= 1 + end + "" + end + + def method_param_types(document) + types_by_method = {} + pending_sig = +"" + document.lines.each do |line| + pending_sig << line if pending_sig_active?(line, pending_sig) + if (match = line.match(/\A\s*def\s+([A-Za-z_]\w*[!?=]?)(?:\s|\(|$)/)) + types_by_method[match[1]] = sig_param_types(pending_sig) + pending_sig = +"" + end + end + types_by_method + end + + def pending_sig_active?(line, pending_sig) + !pending_sig.empty? || line.match?(/\A\s*sig\b/) + end + + def sig_param_types(sig_source) + match = sig_source.match(/params\s*\((.*?)\)/m) + return {} unless match + + match[1].scan(/([A-Za-z_]\w*)\s*:\s*([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)/).to_h + end + + def apply_ruby_visibility!(out) + functions_by_owner = out.fetch(:function_defs).group_by(&:owner) + calls_by_owner = out.fetch(:call_sites).group_by(&:owner) + functions_by_owner.each do |owner, functions| + calls = Array(calls_by_owner[owner]) + + visibility = :public + events = (functions + ruby_visibility_calls(calls)).sort_by do |event| + [event.line, event.is_a?(CallSite) ? 0 : 1] + end + + events.each do |event| + if event.is_a?(FunctionDef) + event.visibility ||= event.name.to_s.start_with?("self.") ? :public : visibility + elsif event.arguments.to_a.empty? + visibility = event.message.to_sym + else + event.arguments.each do |arg| + name = ruby_visibility_arg_name(arg) + functions.reverse_each do |function| + next unless function.name.to_s == name + + function.visibility = event.message.to_sym + break + end + end + end + end + end + end + + def ruby_visibility_calls(calls) + calls.select do |call| + call.function == "(top-level)" && + call.receiver == "self" && + %w[public protected private].include?(call.message.to_s) + end + end + + def ruby_visibility_arg_name(arg) + arg.to_s.strip + .delete_prefix(":") + .delete_prefix("\"") + .delete_suffix("\"") + .delete_prefix("'") + .delete_suffix("'") + end + end + + end +end diff --git a/gems/decomplex/lib/decomplex/syntax/ruby_effects.rb b/gems/decomplex/lib/decomplex/syntax/ruby_effects.rb new file mode 100644 index 000000000..af6e28f90 --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/ruby_effects.rb @@ -0,0 +1,197 @@ +# frozen_string_literal: true + +module Decomplex + module Syntax + RUBY_EFFECT_LEXICON = EffectLexicon.new( + dispatch_mids: %w[send __send__ public_send const_get constantize + instance_variable_get].freeze, + meta_mids: %w[define_method define_singleton_method alias_method + class_eval module_eval instance_eval class_exec + module_exec instance_exec eval const_set + instance_variable_set remove_method undef_method + prepend singleton_class binding].freeze, + method_obj_mids: %i[method public_method instance_method].freeze, + io_consts: %w[File IO Dir FileUtils Open3 Socket TCPSocket UDPSocket + TCPServer UNIXSocket Tempfile Pathname Marshal].freeze, + io_bare: %w[puts print warn gets readline readlines system + exec spawn fork sleep open abort exit exit!].freeze, + dir_context: %w[pwd getwd home].freeze, + context_pairs: { + "Time" => %w[now current], "Date" => %w[today current], + "DateTime" => %w[now current], "Process" => %w[pid ppid uid gid euid], + "Thread" => %w[current list main], "Fiber" => %w[current], + "Random" => %w[rand bytes], "GC" => %w[stat count], + "ObjectSpace" => %w[each_object count_objects] + }.freeze, + context_bare: %w[rand srand].freeze, + callback_set: %w[transaction synchronize lock with_lock unlock + mutex atomic reentrant subscribe callback hook].freeze, + core_consts: %w[String Symbol Integer Float Numeric Rational Complex + Array Hash Set Range Struct Object BasicObject Kernel + Module Class Comparable Enumerable Enumerator Proc Method + UnboundMethod NilClass TrueClass FalseClass Exception + StandardError RuntimeError ArgumentError TypeError + NameError NoMethodError IO File Dir Time Date DateTime + Regexp MatchData Thread Mutex Fiber Process Math GC + ObjectSpace Marshal Random Encoding].freeze + ).freeze + + class RubySyntaxAdapter + def semantic_effect_sites(document) + sites = super + sites.concat(ruby_global_context_sites(document)) + sites.concat(ruby_state_mutation_sites(document)) + sites.concat(ruby_method_hook_sites(document)) + TreeSitterAdapter.walk_document(document, initial_stack(document), self) do |node, stack| + sites.concat(ruby_semantic_effect_sites_for_node(document, node, stack)) + end + sites.uniq { |site| [site.kind, site.detail, site.file, site.function, site.line, site.span] } + end + + private + + def effect_lexicon + RUBY_EFFECT_LEXICON + end + + def ruby_net_receiver?(receiver) + receiver.to_s.sub(/\A::/, "").start_with?("Net::") + end + + def const_effect_site_for_call(call, message) + receiver = call.receiver.to_s.sub(/\A::/, "") + return semantic_effect_site_from_call(call, :hidden_io, "URI.open") \ + if receiver == "URI" && message == "open" + + super + end + + def ruby_global_context_sites(document) + document.state_reads.filter_map do |read| + next unless read.field.to_s.start_with?("$") + next if ruby_global_assignment_read?(document, read) + + SemanticEffectSite.new( + kind: :context_dependency, + detail: read.field, + file: read.file, + function: read.function, + owner: read.owner, + line: read.line, + span: read.span + ) + end + end + + def ruby_global_assignment_read?(document, read) + line_text = document.lines[read.line - 1].to_s + line_text[read.span[3]..].to_s.lstrip.start_with?("=") + end + + def ruby_state_mutation_sites(document) + document.state_writes.filter_map do |write| + next if write.receiver.to_s == "self" + next if write.field.to_s.start_with?("@", "$") + + SemanticEffectSite.new( + kind: :hidden_mutation, + detail: "#{write.field}=", + file: write.file, + function: write.function, + owner: write.owner, + line: write.line, + span: write.span + ) + end + end + + def ruby_method_hook_sites(document) + document.function_defs.filter_map do |function_def| + name = function_def.name.to_s.split(".").last + next unless %w[method_missing respond_to_missing?].include?(name) + + SemanticEffectSite.new( + kind: :metaprogramming, + detail: "def #{name}", + file: function_def.file, + function: function_def.name, + owner: function_def.owner, + line: function_def.line, + span: function_def.span + ) + end + end + + def ruby_semantic_effect_sites_for_node(document, node, stack) + case node.kind + when "yield" + [semantic_effect_site(document, node, stack, :dynamic_dispatch, "yield")] + when "subshell" + [semantic_effect_site(document, node, stack, :hidden_io, "backtick")] + when "singleton_class" + ruby_singleton_class_effect(document, node, stack) + when "element_reference" + ruby_element_reference_effect(document, node, stack) + when "assignment" + ruby_global_assignment_effect(document, node, stack) + + ruby_assignment_effect(document, node, stack) + when "operator_assignment" + ruby_operator_assignment_effect(document, node, stack) + when "binary" + ruby_binary_effect(document, node, stack) + else + [] + end + end + + def ruby_singleton_class_effect(document, node, stack) + receiver = node.named_children.first + return [] unless receiver + return [] if receiver.text == "self" + + [semantic_effect_site(document, node, stack, :metaprogramming, "class << #{normalize_text(receiver.text)}")] + end + + def ruby_element_reference_effect(document, node, stack) + receiver = node.named_children.first + return [] unless receiver&.text == "ENV" + + [semantic_effect_site(document, node, stack, :context_dependency, "ENV")] + end + + def ruby_assignment_effect(document, node, stack) + lhs = named_field(node, "left") || node.named_children.first + return [] unless lhs&.kind == "element_reference" + return [] if lhs.named_children.first&.text == "ENV" + + [semantic_effect_site(document, node, stack, :hidden_mutation, "[]=")] + end + + def ruby_global_assignment_effect(document, node, stack) + lhs = named_field(node, "left") || node.named_children.first + return [] unless lhs&.kind == "global_variable" + + [semantic_effect_site(document, node, stack, :context_dependency, lhs.text)] + end + + def ruby_operator_assignment_effect(document, node, stack) + lhs = named_field(node, "left") || node.named_children.first + return [] if ruby_local_operator_assignment_lhs?(lhs) + + [semantic_effect_site(document, node, stack, :hidden_mutation, "op-assign")] + end + + def ruby_local_operator_assignment_lhs?(lhs) + return true unless lhs + + %w[identifier instance_variable global_variable].include?(lhs.kind) + end + + def ruby_binary_effect(document, node, stack) + return [] unless direct_operator(node) == "<<" + + [semantic_effect_site(document, node, stack, :hidden_mutation, "<<")] + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/syntax/ruby_protocols.rb b/gems/decomplex/lib/decomplex/syntax/ruby_protocols.rb new file mode 100644 index 000000000..39bf7d5c4 --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/ruby_protocols.rb @@ -0,0 +1,360 @@ +# frozen_string_literal: true + +module Decomplex + module Syntax + RUBY_PROTOCOL_PATH_LIMIT = 64 + RUBY_PROTOCOL_DECLARATIVE_MIDS = %w[ + abstract! alias_method any attr_accessor attr_reader attr_writer bind + cast checked enum extend final include interface! let must must_because + nilable override overridable params prepend private private_class_method + protected public require require_relative requires_ancestor sealed! sig + type_member type_template untyped unsafe void + ].freeze + RUBY_PROTOCOL_TEST_DSL_MIDS = %w[ + a_kind_of after around before be be_a be_an be_empty be_falsey be_nil + be_truthy change contain_exactly context describe eq eql equal expect + have_attributes have_key have_received it match not_to raise_error + receive subject to + ].freeze + RUBY_PROTOCOL_IGNORED_MIDS = (RUBY_PROTOCOL_DECLARATIVE_MIDS + RUBY_PROTOCOL_TEST_DSL_MIDS).freeze + RUBY_PROTOCOL_OPTIONAL_DIAGNOSTIC_MIDS = %w[ + error! fixable! read_interpolated_string warn! + ].freeze + RUBY_PROTOCOL_MUTATING_MIDS = %w[ + << []= add append clear collect! compact! concat declare delete delete_if + each_key= fill filter! keep_if mark merge! move push reject! replace + resolve shift stamp store unshift update write + ].freeze + RUBY_PROTOCOL_NON_MUTATING_OPERATOR_MIDS = %w[! != !~].freeze + RUBY_PROTOCOL_MUTATING_SUFFIXES = %w[!].freeze + + class RubySyntaxAdapter + def protocol_method_effects(document) + document.function_defs.map do |function_def| + reads = Set.new + writes = Set.new + statements = ruby_function_body_statements(function_def.body) + local_names = ruby_local_names(function_def, statements) + ruby_protocol_collect_state_access(function_def.body, reads, writes, + local_names: local_names, + root: true) + ProtocolMethodEffect.new( + file: function_def.file, + owner: function_def.owner, + name: ruby_protocol_method_name(function_def.name), + line: function_def.line, + reads: reads.to_a.sort, + writes: writes.to_a.sort + ) + end + end + + def protocol_call_paths(document) + document.function_defs.flat_map do |function_def| + statements = ruby_function_body_statements(function_def.body) + local_names = ruby_local_names(function_def, statements) + ruby_protocol_paths_for_statements(statements, local_names: local_names).map do |path| + ProtocolMethodPath.new( + file: function_def.file, + owner: function_def.owner, + name: ruby_protocol_method_name(function_def.name), + line: function_def.line, + calls: path.calls + ) + end + end + end + + private + + def ruby_protocol_method_name(name) + name.to_s.split(".").last + end + + def ruby_protocol_collect_state_access(node, reads, writes, local_names:, root: false) + return unless ts_node?(node) + return if !root && ruby_protocol_nested_boundary?(node) + + case node.kind + when "assignment" + lhs = named_field(node, "left") || node.named_children.first + rhs = named_field(node, "right") || named_field(node, "value") || node.named_children[1] + ruby_protocol_record_write(lhs, writes, local_names) + ruby_protocol_collect_state_access(rhs, reads, writes, local_names: local_names) + return + when "operator_assignment" + lhs = named_field(node, "left") || node.named_children.first + if (state = ruby_protocol_state_target(lhs, local_names)) + reads << state + writes << state + end + rhs = named_field(node, "right") || named_field(node, "value") || node.named_children[1] + ruby_protocol_collect_state_access(rhs, reads, writes, local_names: local_names) + return + when "instance_variable" + reads << ruby_protocol_normalize_state(node.text) + when "call" + ruby_protocol_collect_call_state(node, reads, writes, local_names) + when "identifier" + reads << ruby_protocol_normalize_state(node.text) if ruby_protocol_bare_reader?(node, local_names) + end + + node.named_children.each do |child| + ruby_protocol_collect_state_access(child, reads, writes, local_names: local_names) + end + end + + def ruby_protocol_collect_call_state(node, reads, writes, local_names) + target = ruby_proc_call_target(node) || ruby_call_target(node) + return unless target + + mid = target[:message].to_s + receiver = target[:receiver].to_s + if receiver == "self" && target[:arguments].to_a.empty? && + !ruby_protocol_mutating_mid?(mid) && !RUBY_PROTOCOL_IGNORED_MIDS.include?(mid) + reads << ruby_protocol_normalize_state(mid) + end + + return unless ruby_protocol_mutating_mid?(mid) + + token = ruby_protocol_receiver_state_token(receiver, local_names) + writes << token if token + end + + def ruby_protocol_record_write(lhs, writes, local_names) + state = ruby_protocol_state_target(lhs, local_names) + writes << state if state + end + + def ruby_protocol_state_target(node, local_names) + return nil unless ts_node?(node) + + case node.kind + when "instance_variable" + ruby_protocol_normalize_state(node.text) + when "element_reference" + ruby_protocol_receiver_state_token(node.named_children.first&.text, local_names) + when "call" + target = ruby_proc_call_target(node) || ruby_call_target(node) + return nil unless target + + receiver = ruby_protocol_receiver_state_token(target[:receiver], local_names) + field = target[:message].to_s.sub(/=\z/, "") + return ruby_protocol_normalize_state(field) if receiver == "self" + return "#{receiver}.#{field}" if receiver + + nil + else + nil + end + end + + def ruby_protocol_receiver_state_token(receiver, local_names) + text = receiver.to_s + return nil if text.empty? + return "self" if text == "self" + return ruby_protocol_normalize_state(text) if text.start_with?("@") + return ruby_protocol_normalize_state(text) if text.match?(/\A[a-z_]\w*[!?]?\z/) + return nil if local_names.include?(text) + + nil + end + + def ruby_protocol_bare_reader?(node, local_names) + name = node.text.to_s + return false unless name.match?(/\A[a-z_]\w*[!?]?\z/) + return false if local_names.include?(name) + return false if RUBY_PROTOCOL_IGNORED_MIDS.include?(name) + + parent = parent_node(node) + return false unless parent + return false if ruby_declaration_name?(node, parent) + return false if %w[call method_parameters block_parameters argument_list assignment + operator_assignment pair hash_key_symbol].include?(parent.kind) + return false if next_sibling(node)&.text == "=" || prev_sibling(node)&.text == "=" + return false if next_sibling(node)&.text == "." || prev_sibling(node)&.text == "." + return false if next_sibling(node)&.text == ":" || prev_sibling(node)&.text == ":" + + true + end + + def ruby_protocol_paths_for_statements(statements, local_names:) + statements.compact.each_with_object([ruby_protocol_empty_path]) do |statement, paths| + statement_paths = ruby_protocol_paths_for(statement, local_names: local_names) + paths.replace(ruby_protocol_combine_path_lists(paths, statement_paths)) + end + end + + def ruby_protocol_paths_for(node, local_names:) + return [ruby_protocol_empty_path] unless ts_node?(node) + return [ruby_protocol_empty_path] if ruby_protocol_nested_boundary?(node) + + if ruby_path_if_node?(node) + return ruby_protocol_branch_paths(node, local_names: local_names) + end + return ruby_protocol_case_paths(node, local_names: local_names) if ruby_protocol_case_node?(node) + + paths = ruby_protocol_generic_paths(node, local_names: local_names) + return paths unless %w[return break next redo retry].include?(node.kind) + + paths.map { |path| ProtocolPath.new(calls: path.calls, terminal: true) } + end + + def ruby_protocol_branch_paths(node, local_names:) + condition_paths = ruby_protocol_paths_for(ruby_path_condition(node), local_names: local_names) + then_paths = ruby_protocol_body_paths(ruby_path_then_body(node), local_names: local_names) + else_node = ruby_path_else_body(node) + else_paths = else_node ? ruby_protocol_body_paths(else_node, local_names: local_names) : [ruby_protocol_empty_path] + alternatives = then_paths + else_paths + ruby_protocol_combine_path_lists(condition_paths, alternatives) + end + + def ruby_protocol_case_paths(node, local_names:) + subject = ruby_protocol_case_subject(node) + subject_paths = subject ? ruby_protocol_paths_for(subject, local_names: local_names) : [ruby_protocol_empty_path] + branches = ruby_protocol_case_branch_paths(node, local_names: local_names) + ruby_protocol_combine_path_lists(subject_paths, branches.empty? ? [ruby_protocol_empty_path] : branches) + end + + def ruby_protocol_case_subject(node) + first = node.named_children.first + return nil unless first + return nil if %w[when else].include?(first.kind) + + first + end + + def ruby_protocol_case_branch_paths(node, local_names:) + node.named_children.flat_map do |child| + case child.kind + when "when" + pattern_paths = child.named_children.take_while { |part| part.kind != "then" } + .each_with_object([ruby_protocol_empty_path]) do |pattern, paths| + paths.replace(ruby_protocol_combine_path_lists( + paths, + ruby_protocol_paths_for(pattern, local_names: local_names) + )) + end + body = child.named_children.find { |part| part.kind == "then" } + ruby_protocol_combine_path_lists(pattern_paths, ruby_protocol_body_paths(body, local_names: local_names)) + when "else" + ruby_protocol_body_paths(child, local_names: local_names) + else + [] + end + end.first(RUBY_PROTOCOL_PATH_LIMIT) + end + + def ruby_protocol_body_paths(node, local_names:) + return [ruby_protocol_empty_path] unless ts_node?(node) + + if %w[then else body_statement block block_body].include?(node.kind) + return ruby_protocol_paths_for_statements( + node.named_children.reject { |child| child.kind == "comment" }, + local_names: local_names + ) + end + + ruby_protocol_paths_for(node, local_names: local_names) + end + + def ruby_protocol_generic_paths(node, local_names:) + children = ruby_protocol_child_nodes(node) + child_paths = children.each_with_object([ruby_protocol_empty_path]) do |child, paths| + paths.replace(ruby_protocol_combine_path_lists( + paths, + ruby_protocol_paths_for(child, local_names: local_names) + )) + end + + mid = ruby_protocol_internal_call(node, local_names) + return child_paths unless mid + + call_path = ProtocolPath.new(calls: [ruby_protocol_raw_call(mid, node)], terminal: false) + ruby_protocol_combine_path_lists([call_path], child_paths) + end + + def ruby_protocol_child_nodes(node) + return [] if ruby_protocol_nested_boundary?(node) + + case node.kind + when "call" + node.named_children.select { |child| %w[argument_list block do_block].include?(child.kind) } + when "assignment", "operator_assignment" + rhs = named_field(node, "right") || named_field(node, "value") || node.named_children[1] + rhs ? [rhs] : [] + else + node.named_children.reject { |child| child.kind == "comment" } + end + end + + def ruby_protocol_internal_call(node, local_names) + target = + case node.kind + when "call" + ruby_proc_call_target(node) || ruby_call_target(node) + when "identifier" + ruby_bare_call_target(node) + end + return nil unless target + return nil unless target[:receiver].to_s == "self" + + mid = target[:message].to_s + return nil if local_names.include?(mid) + return nil if RUBY_PROTOCOL_IGNORED_MIDS.include?(mid) + + mid + end + + def ruby_protocol_raw_call(mid, node) + ProtocolCall.new( + mid: mid, + file: nil, + owner: nil, + defn: nil, + line: line(node), + span: span(node) + ) + end + + def ruby_protocol_combine_path_lists(left_paths, right_paths) + left_paths.flat_map do |path| + if path.terminal + [path] + else + right_paths.map do |right_path| + ProtocolPath.new(calls: path.calls + right_path.calls, terminal: right_path.terminal) + end + end + end.first(RUBY_PROTOCOL_PATH_LIMIT) + end + + def ruby_protocol_empty_path + ProtocolPath.new(calls: [], terminal: false) + end + + def ruby_protocol_case_node?(node) + ts_node?(node) && (node.kind == "case" || hidden_case?(node)) + end + + def ruby_protocol_nested_boundary?(node) + return false unless ts_node?(node) + return true if %w[class module method singleton_method lambda].include?(node.kind) + return true if hidden_ruby_method_definition?(node) || hidden_ruby_owner_declaration?(node) + + false + end + + def ruby_protocol_mutating_mid?(mid) + return false if RUBY_PROTOCOL_NON_MUTATING_OPERATOR_MIDS.include?(mid) + + RUBY_PROTOCOL_MUTATING_MIDS.include?(mid) || + RUBY_PROTOCOL_MUTATING_SUFFIXES.any? { |suffix| mid.end_with?(suffix) } + end + + def ruby_protocol_normalize_state(name) + name.to_s.sub(/\A@/, "").sub(/=\z/, "") + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/temporal_ordering_pressure.rb b/gems/decomplex/lib/decomplex/temporal_ordering_pressure.rb index fc88a5903..1ea2087f2 100644 --- a/gems/decomplex/lib/decomplex/temporal_ordering_pressure.rb +++ b/gems/decomplex/lib/decomplex/temporal_ordering_pressure.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -require_relative "ast" +require_relative "syntax" module Decomplex # TemporalOrderingPressure -- classes/modules whose public method @@ -12,98 +12,54 @@ class TemporalOrderingPressure keyword_init: true) def self.scan(files) - rows = [] - files.each do |file| - root, lines = Ast.parse(file) - rows.concat(new(file, lines).scan(root)) + rows = files.flat_map do |file| + document = Syntax.parse(file, parser: "tree_sitter") + new(file, document).scan end rows.sort_by { |h| [-h[:score], -h[:state_methods], h[:file], h[:owner]] } end - def initialize(file, lines) + def initialize(file, document) @file = file - @lines = lines + @document = document end - def scan(root) - out = [] - walk_owners(root, [], out) - out - end - - def walk_owners(node, owners, out) - return unless Ast.node?(node) - - if %i[CLASS MODULE].include?(node.type) - owner = owner_name(node) - methods = owner_methods(node) - row = pressure_row(owner, methods) - out << row if row - node.children.each { |child| walk_owners(child, owners + [owner], out) } - else - node.children.each { |child| walk_owners(child, owners, out) } + def scan + temporal_owners.filter_map do |owner| + row = pressure_row(owner, owner_methods(owner)) + row if row end end - def owner_name(node) - Ast.slice(node.children[0], @lines).to_s.empty? ? "(anonymous)" : Ast.slice(node.children[0], @lines) - end + private - def owner_methods(owner_node) - body = owner_body(owner_node) - return [] unless Ast.node?(body) - - stmts = body.type == :BLOCK ? body.children.compact : [body] - visibility = :public - methods = [] - stmts.each do |stmt| - next unless Ast.node?(stmt) - - if visibility_marker?(stmt) - visibility = stmt.children[0].to_sym - elsif %i[DEFN DEFS].include?(stmt.type) - methods << method_state(stmt, visibility) - end - end - methods + def temporal_owners + (@document.owner_defs.map(&:name) + @document.function_defs.map(&:owner)).compact.uniq end - def owner_body(owner_node) - scope = owner_node.children[2] - return nil unless Ast.node?(scope) && scope.type == :SCOPE - - scope.children[2] - end - - def visibility_marker?(node) - node.type == :VCALL && %i[public protected private].include?(node.children[0]) + def owner_methods(owner) + @document.function_defs.select { |function| function.owner == owner }.map do |function| + MethodState.new( + name: function.name, + line: function.line, + span: function.span, + visibility: function.visibility || :public, + reads: state_reads_for(function).uniq.sort, + writes: state_writes_for(function).uniq.sort + ) + end end - def method_state(defn_node, visibility) - reads = [] - writes = [] - collect_state_access(defn_node, reads, writes) - MethodState.new( - name: defn_node.children[defn_node.type == :DEFS ? 1 : 0].to_s, - line: defn_node.first_lineno, - span: [defn_node.first_lineno, defn_node.first_column, - defn_node.last_lineno, defn_node.last_column], - visibility: visibility, - reads: reads.uniq.sort, - writes: writes.uniq.sort - ) + def state_reads_for(function) + @document.state_reads.select do |read| + read.owner == function.owner && read.function == function.name + end.map(&:field) end - def collect_state_access(node, reads, writes) - return unless Ast.node?(node) - - case node.type - when :IASGN - writes << node.children[0].to_s - when :IVAR - reads << node.children[0].to_s - end - node.children.each { |child| collect_state_access(child, reads, writes) } + def state_writes_for(function) + @document.state_writes.select do |write| + write.owner == function.owner && write.function == function.name + end.map(&:field) end def pressure_row(owner, methods) diff --git a/gems/decomplex/lib/decomplex/weighted_inlined_cognitive_complexity.rb b/gems/decomplex/lib/decomplex/weighted_inlined_cognitive_complexity.rb index fba969214..cc1d0c3a7 100644 --- a/gems/decomplex/lib/decomplex/weighted_inlined_cognitive_complexity.rb +++ b/gems/decomplex/lib/decomplex/weighted_inlined_cognitive_complexity.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true require "set" -require_relative "ast" +require_relative "local_flow" require_relative "structural_topology" module Decomplex @@ -18,16 +18,6 @@ class WeightedInlinedCognitiveComplexity DEFAULT_MAX_DEPTH = 2 DEPTH_WEIGHTS = [1.0, 1.0, 0.6, 0.35].freeze EDGE_WEIGHTS = { always: 1.0, conditional: 0.75, iterates: 1.15 }.freeze - OWNER_TYPES = %i[CLASS MODULE].freeze - METHOD_TYPES = %i[DEFN DEFS].freeze - SKIP_NESTED_TYPES = %i[CLASS MODULE DEFN DEFS LAMBDA].freeze - BRANCH_TYPES = %i[IF UNLESS].freeze - LOOP_TYPES = %i[WHILE UNTIL FOR ITER].freeze - CASE_TYPES = %i[CASE CASE2].freeze - RESCUE_TYPES = %i[RESCUE RESBODY].freeze - EARLY_EXIT_TYPES = %i[RETURN BREAK NEXT REDO RETRY].freeze - BOOLEAN_TYPES = %i[AND OR].freeze - def self.scan(files, min_score: DEFAULT_MIN_SCORE, min_hidden: DEFAULT_MIN_HIDDEN, max_depth: DEFAULT_MAX_DEPTH) new(files, min_score: min_score, min_hidden: min_hidden, max_depth: max_depth).scan end @@ -40,11 +30,8 @@ def initialize(files, min_score:, min_hidden:, max_depth:) end def scan - parsed = parse_files topology = StructuralTopology.scan(@files) - bodies = parsed.flat_map do |file, (root, lines)| - MethodBodyCollector.new(file, lines).scan(root) - end + bodies = LocalFlow.scan(@files).map { |summary| method_body(summary) } scores = bodies.to_h do |body| score = LocalScorer.new.score(body.node) [body.id, LocalScore.new( @@ -64,239 +51,183 @@ def scan private - def parse_files - @files.each_with_object({}) do |file, out| - out[file] = Ast.parse(file) - end + def method_body(summary) + owner = summary.owner == "(top-level)" ? "(top-level:#{summary.file})" : summary.owner + MethodBody.new( + id: "#{owner}##{summary.name}", + owner: owner, + name: summary.name, + file: summary.file, + line: summary.line, + span: summary.span, + node: summary.node + ) end - class MethodBodyCollector - def initialize(file, lines) - @file = file - @lines = lines - end - - def scan(root) - out = [] - top_level_methods(root).each do |method_node| - out << method_body(method_node, top_level_owner) - end - walk(root, [], out) - out + class LocalScorer + def score(method_node) + signals = Hash.new(0) + { + score: round(score_node(method_node, nesting: 0, signals: signals)), + signals: signals.to_h + } end private - def top_level_methods(root) - top_level_statements(root).select { |stmt| Ast.node?(stmt) && METHOD_TYPES.include?(stmt.type) } - end + def score_node(node, nesting:, signals:) + return 0.0 unless tree_sitter_node?(node) - def walk(node, owners, out) - return unless Ast.node?(node) + score_tree_sitter_node(node, nesting: nesting, signals: signals) + end - if OWNER_TYPES.include?(node.type) - owner = (owners + [owner_segment(node)]).join("::") - owner_methods(node).each do |method_node| - out << method_body(method_node, owner) - end - node.children.each { |child| walk(child, owners + [owner_segment(node)], out) } - else - node.children.each { |child| walk(child, owners, out) } - end + def boolean_count(node) + tree_sitter_boolean_count(node) end - def owner_methods(owner_node) - body = owner_body(owner_node) - return [] unless Ast.node?(body) + def score_tree_sitter_node(node, nesting:, signals:) + return 0.0 if skip_tree_sitter_nested?(node) - owner_statements(body).flat_map do |stmt| - next [] unless Ast.node?(stmt) + if tree_sitter_branch?(node) + signals[:branches] += 1 + signals[:nested] += 1 if nesting.positive? + return branch_cost(nesting) + + tree_sitter_predicate_cost(node, signals) + + score_tree_sitter_children(node, nesting: nesting + 1, signals: signals) + end - if METHOD_TYPES.include?(stmt.type) - [stmt] - elsif visibility_call?(stmt) - inline_methods(stmt) - else - [] - end + if tree_sitter_loop?(node) + signals[:loops] += 1 + signals[:nested] += 1 if nesting.positive? + return branch_cost(nesting) + + score_tree_sitter_children(node, nesting: nesting + 1, signals: signals) end - end - def method_body(node, owner) - name = method_name(node) - MethodBody.new( - id: "#{owner}##{name}", - owner: owner, - name: name, - file: @file, - line: node.first_lineno, - span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], - node: node - ) - end + if tree_sitter_case?(node) + signals[:cases] += 1 + return 0.5 + score_tree_sitter_children(node, nesting: nesting + 1, signals: signals) + end - def inline_methods(stmt) - args = stmt.children[1] - return [] unless Ast.node?(args) + if tree_sitter_rescue?(node) + signals[:rescues] += 1 + return branch_cost(nesting) + + score_tree_sitter_children(node, nesting: nesting + 1, signals: signals) + end - args.children.compact.select { |arg| Ast.node?(arg) && METHOD_TYPES.include?(arg.type) } - end + if tree_sitter_early_exit?(node) + signals[:early_exits] += 1 + exit_cost = nesting.positive? ? 0.5 + (nesting * 0.25) : 0.0 + return exit_cost + score_tree_sitter_children(node, nesting: nesting, signals: signals) + end - def owner_body(owner_node) - scope = owner_node.children[owner_node.type == :CLASS ? 2 : 1] - return nil unless Ast.node?(scope) && scope.type == :SCOPE + if tree_sitter_boolean_node?(node) + signals[:boolean_ops] += 1 + return 0.25 + score_tree_sitter_children(node, nesting: nesting, signals: signals) + end - scope.children[2] + score_tree_sitter_children(node, nesting: nesting, signals: signals) end - def owner_statements(body) - body.type == :BLOCK ? body.children.compact : [body] + def score_tree_sitter_children(node, nesting:, signals:) + node.children.sum { |child| score_node(child, nesting: nesting, signals: signals) } end - def top_level_statements(root) - return [] unless Ast.node?(root) - - root.children.compact.flat_map do |child| - Ast.node?(child) && child.type == :BLOCK ? child.children.compact : [child] - end + def tree_sitter_predicate_cost(node, signals) + predicate = tree_sitter_condition_node(node) + bools = tree_sitter_boolean_count(predicate) + signals[:boolean_ops] += bools + bools * 0.5 end - def visibility_call?(node) - node.type == :FCALL && StructuralTopology::VISIBILITY_MIDS.include?(node.children[0]) - end + def tree_sitter_condition_node(node) + return node.named_children.last if tree_sitter_modifier_if?(node) + return node.named_children.first if node.kind == "body_statement" - def method_name(node) - if node.type == :DEFS - receiver = node.children[0] - prefix = Ast.node?(receiver) && receiver.type == :SELF ? "self" : Ast.slice(receiver, @lines) - "#{prefix}.#{node.children[1]}" - else - node.children[0].to_s - end + node.named_children.first end - def owner_segment(node) - text = Ast.slice(node.children[0], @lines) - text.empty? ? "(anonymous)" : text - end + def tree_sitter_boolean_count(node) + return 0 unless tree_sitter_node?(node) - def top_level_owner - "(top-level:#{@file})" + own = tree_sitter_boolean_node?(node) ? 1 : 0 + own + node.children.sum { |child| tree_sitter_boolean_count(child) } end - end - class LocalScorer - def score(method_node) - signals = Hash.new(0) - { - score: round(score_node(method_node, nesting: 0, signals: signals)), - signals: signals.to_h - } + def tree_sitter_boolean_node?(node) + tree_sitter_node?(node) && + %w[binary binary_expression boolean_operator conjunction_expression disjunction_expression].include?(node.kind) && + node.children.any? { |child| !child.named? && %w[&& || and or].include?(child.text.to_s) } end - private - - def score_node(node, nesting:, signals:) - return 0.0 unless Ast.node?(node) - return 0.0 if skip_nested?(node) - - case node.type - when *BRANCH_TYPES - score_branch(node, nesting, signals) - when *LOOP_TYPES - score_loop(node, nesting, signals) - when *CASE_TYPES - score_case(node, nesting, signals) - when *RESCUE_TYPES - score_rescue(node, nesting, signals) - when *EARLY_EXIT_TYPES - score_early_exit(node, nesting, signals) - when *BOOLEAN_TYPES - score_boolean_node(node, nesting, signals) - else - score_children(node, nesting: nesting, signals: signals) - end - end + def tree_sitter_branch?(node) + return false unless tree_sitter_node?(node) + return true if %w[if unless if_statement if_expression if_modifier unless_modifier].include?(node.kind) && + node.named_children.any? - def skip_nested?(node) - SKIP_NESTED_TYPES.include?(node.type) && !METHOD_TYPES.include?(node.type) + tree_sitter_hidden_if?(node) || tree_sitter_modifier_if?(node) end - def score_branch(node, nesting, signals) - signals[:branches] += 1 - signals[:nested] += 1 if nesting.positive? - condition = node.children[0] - positive = node.children[1] - negative = node.children[2] - branch_cost(nesting) + - predicate_cost(condition, signals) + - score_node(positive, nesting: nesting + 1, signals: signals) + - score_node(negative, nesting: nesting + 1, signals: signals) - end + def tree_sitter_hidden_if?(node) + return true if node.kind == "expression_statement" && node.text.to_s.lstrip.start_with?("if ") - def score_loop(node, nesting, signals) - signals[:loops] += 1 - signals[:nested] += 1 if nesting.positive? - branch_cost(nesting) + score_children(node, nesting: nesting + 1, signals: signals) + %w[body_statement block statements statement_list].include?(node.kind) && + node.children.first && + !node.children.first.named? && + %w[if unless].include?(node.children.first.kind.to_s) end - def score_case(node, nesting, signals) - signals[:cases] += 1 - 0.5 + score_case_children(node, nesting, signals) - end + def tree_sitter_modifier_if?(node) + return true if %w[if_modifier unless_modifier].include?(node.kind) + return false unless node.kind == "body_statement" - def score_case_children(node, nesting, signals) - node.children.sum do |child| - if Ast.node?(child) && child.type == :WHEN - score_when(child, nesting, signals) - else - score_node(child, nesting: nesting, signals: signals) - end + seen_named = false + node.children.any? do |child| + seen_named ||= child.named? + seen_named && !child.named? && %w[if unless].include?(child.kind.to_s) end end - def score_when(node, nesting, signals) - body = node.children[1] - next_when = node.children[2] - score_node(body, nesting: nesting + 1, signals: signals) + - score_node(next_when, nesting: nesting, signals: signals) - end + def tree_sitter_loop?(node) + return false unless tree_sitter_node?(node) + return true if %w[while until while_statement for for_statement for_in_statement do_block].include?(node.kind) + return true if tree_sitter_hidden_loop?(node) - def score_rescue(node, nesting, signals) - signals[:rescues] += 1 - branch_cost(nesting) + score_children(node, nesting: nesting + 1, signals: signals) + (node.kind == "expression_statement" && node.text.to_s.lstrip.match?(/\A(?:for|while|loop)\b/)) || + (node.kind == "labeled_statement" && node.text.to_s.lstrip.start_with?("for ")) end - def score_early_exit(node, nesting, signals) - signals[:early_exits] += 1 - exit_cost = nesting.positive? ? 0.5 + (nesting * 0.25) : 0.0 - exit_cost + score_children(node, nesting: nesting, signals: signals) + def tree_sitter_hidden_loop?(node) + %w[body_statement block statements statement_list].include?(node.kind) && + node.children.first && + !node.children.first.named? && + %w[for while loop].include?(node.children.first.kind.to_s) end - def score_boolean_node(node, nesting, signals) - signals[:boolean_ops] += 1 - 0.25 + score_children(node, nesting: nesting, signals: signals) + def tree_sitter_case?(node) + tree_sitter_node?(node) && + (%w[case switch_statement switch_expression match_statement match_expression].include?(node.kind) || + (node.kind == "expression_statement" && node.text.to_s.lstrip.start_with?("match "))) end - def score_children(node, nesting:, signals:) - node.children.sum { |child| score_node(child, nesting: nesting, signals: signals) } + def tree_sitter_rescue?(node) + tree_sitter_node?(node) && %w[rescue rescue_modifier rescue_clause rescue_body].include?(node.kind) end - def predicate_cost(node, signals) - bools = boolean_count(node) - signals[:boolean_ops] += bools - bools * 0.5 + def tree_sitter_early_exit?(node) + tree_sitter_node?(node) && + %w[return break next redo retry return_statement break_statement continue_statement].include?(node.kind) end - def boolean_count(node) - return 0 unless Ast.node?(node) + def skip_tree_sitter_nested?(node) + %w[class module lambda].include?(node.kind) + end - own = BOOLEAN_TYPES.include?(node.type) ? 1 : 0 - own + node.children.sum { |child| boolean_count(child) } + def tree_sitter_node?(node) + node.respond_to?(:kind) && node.respond_to?(:children) end def branch_cost(nesting) - 1.0 + nesting + 1.1 + nesting end def round(value) diff --git a/gems/decomplex/rust/src/decomplex/ast-test.rs b/gems/decomplex/rust/src/decomplex/ast-test.rs new file mode 100644 index 000000000..630e7a6b4 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/ast-test.rs @@ -0,0 +1,20851 @@ +use super::{parse, parse_with_language, Child, Node}; +use crate::decomplex::syntax::Language; +use serde_json::{json, Value}; +use std::collections::BTreeSet; +use std::io::Write; +use std::path::Path; +use std::process::Command; +use tree_sitter::{Node as TreeSitterNode, Parser as TreeSitterParser}; + +fn parse_source(source: &str) -> Node { + let mut file = tempfile::Builder::new() + .suffix(".rb") + .tempfile() + .expect("create temp ruby file"); + file.write_all(source.as_bytes()) + .expect("write temp ruby file"); + parse(file.path()).expect("parse temp ruby file").0 +} + +fn parse_language_source(source: &str, language: Language, suffix: &str) -> Node { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create temp source file"); + file.write_all(source.as_bytes()) + .expect("write temp source file"); + parse_with_language(file.path(), language) + .expect("parse temp source file") + .0 +} + +fn nodes_of_type<'a>(node: &'a Node, node_type: &str, out: &mut Vec<&'a Node>) { + if node.r#type == node_type { + out.push(node); + } + for child in node.children.iter().filter_map(super::node) { + nodes_of_type(child, node_type, out); + } +} + +fn first_node<'a>(root: &'a Node, node_type: &str, text: &str) -> &'a Node { + let mut nodes = Vec::new(); + nodes_of_type(root, node_type, &mut nodes); + nodes + .into_iter() + .find(|node| node.text == text) + .unwrap_or_else(|| panic!("expected {node_type} with text {text:?} in {root:#?}")) +} + +fn child_node(node: &Node, index: usize) -> &Node { + node.children + .get(index) + .and_then(super::node) + .unwrap_or_else(|| panic!("expected child node {index} in {node:#?}")) +} + +fn child_types(node: &Node) -> Vec<&str> { + node.children + .iter() + .filter_map(super::node) + .map(|child| child.r#type.as_str()) + .collect() +} + +fn test_node(node_type: &str, children: Vec) -> Node { + Node { + r#type: node_type.to_string(), + children, + first_lineno: 1, + first_column: 0, + last_lineno: 1, + last_column: 1, + text: node_type.to_string(), + } +} + +fn infix_parts_text( + normalizer: &super::TreeSitterNormalizer<'_>, + node: TreeSitterNode<'_>, + source: &str, +) -> Option<(String, String, String)> { + let (left, operator, right) = normalizer.infix_statement_parts(node)?; + Some(( + super::node_text(left, source).to_string(), + operator, + super::node_text(right, source).to_string(), + )) +} + +fn node_value(node: &Node) -> Value { + json!({ + "type": node.r#type, + "children": node.children.iter().map(child_value).collect::>(), + "first_lineno": node.first_lineno, + "first_column": node.first_column, + "last_lineno": node.last_lineno, + "last_column": node.last_column, + "text": node.text, + }) +} + +fn child_value(child: &Child) -> Value { + match child { + Child::Node(node) => node_value(node), + Child::Symbol(value) | Child::String(value) => Value::String(value.clone()), + Child::Integer(value) => Value::Number((*value).into()), + Child::Bool(value) => Value::Bool(*value), + Child::Nil => Value::Null, + } +} + +fn children_value(children: &[Child]) -> Value { + Value::Array(children.iter().map(child_value).collect()) +} + +fn ruby_language_name(language: Language) -> &'static str { + match language { + Language::Ruby => "ruby", + Language::Python => "python", + Language::JavaScript => "javascript", + Language::Java => "java", + Language::TypeScript => "typescript", + Language::Swift => "swift", + Language::Kotlin => "kotlin", + Language::Go => "go", + Language::Rust => "rust", + Language::Zig => "zig", + Language::Lua => "lua", + Language::C => "c", + Language::Cpp => "cpp", + Language::CSharp => "csharp", + } +} + +fn ruby_normalized_value(path: &Path, language: Language) -> Value { + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + root, = Decomplex::Ast.parse(ARGV.fetch(0)) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + elsif node.is_a?(Array) + node.map { |child| value(child) } + else + node + end + end + + puts JSON.generate(value(root)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "json", + "-e", + script, + ]) + .arg(path) + .output() + .expect("run ruby normalizer"); + assert!( + output.status.success(), + "ruby normalizer failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout).expect("ruby normalizer should emit JSON") +} + +fn assert_ruby_parity(source: &str, language: Language, suffix: &str) { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create parity temp source file"); + file.write_all(source.as_bytes()) + .expect("write parity temp source file"); + + let rust = node_value( + &parse_with_language(file.path(), language) + .expect("parse parity temp source file") + .0, + ); + let ruby = ruby_normalized_value(file.path(), language); + assert_eq!(rust, ruby); +} + +fn raw_tree(source: &str, language: Language) -> tree_sitter::Tree { + let mut parser = TreeSitterParser::new(); + parser + .set_language(&super::language_grammar(language)) + .expect("set raw parser language"); + parser.parse(source, None).expect("parse raw source") +} + +fn first_raw_node<'tree>( + node: TreeSitterNode<'tree>, + source: &str, + kind: &str, + text: &str, +) -> TreeSitterNode<'tree> { + if node.kind() == kind && super::node_text(node, source) == text { + return node; + } + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + if let Some(found) = first_raw_node_opt(child, source, kind, text) { + return found; + } + } + panic!("expected raw node kind={kind:?} text={text:?}"); +} + +fn first_raw_node_opt<'tree>( + node: TreeSitterNode<'tree>, + source: &str, + kind: &str, + text: &str, +) -> Option> { + if node.kind() == kind && super::node_text(node, source) == text { + return Some(node); + } + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + if let Some(found) = first_raw_node_opt(child, source, kind, text) { + return Some(found); + } + } + None +} + +fn nth_raw_node<'tree>( + node: TreeSitterNode<'tree>, + source: &str, + kind: &str, + text: &str, + index: usize, +) -> TreeSitterNode<'tree> { + let mut found = Vec::new(); + collect_raw_nodes(node, source, kind, text, &mut found); + *found + .get(index) + .unwrap_or_else(|| panic!("expected raw node kind={kind:?} text={text:?} index={index}")) +} + +fn collect_raw_nodes<'tree>( + node: TreeSitterNode<'tree>, + source: &str, + kind: &str, + text: &str, + found: &mut Vec>, +) { + if node.kind() == kind && super::node_text(node, source) == text { + found.push(node); + } + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + collect_raw_nodes(child, source, kind, text, found); + } +} + +fn ruby_private_predicate( + source: &str, + language: Language, + suffix: &str, + method: &str, + kind: &str, + text: &str, +) -> bool { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby predicate temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby predicate temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + method = ARGV.fetch(3) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + puts normalizer.send(method, target) ? "true" : "false" + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(method) + .output() + .expect("run ruby private predicate"); + assert!( + output.status.success(), + "ruby predicate failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + String::from_utf8(output.stdout) + .expect("ruby predicate output should be utf8") + .trim() + == "true" +} + +fn ruby_private_collected_names( + source: &str, + language: Language, + suffix: &str, + method: &str, + kind: &str, + text: &str, +) -> BTreeSet { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby collected names temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby collected names temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + method = ARGV.fetch(3) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + locals = Set.new + normalizer.send(method, target, locals) + puts JSON.generate(locals.to_a.sort) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-r", + "set", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(method) + .output() + .expect("run ruby collected names helper"); + assert!( + output.status.success(), + "ruby collected names helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice::>(&output.stdout) + .expect("ruby collected names output should be json") + .into_iter() + .collect() +} + +fn ruby_private_scope_collected_names( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + root: bool, +) -> BTreeSet { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby scope collected names temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby scope collected names temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + require "set" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + root = ARGV.fetch(3) == "true" + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + locals = Set.new + normalizer.send(:collect_ruby_scope_locals, target, locals, root: root) + puts JSON.generate(locals.to_a.sort) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(if root { "true" } else { "false" }) + .output() + .expect("run ruby scope collected names helper"); + assert!( + output.status.success(), + "ruby scope collected names helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice::>(&output.stdout) + .expect("ruby scope collected names output should be json") + .into_iter() + .collect() +} + +fn ruby_private_ruby_scope_locals( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, +) -> BTreeSet { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby scope locals temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby scope locals temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + puts JSON.generate(normalizer.send(:ruby_scope_locals, target).to_a.sort) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-r", + "set", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby scope locals helper"); + assert!( + output.status.success(), + "ruby scope locals helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice::>(&output.stdout) + .expect("ruby scope locals output should be json") + .into_iter() + .collect() +} + +fn ruby_private_with_ruby_scope_trace( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + reset: bool, + initial_stack: &[Vec<&str>], +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby with_ruby_scope temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby with_ruby_scope temp source file"); + let initial_stack_json = + serde_json::to_string(initial_stack).expect("serialize initial local stack"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + reset = ARGV.fetch(3) == "true" + initial = JSON.parse(ARGV.fetch(4)).map { |names| Set.new(names) } + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + normalizer.instance_variable_set(:@local_stack, initial) + snapshot = lambda do + Array(normalizer.instance_variable_get(:@local_stack)).map { |locals| locals.to_a.sort } + end + before = snapshot.call + inside = nil + result = normalizer.send(:with_ruby_scope, target, reset: reset) do + inside = snapshot.call + "block-result" + end + after = snapshot.call + puts JSON.generate("before" => before, "inside" => inside, "after" => after, "result" => result) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-r", + "set", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(if reset { "true" } else { "false" }) + .arg(initial_stack_json) + .output() + .expect("run ruby with_ruby_scope helper"); + assert!( + output.status.success(), + "ruby with_ruby_scope helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout).expect("ruby with_ruby_scope output should be json") +} + +fn local_stack_from(names: &[Vec<&str>]) -> Vec> { + names + .iter() + .map(|scope| scope.iter().map(|name| name.to_string()).collect()) + .collect() +} + +fn local_stack_value(stack: &[BTreeSet]) -> Value { + json!(stack + .iter() + .map(|scope| scope.iter().cloned().collect::>()) + .collect::>()) +} + +fn ruby_private_destructured_parameter_targets_value( + source: &str, + kind: &str, + text: &str, +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(".rb") + .tempfile() + .expect("create ruby destructured parameter temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby destructured parameter temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + targets = [] + normalizer.send(:collect_destructured_parameter_targets, target, targets) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + elsif node.is_a?(Array) + node.map { |child| value(child) } + else + node + end + end + + puts JSON.generate(targets.map { |node| value(node) }) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env( + "DECOMPLEX_FORCE_LANGUAGE", + ruby_language_name(Language::Ruby), + ) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby destructured parameter helper"); + assert!( + output.status.success(), + "ruby destructured parameter helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby destructured parameter output should be json") +} + +fn ruby_private_scope_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + mode: &str, +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby scope temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby scope temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + mode = ARGV.fetch(3) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + + body = mode == "body" ? normalizer.send(:wrap, :BODY, children: [], source: target) : nil + args = mode == "args" ? normalizer.send(:wrap, :ARGS, children: [], source: target) : nil + result = normalizer.send(:scope, body, args: args, source: target) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(mode) + .output() + .expect("run ruby scope helper"); + assert!( + output.status.success(), + "ruby scope helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout).expect("ruby scope output should be json") +} + +fn ruby_private_list_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + mode: &str, +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby list temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby list temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + mode = ARGV.fetch(3) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + + item = normalizer.send(:wrap, :ITEM, children: [], source: target) + children = + case mode + when "nil" then nil + when "empty" then [] + when "one" then [item] + else abort "unknown list mode: #{mode}" + end + result = normalizer.send(:list, children, source: target) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(mode) + .output() + .expect("run ruby list helper"); + assert!( + output.status.success(), + "ruby list helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout).expect("ruby list output should be json") +} + +fn ruby_private_string( + source: &str, + language: Language, + suffix: &str, + method: &str, + kind: &str, + text: &str, +) -> String { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby string temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby string temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + method = ARGV.fetch(3) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + puts normalizer.send(method, target).to_s + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(method) + .output() + .expect("run ruby private string helper"); + assert!( + output.status.success(), + "ruby string helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + String::from_utf8(output.stdout) + .expect("ruby string helper output should be utf8") + .trim_end_matches(['\r', '\n']) + .to_string() +} + +fn ruby_private_text_predicate(language: Language, method: &str, text: &str) -> bool { + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + language = ARGV.fetch(0).to_sym + text = ARGV.fetch(1) + method = ARGV.fetch(2) + document = Object.new + document.define_singleton_method(:language) { language } + normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + normalizer.instance_variable_set(:@document, document) + puts normalizer.send(method, text) ? "true" : "false" + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .args(["-I", "lib", "-r", "decomplex/ast", "-e", script]) + .arg(ruby_language_name(language)) + .arg(text) + .arg(method) + .output() + .expect("run ruby private text predicate"); + assert!( + output.status.success(), + "ruby text predicate failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + String::from_utf8(output.stdout) + .expect("ruby text predicate output should be utf8") + .trim() + == "true" +} + +fn ruby_private_text_string(language: Language, method: &str, text: &str) -> String { + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + language = ARGV.fetch(0).to_sym + text = ARGV.fetch(1) + method = ARGV.fetch(2) + document = Object.new + document.define_singleton_method(:language) { language } + normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + normalizer.instance_variable_set(:@document, document) + puts normalizer.send(method, text).to_s + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .args(["-I", "lib", "-r", "decomplex/ast", "-e", script]) + .arg(ruby_language_name(language)) + .arg(text) + .arg(method) + .output() + .expect("run ruby private text string helper"); + assert!( + output.status.success(), + "ruby text string helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + String::from_utf8(output.stdout) + .expect("ruby text string output should be utf8") + .trim_end_matches(['\r', '\n']) + .to_string() +} + +fn ruby_private_ts_node_value(value: &str) -> bool { + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Object.new + document.define_singleton_method(:language) { :ruby } + normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + normalizer.instance_variable_set(:@document, document) + target = + case ARGV.fetch(0) + when "nil" + nil + when "string" + "value" + when "normalized_node" + Decomplex::Ast::Node.new(type: :LIT, children: [], first_lineno: 1, first_column: 0, last_lineno: 1, last_column: 1, text: "1") + else + abort "unknown ts_node? probe" + end + puts normalizer.send(:ts_node?, target) ? "true" : "false" + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .args(["-I", "lib", "-r", "decomplex/ast", "-e", script]) + .arg(value) + .output() + .expect("run ruby private ts_node? value helper"); + assert!( + output.status.success(), + "ruby ts_node? value helper failed for {value}: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + String::from_utf8(output.stdout) + .expect("ruby ts_node? value output should be utf8") + .trim() + == "true" +} + +fn ruby_private_regex_literal_value(value: &str) -> bool { + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Object.new + document.define_singleton_method(:language) { :ruby } + normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + normalizer.instance_variable_set(:@document, document) + target = + case ARGV.fetch(0) + when "nil" + nil + when "string" + "value" + when "normalized_node" + Decomplex::Ast::Node.new(type: :LIT, children: [], first_lineno: 1, first_column: 0, last_lineno: 1, last_column: 1, text: "1") + else + abort "unknown regex_literal? probe" + end + puts normalizer.send(:regex_literal?, target) ? "true" : "false" + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .args(["-I", "lib", "-r", "decomplex/ast", "-e", script]) + .arg(value) + .output() + .expect("run ruby private regex_literal? value helper"); + assert!( + output.status.success(), + "ruby regex_literal? value helper failed for {value}: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + String::from_utf8(output.stdout) + .expect("ruby regex_literal? value output should be utf8") + .trim() + == "true" +} + +fn ruby_private_node_signature( + source: &str, + language: Language, + suffix: &str, + method: &str, + kind: &str, + text: &str, +) -> Option<(String, String)> { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby node signature temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby node signature temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + method = ARGV.fetch(3) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + result = normalizer.send(method, target) + if result + puts JSON.generate([result.kind, result.text.to_s]) + else + puts "null" + end + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(method) + .output() + .expect("run ruby private node signature helper"); + assert!( + output.status.success(), + "ruby node signature helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + let value: Value = + serde_json::from_slice(&output.stdout).expect("ruby node signature output should be json"); + if value.is_null() { + return None; + } + let pair = value + .as_array() + .expect("ruby node signature should be an array"); + Some(( + pair[0] + .as_str() + .expect("node kind should be string") + .to_string(), + pair[1] + .as_str() + .expect("node text should be string") + .to_string(), + )) +} + +fn ruby_private_inline_def_name_after_receiver( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, +) -> String { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby inline def name temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby inline def name temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + receiver = normalizer.send(:inline_def_receiver, target) + puts normalizer.send(:inline_def_name_after_receiver, target, receiver).to_s + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby inline def name helper"); + assert!( + output.status.success(), + "ruby inline def name helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + String::from_utf8(output.stdout) + .expect("ruby inline def name output should be utf8") + .trim() + .to_string() +} + +fn ruby_private_inline_parameter_begin_marker_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby inline_parameter_begin_marker temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby inline_parameter_begin_marker temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found" unless target + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + result = normalizer.send(:inline_parameter_begin_marker, target) + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby private inline_parameter_begin_marker helper"); + assert!( + output.status.success(), + "ruby inline_parameter_begin_marker helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby inline_parameter_begin_marker output should be json") +} + +fn ruby_private_prepend_inline_parameter_begin_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + body: &Value, +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby prepend_inline_parameter_begin temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby prepend_inline_parameter_begin temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + def node(value) + return nil if value.nil? + return value unless value.is_a?(Hash) + + Decomplex::Ast::Node.new( + type: value.fetch("type").to_sym, + children: value.fetch("children").map { |child| node(child) }, + first_lineno: value.fetch("first_lineno"), + first_column: value.fetch("first_column"), + last_lineno: value.fetch("last_lineno"), + last_column: value.fetch("last_column"), + text: value.fetch("text") + ) + end + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + walk = lambda do |ts_node| + if ts_node.respond_to?(:kind) + target ||= ts_node if ts_node.kind == target_kind && ts_node.text.to_s == target_text + ts_node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + body = node(JSON.parse(ARGV.fetch(3))) + result = normalizer.send(:prepend_inline_parameter_begin, target, body) + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(body.to_string()) + .output() + .expect("run ruby private prepend_inline_parameter_begin helper"); + assert!( + output.status.success(), + "ruby prepend_inline_parameter_begin helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby prepend_inline_parameter_begin output should be json") +} + +fn ruby_private_local_or_call_for_name_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + name: &str, + local: bool, +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby local_or_call_for_name temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby local_or_call_for_name temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + require "set" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + name = ARGV.fetch(3) + local = ARGV.fetch(4) == "true" + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found" unless target + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + normalizer.instance_variable_set(:@local_stack, local ? [Set[name]] : []) + result = normalizer.send(:local_or_call_for_name, name, target) + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(name) + .arg(if local { "true" } else { "false" }) + .output() + .expect("run ruby private local_or_call_for_name helper"); + assert!( + output.status.success(), + "ruby local_or_call_for_name helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby local_or_call_for_name output should be json") +} + +fn ruby_private_ruby_vcall_identifier_predicate( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + local_names: &[&str], +) -> bool { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby ruby_vcall_identifier temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby ruby_vcall_identifier temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + require "set" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + local_names = ARGV.fetch(3).split(",").reject(&:empty?) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + normalizer.instance_variable_set(:@local_stack, local_names.empty? ? [] : [Set.new(local_names)]) + puts normalizer.send(:ruby_vcall_identifier?, target) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(local_names.join(",")) + .output() + .expect("run ruby private ruby_vcall_identifier? helper"); + assert!( + output.status.success(), + "ruby ruby_vcall_identifier? helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + String::from_utf8(output.stdout) + .expect("ruby ruby_vcall_identifier? output should be utf8") + .trim() + == "true" +} + +fn ruby_private_vcall_identifier_predicate( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + local_names: &[&str], +) -> bool { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby vcall_identifier temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby vcall_identifier temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + require "set" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + local_names = ARGV.fetch(3).split(",").reject(&:empty?) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + normalizer.instance_variable_set(:@local_stack, local_names.empty? ? [] : [Set.new(local_names)]) + puts normalizer.send(:vcall_identifier?, target) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(local_names.join(",")) + .output() + .expect("run ruby private vcall_identifier? helper"); + assert!( + output.status.success(), + "ruby vcall_identifier? helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + String::from_utf8(output.stdout) + .expect("ruby vcall_identifier? output should be utf8") + .trim() + == "true" +} + +fn ruby_private_normalize_terminal_statement_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + local_names: &[&str], +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby normalize_terminal_statement temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby normalize_terminal_statement temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + require "set" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + local_names = ARGV.fetch(3).split(",").reject(&:empty?) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + normalizer.instance_variable_set(:@local_stack, local_names.empty? ? [] : [Set.new(local_names)]) + result = normalizer.send(:normalize_terminal_statement, target) + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(local_names.join(",")) + .output() + .expect("run ruby private normalize_terminal_statement helper"); + assert!( + output.status.success(), + "ruby normalize_terminal_statement helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby normalize_terminal_statement output should be json") +} + +fn ruby_private_node_list_signature( + source: &str, + language: Language, + suffix: &str, + method: &str, + kind: &str, + text: &str, +) -> Vec<(String, String)> { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby node list signature temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby node list signature temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + method = ARGV.fetch(3) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + result = Array(normalizer.send(method, target)) + puts JSON.generate(result.map { |node| [node.kind, node.text.to_s] }) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(method) + .output() + .expect("run ruby node list signature helper"); + assert!( + output.status.success(), + "ruby node list signature helper failed for {method}: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + let value: Value = serde_json::from_slice(&output.stdout) + .expect("ruby node list signature output should be json"); + value + .as_array() + .expect("ruby node list signature should be an array") + .iter() + .map(|item| { + let item = item + .as_array() + .expect("ruby node list item should be an array"); + ( + item[0] + .as_str() + .expect("ruby node list kind should be a string") + .to_string(), + item[1] + .as_str() + .expect("ruby node list text should be a string") + .to_string(), + ) + }) + .collect() +} + +fn ruby_private_dotted_call_parts( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, +) -> Option<(String, String, String)> { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby dotted_call_parts temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby dotted_call_parts temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + receiver, method = normalizer.send(:dotted_call_parts, target) + if receiver + puts JSON.generate([receiver.kind, receiver.text.to_s, method.to_s]) + else + puts "null" + end + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby private dotted_call_parts helper"); + assert!( + output.status.success(), + "ruby dotted_call_parts helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + let value: Value = serde_json::from_slice(&output.stdout) + .expect("ruby dotted_call_parts output should be json"); + if value.is_null() { + return None; + } + let parts = value + .as_array() + .expect("ruby dotted_call_parts should be an array"); + Some(( + parts[0] + .as_str() + .expect("receiver kind should be string") + .to_string(), + parts[1] + .as_str() + .expect("receiver text should be string") + .to_string(), + parts[2] + .as_str() + .expect("method should be string") + .to_string(), + )) +} + +fn ruby_private_member_parts( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, +) -> Option<(String, String, String)> { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby member_parts temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby member_parts temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + receiver, method = normalizer.send(:member_parts, target) + if receiver + puts JSON.generate([receiver.kind, receiver.text.to_s, method.to_s]) + else + puts "null" + end + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby private member_parts helper"); + assert!( + output.status.success(), + "ruby member_parts helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + let value: Value = + serde_json::from_slice(&output.stdout).expect("ruby member_parts output should be json"); + if value.is_null() { + return None; + } + let parts = value + .as_array() + .expect("ruby member_parts should be an array"); + Some(( + parts[0] + .as_str() + .expect("receiver kind should be string") + .to_string(), + parts[1] + .as_str() + .expect("receiver text should be string") + .to_string(), + parts[2] + .as_str() + .expect("method should be string") + .to_string(), + )) +} + +fn ruby_private_named_field_signature( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + field: &str, +) -> Option<(String, String)> { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby named_field temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby named_field temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + field = ARGV.fetch(3) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + result = normalizer.send(:named_field, target, field) + if result + puts JSON.generate([result.kind, result.text.to_s]) + else + puts "null" + end + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(field) + .output() + .expect("run ruby private named_field helper"); + assert!( + output.status.success(), + "ruby named_field helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + let value: Value = + serde_json::from_slice(&output.stdout).expect("ruby named_field output should be json"); + if value.is_null() { + return None; + } + let pair = value + .as_array() + .expect("ruby named_field output should be an array"); + Some(( + pair[0] + .as_str() + .expect("named_field kind should be string") + .to_string(), + pair[1] + .as_str() + .expect("named_field text should be string") + .to_string(), + )) +} + +fn ruby_private_branch_child_signature( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + condition_kind: &str, + condition_text: &str, + index: usize, +) -> Option<(String, String)> { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby branch_child temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby branch_child temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + condition_kind = ARGV.fetch(3) + condition_text = ARGV.fetch(4) + index = Integer(ARGV.fetch(5)) + target = nil + condition = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + condition ||= node if node.kind == condition_kind && node.text.to_s == condition_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found" unless target + abort "condition node not found" unless condition + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + result = normalizer.send(:branch_child, target, condition, index) + if result + puts JSON.generate([result.kind, result.text.to_s]) + else + puts "null" + end + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(condition_kind) + .arg(condition_text) + .arg(index.to_string()) + .output() + .expect("run ruby private branch_child helper"); + assert!( + output.status.success(), + "ruby branch_child helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + let value: Value = + serde_json::from_slice(&output.stdout).expect("ruby branch_child output should be json"); + if value.is_null() { + return None; + } + let pair = value + .as_array() + .expect("ruby branch_child output should be an array"); + Some(( + pair[0] + .as_str() + .expect("branch_child kind should be string") + .to_string(), + pair[1] + .as_str() + .expect("branch_child text should be string") + .to_string(), + )) +} + +fn ruby_private_wrap_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + normalized_source: bool, +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby wrap temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby wrap temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + normalized_source = ARGV.fetch(3) == "true" + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + source = if normalized_source + normalizer.send(:wrap, :INNER, children: [], source: target) + else + target + end + result = normalizer.send(:wrap, :OUTER, children: [:child], source: source) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(if normalized_source { "true" } else { "false" }) + .output() + .expect("run ruby private wrap helper"); + assert!( + output.status.success(), + "ruby wrap helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout).expect("ruby wrap output should be json") +} + +fn ruby_private_normalize_method_value( + source: &str, + language: Language, + suffix: &str, + method: &str, + kind: &str, + text: &str, +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby normalize method temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby normalize method temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + method = ARGV.fetch(3) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + result = normalizer.send(method, target) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + elsif node.is_a?(Array) + node.map { |child| value(child) } + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(method) + .output() + .expect("run ruby private normalize method helper"); + assert!( + output.status.success(), + "ruby normalize method helper failed for {method}: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout).expect("ruby normalize method output should be json") +} + +fn ruby_private_normalize_return_node_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + elide_symbol: bool, +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby normalize return node temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby normalize return node temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + elide_symbol = ARGV.fetch(3) == "true" + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + result = normalizer.send(:normalize_return_node, target, elide_symbol: elide_symbol) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + elsif node.is_a?(Array) + node.map { |child| value(child) } + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(if elide_symbol { "true" } else { "false" }) + .output() + .expect("run ruby private normalize_return_node helper"); + assert!( + output.status.success(), + "ruby normalize_return_node helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby normalize_return_node output should be json") +} + +fn ruby_private_normalize_body_nodes_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby normalize body nodes temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby normalize body nodes temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + if target_kind == "__root__" + target = document.root + else + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + end + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + result = normalizer.send(:normalize_body_nodes, target.named_children, source: target) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby private normalize_body_nodes helper"); + assert!( + output.status.success(), + "ruby normalize_body_nodes helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout).expect("ruby normalize_body_nodes output should be json") +} + +fn ruby_private_inline_def_from_argument_list_nil_value( + source: &str, + language: Language, + suffix: &str, +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby inline def argument nil temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby inline def argument nil temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + result = normalizer.send(:inline_def_from_argument_list, nil) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .output() + .expect("run ruby private inline def argument nil helper"); + assert!( + output.status.success(), + "ruby inline def argument nil helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby inline def argument nil output should be json") +} + +fn ruby_private_assignment_target_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby assignment target temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby assignment target temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + source = normalizer.send(:parent_node, target) || target + right_raw = normalizer.send(:assignment_right, source) + right = right_raw ? normalizer.send(:normalize_node, right_raw) : nil + result = normalizer.send(:assignment_target, target, right, source: source) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby private assignment target helper"); + assert!( + output.status.success(), + "ruby assignment target helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout).expect("ruby assignment target output should be json") +} + +fn ruby_private_normalize_multiple_assignment_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby multiple assignment temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby multiple assignment temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + left = normalizer.send(:assignment_left, target) + right_raw = normalizer.send(:assignment_right, target) + right = right_raw ? normalizer.send(:normalize_node, right_raw) : nil + result = normalizer.send(:normalize_multiple_assignment, left, right, target) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby private multiple assignment helper"); + assert!( + output.status.success(), + "ruby multiple assignment helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout).expect("ruby multiple assignment output should be json") +} + +fn ruby_private_augmented_assignment_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + operator: &str, +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby augmented assignment value temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby augmented assignment value temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + operator = ARGV.fetch(3).to_sym + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + source = normalizer.send(:parent_node, target) || target + right_raw = normalizer.send(:assignment_right, source) + result = normalizer.send(:augmented_assignment_value, target, operator, right_raw, source) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(operator) + .output() + .expect("run ruby private augmented assignment value helper"); + assert!( + output.status.success(), + "ruby augmented assignment value helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby augmented assignment value output should be json") +} + +fn ruby_private_logical_operator_assignment_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby logical operator assignment temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby logical operator assignment temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + left = normalizer.send(:assignment_left, target) + right_raw = normalizer.send(:assignment_right, target) + right = normalizer.send(:normalize_node, right_raw) + operator = normalizer.send(:operator_assignment_operator, target) + result = normalizer.send(:normalize_logical_operator_assignment, left, operator, right, source: target) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby private logical operator assignment helper"); + assert!( + output.status.success(), + "ruby logical operator assignment helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby logical operator assignment output should be json") +} + +fn ruby_private_call_arguments_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + function_mode: &str, +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby call arguments temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby call arguments temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + function_mode = ARGV.fetch(3) + target = nil + fallback_target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + fallback_target ||= node if node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + target ||= fallback_target + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + function = + case function_mode + when "auto" + normalizer.send(:named_field, target, "function") || + normalizer.send(:named_field, target, "call") || + target.named_children.first + when "none" + nil + else + abort "unknown function mode: #{function_mode.inspect}" + end + result = normalizer.send(:call_arguments, target, function) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(result.map { |node| value(node) }) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(function_mode) + .output() + .expect("run ruby private call arguments helper"); + assert!( + output.status.success(), + "ruby call arguments helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout).expect("ruby call arguments output should be json") +} + +fn ruby_private_normalize_call_without_block_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + block_mode: &str, +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby normalize_call_without_block temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby normalize_call_without_block temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + block_mode = ARGV.fetch(3) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + block = + case block_mode + when "auto" + normalizer.send(:call_block, target) + when "none" + nil + else + abort "unknown block mode: #{block_mode.inspect}" + end + result = normalizer.send(:normalize_call_without_block, target, block) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(block_mode) + .output() + .expect("run ruby private normalize_call_without_block helper"); + assert!( + output.status.success(), + "ruby normalize_call_without_block helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby normalize_call_without_block output should be json") +} + +fn ruby_private_normalize_patterns_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby normalize_patterns temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby normalize_patterns temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + result = normalizer.send(:normalize_patterns, target) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(result.map { |node| value(node) }) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby private normalize_patterns helper"); + assert!( + output.status.success(), + "ruby normalize_patterns helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout).expect("ruby normalize_patterns output should be json") +} + +fn ruby_private_command_arguments_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby command arguments temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby command arguments temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + fallback_target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + fallback_target ||= node if node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + target ||= fallback_target + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + result = normalizer.send(:command_arguments, target) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(result.map { |node| value(node) }) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby private command arguments helper"); + assert!( + output.status.success(), + "ruby command arguments helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout).expect("ruby command arguments output should be json") +} + +fn ruby_private_const_for_nil_value(source: &str, language: Language, suffix: &str) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby const_for nil temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby const_for nil temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + result = normalizer.send(:const_for, nil) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .output() + .expect("run ruby private const_for nil helper"); + assert!( + output.status.success(), + "ruby const_for nil helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout).expect("ruby const_for nil output should be json") +} + +fn ruby_private_source_before_child_wrap_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + child_kind: &str, + child_text: &str, +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby source_before_child temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby source_before_child temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + child_kind = ARGV.fetch(3) + child_text = ARGV.fetch(4) + target = nil + child = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + child ||= node if node.kind == child_kind && node.text.to_s == child_text + node.named_children.each { |next_child| walk.call(next_child) } + end + end + walk.call(document.root) + abort "target node not found" unless target + abort "child node not found" unless child + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + source = normalizer.send(:source_before_child, target, child) + result = normalizer.send(:wrap, :OUTER, children: [], source: source) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(child_kind) + .arg(child_text) + .output() + .expect("run ruby private source_before_child helper"); + assert!( + output.status.success(), + "ruby source_before_child helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout).expect("ruby source_before_child output should be json") +} + +fn ruby_private_source_from_nodes_value( + source: &str, + language: Language, + suffix: &str, + first_kind: &str, + first_text: &str, + last_kind: &str, + last_text: &str, +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby source_from_nodes temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby source_from_nodes temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + first_kind = ARGV.fetch(1) + first_text = ARGV.fetch(2) + last_kind = ARGV.fetch(3) + last_text = ARGV.fetch(4) + first_node = nil + last_node = nil + walk = lambda do |node| + if node.respond_to?(:kind) + first_node ||= node if node.kind == first_kind && node.text.to_s == first_text + last_node = node if node.kind == last_kind && node.text.to_s == last_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "first node not found: #{first_kind} #{first_text.inspect}" unless first_node + abort "last node not found: #{last_kind} #{last_text.inspect}" unless last_node + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + result = normalizer.send(:source_from_nodes, first_node, last_node) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(first_kind) + .arg(first_text) + .arg(last_kind) + .arg(last_text) + .output() + .expect("run ruby private source_from_nodes helper"); + assert!( + output.status.success(), + "ruby source_from_nodes helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout).expect("ruby source_from_nodes output should be json") +} + +fn ruby_private_source_from_normalized_nodes_value( + source: &str, + language: Language, + suffix: &str, + first_kind: &str, + first_text: &str, + last_kind: &str, + last_text: &str, +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby source_from_normalized_nodes temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby source_from_normalized_nodes temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + first_kind = ARGV.fetch(1) + first_text = ARGV.fetch(2) + last_kind = ARGV.fetch(3) + last_text = ARGV.fetch(4) + first_raw = nil + last_raw = nil + walk = lambda do |node| + if node.respond_to?(:kind) + first_raw ||= node if node.kind == first_kind && node.text.to_s == first_text + last_raw ||= node if node.kind == last_kind && node.text.to_s == last_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "first node not found" unless first_raw + abort "last node not found" unless last_raw + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + first_node = normalizer.send(:wrap, :FIRST, children: [], source: first_raw) + last_node = normalizer.send(:wrap, :LAST, children: [], source: last_raw) + result = normalizer.send(:source_from_normalized_nodes, first_node, last_node) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(first_kind) + .arg(first_text) + .arg(last_kind) + .arg(last_text) + .output() + .expect("run ruby private source_from_normalized_nodes helper"); + assert!( + output.status.success(), + "ruby source_from_normalized_nodes helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby source_from_normalized_nodes output should be json") +} + +fn ruby_private_dynamic_string_source_signature( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, +) -> Option<(String, String)> { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby dynamic_string_source temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby dynamic_string_source temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + normalized = target.named_children.map { |child| [child, normalizer.send(:normalize_node, child)] } + result = normalizer.send(:dynamic_string_source, normalized) + if result + puts JSON.generate([result.kind, result.text.to_s]) + else + puts "null" + end + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby private dynamic_string_source helper"); + assert!( + output.status.success(), + "ruby dynamic_string_source helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + let value: Value = serde_json::from_slice(&output.stdout) + .expect("ruby dynamic_string_source output should be json"); + if value.is_null() { + return None; + } + let pair = value + .as_array() + .expect("ruby dynamic_string_source output should be an array"); + Some(( + pair[0] + .as_str() + .expect("dynamic_string_source kind should be string") + .to_string(), + pair[1] + .as_str() + .expect("dynamic_string_source text should be string") + .to_string(), + )) +} + +fn ruby_private_operator_assignment_statement_parts_signature( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, +) -> Option<(String, String, String, String, String)> { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby operator_assignment_statement_parts temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby operator_assignment_statement_parts temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + left, operator, right = normalizer.send(:operator_assignment_statement_parts, target) + if left && operator && right + puts JSON.generate([left.kind, left.text.to_s, operator.to_s, right.kind, right.text.to_s]) + else + puts "null" + end + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby private operator_assignment_statement_parts helper"); + assert!( + output.status.success(), + "ruby operator_assignment_statement_parts helper failed for {language:?} {kind:?} {text:?}: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + let value: Value = serde_json::from_slice(&output.stdout) + .expect("ruby operator_assignment_statement_parts output should be json"); + if value.is_null() { + return None; + } + let parts = value + .as_array() + .expect("ruby operator_assignment_statement_parts output should be an array"); + Some(( + parts[0] + .as_str() + .expect("operator_assignment left kind should be string") + .to_string(), + parts[1] + .as_str() + .expect("operator_assignment left text should be string") + .to_string(), + parts[2] + .as_str() + .expect("operator_assignment operator should be string") + .to_string(), + parts[3] + .as_str() + .expect("operator_assignment right kind should be string") + .to_string(), + parts[4] + .as_str() + .expect("operator_assignment right text should be string") + .to_string(), + )) +} + +fn ruby_private_modifier_parts_signature( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, +) -> Option<((String, String), (String, String))> { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby modifier_parts temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby modifier_parts temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + action, condition = normalizer.send(:modifier_parts, target) + if action && condition + puts JSON.generate([[action.kind, action.text.to_s], [condition.kind, condition.text.to_s]]) + else + puts "null" + end + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby private modifier_parts helper"); + assert!( + output.status.success(), + "ruby modifier_parts helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + let value: Value = + serde_json::from_slice(&output.stdout).expect("ruby modifier_parts output should be json"); + if value.is_null() { + return None; + } + let pairs = value + .as_array() + .expect("ruby modifier_parts output should be an array"); + let action = pairs[0] + .as_array() + .expect("modifier_parts action should be an array"); + let condition = pairs[1] + .as_array() + .expect("modifier_parts condition should be an array"); + Some(( + ( + action[0] + .as_str() + .expect("modifier_parts action kind should be string") + .to_string(), + action[1] + .as_str() + .expect("modifier_parts action text should be string") + .to_string(), + ), + ( + condition[0] + .as_str() + .expect("modifier_parts condition kind should be string") + .to_string(), + condition[1] + .as_str() + .expect("modifier_parts condition text should be string") + .to_string(), + ), + )) +} + +fn ruby_private_visibility_inline_def_statement_predicate( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, +) -> bool { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby visibility_inline_def_statement temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby visibility_inline_def_statement temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + puts normalizer.send(:visibility_inline_def_statement?, target, target.named_children.first) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby private visibility_inline_def_statement helper"); + assert!( + output.status.success(), + "ruby visibility_inline_def_statement helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + String::from_utf8(output.stdout) + .expect("ruby visibility_inline_def_statement output should be utf8") + .trim() + == "true" +} + +fn ruby_private_drop_trailing_nil_statement_value(input: &Value) -> Value { + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + def node(value) + return nil if value.nil? + return value unless value.is_a?(Hash) + + Decomplex::Ast::Node.new( + type: value.fetch("type").to_sym, + children: value.fetch("children").map { |child| node(child) }, + first_lineno: value.fetch("first_lineno"), + first_column: value.fetch("first_column"), + last_lineno: value.fetch("last_lineno"), + last_column: value.fetch("last_column"), + text: value.fetch("text") + ) + end + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + result = normalizer.send(:drop_trailing_nil_statement, node(JSON.parse(ARGV.fetch(0)))) + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "json", + "-e", + script, + ]) + .arg(input.to_string()) + .output() + .expect("run ruby private drop_trailing_nil_statement helper"); + assert!( + output.status.success(), + "ruby drop_trailing_nil_statement helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby drop_trailing_nil_statement output should be json") +} + +fn ruby_private_elide_tail_returns_value(input: &Value, ruby: bool) -> Value { + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + def node(value) + return nil if value.nil? + return value unless value.is_a?(Hash) + + Decomplex::Ast::Node.new( + type: value.fetch("type").to_sym, + children: value.fetch("children").map { |child| node(child) }, + first_lineno: value.fetch("first_lineno"), + first_column: value.fetch("first_column"), + last_lineno: value.fetch("last_lineno"), + last_column: value.fetch("last_column"), + text: value.fetch("text") + ) + end + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + adapter = if ARGV.fetch(1) == "ruby" + Decomplex::Ast::RubyTreeSitterNormalizationAdapter.new(nil) + else + Decomplex::Ast::TreeSitterNormalizationAdapter.new(nil) + end + normalizer.instance_variable_set(:@normalization_adapter, adapter) + result = normalizer.send(:elide_tail_returns, node(JSON.parse(ARGV.fetch(0)))) + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "json", + "-e", + script, + ]) + .arg(input.to_string()) + .arg(if ruby { "ruby" } else { "other" }) + .output() + .expect("run ruby private elide_tail_returns helper"); + assert!( + output.status.success(), + "ruby elide_tail_returns helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout).expect("ruby elide_tail_returns output should be json") +} + +fn ruby_private_elide_implicit_nil_body_value(input: &Value, ruby: bool) -> Value { + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + def node(value) + return nil if value.nil? + return value unless value.is_a?(Hash) + + Decomplex::Ast::Node.new( + type: value.fetch("type").to_sym, + children: value.fetch("children").map { |child| node(child) }, + first_lineno: value.fetch("first_lineno"), + first_column: value.fetch("first_column"), + last_lineno: value.fetch("last_lineno"), + last_column: value.fetch("last_column"), + text: value.fetch("text") + ) + end + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + adapter = if ARGV.fetch(1) == "ruby" + Decomplex::Ast::RubyTreeSitterNormalizationAdapter.new(nil) + else + Decomplex::Ast::TreeSitterNormalizationAdapter.new(nil) + end + normalizer.instance_variable_set(:@normalization_adapter, adapter) + result = normalizer.send(:elide_implicit_nil_body, node(JSON.parse(ARGV.fetch(0)))) + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "json", + "-e", + script, + ]) + .arg(input.to_string()) + .arg(if ruby { "ruby" } else { "other" }) + .output() + .expect("run ruby private elide_implicit_nil_body helper"); + assert!( + output.status.success(), + "ruby elide_implicit_nil_body helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby elide_implicit_nil_body output should be json") +} + +fn ruby_private_prepend_rescue_exception_assignment_value( + source: &str, + body: &Value, + assignment: &Value, +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(".rb") + .tempfile() + .expect("create ruby prepend rescue temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby prepend rescue temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + def node(value) + return nil if value.nil? + return value unless value.is_a?(Hash) + + Decomplex::Ast::Node.new( + type: value.fetch("type").to_sym, + children: value.fetch("children").map { |child| node(child) }, + first_lineno: value.fetch("first_lineno"), + first_column: value.fetch("first_column"), + last_lineno: value.fetch("last_lineno"), + last_column: value.fetch("last_column"), + text: value.fetch("text") + ) + end + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + body = node(JSON.parse(ARGV.fetch(1))) + assignment = node(JSON.parse(ARGV.fetch(2))) + result = normalizer.send(:prepend_rescue_exception_assignment, body, assignment) + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", "ruby") + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(body.to_string()) + .arg(assignment.to_string()) + .output() + .expect("run ruby private prepend_rescue_exception_assignment helper"); + assert!( + output.status.success(), + "ruby prepend_rescue_exception_assignment helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby prepend_rescue_exception_assignment output should be json") +} + +fn ruby_private_symbol_literal_node_predicate( + node_type: Option<&str>, + child_kind: Option<&str>, +) -> bool { + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + def child(kind) + case kind + when "symbol" + :value + when "string" + "value" + when "node" + Decomplex::Ast::Node.new( + type: :NIL, + children: [], + first_lineno: 1, + first_column: 0, + last_lineno: 1, + last_column: 1, + text: "NIL" + ) + when "nil" + nil + else + nil + end + end + + node_type = ARGV.fetch(0) + child_kind = ARGV.fetch(1) + target = if node_type == "none" + nil + else + children = child_kind == "none" ? [] : [child(child_kind)] + Decomplex::Ast::Node.new( + type: node_type.to_sym, + children: children, + first_lineno: 1, + first_column: 0, + last_lineno: 1, + last_column: 1, + text: node_type + ) + end + normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + puts normalizer.send(:symbol_literal_node?, target) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .args(["-I", "lib", "-r", "decomplex/ast", "-e", script]) + .arg(node_type.unwrap_or("none")) + .arg(child_kind.unwrap_or("none")) + .output() + .expect("run ruby private symbol_literal_node? helper"); + assert!( + output.status.success(), + "ruby symbol_literal_node? helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + String::from_utf8(output.stdout) + .expect("ruby symbol_literal_node? output should be utf8") + .trim() + == "true" +} + +fn ruby_private_same_ts_node_predicate( + source: &str, + language: Language, + suffix: &str, + left_kind: &str, + left_text: &str, + left_index: usize, + right_kind: &str, + right_text: &str, + right_index: usize, +) -> bool { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby same_ts_node temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby same_ts_node temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + left_kind = ARGV.fetch(1) + left_text = ARGV.fetch(2) + left_index = ARGV.fetch(3).to_i + right_kind = ARGV.fetch(4) + right_text = ARGV.fetch(5) + right_index = ARGV.fetch(6).to_i + + def matches(root, kind, text) + found = [] + walk = lambda do |node| + if node.respond_to?(:kind) + found << node if node.kind == kind && node.text.to_s == text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(root) + found + end + + left = matches(document.root, left_kind, left_text).fetch(left_index) + right = matches(document.root, right_kind, right_text).fetch(right_index) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + puts normalizer.send(:same_ts_node?, left, right) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-e", + script, + ]) + .arg(file.path()) + .arg(left_kind) + .arg(left_text) + .arg(left_index.to_string()) + .arg(right_kind) + .arg(right_text) + .arg(right_index.to_string()) + .output() + .expect("run ruby private same_ts_node? helper"); + assert!( + output.status.success(), + "ruby same_ts_node? helper failed for {language:?}: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + String::from_utf8(output.stdout) + .expect("ruby same_ts_node? output should be utf8") + .trim() + == "true" +} + +fn ruby_private_parent_named_child_predicate( + source: &str, + language: Language, + suffix: &str, + parent_kind: &str, + parent_text: &str, + parent_index: usize, + child_kind: &str, + child_text: &str, + child_index: usize, +) -> bool { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby parent_named_child temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby parent_named_child temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + parent_kind = ARGV.fetch(1) + parent_text = ARGV.fetch(2) + parent_index = ARGV.fetch(3).to_i + child_kind = ARGV.fetch(4) + child_text = ARGV.fetch(5) + child_index = ARGV.fetch(6).to_i + + def matches(root, kind, text) + found = [] + walk = lambda do |node| + if node.respond_to?(:kind) + found << node if node.kind == kind && node.text.to_s == text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(root) + found + end + + parent = matches(document.root, parent_kind, parent_text).fetch(parent_index) + child = matches(document.root, child_kind, child_text).fetch(child_index) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + puts normalizer.send(:parent_named_child?, parent, child) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-e", + script, + ]) + .arg(file.path()) + .arg(parent_kind) + .arg(parent_text) + .arg(parent_index.to_string()) + .arg(child_kind) + .arg(child_text) + .arg(child_index.to_string()) + .output() + .expect("run ruby private parent_named_child? helper"); + assert!( + output.status.success(), + "ruby parent_named_child? helper failed for {language:?}: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + String::from_utf8(output.stdout) + .expect("ruby parent_named_child? output should be utf8") + .trim() + == "true" +} + +fn ruby_private_node_key_signature( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + index: usize, +) -> (String, usize, usize) { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby node_key temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby node_key temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target_index = ARGV.fetch(3).to_i + found = [] + walk = lambda do |node| + if node.respond_to?(:kind) + found << node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + target = found.fetch(target_index) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + puts JSON.generate(normalizer.send(:node_key, target)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(index.to_string()) + .output() + .expect("run ruby private node_key helper"); + assert!( + output.status.success(), + "ruby node_key helper failed for {language:?}: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + let value: Value = + serde_json::from_slice(&output.stdout).expect("ruby node_key output should be json"); + let key = value + .as_array() + .expect("ruby node_key output should be an array"); + ( + key[0] + .as_str() + .expect("node_key kind should be string") + .to_string(), + key[1] + .as_u64() + .expect("node_key start byte should be integer") as usize, + key[2] + .as_u64() + .expect("node_key end byte should be integer") as usize, + ) +} + +#[test] +fn tree_normalizer_new_initializes_empty_state() { + let normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); + + assert_eq!(normalizer.source, ""); + assert_eq!(normalizer.language, Language::Ruby); + assert!(normalizer.local_stack.is_empty()); + assert_eq!(normalizer.root_span, None); +} + +#[test] +fn normalize_root_matches_ruby_across_tree_normalizer_languages() { + for (source, language, suffix) in [ + ( + "class C\n def each(value)\n yield value\n case value\n when 1 then :one\n else :other\n end\n end\nend\n", + Language::Ruby, + ".rb", + ), + ( + "def gen(value):\n yield value\n other()\n", + Language::Python, + ".py", + ), + ( + "function f(value: number) { switch (value) { case 1: one(); break; default: other(); } return value ? one() : other(); }\n", + Language::TypeScript, + ".ts", + ), + ( + "function f(value)\n if value then\n one()\n else\n other()\n end\n return value\nend\n", + Language::Lua, + ".lua", + ), + ] { + assert_ruby_parity(source, language, suffix); + } +} + +#[test] +fn tree_normalizer_yield_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def each\n yield :item\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "yield :item", + ), + ( + "def each\n value\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value", + ), + ( + "def gen():\n yield item\n other()\n", + Language::Python, + ".py", + "expression_statement", + "yield item", + ), + ( + "def gen():\n yield from items\n other()\n", + Language::Python, + ".py", + "expression_statement", + "yield from items", + ), + ( + "def gen():\n yield item\n other()\n", + Language::Python, + ".py", + "block", + "yield item\n other()", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.yield_statement(node), + ruby_private_predicate(source, language, suffix, "yield_statement?", kind, text), + "yield_statement? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn yield_argument_list_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def each\n yield(:item)\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "(:item)", + ), + ( + "def each\n yield :item\nend\n", + Language::Ruby, + ".rb", + "argument_list", + ":item", + ), + ( + "def call\n foo(:item)\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "(:item)", + ), + ( + "yield_value(value)\n", + Language::Python, + ".py", + "argument_list", + "(value)", + ), + ( + "yield(value);\n", + Language::TypeScript, + ".ts", + "parenthesized_expression", + "(value)", + ), + ( + "coroutine.yield(value)\n", + Language::Lua, + ".lua", + "arguments", + "(value)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.yield_argument_list(node), + ruby_private_predicate(source, language, suffix, "yield_argument_list?", kind, text), + "yield_argument_list? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn yield_argument_nodes_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def each\n yield(:item)\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "(:item)", + ), + ( + "def each\n yield nil\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "nil", + ), + ( + "def each\n yield item, other\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "item, other", + ), + ( + "yield_value(value)\n", + Language::Python, + ".py", + "argument_list", + "(value)", + ), + ( + "yield(value);\n", + Language::TypeScript, + ".ts", + "parenthesized_expression", + "(value)", + ), + ( + "coroutine.yield(value)\n", + Language::Lua, + ".lua", + "arguments", + "(value)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = Value::Array( + normalizer + .yield_argument_nodes(node) + .iter() + .map(node_value) + .collect(), + ); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "yield_argument_nodes", + kind, + text + ), + "yield_argument_nodes mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn yield_inline_arguments_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def each\n yield\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "yield", + ), + ( + "def gen():\n yield item\n other()\n", + Language::Python, + ".py", + "expression_statement", + "yield item", + ), + ( + "function* gen() { yield item; }\n", + Language::TypeScript, + ".ts", + "expression_statement", + "yield item;", + ), + ( + "coroutine.yield(item)\n", + Language::Lua, + ".lua", + "function_call", + "coroutine.yield(item)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = Value::Array( + normalizer + .yield_inline_arguments(node) + .iter() + .map(node_value) + .collect(), + ); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "yield_inline_arguments", + kind, + text + ), + "yield_inline_arguments mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_yield_argument_list_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def each\n yield(:item)\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "(:item)", + ), + ( + "def each\n yield :item\nend\n", + Language::Ruby, + ".rb", + "argument_list", + ":item", + ), + ( + "def each\n yield nil\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "nil", + ), + ( + "yield_value(value)\n", + Language::Python, + ".py", + "argument_list", + "(value)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = node_value(&normalizer.normalize_yield_argument_list(node)); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_yield_argument_list", + kind, + text + ), + "normalize_yield_argument_list mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_yield_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def each\n yield\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "yield", + ), + ( + "def each\n yield item\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "yield item", + ), + ( + "def each\n yield nil\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "yield nil", + ), + ( + "def gen():\n yield item\n other()\n", + Language::Python, + ".py", + "expression_statement", + "yield item", + ), + ( + "function* gen() { yield item; }\n", + Language::TypeScript, + ".ts", + "yield_expression", + "yield item", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = node_value(&normalizer.normalize_yield(node)); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_yield", + kind, + text + ), + "normalize_yield mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_yield_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def each\n yield\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "yield", + ), + ( + "def each\n yield item\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "yield item", + ), + ( + "def each\n yield nil\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "yield nil", + ), + ( + "def gen():\n yield item\n other()\n", + Language::Python, + ".py", + "expression_statement", + "yield item", + ), + ( + "def gen():\n yield from items\n other()\n", + Language::Python, + ".py", + "expression_statement", + "yield from items", + ), + ( + "function* gen() { yield item; }\n", + Language::TypeScript, + ".ts", + "expression_statement", + "yield item;", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = node_value(&normalizer.normalize_yield_statement(node)); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_yield_statement", + kind, + text + ), + "normalize_yield_statement mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_node_dispatch_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def each\n yield item\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "yield item", + ), + ( + "def check\n !flag\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "!flag", + ), + ( + "def gen():\n yield item\n other()\n", + Language::Python, + ".py", + "expression_statement", + "yield item", + ), + ( + "switch (value) { case 1: one(); default: other(); }\n", + Language::TypeScript, + ".ts", + "switch_statement", + "switch (value) { case 1: one(); default: other(); }", + ), + ( + "if value then one() else other() end\n", + Language::Lua, + ".lua", + "if_statement", + "if value then one() else other() end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_node(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_node", + kind, + text + ), + "normalize_node mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn python_yield_statement_in_multi_statement_block_matches_ruby_ast() { + let source = "def gen():\n yield item\n other()\n"; + assert_ruby_parity(source, Language::Python, ".py"); + + let root = parse_language_source(source, Language::Python, ".py"); + let defn = first_node(&root, "DEFN", "def gen():\n yield item\n other()"); + let scope = child_node(defn, 1); + let body = child_node(scope, 2); + + assert_eq!(body.r#type, "BLOCK"); + assert_eq!(child_types(body), vec!["YIELD", "EXPRESSION_STATEMENT"]); +} + +#[test] +fn tree_normalizer_super_statement_matches_ruby_private_predicate() { + for (source, kind, text) in [ + ( + "class Child < Parent\n def call\n super\n end\nend\n", + "body_statement", + "super", + ), + ( + "class Child < Parent\n def call\n super :item\n end\nend\n", + "body_statement", + "super :item", + ), + ( + "class Child < Parent\n def call\n value\n end\nend\n", + "body_statement", + "value", + ), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + + assert_eq!( + normalizer.super_statement(node), + ruby_private_predicate( + source, + Language::Ruby, + ".rb", + "super_statement?", + kind, + text + ), + "super_statement? mismatch for {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_super_statement_matches_ruby_private_method() { + for (source, kind, text) in [ + ( + "class Child < Parent\n def call\n super\n end\nend\n", + "body_statement", + "super", + ), + ( + "class Child < Parent\n def call\n super :item\n end\nend\n", + "body_statement", + "super :item", + ), + ( + "class Child < Parent\n def call\n super value\n end\nend\n", + "body_statement", + "super value", + ), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let rust = node_value(&normalizer.normalize_super_statement(node)); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + Language::Ruby, + ".rb", + "normalize_super_statement", + kind, + text + ), + "normalize_super_statement mismatch for {kind} {text:?}" + ); + } +} + +#[test] +fn ruby_super_statement_normalization_matches_ruby_ast() { + let source = "class Child < Parent\n def bare\n super\n end\n def with_arg\n super :item\n end\nend\n"; + assert_ruby_parity(source, Language::Ruby, ".rb"); + + let root = parse_language_source(source, Language::Ruby, ".rb"); + let bare = first_node(&root, "SUPER", "super"); + let with_arg = first_node(&root, "SUPER", "super :item"); + + assert_eq!(bare.children, vec![Child::Nil]); + assert_eq!(child_types(with_arg), vec!["LIST"]); + assert_eq!(child_types(child_node(with_arg, 0)), vec!["LIT"]); +} + +#[test] +fn tree_normalizer_argument_list_element_reference_matches_ruby_private_predicate() { + for (source, text) in [ + ("def indexed\n return items[0]\nend\n", "items[0]"), + ("def indexed\n return obj.foo[0]\nend\n", "obj.foo[0]"), + ("def indexed\n return [0]\nend\n", "[0]"), + ( + "def indexed\n return items[0], other\nend\n", + "items[0], other", + ), + ("def indexed\n return items[]\nend\n", "items[]"), + ( + "def indexed\n return items[0] { nope }\nend\n", + "items[0] { nope }", + ), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, "argument_list", text); + let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + + assert_eq!( + normalizer.argument_list_element_reference(node), + ruby_private_predicate( + source, + Language::Ruby, + ".rb", + "argument_list_element_reference?", + "argument_list", + text + ), + "argument_list_element_reference? mismatch for {text:?}" + ); + } +} + +#[test] +fn normalize_argument_list_element_reference_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def indexed\n return items[0]\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "items[0]", + ), + ( + "def indexed\n return obj.foo[0]\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "obj.foo[0]", + ), + ( + "def indexed\n return [0]\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "[0]", + ), + ( + "def indexed\n return items[0], other\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "items[0], other", + ), + ( + "def indexed\n return items[0] { nope }\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "items[0] { nope }", + ), + ( + "def indexed():\n return foo(items[0])\n", + Language::Python, + ".py", + "argument_list", + "(items[0])", + ), + ( + "function indexed(){ return foo(items[0]); }\n", + Language::TypeScript, + ".ts", + "arguments", + "(items[0])", + ), + ( + "function indexed() return foo(items[0]) end\n", + Language::Lua, + ".lua", + "arguments", + "(items[0])", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_argument_list_element_reference(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_argument_list_element_reference", + kind, + text + ), + "normalize_argument_list_element_reference mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn dynamic_scope_rewrites_locals_without_crossing_scope_boundaries() { + let inner_assignment = test_node("LASGN", vec![Child::Symbol("inner".to_string())]); + let node = test_node( + "BLOCK", + vec![ + Child::Node(Box::new(test_node( + "LASGN", + vec![Child::Symbol("value".to_string())], + ))), + Child::Node(Box::new(test_node( + "LVAR", + vec![Child::Symbol("value".to_string())], + ))), + Child::Node(Box::new(test_node( + "DEFN", + vec![ + Child::Symbol("nested".to_string()), + Child::Node(Box::new(test_node( + "SCOPE", + vec![ + Child::Nil, + Child::Nil, + Child::Node(Box::new(inner_assignment)), + ], + ))), + ], + ))), + ], + ); + + let result = super::dynamic_scope(node); + + assert_eq!(child_node(&result, 0).r#type, "DASGN"); + assert_eq!(child_node(&result, 1).r#type, "DVAR"); + let nested = child_node(&result, 2); + assert_eq!(nested.r#type, "DEFN"); + let nested_scope = child_node(nested, 1); + assert_eq!(nested_scope.r#type, "SCOPE"); + assert_eq!(child_node(nested_scope, 2).r#type, "LASGN"); +} + +#[test] +fn link_when_chain_sets_next_arm_and_pads_short_when_nodes() { + let fallback = test_node("ELSE", Vec::new()); + let first = test_node( + "WHEN", + vec![ + Child::Symbol("patterns".to_string()), + Child::Nil, + Child::Nil, + ], + ); + let second = test_node( + "WHEN", + vec![ + Child::Symbol("patterns".to_string()), + Child::Nil, + Child::Nil, + ], + ); + let normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); + + let result = normalizer + .link_when_chain(vec![first, second], Some(fallback)) + .expect("expected linked when chain"); + + assert_eq!(result.r#type, "WHEN"); + let next = child_node(&result, 2); + assert_eq!(next.r#type, "WHEN"); + assert_eq!(child_node(next, 2).r#type, "ELSE"); + + let short = test_node("WHEN", vec![Child::Symbol("patterns".to_string())]); + let fallback = test_node("ELSE", Vec::new()); + let result = normalizer + .link_when_chain(vec![short], Some(fallback)) + .expect("expected padded when chain"); + + assert_eq!(result.children.len(), 3); + assert_eq!(result.children[1], Child::Nil); + assert_eq!(child_node(&result, 2).r#type, "ELSE"); +} + +#[test] +fn link_rescue_chain_sets_next_rescue_and_pads_short_resbody_nodes() { + let first = test_node( + "RESBODY", + vec![ + Child::Symbol("exceptions".to_string()), + Child::Nil, + Child::Nil, + ], + ); + let second = test_node( + "RESBODY", + vec![ + Child::Symbol("exceptions".to_string()), + Child::Nil, + Child::Nil, + ], + ); + let normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); + + let result = normalizer + .link_rescue_chain(vec![first, second]) + .expect("expected linked rescue chain"); + + assert_eq!(result.r#type, "RESBODY"); + let next = child_node(&result, 2); + assert_eq!(next.r#type, "RESBODY"); + assert_eq!(next.children[2], Child::Nil); + + let short = test_node("RESBODY", vec![Child::Symbol("exceptions".to_string())]); + let result = normalizer + .link_rescue_chain(vec![short]) + .expect("expected padded rescue chain"); + + assert_eq!(result.children.len(), 3); + assert_eq!(result.children[1], Child::Nil); + assert_eq!(result.children[2], Child::Nil); +} + +#[test] +fn infix_statement_parts_extracts_allowed_wrapper_parts() { + let source = "def calc\n left + right\nend\n"; + let tree = raw_tree(source, Language::Ruby); + let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let body = first_raw_node(tree.root_node(), source, "body_statement", "left + right"); + let binary = first_raw_node(tree.root_node(), source, "binary", "left + right"); + + assert_eq!( + infix_parts_text(&normalizer, body, source), + Some(("left".to_string(), "+".to_string(), "right".to_string())) + ); + assert_eq!(infix_parts_text(&normalizer, binary, source), None); + + let source = "def calc\n return left + right\nend\n"; + let tree = raw_tree(source, Language::Ruby); + let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let args = first_raw_node(tree.root_node(), source, "argument_list", "left + right"); + assert_eq!( + infix_parts_text(&normalizer, args, source), + Some(("left".to_string(), "+".to_string(), "right".to_string())) + ); + + let source = "def calc\n left && right\nend\n"; + let tree = raw_tree(source, Language::Ruby); + let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let boolean = first_raw_node(tree.root_node(), source, "body_statement", "left && right"); + assert_eq!(infix_parts_text(&normalizer, boolean, source), None); +} + +#[test] +fn infix_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def calc\n left + right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left + right", + ), + ( + "def calc\n return left + right\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "left + right", + ), + ( + "def calc\n left && right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left && right", + ), + ( + "const value = left + right;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left + right", + ), + ( + "value = left + right\n", + Language::Python, + ".py", + "binary_operator", + "left + right", + ), + ( + "local value = left + right\n", + Language::Lua, + ".lua", + "expression_list", + "left + right", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.infix_statement(node), + ruby_private_predicate(source, language, suffix, "infix_statement?", kind, text), + "infix_statement? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_infix_statement_matches_ruby_private_method() { + for (source, kind, text) in [ + ( + "def calc\n left + right\nend\n", + "body_statement", + "left + right", + ), + ( + "def calc\n return left + right\nend\n", + "argument_list", + "left + right", + ), + ( + "def match\n value =~ /left/\nend\n", + "body_statement", + "value =~ /left/", + ), + ( + "def match\n value =~ pattern\nend\n", + "body_statement", + "value =~ pattern", + ), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let rust = normalizer + .normalize_infix_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + Language::Ruby, + ".rb", + "normalize_infix_statement", + kind, + text + ), + "normalize_infix_statement mismatch for {kind} {text:?}" + ); + } +} + +#[test] +fn regex_literal_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "value =~ /left/\n", + Language::Ruby, + ".rb", + "regex", + "/left/", + ), + ( + "value = \"left\"\n", + Language::Ruby, + ".rb", + "string", + "\"left\"", + ), + ( + "const pattern = /left/;\n", + Language::TypeScript, + ".ts", + "regex", + "/left/", + ), + ( + "pattern = r\"left\"\n", + Language::Python, + ".py", + "string", + "r\"left\"", + ), + ( + "local pattern = \"left\"\n", + Language::Lua, + ".lua", + "string_content", + "left", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.regex_literal(Some(node)), + ruby_private_predicate(source, language, suffix, "regex_literal?", kind, text), + "regex_literal? mismatch for {language:?} {kind} {text:?}" + ); + } + + let normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); + assert_eq!( + normalizer.regex_literal(None), + ruby_private_regex_literal_value("nil") + ); + assert!(!ruby_private_regex_literal_value("string")); + assert!(!ruby_private_regex_literal_value("normalized_node")); +} + +#[test] +fn argument_list_unary_not_matches_ruby_private_predicate() { + for (line, text) in [ + ("return !flag", "!flag"), + ("return !!flag", "!!flag"), + ("return flag", "flag"), + ("return !flag, other", "!flag, other"), + ("return (!flag)", "(!flag)"), + ("return not flag", "not flag"), + ] { + let source = format!("def check\n {line}\nend\n"); + let tree = raw_tree(&source, Language::Ruby); + let node = first_raw_node(tree.root_node(), &source, "argument_list", text); + let normalizer = super::TreeSitterNormalizer::new(&source, Language::Ruby); + + assert_eq!( + normalizer.argument_list_unary_not(node), + ruby_private_predicate( + &source, + Language::Ruby, + ".rb", + "argument_list_unary_not?", + "argument_list", + text + ), + "argument_list_unary_not? mismatch for {line:?}" + ); + } +} + +#[test] +fn normalize_argument_list_unary_not_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def check\n return !flag\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "!flag", + ), + ( + "def check\n return !!flag\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "!!flag", + ), + ( + "def check\n return flag\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "flag", + ), + ( + "def check\n return !flag, other\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "!flag, other", + ), + ( + "def check():\n return foo(not flag)\n", + Language::Python, + ".py", + "argument_list", + "(not flag)", + ), + ( + "function check(){ return foo(!flag); }\n", + Language::TypeScript, + ".ts", + "arguments", + "(!flag)", + ), + ( + "function check() return foo(not flag) end\n", + Language::Lua, + ".lua", + "arguments", + "(not flag)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_argument_list_unary_not(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_argument_list_unary_not", + kind, + text + ), + "normalize_argument_list_unary_not mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn unary_not_statement_matches_ruby_private_predicate() { + for (line, text) in [ + ("!flag", "!flag"), + ("!!flag", "!!flag"), + ("flag", "flag"), + ("!flag; other", "!flag; other"), + ("(!flag)", "(!flag)"), + ("not flag", "not flag"), + ] { + let source = format!("def check\n {line}\nend\n"); + let tree = raw_tree(&source, Language::Ruby); + let node = first_raw_node(tree.root_node(), &source, "body_statement", text); + let normalizer = super::TreeSitterNormalizer::new(&source, Language::Ruby); + + assert_eq!( + normalizer.unary_not_statement(node), + ruby_private_predicate( + &source, + Language::Ruby, + ".rb", + "unary_not_statement?", + "body_statement", + text + ), + "unary_not_statement? mismatch for {line:?}" + ); + } +} + +#[test] +fn unary_not_expression_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def check\n !flag\n !!flag\n -flag\n not flag\nend\n", + Language::Ruby, + ".rb", + "unary", + "!flag", + ), + ( + "def check\n !flag\n !!flag\n -flag\n not flag\nend\n", + Language::Ruby, + ".rb", + "unary", + "!!flag", + ), + ( + "def check\n !flag\n !!flag\n -flag\n not flag\nend\n", + Language::Ruby, + ".rb", + "unary", + "-flag", + ), + ( + "def check\n !flag\n !!flag\n -flag\n not flag\nend\n", + Language::Ruby, + ".rb", + "unary", + "not flag", + ), + ( + "function check(flag: boolean) { return !flag; }\n", + Language::TypeScript, + ".ts", + "unary_expression", + "!flag", + ), + ( + "if not flag:\n pass\n", + Language::Python, + ".py", + "not_operator", + "not flag", + ), + ( + "if not flag then end\n", + Language::Lua, + ".lua", + "unary_expression", + "not flag", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.unary_not_expression(node), + ruby_private_predicate( + source, + language, + suffix, + "unary_not_expression?", + kind, + text + ), + "unary_not_expression? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_unary_not_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def check\n !flag\n !!flag\n -flag\n not flag\nend\n", + Language::Ruby, + ".rb", + "unary", + "!flag", + ), + ( + "def check\n !flag\n !!flag\n -flag\n not flag\nend\n", + Language::Ruby, + ".rb", + "unary", + "!!flag", + ), + ( + "function check(flag: boolean) { return !flag; }\n", + Language::TypeScript, + ".ts", + "unary_expression", + "!flag", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_unary_not(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_unary_not", + kind, + text + ), + "normalize_unary_not mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_unary_not_statement_matches_ruby_private_method() { + for (line, text) in [("!flag", "!flag"), ("!!flag", "!!flag")] { + let source = format!("def check\n {line}\nend\n"); + let tree = raw_tree(&source, Language::Ruby); + let node = first_raw_node(tree.root_node(), &source, "body_statement", text); + let mut normalizer = super::TreeSitterNormalizer::new(&source, Language::Ruby); + let rust = normalizer + .normalize_unary_not_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + &source, + Language::Ruby, + ".rb", + "normalize_unary_not_statement", + "body_statement", + text + ), + "normalize_unary_not_statement mismatch for {text:?}" + ); + } +} + +#[test] +fn unary_minus_expression_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def check\n -flag\n !flag\n value\nend\n", + Language::Ruby, + ".rb", + "unary", + "-flag", + ), + ( + "def check\n -flag\n !flag\n value\nend\n", + Language::Ruby, + ".rb", + "unary", + "!flag", + ), + ( + "function check(value: number) { return -value; }\n", + Language::TypeScript, + ".ts", + "unary_expression", + "-value", + ), + ( + "x = -value\n", + Language::Python, + ".py", + "unary_operator", + "-value", + ), + ( + "local x = -value\n", + Language::Lua, + ".lua", + "expression_list", + "-value", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.unary_minus_expression(node), + ruby_private_predicate( + source, + language, + suffix, + "unary_minus_expression?", + kind, + text + ), + "unary_minus_expression? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_unary_minus_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def check\n -1\n -flag\nend\n", + Language::Ruby, + ".rb", + "unary", + "-1", + ), + ( + "def check\n -1\n -flag\nend\n", + Language::Ruby, + ".rb", + "unary", + "-flag", + ), + ( + "function check(value: number) { return -value; }\n", + Language::TypeScript, + ".ts", + "unary_expression", + "-value", + ), + ( + "x = -value\n", + Language::Python, + ".py", + "unary_operator", + "-value", + ), + ( + "local x = -value\n", + Language::Lua, + ".lua", + "expression_list", + "-value", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_unary_minus(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_unary_minus", + kind, + text + ), + "normalize_unary_minus mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn binary_operator_matches_ruby_private_helper() { + for (source, language, suffix, kind, text) in [ + ( + "def calc\n left + right\n left && right\n value\nend\n", + Language::Ruby, + ".rb", + "binary", + "left + right", + ), + ( + "def calc\n left + right\n left && right\n value\nend\n", + Language::Ruby, + ".rb", + "binary", + "left && right", + ), + ( + "def calc\n left + right\n left && right\n value\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left + right\n left && right\n value", + ), + ( + "const value = left + right && other;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left + right && other", + ), + ( + "const value = left + right && other;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left + right", + ), + ( + "value = left + right and other\n", + Language::Python, + ".py", + "boolean_operator", + "left + right and other", + ), + ( + "value = left + right and other\n", + Language::Python, + ".py", + "binary_operator", + "left + right", + ), + ( + "local value = left + right and other\n", + Language::Lua, + ".lua", + "expression_list", + "left + right and other", + ), + ( + "local value = left + right and other\n", + Language::Lua, + ".lua", + "binary_expression", + "left + right", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.binary_operator(node).unwrap_or_default(), + ruby_private_string(source, language, suffix, "binary_operator", kind, text), + "binary_operator mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn boolean_operator_matches_ruby_private_helper() { + for (source, language, suffix, kind, text) in [ + ( + "def calc\n left && right\n left || right\n left + right\nend\n", + Language::Ruby, + ".rb", + "binary", + "left && right", + ), + ( + "def calc\n left && right\n left || right\n left + right\nend\n", + Language::Ruby, + ".rb", + "binary", + "left || right", + ), + ( + "def calc\n left && right\n left || right\n left + right\nend\n", + Language::Ruby, + ".rb", + "binary", + "left + right", + ), + ( + "const value = left && right || other;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left && right", + ), + ( + "const value = left && right || other;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left && right || other", + ), + ( + "value = left and right or other\n", + Language::Python, + ".py", + "boolean_operator", + "left and right", + ), + ( + "value = left and right or other\n", + Language::Python, + ".py", + "boolean_operator", + "left and right or other", + ), + ( + "local value = left and right or other\n", + Language::Lua, + ".lua", + "expression_list", + "left and right or other", + ), + ( + "local value = left + right\n", + Language::Lua, + ".lua", + "expression_list", + "left + right", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.boolean_operator(node).unwrap_or_default(), + ruby_private_string(source, language, suffix, "boolean_operator", kind, text), + "boolean_operator mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn comparison_operator_matches_ruby_private_helper() { + for (source, language, suffix, kind, text) in [ + ( + "def calc\n left == right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left == right", + ), + ( + "def calc\n left + right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left + right", + ), + ( + "const value = left === right;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left === right", + ), + ( + "const value = left + right;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left + right", + ), + ( + "value = left == right\n", + Language::Python, + ".py", + "comparison_operator", + "left == right", + ), + ( + "value = left + right\n", + Language::Python, + ".py", + "binary_operator", + "left + right", + ), + ( + "local value = left == right\n", + Language::Lua, + ".lua", + "expression_list", + "left == right", + ), + ( + "local value = left + right\n", + Language::Lua, + ".lua", + "expression_list", + "left + right", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.comparison_operator(node).unwrap_or_default(), + ruby_private_string(source, language, suffix, "comparison_operator", kind, text), + "comparison_operator mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn comparison_expression_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def calc\n left == right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left == right", + ), + ( + "const value = left === right;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left === right", + ), + ( + "const value = left + right;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left + right", + ), + ( + "value = left == right\n", + Language::Python, + ".py", + "comparison_operator", + "left == right", + ), + ( + "value = left + right\n", + Language::Python, + ".py", + "binary_operator", + "left + right", + ), + ( + "local value = left == right\n", + Language::Lua, + ".lua", + "expression_list", + "left == right", + ), + ( + "local value = left + right\n", + Language::Lua, + ".lua", + "expression_list", + "left + right", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.comparison_expression(node), + ruby_private_predicate( + source, + language, + suffix, + "comparison_expression?", + kind, + text + ), + "comparison_expression? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn comparison_expression_normalization_matches_ruby() { + for (source, language, suffix) in [ + ("value = left == right\n", Language::Python, ".py"), + ( + "const value = left === right;\n", + Language::TypeScript, + ".ts", + ), + ("local value = left == right\n", Language::Lua, ".lua"), + ] { + assert_ruby_parity(source, language, suffix); + } +} + +#[test] +fn normalize_comparison_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def calc\n left == right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left == right", + ), + ( + "value = left == right\n", + Language::Python, + ".py", + "comparison_operator", + "left == right", + ), + ( + "const value = left === right;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left === right", + ), + ( + "local value = left == right\n", + Language::Lua, + ".lua", + "expression_list", + "left == right", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_comparison(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_comparison", + kind, + text + ), + "normalize_comparison mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn boolean_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def calc\n left && right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left && right", + ), + ( + "def calc\n left or right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left or right", + ), + ( + "def calc\n left + right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left + right", + ), + ( + "foo(left && right)\n", + Language::Ruby, + ".rb", + "argument_list", + "(left && right)", + ), + ( + "value = left and right\n", + Language::Python, + ".py", + "boolean_operator", + "left and right", + ), + ( + "local value = left and right\n", + Language::Lua, + ".lua", + "expression_list", + "left and right", + ), + ( + "const value = left && right;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left && right", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.boolean_statement(node), + ruby_private_predicate(source, language, suffix, "boolean_statement?", kind, text), + "boolean_statement? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn boolean_expression_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def calc\n left && right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left && right", + ), + ( + "def calc\n left && right\n left + right\nend\n", + Language::Ruby, + ".rb", + "binary", + "left && right", + ), + ( + "def calc\n left && right\n left + right\nend\n", + Language::Ruby, + ".rb", + "binary", + "left + right", + ), + ( + "const value = left && right;\nconst other = left + right;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left && right", + ), + ( + "const value = left && right;\nconst other = left + right;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left + right", + ), + ( + "value = left and right\nother = left + right\n", + Language::Python, + ".py", + "boolean_operator", + "left and right", + ), + ( + "value = left and right\nother = left + right\n", + Language::Python, + ".py", + "binary_operator", + "left + right", + ), + ( + "local value = left and right\nlocal other = left + right\n", + Language::Lua, + ".lua", + "expression_list", + "left and right", + ), + ( + "local value = left and right\nlocal other = left + right\n", + Language::Lua, + ".lua", + "expression_list", + "left + right", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.boolean_expression(node), + ruby_private_predicate(source, language, suffix, "boolean_expression?", kind, text), + "boolean_expression? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_boolean_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def calc\n left && right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left && right", + ), + ( + "def calc\n left || right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left || right", + ), + ( + "def calc\n left && middle && right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left && middle && right", + ), + ( + "value = left and right\n", + Language::Python, + ".py", + "boolean_operator", + "left and right", + ), + ( + "value = left or right\n", + Language::Python, + ".py", + "boolean_operator", + "left or right", + ), + ( + "local value = left and right\n", + Language::Lua, + ".lua", + "expression_list", + "left and right", + ), + ( + "local value = left or right\n", + Language::Lua, + ".lua", + "expression_list", + "left or right", + ), + ( + "const value = left && right;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left && right", + ), + ( + "const value = left || right;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left || right", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_boolean(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_boolean", + kind, + text + ), + "normalize_boolean mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn boolean_expression_normalization_matches_ruby() { + for (source, language, suffix) in [ + ("def calc\n left && right\nend\n", Language::Ruby, ".rb"), + ("value = left and right\n", Language::Python, ".py"), + ("local value = left and right\n", Language::Lua, ".lua"), + ( + "const value = left && right;\n", + Language::TypeScript, + ".ts", + ), + ] { + assert_ruby_parity(source, language, suffix); + } +} + +#[test] +fn operator_call_expression_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def calc\n left + right\n left && right\nend\n", + Language::Ruby, + ".rb", + "binary", + "left + right", + ), + ( + "def calc\n left + right\n left && right\nend\n", + Language::Ruby, + ".rb", + "binary", + "left && right", + ), + ( + "const value = left + right && other;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left + right", + ), + ( + "const value = left + right && other;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left + right && other", + ), + ( + "value = left + right and other\n", + Language::Python, + ".py", + "binary_operator", + "left + right", + ), + ( + "value = left + right and other\n", + Language::Python, + ".py", + "boolean_operator", + "left + right and other", + ), + ( + "local value = left + right\nlocal other = left and right\n", + Language::Lua, + ".lua", + "expression_list", + "left + right", + ), + ( + "local value = left + right\nlocal other = left and right\n", + Language::Lua, + ".lua", + "expression_list", + "left and right", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.operator_call_expression(node), + ruby_private_predicate( + source, + language, + suffix, + "operator_call_expression?", + kind, + text + ), + "operator_call_expression? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_operator_call_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def calc\n left + right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left + right", + ), + ( + "def calc\n left =~ /right/\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left =~ /right/", + ), + ( + "def calc\n left =~ pattern\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left =~ pattern", + ), + ( + "value = left + right\n", + Language::Python, + ".py", + "binary_operator", + "left + right", + ), + ( + "const value = left + right;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left + right", + ), + ( + "local value = left + right\n", + Language::Lua, + ".lua", + "expression_list", + "left + right", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_operator_call(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_operator_call", + kind, + text + ), + "normalize_operator_call mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn operator_call_expression_normalization_matches_ruby() { + for (source, language, suffix) in [ + ("value = left + right\n", Language::Python, ".py"), + ("local value = left + right\n", Language::Lua, ".lua"), + ("const value = left + right;\n", Language::TypeScript, ".ts"), + ] { + assert_ruby_parity(source, language, suffix); + } +} + +#[test] +fn spaced_text_matches_ruby_private_helper() { + for (source, language, suffix, kind, text) in [ + ( + "def calc\n left + right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left + right", + ), + ( + "const value = left + right;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left + right", + ), + ( + "value = left + right\n", + Language::Python, + ".py", + "binary_operator", + "left + right", + ), + ( + "local value = left + right\n", + Language::Lua, + ".lua", + "expression_list", + "left + right", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.spaced_text(node), + ruby_private_string(source, language, suffix, "spaced_text", kind, text), + "spaced_text mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn class_node_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "class Thing; end\n", + Language::Ruby, + ".rb", + "class", + "class Thing; end", + ), + ( + "class Thing:\n pass\n", + Language::Python, + ".py", + "class_definition", + "class Thing:\n pass", + ), + ( + "class Thing {}\n", + Language::TypeScript, + ".ts", + "class_declaration", + "class Thing {}", + ), + ( + "local Thing = {}\n", + Language::Lua, + ".lua", + "variable_declaration", + "local Thing = {}", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.class_node(node), + ruby_private_predicate(source, language, suffix, "class_node?", kind, text), + "class_node? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn module_node_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "module Thing\n value\nend\n", + Language::Ruby, + ".rb", + "module", + "module Thing\n value\nend", + ), + ( + "class Thing; end\n", + Language::Ruby, + ".rb", + "class", + "class Thing; end", + ), + ( + "value = 1\n", + Language::Python, + ".py", + "module", + "value = 1\n", + ), + ( + "namespace Thing { const value = 1; }\n", + Language::TypeScript, + ".ts", + "program", + "namespace Thing { const value = 1; }\n", + ), + ( + "local Thing = {}\n", + Language::Lua, + ".lua", + "chunk", + "local Thing = {}\n", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.module_node(node), + ruby_private_predicate(source, language, suffix, "module_node?", kind, text), + "module_node? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_module_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "module Thing\n value\nend\n", + Language::Ruby, + ".rb", + "module", + "module Thing\n value\nend", + ), + ( + "module Empty\nend\n", + Language::Ruby, + ".rb", + "module", + "module Empty\nend", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_module(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_module", + kind, + text + ), + "normalize_module mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_singleton_class_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "class << self\n value\nend\n", + Language::Ruby, + ".rb", + "singleton_class", + "class << self\n value\nend", + ), + ( + "class << object\nend\n", + Language::Ruby, + ".rb", + "singleton_class", + "class << object\nend", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_singleton_class(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_singleton_class", + kind, + text + ), + "normalize_singleton_class mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn ruby_definition_identifier_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def helper(arg)\n arg\nend\n", + Language::Ruby, + ".rb", + "identifier", + "helper", + ), + ( + "def helper(arg)\n arg\nend\n", + Language::Ruby, + ".rb", + "identifier", + "arg", + ), + ( + "items.each { |item| item }\n", + Language::Ruby, + ".rb", + "identifier", + "item", + ), + ( + "def helper\n value\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value", + ), + ( + "def helper(arg):\n return arg\n", + Language::Python, + ".py", + "identifier", + "arg", + ), + ( + "function helper(arg) { return arg; }\n", + Language::TypeScript, + ".ts", + "identifier", + "arg", + ), + ( + "function helper(arg)\n return arg\nend\n", + Language::Lua, + ".lua", + "identifier", + "arg", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.ruby_definition_identifier(node), + ruby_private_predicate( + source, + language, + suffix, + "ruby_definition_identifier?", + kind, + text + ), + "ruby_definition_identifier? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn literal_fragment_assignment_context_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "value = \"left = right\"\n", + Language::Ruby, + ".rb", + "string_content", + "left = right", + ), + ("value = 1\n", Language::Ruby, ".rb", "identifier", "value"), + ( + "value = \"left = right\"\n", + Language::Python, + ".py", + "string_content", + "left = right", + ), + ( + "const value = \"left = right\";\n", + Language::TypeScript, + ".ts", + "string_fragment", + "left = right", + ), + ( + "local value = \"left = right\"\n", + Language::Lua, + ".lua", + "string_content", + "left = right", + ), + ( + "local value = other\n", + Language::Lua, + ".lua", + "variable_list", + "value", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.literal_fragment_assignment_context(node), + ruby_private_predicate( + source, + language, + suffix, + "literal_fragment_assignment_context?", + kind, + text + ), + "literal_fragment_assignment_context? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn assignment_lhs_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "value = other\n", + Language::Ruby, + ".rb", + "identifier", + "value", + ), + ( + "value = other\n", + Language::Ruby, + ".rb", + "identifier", + "other", + ), + ( + "{ key: value }\n", + Language::Ruby, + ".rb", + "hash_key_symbol", + "key", + ), + ( + "{ key: value }\n", + Language::Ruby, + ".rb", + "identifier", + "value", + ), + ( + "value = other\n", + Language::Python, + ".py", + "identifier", + "value", + ), + ( + "value = other\n", + Language::Python, + ".py", + "identifier", + "other", + ), + ( + "let value = other;\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + ), + ( + "let value = other;\n", + Language::TypeScript, + ".ts", + "identifier", + "other", + ), + ( + "let value = other;\n", + Language::TypeScript, + ".ts", + "variable_declarator", + "value = other", + ), + ( + "local value = other\n", + Language::Lua, + ".lua", + "variable_list", + "value", + ), + ( + "local value = other\n", + Language::Lua, + ".lua", + "expression_list", + "other", + ), + ( + "value = other\n", + Language::Lua, + ".lua", + "variable_list", + "value", + ), + ( + "value = other\n", + Language::Lua, + ".lua", + "expression_list", + "other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.assignment_lhs(node), + ruby_private_predicate(source, language, suffix, "assignment_lhs?", kind, text), + "assignment_lhs? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn assignment_rhs_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "value = other\n", + Language::Ruby, + ".rb", + "identifier", + "value", + ), + ( + "value = other\n", + Language::Ruby, + ".rb", + "identifier", + "other", + ), + ( + "{ key: value }\n", + Language::Ruby, + ".rb", + "hash_key_symbol", + "key", + ), + ( + "{ key: value }\n", + Language::Ruby, + ".rb", + "identifier", + "value", + ), + ( + "value = other\n", + Language::Python, + ".py", + "identifier", + "value", + ), + ( + "value = other\n", + Language::Python, + ".py", + "identifier", + "other", + ), + ( + "let value = other;\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + ), + ( + "let value = other;\n", + Language::TypeScript, + ".ts", + "identifier", + "other", + ), + ( + "let value = other;\n", + Language::TypeScript, + ".ts", + "variable_declarator", + "value = other", + ), + ( + "local value = other\n", + Language::Lua, + ".lua", + "variable_list", + "value", + ), + ( + "local value = other\n", + Language::Lua, + ".lua", + "expression_list", + "other", + ), + ( + "value = other\n", + Language::Lua, + ".lua", + "variable_list", + "value", + ), + ( + "value = other\n", + Language::Lua, + ".lua", + "expression_list", + "other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.assignment_rhs(node), + ruby_private_predicate(source, language, suffix, "assignment_rhs?", kind, text), + "assignment_rhs? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn ruby_assignment_node_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "value = 1\n", + Language::Ruby, + ".rb", + "assignment", + "value = 1", + ), + ( + "value += 1\n", + Language::Ruby, + ".rb", + "operator_assignment", + "value += 1", + ), + ( + "def helper\n value\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value", + ), + ( + "[1].each { |item| local = item }\n", + Language::Ruby, + ".rb", + "block_body", + "local = item", + ), + ( + "value = 1\n", + Language::Python, + ".py", + "expression_statement", + "value = 1", + ), + ( + "value = other;\n", + Language::TypeScript, + ".ts", + "assignment_expression", + "value = other", + ), + ( + "local value = other\n", + Language::Lua, + ".lua", + "assignment_statement", + "value = other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.ruby_assignment_node(node), + ruby_private_predicate( + source, + language, + suffix, + "ruby_assignment_node?", + kind, + text + ), + "ruby_assignment_node? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn collect_assignment_target_names_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "value = other\n", + Language::Ruby, + ".rb", + "identifier", + "value", + ), + ( + "left, *rest = values\n", + Language::Ruby, + ".rb", + "left_assignment_list", + "left, *rest", + ), + ( + "value = other\n", + Language::Python, + ".py", + "identifier", + "value", + ), + ( + "const value = other;\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + ), + ( + "local value = other\n", + Language::Lua, + ".lua", + "variable_list", + "value", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let mut names = BTreeSet::new(); + normalizer.collect_assignment_target_names(node, &mut names); + + assert_eq!( + names, + ruby_private_collected_names( + source, + language, + suffix, + "collect_assignment_target_names", + kind, + text + ), + "collect_assignment_target_names mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn collect_identifier_names_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "left, *rest = values\n", + Language::Ruby, + ".rb", + "left_assignment_list", + "left, *rest", + ), + ( + "receiver.call(argument)\n", + Language::Ruby, + ".rb", + "call", + "receiver.call(argument)", + ), + ( + "value = other\n", + Language::Python, + ".py", + "expression_statement", + "value = other", + ), + ( + "const value = { shorthand };\n", + Language::TypeScript, + ".ts", + "object", + "{ shorthand }", + ), + ( + "local value = other\n", + Language::Lua, + ".lua", + "variable_declaration", + "local value = other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let mut names = BTreeSet::new(); + normalizer.collect_identifier_names(node, &mut names); + + assert_eq!( + names, + ruby_private_collected_names( + source, + language, + suffix, + "collect_identifier_names", + kind, + text + ), + "collect_identifier_names mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn member_name_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ("user.name\n", Language::Ruby, ".rb", "identifier", "name"), + ("user&.name\n", Language::Ruby, ".rb", "identifier", "name"), + ( + "user.name()\n", + Language::Python, + ".py", + "identifier", + "name", + ), + ( + "user?.name;\n", + Language::TypeScript, + ".ts", + "property_identifier", + "name", + ), + ("user.name()\n", Language::Lua, ".lua", "identifier", "name"), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.member_name(node), + ruby_private_string(source, language, suffix, "member_name", kind, text), + "member_name mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn member_parts_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ("user.name\n", Language::Ruby, ".rb", "call", "user.name"), + ("user&.name\n", Language::Ruby, ".rb", "call", "user&.name"), + ( + "user.name()\n", + Language::Python, + ".py", + "attribute", + "user.name", + ), + ( + "user.name(thing)\n", + Language::Python, + ".py", + "expression_statement", + "user.name(thing)", + ), + ( + "user.name();\n", + Language::TypeScript, + ".ts", + "member_expression", + "user.name", + ), + ( + "user.name(thing);\n", + Language::TypeScript, + ".ts", + "call_expression", + "user.name(thing)", + ), + ( + "user.name()\n", + Language::Lua, + ".lua", + "dot_index_expression", + "user.name", + ), + ( + "local value = other\n", + Language::Lua, + ".lua", + "expression_list", + "other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.member_parts(node).map(|(receiver, method)| { + ( + receiver.kind().to_string(), + super::node_text(receiver, source).to_string(), + method, + ) + }); + + assert_eq!( + rust, + ruby_private_member_parts(source, language, suffix, kind, text), + "member_parts mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn member_read_node_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ("user.name\n", Language::Ruby, ".rb", "call", "user.name"), + ("foo()\n", Language::Ruby, ".rb", "call", "foo()"), + ( + "user.name()\n", + Language::Python, + ".py", + "attribute", + "user.name", + ), + ( + "user.name(thing)\n", + Language::Python, + ".py", + "expression_statement", + "user.name(thing)", + ), + ( + "user.name();\n", + Language::TypeScript, + ".ts", + "member_expression", + "user.name", + ), + ( + "user.name(thing);\n", + Language::TypeScript, + ".ts", + "call_expression", + "user.name(thing)", + ), + ( + "user.name()\n", + Language::Lua, + ".lua", + "dot_index_expression", + "user.name", + ), + ( + "local value = other\n", + Language::Lua, + ".lua", + "expression_list", + "other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.member_read_node(node), + ruby_private_predicate(source, language, suffix, "member_read_node?", kind, text), + "member_read_node? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_member_read_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ("user.name\n", Language::Ruby, ".rb", "call", "user.name"), + ( + "user.name()\n", + Language::Python, + ".py", + "attribute", + "user.name", + ), + ( + "user.name;\n", + Language::TypeScript, + ".ts", + "member_expression", + "user.name", + ), + ( + "user.name()\n", + Language::Lua, + ".lua", + "dot_index_expression", + "user.name", + ), + ("value\n", Language::Ruby, ".rb", "identifier", "value"), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_member_read(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_member_read", + kind, + text + ), + "normalize_member_read mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn assignment_left_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "value = other\n", + Language::Ruby, + ".rb", + "assignment", + "value = other", + ), + ( + "left, right = values\n", + Language::Ruby, + ".rb", + "assignment", + "left, right = values", + ), + ( + "value = other\n", + Language::Python, + ".py", + "expression_statement", + "value = other", + ), + ( + "value = other;\n", + Language::TypeScript, + ".ts", + "assignment_expression", + "value = other", + ), + ( + "value = other\n", + Language::Lua, + ".lua", + "assignment_statement", + "value = other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.assignment_left(node).map(|left| { + ( + left.kind().to_string(), + super::node_text(left, source).to_string(), + ) + }); + + assert_eq!( + rust, + ruby_private_node_signature(source, language, suffix, "assignment_left", kind, text), + "assignment_left mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn assignment_right_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "value = other\n", + Language::Ruby, + ".rb", + "assignment", + "value = other", + ), + ( + "left, right = values\n", + Language::Ruby, + ".rb", + "assignment", + "left, right = values", + ), + ( + "value = other\n", + Language::Python, + ".py", + "expression_statement", + "value = other", + ), + ( + "value = other;\n", + Language::TypeScript, + ".ts", + "assignment_expression", + "value = other", + ), + ( + "value = other\n", + Language::Lua, + ".lua", + "assignment_statement", + "value = other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.assignment_right(node).map(|right| { + ( + right.kind().to_string(), + super::node_text(right, source).to_string(), + ) + }); + + assert_eq!( + rust, + ruby_private_node_signature(source, language, suffix, "assignment_right", kind, text), + "assignment_right mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn singleton_receiver_matches_ruby_private_method() { + for (source, kind, text) in [ + ( + "def self.foo\nend\n", + "singleton_method", + "def self.foo\nend", + ), + ( + "def User.foo\nend\n", + "singleton_method", + "def User.foo\nend", + ), + ( + "def object.foo\nend\n", + "singleton_method", + "def object.foo\nend", + ), + ( + "def self.foo(value)\n value\nend\n", + "singleton_method", + "def self.foo(value)\n value\nend", + ), + ( + "def object.foo\n value\nend\n", + "singleton_method", + "def object.foo\n value\nend", + ), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let rust = normalizer.singleton_receiver(node).map(|receiver| { + ( + receiver.kind().to_string(), + super::node_text(receiver, source).to_string(), + ) + }); + + assert_eq!( + rust, + ruby_private_node_signature( + source, + Language::Ruby, + ".rb", + "singleton_receiver", + kind, + text + ), + "singleton_receiver mismatch for {kind} {text:?}" + ); + } +} + +#[test] +fn singleton_name_matches_ruby_private_method() { + for (source, kind, text) in [ + ( + "def self.foo\nend\n", + "singleton_method", + "def self.foo\nend", + ), + ( + "def User.foo\nend\n", + "singleton_method", + "def User.foo\nend", + ), + ( + "def object.foo\nend\n", + "singleton_method", + "def object.foo\nend", + ), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + + assert_eq!( + normalizer.singleton_name(node), + ruby_private_string(source, Language::Ruby, ".rb", "singleton_name", kind, text), + "singleton_name mismatch for {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_singleton_function_matches_ruby_private_method() { + for (source, kind, text) in [ + ( + "def self.hidden(value)\n return value\nend\n", + "singleton_method", + "def self.hidden(value)\n return value\nend", + ), + ( + "def User.hidden\nend\n", + "singleton_method", + "def User.hidden\nend", + ), + ( + "def object.hidden\n value\nend\n", + "singleton_method", + "def object.hidden\n value\nend", + ), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let rust = normalizer + .normalize_singleton_function(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + Language::Ruby, + ".rb", + "normalize_singleton_function", + kind, + text + ), + "normalize_singleton_function mismatch for {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_function_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def check(value)\n return value\nend\n", + Language::Ruby, + ".rb", + "method", + "def check(value)\n return value\nend", + ), + ( + "def empty\nend\n", + Language::Ruby, + ".rb", + "method", + "def empty\nend", + ), + ( + "def object.hidden\n value\nend\n", + Language::Ruby, + ".rb", + "singleton_method", + "def object.hidden\n value\nend", + ), + ( + "def check(value):\n return value\n", + Language::Python, + ".py", + "function_definition", + "def check(value):\n return value", + ), + ( + "function check(value) { return value; }\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function check(value) { return value; }", + ), + ( + "class Box { check(value) { return value; } }\n", + Language::TypeScript, + ".ts", + "method_definition", + "check(value) { return value; }", + ), + ( + "function check(value)\n return value\nend\n", + Language::Lua, + ".lua", + "function_declaration", + "function check(value)\n return value\nend", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_function(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_function", + kind, + text + ), + "normalize_function mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn lambda_expression_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "fn = ->(x) { x + 1 }\n", + Language::Ruby, + ".rb", + "lambda", + "->(x) { x + 1 }", + ), + ( + "fn = lambda x: x + 1\n", + Language::Python, + ".py", + "lambda", + "lambda x: x + 1", + ), + ( + "const fn = (x) => x + 1;\n", + Language::TypeScript, + ".ts", + "arrow_function", + "(x) => x + 1", + ), + ( + "const fn = function(x) { return x + 1; };\n", + Language::TypeScript, + ".ts", + "function_expression", + "function(x) { return x + 1; }", + ), + ( + "local fn = function(x) return x + 1 end\n", + Language::Lua, + ".lua", + "expression_list", + "function(x) return x + 1 end", + ), + ( + "function f(x) return x + 1 end\n", + Language::Lua, + ".lua", + "function_declaration", + "function f(x) return x + 1 end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.lambda_expression(node), + ruby_private_predicate(source, language, suffix, "lambda_expression?", kind, text), + "lambda_expression? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_lambda_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "fn = ->(x) { x + 1 }\n", + Language::Ruby, + ".rb", + "lambda", + "->(x) { x + 1 }", + ), + ( + "fn = lambda x: x + 1\n", + Language::Python, + ".py", + "lambda", + "lambda x: x + 1", + ), + ( + "const fn = (x) => x + 1;\n", + Language::TypeScript, + ".ts", + "arrow_function", + "(x) => x + 1", + ), + ( + "const fn = function(x) { return x + 1; };\n", + Language::TypeScript, + ".ts", + "function_expression", + "function(x) { return x + 1; }", + ), + ( + "local fn = function(x) return x + 1 end\n", + Language::Lua, + ".lua", + "expression_list", + "function(x) return x + 1 end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_lambda(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_lambda", + kind, + text + ), + "normalize_lambda mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn lambda_expression_normalization_matches_ruby() { + for (source, language, suffix) in [ + ("fn = ->(x) { x + 1 }\n", Language::Ruby, ".rb"), + ("fn = lambda x: x + 1\n", Language::Python, ".py"), + ("const fn = (x) => x + 1;\n", Language::TypeScript, ".ts"), + ( + "const fn = function(x) { return x + 1; };\n", + Language::TypeScript, + ".ts", + ), + ( + "local fn = function(x) return x + 1 end\n", + Language::Lua, + ".lua", + ), + ] { + let root = parse_language_source(source, language, suffix); + let mut lambdas = Vec::new(); + nodes_of_type(&root, "LAMBDA", &mut lambdas); + assert!( + !lambdas.is_empty(), + "expected LAMBDA for {language:?} in {root:#?}" + ); + assert_ruby_parity(source, language, suffix); + } +} + +#[test] +fn function_name_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def run\nend\n", + Language::Ruby, + ".rb", + "method", + "def run\nend", + ), + ( + "def self.run\nend\n", + Language::Ruby, + ".rb", + "singleton_method", + "def self.run\nend", + ), + ( + "def run():\n pass\n", + Language::Python, + ".py", + "function_definition", + "def run():\n pass", + ), + ( + "function run() {}\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function run() {}", + ), + ( + "class Box { run() {} }\n", + Language::TypeScript, + ".ts", + "method_definition", + "run() {}", + ), + ( + "function run()\nend\n", + Language::Lua, + ".lua", + "function_declaration", + "function run()\nend", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.function_name(node).unwrap_or_default(), + ruby_private_string(source, language, suffix, "function_name", kind, text), + "function_name mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn collect_destructured_parameter_targets_matches_ruby_private_method() { + for (source, kind, text) in [ + ( + "items.each { |(left, right)| left }\n", + "destructured_parameter", + "(left, right)", + ), + ( + "items.each do |(left, (middle, right))| left end\n", + "destructured_parameter", + "(left, (middle, right))", + ), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let mut targets = Vec::new(); + normalizer.collect_destructured_parameter_targets(node, &mut targets); + let rust = Value::Array(targets.iter().map(node_value).collect()); + + assert_eq!( + rust, + ruby_private_destructured_parameter_targets_value(source, kind, text), + "collect_destructured_parameter_targets mismatch for {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_block_parameters_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "items.each { |(left, right)| left }\n", + Language::Ruby, + ".rb", + "block", + "{ |(left, right)| left }", + ), + ( + "items.each { |item, (left, right)| item }\n", + Language::Ruby, + ".rb", + "block", + "{ |item, (left, right)| item }", + ), + ( + "items.each { |item| item }\n", + Language::Ruby, + ".rb", + "block", + "{ |item| item }", + ), + ( + "def f(x):\n pass\n", + Language::Python, + ".py", + "function_definition", + "def f(x):\n pass", + ), + ( + "items.forEach((item) => item);\n", + Language::TypeScript, + ".ts", + "expression_statement", + "items.forEach((item) => item);", + ), + ( + "function f(x)\n return x\nend\n", + Language::Lua, + ".lua", + "function_declaration", + "function f(x)\n return x\nend", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_block_parameters(Some(node)) + .as_ref() + .map(node_value) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_block_parameters", + kind, + text + ), + "normalize_block_parameters mismatch for {language:?} {kind} {text:?}" + ); + } + + let mut normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); + assert!(normalizer.normalize_block_parameters(None).is_none()); +} + +#[test] +fn normalize_parameters_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f(value = 1)\nend\n", + Language::Ruby, + ".rb", + "method_parameters", + "(value = 1)", + ), + ( + "def f(value)\nend\n", + Language::Ruby, + ".rb", + "method_parameters", + "(value)", + ), + ( + "def f(value=1):\n pass\n", + Language::Python, + ".py", + "parameters", + "(value=1)", + ), + ( + "function f(value = 1) {}\n", + Language::TypeScript, + ".ts", + "formal_parameters", + "(value = 1)", + ), + ( + "function f(value)\nend\n", + Language::Lua, + ".lua", + "parameters", + "(value)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_parameters(Some(node)) + .as_ref() + .map(node_value) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_parameters", + kind, + text + ), + "normalize_parameters mismatch for {language:?} {kind} {text:?}" + ); + } + + let mut normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); + assert!(normalizer.normalize_parameters(None).is_none()); +} + +#[test] +fn normalize_destructured_block_parameter_matches_ruby_private_method() { + for (source, kind, text) in [ + ( + "items.each { |(left, right)| left }\n", + "destructured_parameter", + "(left, right)", + ), + ( + "items.each do |(left, (middle, right))| left end\n", + "destructured_parameter", + "(left, (middle, right))", + ), + ("items.each { |item| item }\n", "identifier", "item"), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let rust = normalizer + .normalize_destructured_block_parameter(node) + .as_ref() + .map(node_value) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + Language::Ruby, + ".rb", + "normalize_destructured_block_parameter", + kind, + text + ), + "normalize_destructured_block_parameter mismatch for {kind} {text:?}" + ); + } +} + +#[test] +fn scope_matches_ruby_private_method() { + for (source, language, suffix, kind, text, mode) in [ + ("1\n", Language::Ruby, ".rb", "integer", "1", "body"), + ( + "1\n", + Language::Python, + ".py", + "expression_statement", + "1", + "body", + ), + ( + "value;\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + "args", + ), + ( + "return value\n", + Language::Lua, + ".lua", + "expression_list", + "value", + "empty", + ), + ] { + let tree = raw_tree(source, language); + let root = tree.root_node(); + let node = first_raw_node(root, source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + normalizer.root_span = Some(super::span(root)); + let body = if mode == "body" { + Some(normalizer.wrap("BODY", Vec::new(), node)) + } else { + None + }; + let args = if mode == "args" { + Some(normalizer.wrap("ARGS", Vec::new(), node)) + } else { + None + }; + let rust = node_value(&normalizer.scope(body, args, node)); + + assert_eq!( + rust, + ruby_private_scope_value(source, language, suffix, kind, text, mode), + "scope mismatch for {language:?} {kind} {text:?} mode {mode}" + ); + } +} + +#[test] +fn list_matches_ruby_private_method() { + for (source, language, suffix, kind, text, mode) in [ + ( + "value\n", + Language::Ruby, + ".rb", + "identifier", + "value", + "one", + ), + ( + "value\n", + Language::Python, + ".py", + "expression_statement", + "value", + "empty", + ), + ( + "value;\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + "nil", + ), + ( + "return value\n", + Language::Lua, + ".lua", + "expression_list", + "value", + "one", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let item = normalizer.wrap("ITEM", Vec::new(), node); + let children = match mode { + "nil" => None, + "empty" => Some(Vec::new()), + "one" => Some(vec![item]), + _ => panic!("unknown list mode: {mode}"), + }; + let rust = normalizer + .list(children, node) + .as_ref() + .map(node_value) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_list_value(source, language, suffix, kind, text, mode), + "list mismatch for {language:?} {kind} {text:?} mode {mode}" + ); + } +} + +#[test] +fn unwrap_node_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def check\n (value)\n value\nend\n", + Language::Ruby, + ".rb", + "parenthesized_statements", + "(value)", + ), + ( + "value\n(value)\n", + Language::Python, + ".py", + "expression_statement", + "value", + ), + ( + "value\n(value)\n", + Language::Python, + ".py", + "expression_statement", + "(value)", + ), + ( + "const value = (other);\n", + Language::TypeScript, + ".ts", + "parenthesized_expression", + "(other)", + ), + ( + "local first = (other)\nlocal second = left + right\n", + Language::Lua, + ".lua", + "expression_list", + "(other)", + ), + ( + "local first = (other)\nlocal second = left + right\n", + Language::Lua, + ".lua", + "expression_list", + "left + right", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.unwrap_node(node), + ruby_private_predicate(source, language, suffix, "unwrap_node?", kind, text), + "unwrap_node? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn statement_node_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def check\n return value\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "return value", + ), + ( + "def check\n return value\nend\n", + Language::Ruby, + ".rb", + "identifier", + "check", + ), + ( + "value\n(value)\n", + Language::Python, + ".py", + "expression_statement", + "(value)", + ), + ( + "value\n(value)\n", + Language::Python, + ".py", + "identifier", + "value", + ), + ( + "function check() { return value + other; }\n", + Language::TypeScript, + ".ts", + "return_statement", + "return value + other;", + ), + ( + "function check() { return value + other; }\n", + Language::TypeScript, + ".ts", + "binary_expression", + "value + other", + ), + ( + "function check() { return value + other; }\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + ), + ( + "return value\n", + Language::Lua, + ".lua", + "return_statement", + "return value", + ), + ( + "return value\n", + Language::Lua, + ".lua", + "expression_list", + "value", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.statement_node(node.kind()), + ruby_private_predicate(source, language, suffix, "statement_node?", kind, text), + "statement_node? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn local_identifier_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def check\nend\nclass Thing; end\n", + Language::Ruby, + ".rb", + "identifier", + "check", + ), + ( + "def check\nend\nclass Thing; end\n", + Language::Ruby, + ".rb", + "constant", + "Thing", + ), + ( + "def check(value):\n pass\n", + Language::Python, + ".py", + "identifier", + "value", + ), + ( + "def check(value):\n pass\n", + Language::Python, + ".py", + "parameters", + "(value)", + ), + ( + "const value = object.field;\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + ), + ( + "const value = object.field;\n", + Language::TypeScript, + ".ts", + "property_identifier", + "field", + ), + ( + "const value = object.field;\n", + Language::TypeScript, + ".ts", + "lexical_declaration", + "const value = object.field;", + ), + ( + "local value = other\nprint(value)\n", + Language::Lua, + ".lua", + "identifier", + "value", + ), + ( + "local value = other\n", + Language::Lua, + ".lua", + "expression_list", + "other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.identifier_kind(node.kind()), + ruby_private_predicate(source, language, suffix, "local_identifier?", kind, text), + "local_identifier? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn ruby_local_name_matches_scope_stack_lookup() { + let mut normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); + normalizer.local_stack = vec![ + BTreeSet::from(["outer".to_string(), "shared".to_string()]), + BTreeSet::from(["inner".to_string()]), + ]; + + assert!(normalizer.ruby_local_name("outer")); + assert!(normalizer.ruby_local_name("inner")); + assert!(normalizer.ruby_local_name("shared")); + assert!(!normalizer.ruby_local_name("missing")); +} + +#[test] +fn ruby_vcall_identifier_matches_ruby_private_predicate() { + let cases = vec![ + ( + "ruby_vcall", + "foo\n", + Language::Ruby, + ".rb", + "identifier", + "foo", + Vec::<&str>::new(), + ), + ( + "ruby_local", + "foo\n", + Language::Ruby, + ".rb", + "identifier", + "foo", + vec!["foo"], + ), + ( + "assignment_lhs", + "foo = 1\n", + Language::Ruby, + ".rb", + "identifier", + "foo", + Vec::<&str>::new(), + ), + ( + "method_name", + "def foo\nend\n", + Language::Ruby, + ".rb", + "identifier", + "foo", + Vec::<&str>::new(), + ), + ( + "parameter", + "def f(foo)\nend\n", + Language::Ruby, + ".rb", + "identifier", + "foo", + Vec::<&str>::new(), + ), + ( + "non_identifier", + "Thing\n", + Language::Ruby, + ".rb", + "constant", + "Thing", + Vec::<&str>::new(), + ), + ( + "non_ruby", + "foo\n", + Language::Python, + ".py", + "expression_statement", + "foo", + Vec::<&str>::new(), + ), + ]; + + for (label, source, language, suffix, kind, text, locals) in cases { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + if !locals.is_empty() { + normalizer + .local_stack + .push(locals.iter().map(|name| name.to_string()).collect()); + } + + assert_eq!( + normalizer.ruby_vcall_identifier(node, super::node_text(node, source)), + ruby_private_ruby_vcall_identifier_predicate( + source, language, suffix, kind, text, &locals, + ), + "ruby_vcall_identifier? mismatch for {label}" + ); + } +} + +#[test] +fn vcall_identifier_matches_ruby_private_predicate() { + let cases = vec![ + ( + "ruby_modifier_action", + "foo if cond\n", + Language::Ruby, + ".rb", + "identifier", + "foo", + Vec::<&str>::new(), + ), + ( + "ruby_local", + "foo if cond\n", + Language::Ruby, + ".rb", + "identifier", + "foo", + vec!["foo"], + ), + ( + "method_name", + "def foo\nend\n", + Language::Ruby, + ".rb", + "identifier", + "foo", + Vec::<&str>::new(), + ), + ( + "argument", + "call(foo)\n", + Language::Ruby, + ".rb", + "identifier", + "foo", + Vec::<&str>::new(), + ), + ( + "member_read", + "def f\n user.name\nend\n", + Language::Ruby, + ".rb", + "identifier", + "name", + Vec::<&str>::new(), + ), + ( + "assignment_lhs", + "foo = bar\n", + Language::Ruby, + ".rb", + "identifier", + "foo", + Vec::<&str>::new(), + ), + ( + "python_identifier", + "foo\n", + Language::Python, + ".py", + "expression_statement", + "foo", + Vec::<&str>::new(), + ), + ( + "typescript_identifier", + "foo;\n", + Language::TypeScript, + ".ts", + "identifier", + "foo", + Vec::<&str>::new(), + ), + ( + "lua_identifier", + "foo()\n", + Language::Lua, + ".lua", + "identifier", + "foo", + Vec::<&str>::new(), + ), + ]; + + for (label, source, language, suffix, kind, text, locals) in cases { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + if !locals.is_empty() { + normalizer + .local_stack + .push(locals.iter().map(|name| name.to_string()).collect()); + } + + assert_eq!( + normalizer.vcall_identifier(node, super::node_text(node, source)), + ruby_private_vcall_identifier_predicate(source, language, suffix, kind, text, &locals,), + "vcall_identifier? mismatch for {label}" + ); + } + + let source = "def f\n Thing\nend\n"; + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, "constant", "Thing"); + let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + assert!( + !normalizer.vcall_identifier(node, super::node_text(node, source)), + "vcall_identifier? must reject non-local identifiers in statement wrappers" + ); + + let source = "foo\n"; + let tree = raw_tree(source, Language::Python); + let node = first_raw_node(tree.root_node(), source, "identifier", "foo"); + let normalizer = super::TreeSitterNormalizer::new(source, Language::Python); + assert!( + !normalizer.vcall_identifier(node, super::node_text(node, source)), + "vcall_identifier? must reject Python bare identifiers" + ); +} + +#[test] +fn collect_ruby_parameter_locals_matches_ruby_private_method() { + for (source, kind, text) in [ + ( + "def f(a, b = 1, *rest, key:, **opts, &block)\nend\n", + "method_parameters", + "(a, b = 1, *rest, key:, **opts, &block)", + ), + ( + "[1].each { |item, (left, right)| item }\n", + "block_parameters", + "|item, (left, right)|", + ), + ("fn = ->(x, y:) { x }\n", "lambda_parameters", "(x, y:)"), + ("value = other\n", "assignment", "value = other"), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let mut locals = BTreeSet::new(); + normalizer.collect_ruby_parameter_locals(node, &mut locals); + + assert_eq!( + locals, + ruby_private_collected_names( + source, + Language::Ruby, + ".rb", + "collect_ruby_parameter_locals", + kind, + text + ), + "collect_ruby_parameter_locals mismatch for {kind} {text:?}" + ); + } +} + +#[test] +fn collect_ruby_assignment_locals_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "value = other\n", + Language::Ruby, + ".rb", + "assignment", + "value = other", + ), + ( + "left, *rest = values\n", + Language::Ruby, + ".rb", + "assignment", + "left, *rest = values", + ), + ( + "value += 1\n", + Language::Ruby, + ".rb", + "operator_assignment", + "value += 1", + ), + ( + "begin\n work\nrescue => error\n error\nend\n", + Language::Ruby, + ".rb", + "exception_variable", + "=> error", + ), + ( + "value = other\n", + Language::Python, + ".py", + "expression_statement", + "value = other", + ), + ( + "let value = other;\n", + Language::TypeScript, + ".ts", + "variable_declarator", + "value = other", + ), + ( + "local value = other\n", + Language::Lua, + ".lua", + "assignment_statement", + "value = other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let mut locals = BTreeSet::new(); + normalizer.collect_ruby_assignment_locals(node, &mut locals); + + assert_eq!( + locals, + ruby_private_collected_names( + source, + language, + suffix, + "collect_ruby_assignment_locals", + kind, + text + ), + "collect_ruby_assignment_locals mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn collect_ruby_scope_locals_matches_ruby_private_method() { + for (source, language, suffix, kind, text, root) in [ + ( + "def outer(a)\n local = 1\n items.each { |item| nested = item }\n def inner(inner_arg)\n inner_local = 1\n end\nend\n", + Language::Ruby, + ".rb", + "method", + "def outer(a)\n local = 1\n items.each { |item| nested = item }\n def inner(inner_arg)\n inner_local = 1\n end\nend", + true, + ), + ( + "def outer(a)\n local = 1\nend\n", + Language::Ruby, + ".rb", + "method", + "def outer(a)\n local = 1\nend", + false, + ), + ( + "[1].each { |item| local = item }\n", + Language::Ruby, + ".rb", + "block", + "{ |item| local = item }", + true, + ), + ( + "value = other\n", + Language::Python, + ".py", + "expression_statement", + "value = other", + true, + ), + ( + "let value = other;\n", + Language::TypeScript, + ".ts", + "variable_declarator", + "value = other", + true, + ), + ( + "local value = other\n", + Language::Lua, + ".lua", + "assignment_statement", + "value = other", + true, + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let mut locals = BTreeSet::new(); + normalizer.collect_ruby_scope_locals(node, &mut locals, root); + + assert_eq!( + locals, + ruby_private_scope_collected_names(source, language, suffix, kind, text, root), + "collect_ruby_scope_locals mismatch for {language:?} {kind} {text:?} root={root}" + ); + } +} + +#[test] +fn ruby_scope_locals_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def outer(a)\n local = 1\n items.each { |item| nested = item }\n def inner(inner_arg)\n inner_local = 1\n end\nend\n", + Language::Ruby, + ".rb", + "method", + "def outer(a)\n local = 1\n items.each { |item| nested = item }\n def inner(inner_arg)\n inner_local = 1\n end\nend", + ), + ( + "[1].each { |item| local = item }\n", + Language::Ruby, + ".rb", + "block", + "{ |item| local = item }", + ), + ( + "value = other\n", + Language::Python, + ".py", + "expression_statement", + "value = other", + ), + ( + "let value = other;\n", + Language::TypeScript, + ".ts", + "variable_declarator", + "value = other", + ), + ( + "local value = other\n", + Language::Lua, + ".lua", + "assignment_statement", + "value = other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.ruby_scope_locals(node), + ruby_private_ruby_scope_locals(source, language, suffix, kind, text), + "ruby_scope_locals mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn with_ruby_scope_matches_ruby_private_method() { + for (source, language, suffix, kind, text, reset, initial_stack) in [ + ( + "def f(a)\n local = 1\nend\n", + Language::Ruby, + ".rb", + "method", + "def f(a)\n local = 1\nend", + false, + vec![vec!["outer"]], + ), + ( + "def f(a)\n local = 1\nend\n", + Language::Ruby, + ".rb", + "method", + "def f(a)\n local = 1\nend", + true, + vec![vec!["outer"]], + ), + ( + "[1].each { |item| local = item }\n", + Language::Ruby, + ".rb", + "block", + "{ |item| local = item }", + false, + vec![], + ), + ( + "def f(value):\n local = value\n", + Language::Python, + ".py", + "function_definition", + "def f(value):\n local = value", + true, + vec![vec!["outer"]], + ), + ( + "function f(value) { let local = value; }\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function f(value) { let local = value; }", + true, + vec![vec!["outer"]], + ), + ( + "function f(value)\n local local_value = value\nend\n", + Language::Lua, + ".lua", + "function_declaration", + "function f(value)\n local local_value = value\nend", + true, + vec![vec!["outer"]], + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + normalizer.local_stack = local_stack_from(&initial_stack); + let before = local_stack_value(&normalizer.local_stack); + let inside = normalizer.with_ruby_scope(node, reset, |normalizer| { + local_stack_value(&normalizer.local_stack) + }); + let after = local_stack_value(&normalizer.local_stack); + let rust = json!({ + "before": before, + "inside": inside, + "after": after, + "result": "block-result", + }); + + assert_eq!( + rust, + ruby_private_with_ruby_scope_trace( + source, + language, + suffix, + kind, + text, + reset, + &initial_stack, + ), + "with_ruby_scope mismatch for {language:?} {kind} {text:?} reset={reset}" + ); + } +} + +#[test] +fn ruby_scope_boundary_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n value\nend\n", + Language::Ruby, + ".rb", + "method", + "def f\n value\nend", + ), + ( + "class Box\nend\n", + Language::Ruby, + ".rb", + "class", + "class Box\nend", + ), + ( + "module Admin\nend\n", + Language::Ruby, + ".rb", + "module", + "module Admin\nend", + ), + ( + "items.each { |item| item }\n", + Language::Ruby, + ".rb", + "block", + "{ |item| item }", + ), + ( + "handler = -> { value }\n", + Language::Ruby, + ".rb", + "block", + "{ value }", + ), + ( + "def f():\n return value\n break\n continue\n", + Language::Python, + ".py", + "function_definition", + "def f():\n return value\n break\n continue", + ), + ( + "def f():\n return value\n", + Language::Python, + ".py", + "block", + "return value", + ), + ( + "class Box:\n pass\n", + Language::Python, + ".py", + "class_definition", + "class Box:\n pass", + ), + ( + "function f() { return value; }\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function f() { return value; }", + ), + ( + "class Box {}\n", + Language::TypeScript, + ".ts", + "class_declaration", + "class Box {}", + ), + ( + "function f()\n return value\nend\n", + Language::Lua, + ".lua", + "function_declaration", + "function f()\n return value\nend", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.ruby_scope_boundary(node), + ruby_private_predicate(source, language, suffix, "ruby_scope_boundary?", kind, text), + "ruby_scope_boundary? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn ruby_scope_child_boundary_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n value\nend\n", + Language::Ruby, + ".rb", + "method", + "def f\n value\nend", + ), + ( + "class Box\nend\n", + Language::Ruby, + ".rb", + "class", + "class Box\nend", + ), + ( + "module Admin\nend\n", + Language::Ruby, + ".rb", + "module", + "module Admin\nend", + ), + ( + "items.each { |item| item }\n", + Language::Ruby, + ".rb", + "block", + "{ |item| item }", + ), + ( + "handler = -> { value }\n", + Language::Ruby, + ".rb", + "block", + "{ value }", + ), + ( + "def f():\n return value\n", + Language::Python, + ".py", + "function_definition", + "def f():\n return value", + ), + ( + "def f():\n return value\n", + Language::Python, + ".py", + "block", + "return value", + ), + ( + "class Box:\n pass\n", + Language::Python, + ".py", + "class_definition", + "class Box:\n pass", + ), + ( + "function f() { return value; }\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function f() { return value; }", + ), + ( + "class Box {}\n", + Language::TypeScript, + ".ts", + "class_declaration", + "class Box {}", + ), + ( + "function f()\n return value\nend\n", + Language::Lua, + ".lua", + "function_declaration", + "function f()\n return value\nend", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.ruby_scope_child_boundary(node), + ruby_private_predicate( + source, + language, + suffix, + "ruby_scope_child_boundary?", + kind, + text + ), + "ruby_scope_child_boundary? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn ruby_predicate_uses_normalization_adapter() { + for (language, expected) in [ + (Language::Ruby, true), + (Language::Python, false), + (Language::Lua, false), + (Language::TypeScript, false), + ] { + let normalizer = super::TreeSitterNormalizer::new("", language); + + assert_eq!( + normalizer.ruby(), + expected, + "ruby? mismatch for {language:?}" + ); + } +} + +#[test] +fn interpolated_string_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "name = \"hi #{user}\"\nplain = \"hi\"\n", + Language::Ruby, + ".rb", + "string", + "\"hi #{user}\"", + ), + ( + "name = \"hi #{user}\"\nplain = \"hi\"\n", + Language::Ruby, + ".rb", + "string", + "\"hi\"", + ), + ( + "name = f\"hi {user}\"\nplain = \"hi\"\n", + Language::Python, + ".py", + "string", + "f\"hi {user}\"", + ), + ( + "name = f\"hi {user}\"\nplain = \"hi\"\n", + Language::Python, + ".py", + "string", + "\"hi\"", + ), + ( + "const name = `hi ${user}`;\nconst plain = `hi`;\n", + Language::TypeScript, + ".ts", + "template_string", + "`hi ${user}`", + ), + ( + "const name = `hi ${user}`;\nconst plain = `hi`;\n", + Language::TypeScript, + ".ts", + "template_string", + "`hi`", + ), + ( + "local name = \"hi\"\n", + Language::Lua, + ".lua", + "expression_list", + "\"hi\"", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.interpolated_string(node), + ruby_private_predicate(source, language, suffix, "interpolated_string?", kind, text), + "interpolated_string? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_interpolated_string_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "name = \"hi #{user}\"\n", + Language::Ruby, + ".rb", + "string", + "\"hi #{user}\"", + ), + ( + "name = f\"hi {user}\"\n", + Language::Python, + ".py", + "string", + "f\"hi {user}\"", + ), + ( + "const name = `hi ${user}`;\n", + Language::TypeScript, + ".ts", + "template_string", + "`hi ${user}`", + ), + ( + "local name = \"hi\"\n", + Language::Lua, + ".lua", + "expression_list", + "\"hi\"", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = node_value(&normalizer.normalize_interpolated_string(node)); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_interpolated_string", + kind, + text + ), + "normalize_interpolated_string mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_subshell_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "value = `echo hi`\n", + Language::Ruby, + ".rb", + "subshell", + "`echo hi`", + ), + ( + "value = `echo #{name}`\n", + Language::Ruby, + ".rb", + "subshell", + "`echo #{name}`", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = node_value(&normalizer.normalize_subshell(node)); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_subshell", + kind, + text + ), + "normalize_subshell mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn const_node_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "class Thing; end\ndef check; end\n", + Language::Ruby, + ".rb", + "constant", + "Thing", + ), + ( + "class Thing; end\ndef check; end\n", + Language::Ruby, + ".rb", + "identifier", + "check", + ), + ( + "class Thing:\n pass\n", + Language::Python, + ".py", + "identifier", + "Thing", + ), + ( + "type Thing = Other;\nconst value = Thing;\n", + Language::TypeScript, + ".ts", + "type_identifier", + "Thing", + ), + ( + "type Thing = Other;\nconst value = Thing;\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + ), + ( + "local Thing = {}\n", + Language::Lua, + ".lua", + "variable_list", + "Thing", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.const_kind(node.kind()), + ruby_private_predicate(source, language, suffix, "const_node?", kind, text), + "const_node? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn self_node_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ("self\nother\n", Language::Ruby, ".rb", "self", "self"), + ( + "self\nother\n", + Language::Ruby, + ".rb", + "identifier", + "other", + ), + ( + "self.value\nother.value\n", + Language::Python, + ".py", + "identifier", + "self", + ), + ( + "self.value\nother.value\n", + Language::Python, + ".py", + "identifier", + "other", + ), + ( + "this.value;\nother;\n", + Language::TypeScript, + ".ts", + "this", + "this", + ), + ( + "this.value;\nother;\n", + Language::TypeScript, + ".ts", + "identifier", + "other", + ), + ( + "print(self.value)\nprint(other.value)\n", + Language::Lua, + ".lua", + "identifier", + "self", + ), + ( + "print(self.value)\nprint(other.value)\n", + Language::Lua, + ".lua", + "identifier", + "other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.self_node(node), + ruby_private_predicate(source, language, suffix, "self_node?", kind, text), + "self_node? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn instance_variable_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "@value\nname\n", + Language::Ruby, + ".rb", + "instance_variable", + "@value", + ), + ( + "@value\nname\n", + Language::Ruby, + ".rb", + "identifier", + "name", + ), + ( + "@decorator\ndef call():\n pass\n", + Language::Python, + ".py", + "decorator", + "@decorator", + ), + ( + "@sealed\nclass Thing {}\n", + Language::TypeScript, + ".ts", + "decorator", + "@sealed", + ), + ( + "print(value)\n", + Language::Lua, + ".lua", + "identifier", + "value", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.instance_variable(node), + ruby_private_predicate(source, language, suffix, "instance_variable?", kind, text), + "instance_variable? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn global_variable_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "$value\nname\n", + Language::Ruby, + ".rb", + "global_variable", + "$value", + ), + ( + "$value\nname\n", + Language::Ruby, + ".rb", + "identifier", + "name", + ), + ( + "value = \"$name\"\n", + Language::Python, + ".py", + "string_content", + "$name", + ), + ( + "const $value = other;\n", + Language::TypeScript, + ".ts", + "identifier", + "$value", + ), + ( + "print(\"$name\")\n", + Language::Lua, + ".lua", + "string_content", + "$name", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.global_variable(node), + ruby_private_predicate(source, language, suffix, "global_variable?", kind, text), + "global_variable? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_global_variable_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "$value\n$1\n$12\n$0\n", + Language::Ruby, + ".rb", + "global_variable", + "$value", + ), + ( + "$value\n$1\n$12\n$0\n", + Language::Ruby, + ".rb", + "global_variable", + "$1", + ), + ( + "$value\n$1\n$12\n$0\n", + Language::Ruby, + ".rb", + "global_variable", + "$12", + ), + ( + "$value\n$1\n$12\n$0\n", + Language::Ruby, + ".rb", + "global_variable", + "$0", + ), + ( + "value = \"$name\"\n", + Language::Python, + ".py", + "string_content", + "$name", + ), + ( + "const $value = 1;\n", + Language::TypeScript, + ".ts", + "identifier", + "$value", + ), + ( + "print(\"$name\")\n", + Language::Lua, + ".lua", + "string_content", + "$name", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.normalize_global_variable(node); + + assert_eq!( + node_value(&rust), + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_global_variable", + kind, + text + ), + "normalize_global_variable mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn assignment_operator_matches_ruby_private_predicate() { + for (language, text) in [ + (Language::Ruby, "="), + (Language::Ruby, "**="), + (Language::Ruby, "??="), + (Language::Python, ":="), + (Language::Python, "//="), + (Language::Python, "&&="), + (Language::TypeScript, "??="), + (Language::TypeScript, ">>>="), + (Language::TypeScript, ":="), + (Language::Lua, "="), + (Language::Lua, "+="), + ] { + let normalizer = super::TreeSitterNormalizer::new("", language); + + assert_eq!( + normalizer.assignment_operator(text), + ruby_private_text_predicate(language, "assignment_operator?", text), + "assignment_operator? mismatch for {language:?} {text:?}" + ); + } +} + +#[test] +fn operator_assignment_operator_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "value **= other\nflag ||= fallback\n", + Language::Ruby, + ".rb", + "operator_assignment", + "value **= other", + ), + ( + "value **= other\nflag ||= fallback\n", + Language::Ruby, + ".rb", + "operator_assignment", + "flag ||= fallback", + ), + ( + "value //= other\n", + Language::Python, + ".py", + "expression_statement", + "value //= other", + ), + ( + "value ??= other;\ncount >>>= 1;\n", + Language::TypeScript, + ".ts", + "augmented_assignment_expression", + "value ??= other", + ), + ( + "value ??= other;\ncount >>>= 1;\n", + Language::TypeScript, + ".ts", + "augmented_assignment_expression", + "count >>>= 1", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.operator_assignment_operator(node), + ruby_private_string( + source, + language, + suffix, + "operator_assignment_operator", + kind, + text + ), + "operator_assignment_operator mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_logical_operator_assignment_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "value ||= fallback\n", + Language::Ruby, + ".rb", + "operator_assignment", + "value ||= fallback", + ), + ( + "value &&= fallback\n", + Language::Ruby, + ".rb", + "operator_assignment", + "value &&= fallback", + ), + ( + "value += fallback\n", + Language::Ruby, + ".rb", + "operator_assignment", + "value += fallback", + ), + ( + "@value ||= fallback\n", + Language::Ruby, + ".rb", + "operator_assignment", + "@value ||= fallback", + ), + ( + "value //= fallback\n", + Language::Python, + ".py", + "expression_statement", + "value //= fallback", + ), + ( + "value ||= fallback;\n", + Language::TypeScript, + ".ts", + "augmented_assignment_expression", + "value ||= fallback", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let left = normalizer + .assignment_left(node) + .expect("operator assignment should have left side"); + let right = normalizer + .assignment_right(node) + .and_then(|right| normalizer.normalize_node(right)); + let operator = normalizer.operator_assignment_operator(node); + let rust = normalizer + .normalize_logical_operator_assignment(left, &operator, right, node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_logical_operator_assignment_value(source, language, suffix, kind, text), + "normalize_logical_operator_assignment mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_operator_assignment_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "value += other\n", + Language::Ruby, + ".rb", + "operator_assignment", + "value += other", + ), + ( + "$value += 1\n", + Language::Ruby, + ".rb", + "operator_assignment", + "$value += 1", + ), + ( + "items[index] += value\n", + Language::Ruby, + ".rb", + "operator_assignment", + "items[index] += value", + ), + ( + "object.value += 1\n", + Language::Ruby, + ".rb", + "operator_assignment", + "object.value += 1", + ), + ( + "flag ||= fallback\n", + Language::Ruby, + ".rb", + "operator_assignment", + "flag ||= fallback", + ), + ( + "flag &&= fallback\n", + Language::Ruby, + ".rb", + "operator_assignment", + "flag &&= fallback", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_operator_assignment(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_operator_assignment", + kind, + text + ), + "normalize_operator_assignment mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn first_named_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "class Thing; end\nname\n", + Language::Ruby, + ".rb", + "class", + "class Thing; end", + ), + ( + "class Thing; end\nname\n", + Language::Ruby, + ".rb", + "identifier", + "name", + ), + ( + "def check(value):\n return value\n", + Language::Python, + ".py", + "function_definition", + "def check(value):\n return value", + ), + ( + "function check(value) { return value; }\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function check(value) { return value; }", + ), + ( + "print(value)\n", + Language::Lua, + ".lua", + "function_call", + "print(value)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let found = normalizer.first_named(node).map(|node| { + ( + node.kind().to_string(), + super::node_text(node, source).to_string(), + ) + }); + + assert_eq!( + found, + ruby_private_node_signature(source, language, suffix, "first_named", kind, text), + "first_named mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn block_child_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def check\n call\nend\n", + Language::Ruby, + ".rb", + "method", + "def check\n call\nend", + ), + ( + "items.each do\n call\nend\n", + Language::Ruby, + ".rb", + "call", + "items.each do\n call\nend", + ), + ( + "def check():\n call()\n", + Language::Python, + ".py", + "function_definition", + "def check():\n call()", + ), + ( + "function check() { call(); }\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function check() { call(); }", + ), + ( + "function check()\n call()\nend\n", + Language::Lua, + ".lua", + "function_declaration", + "function check()\n call()\nend", + ), + ("name\n", Language::Ruby, ".rb", "identifier", "name"), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let found = normalizer.block_child(node).map(|node| { + ( + node.kind().to_string(), + super::node_text(node, source).to_string(), + ) + }); + + assert_eq!( + found, + ruby_private_node_signature(source, language, suffix, "block_child", kind, text), + "block_child mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn branch_child_matches_ruby_private_method() { + for (source, language, suffix, kind, text, condition_kind, condition_text, index) in [ + ( + "if ready\n call\nelse\n stop\nend\n", + Language::Ruby, + ".rb", + "if", + "if ready\n call\nelse\n stop\nend", + "identifier", + "ready", + 0, + ), + ( + "if ready\n call\nelse\n stop\nend\n", + Language::Ruby, + ".rb", + "if", + "if ready\n call\nelse\n stop\nend", + "identifier", + "ready", + 1, + ), + ( + "if ready\n # note\n call\nend\n", + Language::Ruby, + ".rb", + "if", + "if ready\n # note\n call\nend", + "identifier", + "ready", + 0, + ), + ( + "if ready:\n call()\nelse:\n stop()\n", + Language::Python, + ".py", + "if_statement", + "if ready:\n call()\nelse:\n stop()", + "identifier", + "ready", + 1, + ), + ( + "if (ready) { call(); } else { stop(); }\n", + Language::TypeScript, + ".ts", + "if_statement", + "if (ready) { call(); } else { stop(); }", + "parenthesized_expression", + "(ready)", + 0, + ), + ( + "if ready then\n call()\nelse\n stop()\nend\n", + Language::Lua, + ".lua", + "if_statement", + "if ready then\n call()\nelse\n stop()\nend", + "identifier", + "ready", + 1, + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let condition = first_raw_node(tree.root_node(), source, condition_kind, condition_text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let found = normalizer.branch_child(node, condition, index).map(|node| { + ( + node.kind().to_string(), + super::node_text(node, source).to_string(), + ) + }); + + assert_eq!( + found, + ruby_private_branch_child_signature( + source, + language, + suffix, + kind, + text, + condition_kind, + condition_text, + index + ), + "branch_child mismatch for {language:?} {kind} {text:?} index {index}" + ); + } +} + +#[test] +fn explicit_alternative_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "if ready\n call\nelsif other\n stop\nend\n", + Language::Ruby, + ".rb", + "if", + "if ready\n call\nelsif other\n stop\nend", + ), + ( + "if ready\n call\nend\n", + Language::Ruby, + ".rb", + "if", + "if ready\n call\nend", + ), + ( + "if ready:\n call()\nelif other:\n stop()\n", + Language::Python, + ".py", + "if_statement", + "if ready:\n call()\nelif other:\n stop()", + ), + ( + "if (ready) { call(); } else { stop(); }\n", + Language::TypeScript, + ".ts", + "if_statement", + "if (ready) { call(); } else { stop(); }", + ), + ( + "if ready then\n call()\nelseif other then\n stop()\nend\n", + Language::Lua, + ".lua", + "if_statement", + "if ready then\n call()\nelseif other then\n stop()\nend", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let found = normalizer.explicit_alternative(node).map(|node| { + ( + node.kind().to_string(), + super::node_text(node, source).to_string(), + ) + }); + + assert_eq!( + found, + ruby_private_node_signature( + source, + language, + suffix, + "explicit_alternative", + kind, + text + ), + "explicit_alternative mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn wrap_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "first\nsecond\n", + Language::Ruby, + ".rb", + "identifier", + "second", + ), + ( + "first\nsecond\n", + Language::Python, + ".py", + "expression_statement", + "second", + ), + ( + "first;\nsecond;\n", + Language::TypeScript, + ".ts", + "identifier", + "second", + ), + ( + "print(first)\nprint(second)\n", + Language::Lua, + ".lua", + "identifier", + "second", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + let raw_wrapped = normalizer.wrap("OUTER", vec![Child::Symbol("child".to_string())], node); + assert_eq!( + node_value(&raw_wrapped), + ruby_private_wrap_value(source, language, suffix, kind, text, false), + "wrap raw-source mismatch for {language:?} {kind} {text:?}" + ); + + let inner = normalizer.wrap("INNER", Vec::new(), node); + let node_wrapped = normalizer.wrap_from_source_node( + "OUTER", + vec![Child::Symbol("child".to_string())], + &inner, + ); + assert_eq!( + node_value(&node_wrapped), + ruby_private_wrap_value(source, language, suffix, kind, text, true), + "wrap normalized-source mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn source_before_child_matches_ruby_private_method() { + for (source, language, suffix, kind, text, child_kind, child_text) in [ + ( + "if ready\n call\nend\n", + Language::Ruby, + ".rb", + "if", + "if ready\n call\nend", + "then", + "\n call", + ), + ( + "if ready:\n call()\n", + Language::Python, + ".py", + "if_statement", + "if ready:\n call()", + "block", + "call()", + ), + ( + "if (ready) { call(); }\n", + Language::TypeScript, + ".ts", + "if_statement", + "if (ready) { call(); }", + "statement_block", + "{ call(); }", + ), + ( + "if ready then\n call()\nend\n", + Language::Lua, + ".lua", + "if_statement", + "if ready then\n call()\nend", + "block", + "call()", + ), + ( + "puts value\n", + Language::Ruby, + ".rb", + "call", + "puts value", + "identifier", + "puts", + ), + ( + "call()\n", + Language::Python, + ".py", + "expression_statement", + "call()", + "identifier", + "call", + ), + ( + "call();\n", + Language::TypeScript, + ".ts", + "expression_statement", + "call();", + "identifier", + "call", + ), + ( + "call()\n", + Language::Lua, + ".lua", + "function_call", + "call()", + "identifier", + "call", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let child = first_raw_node(tree.root_node(), source, child_kind, child_text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let source_node = normalizer.source_before_child(node, child); + let wrapped = normalizer.wrap_from_source_node("OUTER", Vec::new(), &source_node); + + assert_eq!( + node_value(&wrapped), + ruby_private_source_before_child_wrap_value( + source, language, suffix, kind, text, child_kind, child_text + ), + "source_before_child mismatch for {language:?} {kind} {text:?} before {child_kind} {child_text:?}" + ); + } +} + +#[test] +fn source_from_nodes_matches_ruby_private_method() { + for (source, language, suffix, first_kind, first_text, last_kind, last_text) in [ + ( + "left + right\n", + Language::Ruby, + ".rb", + "identifier", + "left", + "identifier", + "right", + ), + ( + "left = one\nright = two\n", + Language::Python, + ".py", + "identifier", + "one", + "identifier", + "two", + ), + ( + "const left = one;\nconst right = two;\n", + Language::TypeScript, + ".ts", + "identifier", + "one", + "identifier", + "two", + ), + ( + "local left = one\nlocal right = two\n", + Language::Lua, + ".lua", + "expression_list", + "one", + "expression_list", + "two", + ), + ] { + let tree = raw_tree(source, language); + let first_raw = first_raw_node(tree.root_node(), source, first_kind, first_text); + let last_raw = first_raw_node(tree.root_node(), source, last_kind, last_text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let source_node = normalizer.source_from_nodes(first_raw, last_raw); + + assert_eq!( + node_value(&source_node), + ruby_private_source_from_nodes_value( + source, language, suffix, first_kind, first_text, last_kind, last_text + ), + "source_from_nodes mismatch for {language:?} {first_kind} {first_text:?} through {last_kind} {last_text:?}" + ); + } +} + +#[test] +fn source_from_normalized_nodes_matches_ruby_private_method() { + for (source, language, suffix, first_kind, first_text, last_kind, last_text) in [ + ( + "first\nsecond\n", + Language::Ruby, + ".rb", + "identifier", + "first", + "identifier", + "second", + ), + ( + "first\nsecond\n", + Language::Python, + ".py", + "expression_statement", + "first", + "expression_statement", + "second", + ), + ( + "first;\nsecond;\n", + Language::TypeScript, + ".ts", + "expression_statement", + "first;", + "expression_statement", + "second;", + ), + ( + "print(first)\nprint(second)\n", + Language::Lua, + ".lua", + "function_call", + "print(first)", + "function_call", + "print(second)", + ), + ( + "first + second\n", + Language::Ruby, + ".rb", + "identifier", + "first", + "identifier", + "second", + ), + ] { + let tree = raw_tree(source, language); + let first_raw = first_raw_node(tree.root_node(), source, first_kind, first_text); + let last_raw = first_raw_node(tree.root_node(), source, last_kind, last_text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let first_node = normalizer.wrap("FIRST", Vec::new(), first_raw); + let last_node = normalizer.wrap("LAST", Vec::new(), last_raw); + let source_node = normalizer.source_from_normalized_nodes(&first_node, &last_node); + + assert_eq!( + node_value(&source_node), + ruby_private_source_from_normalized_nodes_value( + source, language, suffix, first_kind, first_text, last_kind, last_text + ), + "source_from_normalized_nodes mismatch for {language:?} {first_kind} {first_text:?} through {last_kind} {last_text:?}" + ); + } +} + +#[test] +fn named_field_matches_ruby_private_method() { + for (source, language, suffix, kind, text, field) in [ + ( + "def check(value)\n value\nend\n", + Language::Ruby, + ".rb", + "method", + "def check(value)\n value\nend", + "name", + ), + ( + "def check(value)\n value\nend\n", + Language::Ruby, + ".rb", + "method", + "def check(value)\n value\nend", + "missing", + ), + ( + "if ready:\n call()\n", + Language::Python, + ".py", + "if_statement", + "if ready:\n call()", + "body", + ), + ( + "if ready:\n call()\n", + Language::Python, + ".py", + "if_statement", + "if ready:\n call()", + "condition", + ), + ( + "function check(value) { return value; }\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function check(value) { return value; }", + "body", + ), + ( + "function check(value)\n return value\nend\n", + Language::Lua, + ".lua", + "function_declaration", + "function check(value)\n return value\nend", + "body", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let found = normalizer.named_field(node, field).map(|node| { + ( + node.kind().to_string(), + super::node_text(node, source).to_string(), + ) + }); + + assert_eq!( + found, + ruby_private_named_field_signature(source, language, suffix, kind, text, field), + "named_field mismatch for {language:?} {kind} {text:?} field {field}" + ); + } +} + +#[test] +fn parent_node_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def check\nend\n", + Language::Ruby, + ".rb", + "identifier", + "check", + ), + ("value\n", Language::Ruby, ".rb", "program", "value\n"), + ( + "if ready:\n call()\n", + Language::Python, + ".py", + "identifier", + "ready", + ), + ( + "call(value);\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + ), + ( + "call(value)\n", + Language::Lua, + ".lua", + "identifier", + "value", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let found = normalizer.parent_node(node).map(|node| { + ( + node.kind().to_string(), + super::node_text(node, source).to_string(), + ) + }); + + assert_eq!( + found, + ruby_private_node_signature(source, language, suffix, "parent_node", kind, text), + "parent_node mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn next_sibling_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ("a + b\n", Language::Ruby, ".rb", "identifier", "a"), + ("a + b\n", Language::Python, ".py", "identifier", "a"), + ("a + b;\n", Language::TypeScript, ".ts", "identifier", "a"), + ("print(a, b)\n", Language::Lua, ".lua", "identifier", "a"), + ("a\n", Language::Ruby, ".rb", "identifier", "a"), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let found = normalizer.next_sibling(node).map(|node| { + ( + node.kind().to_string(), + super::node_text(node, source).to_string(), + ) + }); + + assert_eq!( + found, + ruby_private_node_signature(source, language, suffix, "next_sibling", kind, text), + "next_sibling mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn prev_sibling_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ("a + b\n", Language::Ruby, ".rb", "identifier", "b"), + ("a + b\n", Language::Python, ".py", "identifier", "b"), + ("a + b;\n", Language::TypeScript, ".ts", "identifier", "b"), + ("print(a, b)\n", Language::Lua, ".lua", "identifier", "b"), + ("a\n", Language::Ruby, ".rb", "identifier", "a"), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let found = normalizer.prev_sibling(node).map(|node| { + ( + node.kind().to_string(), + super::node_text(node, source).to_string(), + ) + }); + + assert_eq!( + found, + ruby_private_node_signature(source, language, suffix, "prev_sibling", kind, text), + "prev_sibling mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn next_named_sibling_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ("a + b\n", Language::Ruby, ".rb", "identifier", "a"), + ("a + b\n", Language::Python, ".py", "identifier", "a"), + ("a + b;\n", Language::TypeScript, ".ts", "identifier", "a"), + ("print(a, b)\n", Language::Lua, ".lua", "identifier", "a"), + ("a\n", Language::Ruby, ".rb", "identifier", "a"), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let found = normalizer.next_named_sibling(node).map(|node| { + ( + node.kind().to_string(), + super::node_text(node, source).to_string(), + ) + }); + + assert_eq!( + found, + ruby_private_node_signature(source, language, suffix, "next_named_sibling", kind, text), + "next_named_sibling mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn ternary_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f(cond, a, b)\n cond ? a : b\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "cond ? a : b", + ), + ( + "value = a if cond else b\n", + Language::Python, + ".py", + "conditional_expression", + "a if cond else b", + ), + ( + "const value = cond ? a : b;\n", + Language::TypeScript, + ".ts", + "ternary_expression", + "cond ? a : b", + ), + ( + "local value = cond and a or b\n", + Language::Lua, + ".lua", + "expression_list", + "cond and a or b", + ), + ( + "def f(cond)\n cond\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "cond", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.ternary_statement(node), + ruby_private_predicate(source, language, suffix, "ternary_statement?", kind, text), + "ternary_statement? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_ternary_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f(cond, a, b)\n cond ? a : b\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "cond ? a : b", + ), + ( + "value = a if cond else b\n", + Language::Python, + ".py", + "conditional_expression", + "a if cond else b", + ), + ( + "const value = cond ? a : b;\n", + Language::TypeScript, + ".ts", + "ternary_expression", + "cond ? a : b", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_ternary_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_ternary_statement", + kind, + text + ), + "normalize_ternary_statement mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn ternary_statement_normalization_matches_ruby() { + for (source, language, suffix, if_text) in [ + ( + "def f(cond, a, b)\n cond ? a : b\nend\n", + Language::Ruby, + ".rb", + "cond ? a : b", + ), + ( + "def f(cond, a, b):\n return a if cond else b\n", + Language::Python, + ".py", + "a if cond else b", + ), + ( + "function f(cond: boolean, a: number, b: number) { return cond ? a : b; }\n", + Language::TypeScript, + ".ts", + "cond ? a : b", + ), + ] { + let root = parse_language_source(source, language, suffix); + let if_node = first_node(&root, "IF", if_text); + assert_eq!(child_node(if_node, 0).text, "cond"); + assert_eq!(child_node(if_node, 1).text, "a"); + assert_eq!(child_node(if_node, 2).text, "b"); + assert_ruby_parity(source, language, suffix); + } +} + +#[test] +fn case_argument_list_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f(x)\n return case x\n when 1 then :one\n else :other\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "case x\n when 1 then :one\n else :other\n end", + ), + ( + "case x\nwhen 1 then :one\nelse :other\nend\n", + Language::Ruby, + ".rb", + "case", + "case x\nwhen 1 then :one\nelse :other\nend", + ), + ( + "match value:\n case 1:\n one()\n", + Language::Python, + ".py", + "case_clause", + "case 1:\n one()", + ), + ( + "switch (value) { case 1: one(); break; }\n", + Language::TypeScript, + ".ts", + "switch_case", + "case 1: one(); break;", + ), + ( + "if value == 1 then one() end\n", + Language::Lua, + ".lua", + "if_statement", + "if value == 1 then one() end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.case_argument_list(node), + ruby_private_predicate(source, language, suffix, "case_argument_list?", kind, text), + "case_argument_list? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn leading_function_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def outer\n def inner\n x\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "def inner\n x\n end", + ), + ( + "def outer():\n def inner():\n x\n", + Language::Python, + ".py", + "block", + "def inner():\n x", + ), + ( + "function outer()\n function inner()\n x()\n end\nend\n", + Language::Lua, + ".lua", + "block", + "function inner()\n x()\n end", + ), + ( + "function outer() { function inner() { x; } }\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function inner() { x; }", + ), + ( + "def outer\n x\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "x", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.leading_function_statement(node), + ruby_private_predicate( + source, + language, + suffix, + "leading_function_statement?", + kind, + text + ), + "leading_function_statement? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_leading_function_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def outer\n def inner\n x\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "def inner\n x\n end", + ), + ( + "def outer():\n def inner():\n x\n", + Language::Python, + ".py", + "block", + "def inner():\n x", + ), + ( + "function outer()\n function inner()\n x()\n end\nend\n", + Language::Lua, + ".lua", + "block", + "function inner()\n x()\n end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_leading_function_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_leading_function_statement", + kind, + text + ), + "normalize_leading_function_statement mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn leading_function_statement_normalization_matches_ruby() { + for (source, language, suffix) in [ + ( + "def outer\n def inner\n x\n end\nend\n", + Language::Ruby, + ".rb", + ), + ( + "def outer():\n def inner():\n x\n", + Language::Python, + ".py", + ), + ( + "function outer()\n function inner()\n x()\n end\nend\n", + Language::Lua, + ".lua", + ), + ] { + let root = parse_language_source(source, language, suffix); + let mut defns = Vec::new(); + nodes_of_type(&root, "DEFN", &mut defns); + assert!( + defns.iter().any( + |node| matches!(node.children.first(), Some(Child::Symbol(name)) if name == "inner") + ), + "expected nested DEFN inner for {language:?} in {root:#?}" + ); + let mut iters = Vec::new(); + nodes_of_type(&root, "ITER", &mut iters); + assert!( + iters.iter().all(|node| !node.text.contains("inner")), + "nested function must not normalize as ITER for {language:?}: {iters:#?}" + ); + assert_ruby_parity(source, language, suffix); + } +} + +#[test] +fn leading_owner_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def outer\n class Inner\n value\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "class Inner\n value\n end", + ), + ( + "def outer\n module Inner\n value\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "module Inner\n value\n end", + ), + ( + "def outer():\n class Inner:\n pass\n", + Language::Python, + ".py", + "block", + "class Inner:\n pass", + ), + ( + "function outer() { class Inner {} }\n", + Language::TypeScript, + ".ts", + "class_declaration", + "class Inner {}", + ), + ( + "function outer()\n Inner = {}\nend\n", + Language::Lua, + ".lua", + "block", + "Inner = {}", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.leading_owner_statement(node), + ruby_private_predicate( + source, + language, + suffix, + "leading_owner_statement?", + kind, + text + ), + "leading_owner_statement? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_leading_owner_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def outer\n class Inner\n value\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "class Inner\n value\n end", + ), + ( + "def outer\n module Inner\n value\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "module Inner\n value\n end", + ), + ( + "def outer():\n class Inner:\n pass\n", + Language::Python, + ".py", + "block", + "class Inner:\n pass", + ), + ( + "function outer() { class Inner {} }\n", + Language::TypeScript, + ".ts", + "class_declaration", + "class Inner {}", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_leading_owner_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_leading_owner_statement", + kind, + text + ), + "normalize_leading_owner_statement mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn modifier_keyword_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n value if cond\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value if cond", + ), + ( + "def f\n value unless cond\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value unless cond", + ), + ( + "def f\n value while cond\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value while cond", + ), + ( + "def f\n value until cond\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value until cond", + ), + ( + "def f\n if cond\n value\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "if cond\n value\n end", + ), + ( + "def f():\n if cond:\n value()\n", + Language::Python, + ".py", + "block", + "if cond:\n value()", + ), + ( + "function f() { if (cond) { value(); } }\n", + Language::TypeScript, + ".ts", + "if_statement", + "if (cond) { value(); }", + ), + ( + "function f()\n if cond then\n value()\n end\nend\n", + Language::Lua, + ".lua", + "block", + "if cond then\n value()\n end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.modifier_keyword(node).unwrap_or_default(); + + assert_eq!( + rust, + ruby_private_string(source, language, suffix, "modifier_keyword", kind, text), + "modifier_keyword mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn modifier_parts_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n value if cond\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value if cond", + ), + ( + "def f\n value unless cond\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value unless cond", + ), + ( + "def f\n if cond\n value\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "if cond\n value\n end", + ), + ( + "def f():\n if cond:\n value()\n", + Language::Python, + ".py", + "block", + "if cond:\n value()", + ), + ( + "function f() { if (cond) { value(); } }\n", + Language::TypeScript, + ".ts", + "if_statement", + "if (cond) { value(); }", + ), + ( + "function f()\n if cond then\n value()\n end\nend\n", + Language::Lua, + ".lua", + "block", + "if cond then\n value()\n end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.modifier_parts(node).map(|(action, condition)| { + ( + ( + action.kind().to_string(), + super::node_text(action, source).to_string(), + ), + ( + condition.kind().to_string(), + super::node_text(condition, source).to_string(), + ), + ) + }); + + assert_eq!( + rust, + ruby_private_modifier_parts_signature(source, language, suffix, kind, text), + "modifier_parts mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn modifier_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n value if cond\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value if cond", + ), + ( + "def f\n return value if cond\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "return value if cond", + ), + ( + "def f\n if cond\n value\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "if cond\n value\n end", + ), + ( + "def f():\n if cond:\n value()\n", + Language::Python, + ".py", + "block", + "if cond:\n value()", + ), + ( + "function f() { if (cond) { value(); } }\n", + Language::TypeScript, + ".ts", + "if_statement", + "if (cond) { value(); }", + ), + ( + "function f()\n if cond then\n value()\n end\nend\n", + Language::Lua, + ".lua", + "block", + "if cond then\n value()\n end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.modifier_statement(node), + ruby_private_predicate(source, language, suffix, "modifier_statement?", kind, text), + "modifier_statement? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_modifier_action_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "return value if cond\n", + Language::Ruby, + ".rb", + "return", + "return value", + ), + ("break if done\n", Language::Ruby, ".rb", "break", "break"), + ( + "value if cond\n", + Language::Ruby, + ".rb", + "identifier", + "value", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_modifier_action(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_modifier_action", + kind, + text + ), + "normalize_modifier_action mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_modifier_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n value if cond\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value if cond", + ), + ( + "def f\n value unless cond\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value unless cond", + ), + ( + "def f\n value while cond\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value while cond", + ), + ( + "def f\n value until cond\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value until cond", + ), + ( + "def f\n return value if cond\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "return value if cond", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_modifier_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_modifier_statement", + kind, + text + ), + "normalize_modifier_statement mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn modifier_return_action_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "return value if ready\n", + Language::Ruby, + ".rb", + "return", + "return value", + ), + ("break if done\n", Language::Ruby, ".rb", "break", "break"), + ("next if skip\n", Language::Ruby, ".rb", "next", "next"), + ( + "return value if ready\n", + Language::Ruby, + ".rb", + "identifier", + "ready", + ), + ( + "def f():\n return value\n break\n continue\n", + Language::Python, + ".py", + "return_statement", + "return value", + ), + ( + "def f():\n return value\n break\n continue\n", + Language::Python, + ".py", + "break_statement", + "break", + ), + ( + "def f():\n return value\n break\n continue\n", + Language::Python, + ".py", + "continue_statement", + "continue", + ), + ( + "def f():\n return value\n", + Language::Python, + ".py", + "identifier", + "value", + ), + ( + "function f() { return value; break; continue; }\n", + Language::TypeScript, + ".ts", + "return_statement", + "return value;", + ), + ( + "function f() { return value; break; continue; }\n", + Language::TypeScript, + ".ts", + "break_statement", + "break;", + ), + ( + "function f() { return value; break; continue; }\n", + Language::TypeScript, + ".ts", + "continue_statement", + "continue;", + ), + ( + "function f() { return value; }\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + ), + ( + "return value\n", + Language::Lua, + ".lua", + "return_statement", + "return value", + ), + ( + "return value\n", + Language::Lua, + ".lua", + "expression_list", + "value", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.modifier_return_action(node), + ruby_private_predicate( + source, + language, + suffix, + "modifier_return_action?", + kind, + text + ), + "modifier_return_action? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn call_block_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "items.each do |item|\n item\nend\n", + Language::Ruby, + ".rb", + "call", + "items.each do |item|\n item\nend", + ), + ( + "items.map { |item| item }\n", + Language::Ruby, + ".rb", + "call", + "items.map { |item| item }", + ), + ( + "def f\n items.map { |item| item }\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "items.map { |item| item }", + ), + ("items.each\n", Language::Ruby, ".rb", "call", "items.each"), + ( + "def f():\n value()\n", + Language::Python, + ".py", + "function_definition", + "def f():\n value()", + ), + ( + "function f()\n value()\nend\n", + Language::Lua, + ".lua", + "function_declaration", + "function f()\n value()\nend", + ), + ( + "function f() { value(); }\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function f() { value(); }", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let found = normalizer.call_block(node).map(|node| { + ( + node.kind().to_string(), + super::node_text(node, source).to_string(), + ) + }); + + assert_eq!( + found, + ruby_private_node_signature(source, language, suffix, "call_block", kind, text), + "call_block mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn statement_block_call_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n items.map { |item| item }\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "items.map { |item| item }", + ), + ( + "items.map { |item| item }\n", + Language::Ruby, + ".rb", + "call", + "items.map { |item| item }", + ), + ( + "def f\n foo(bar) { baz }\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "foo(bar) { baz }", + ), + ( + "user.name()\n", + Language::Python, + ".py", + "attribute", + "user.name", + ), + ( + "def f():\n value()\n", + Language::Python, + ".py", + "function_definition", + "def f():\n value()", + ), + ( + "user.name();\n", + Language::TypeScript, + ".ts", + "member_expression", + "user.name", + ), + ( + "function f() { value(); }\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function f() { value(); }", + ), + ( + "user.name()\n", + Language::Lua, + ".lua", + "dot_index_expression", + "user.name", + ), + ( + "function f()\n value()\nend\n", + Language::Lua, + ".lua", + "function_declaration", + "function f()\n value()\nend", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let found = normalizer.statement_block_call(node).map(|node| { + ( + node.kind().to_string(), + super::node_text(node, source).to_string(), + ) + }); + + assert_eq!( + found, + ruby_private_node_signature( + source, + language, + suffix, + "statement_block_call", + kind, + text + ), + "statement_block_call mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn statement_call_with_block_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n items.map { |item| item }\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "items.map { |item| item }", + ), + ( + "items.map { |item| item }\n", + Language::Ruby, + ".rb", + "call", + "items.map { |item| item }", + ), + ( + "def f\n foo(bar) { baz }\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "foo(bar) { baz }", + ), + ( + "def f\n items.map\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "items.map", + ), + ( + "def f():\n value(lambda item: item)\n", + Language::Python, + ".py", + "function_definition", + "def f():\n value(lambda item: item)", + ), + ( + "items.map(item => item);\n", + Language::TypeScript, + ".ts", + "expression_statement", + "items.map(item => item);", + ), + ( + "items:map(function(item) return item end)\n", + Language::Lua, + ".lua", + "function_call", + "items:map(function(item) return item end)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.statement_call_with_block(node), + ruby_private_predicate( + source, + language, + suffix, + "statement_call_with_block?", + kind, + text + ), + "statement_call_with_block? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_statement_call_with_block_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [( + "def f\n items.map { |item| item }\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "items.map { |item| item }", + )] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_statement_call_with_block(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_statement_call_with_block", + kind, + text + ), + "normalize_statement_call_with_block mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn visibility_inline_def_call_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "private def hidden; value; end\n", + Language::Ruby, + ".rb", + "call", + "private def hidden; value; end", + ), + ( + "public def visible\n value\nend\n", + Language::Ruby, + ".rb", + "call", + "public def visible\n value\nend", + ), + ( + "private :hidden\n", + Language::Ruby, + ".rb", + "call", + "private :hidden", + ), + ( + "private(value)\n", + Language::Python, + ".py", + "expression_statement", + "private(value)", + ), + ( + "private(value);\n", + Language::TypeScript, + ".ts", + "call_expression", + "private(value)", + ), + ( + "private(value)\n", + Language::Lua, + ".lua", + "function_call", + "private(value)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.visibility_inline_def_call(node), + ruby_private_predicate( + source, + language, + suffix, + "visibility_inline_def_call?", + kind, + text + ), + "visibility_inline_def_call? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn visibility_inline_def_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "class C\n private def hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "private def hidden\n value\n end", + ), + ( + "class C\n module_function def helper\n value\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "module_function def helper\n value\n end", + ), + ( + "class C\n private :hidden\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "private :hidden", + ), + ( + "private(value)\n", + Language::Python, + ".py", + "expression_statement", + "private(value)", + ), + ( + "private(value);\n", + Language::TypeScript, + ".ts", + "expression_statement", + "private(value);", + ), + ( + "private(value)\n", + Language::Lua, + ".lua", + "function_call", + "private(value)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let function = + normalizer.named_children(node).into_iter().next().expect( + "visibility_inline_def_statement test target should have a first named child", + ); + + assert_eq!( + normalizer.visibility_inline_def_statement(node, function), + ruby_private_visibility_inline_def_statement_predicate( + source, language, suffix, kind, text + ), + "visibility_inline_def_statement? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_visibility_inline_def_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "private def hidden\n value\nend\n", + Language::Ruby, + ".rb", + "call", + "private def hidden\n value\nend", + ), + ( + "public def visible\n value\nend\n", + Language::Ruby, + ".rb", + "call", + "public def visible\n value\nend", + ), + ( + "module_function def self.helper\n value\nend\n", + Language::Ruby, + ".rb", + "call", + "module_function def self.helper\n value\nend", + ), + ( + "private(value)\n", + Language::Python, + ".py", + "expression_statement", + "private(value)", + ), + ( + "private(value);\n", + Language::TypeScript, + ".ts", + "call_expression", + "private(value)", + ), + ( + "private(value)\n", + Language::Lua, + ".lua", + "function_call", + "private(value)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_visibility_inline_def(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_visibility_inline_def", + kind, + text + ), + "normalize_visibility_inline_def mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn inline_def_from_argument_list_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "class C\n private def hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "def hidden\n value\n end", + ), + ( + "class C\n private def self.hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "def self.hidden\n value\n end", + ), + ( + "class C\n private :hidden\nend\n", + Language::Ruby, + ".rb", + "argument_list", + ":hidden", + ), + ( + "private(value)\n", + Language::Python, + ".py", + "argument_list", + "(value)", + ), + ( + "private(value);\n", + Language::TypeScript, + ".ts", + "arguments", + "(value)", + ), + ( + "private(value)\n", + Language::Lua, + ".lua", + "arguments", + "(value)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .inline_def_from_argument_list(Some(node)) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "inline_def_from_argument_list", + kind, + text + ), + "inline_def_from_argument_list mismatch for {language:?} {kind} {text:?}" + ); + } + + for (source, language, suffix) in [ + ("private def hidden\n value\nend\n", Language::Ruby, ".rb"), + ("private(value)\n", Language::Python, ".py"), + ("private(value);\n", Language::TypeScript, ".ts"), + ("private(value)\n", Language::Lua, ".lua"), + ] { + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .inline_def_from_argument_list(None) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_inline_def_from_argument_list_nil_value(source, language, suffix), + "inline_def_from_argument_list nil mismatch for {language:?}" + ); + } +} + +#[test] +fn inline_def_from_source_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "class C\n private def hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "def hidden\n value\n end", + ), + ( + "class C\n private def self.hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "def self.hidden\n value\n end", + ), + ( + "def hidden\n value\nend\n", + Language::Ruby, + ".rb", + "method", + "def hidden\n value\nend", + ), + ( + "def self.hidden\n value\nend\n", + Language::Ruby, + ".rb", + "singleton_method", + "def self.hidden\n value\nend", + ), + ( + "class C\n private :hidden\nend\n", + Language::Ruby, + ".rb", + "argument_list", + ":hidden", + ), + ( + "def hidden():\n value\n", + Language::Python, + ".py", + "function_definition", + "def hidden():\n value", + ), + ( + "function hidden() {\n value;\n}\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function hidden() {\n value;\n}", + ), + ( + "function hidden()\n value()\nend\n", + Language::Lua, + ".lua", + "function_declaration", + "function hidden()\n value()\nend", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .inline_def_from_source(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "inline_def_from_source", + kind, + text + ), + "inline_def_from_source mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn inline_def_from_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "class C\n private def hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "private def hidden\n value\n end", + ), + ( + "class C\n module_function def self.hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "module_function def self.hidden\n value\n end", + ), + ( + "private def hidden\n value\nend\n", + Language::Ruby, + ".rb", + "call", + "private def hidden\n value\nend", + ), + ( + "class C\n private :hidden\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "private :hidden", + ), + ( + "private(value)\n", + Language::Python, + ".py", + "expression_statement", + "private(value)", + ), + ( + "private(value);\n", + Language::TypeScript, + ".ts", + "expression_statement", + "private(value);", + ), + ( + "private(value)\n", + Language::Lua, + ".lua", + "function_call", + "private(value)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .inline_def_from_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "inline_def_from_statement", + kind, + text + ), + "inline_def_from_statement mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn inline_def_body_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "class C\n private def hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "def hidden\n value\n end", + ), + ( + "class C\n private def self.hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "def self.hidden\n value\n end", + ), + ( + "class C\n private def empty\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "def empty\n end", + ), + ( + "def hidden():\n value\n", + Language::Python, + ".py", + "function_definition", + "def hidden():\n value", + ), + ( + "function hidden() {\n value;\n}\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function hidden() {\n value;\n}", + ), + ( + "function hidden()\n value()\nend\n", + Language::Lua, + ".lua", + "function_declaration", + "function hidden()\n value()\nend", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.inline_def_body(node).map(|body| { + ( + body.kind().to_string(), + super::node_text(body, source).to_string(), + ) + }); + + assert_eq!( + rust, + ruby_private_node_signature(source, language, suffix, "inline_def_body", kind, text), + "inline_def_body mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn inline_def_receiver_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "class C\n private def hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "def hidden\n value\n end", + ), + ( + "class C\n private def self.hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "def self.hidden\n value\n end", + ), + ( + "class C\n private def Owner.hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "def Owner.hidden\n value\n end", + ), + ( + "class C\n private def Owner::Nested.hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "def Owner::Nested.hidden\n value\n end", + ), + ( + "def hidden():\n value\n", + Language::Python, + ".py", + "function_definition", + "def hidden():\n value", + ), + ( + "function hidden() {\n value;\n}\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function hidden() {\n value;\n}", + ), + ( + "function hidden()\n value()\nend\n", + Language::Lua, + ".lua", + "function_declaration", + "function hidden()\n value()\nend", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.inline_def_receiver(node).map(|receiver| { + ( + receiver.kind().to_string(), + super::node_text(receiver, source).to_string(), + ) + }); + + assert_eq!( + rust, + ruby_private_node_signature( + source, + language, + suffix, + "inline_def_receiver", + kind, + text + ), + "inline_def_receiver mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn inline_def_name_after_receiver_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "class C\n private def self.hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "def self.hidden\n value\n end", + ), + ( + "class C\n private def Owner.hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "def Owner.hidden\n value\n end", + ), + ( + "class C\n private def Owner::Nested.hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "def Owner::Nested.hidden\n value\n end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let receiver = normalizer + .inline_def_receiver(node) + .expect("inline def receiver should exist for name-after-receiver case"); + let rust = normalizer + .inline_def_name_after_receiver(node, receiver) + .unwrap_or_default(); + + assert_eq!( + rust, + ruby_private_inline_def_name_after_receiver(source, language, suffix, kind, text), + "inline_def_name_after_receiver mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn inline_parameter_begin_marker_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f(a); a; end\n", + Language::Ruby, + ".rb", + "method", + "def f(a); a; end", + ), + ( + "def f a; a; end\n", + Language::Ruby, + ".rb", + "method", + "def f a; a; end", + ), + ( + "def f(a)\n a\nend\n", + Language::Ruby, + ".rb", + "method", + "def f(a)\n a\nend", + ), + ( + "def f(a):\n return a\n", + Language::Python, + ".py", + "function_definition", + "def f(a):\n return a", + ), + ( + "function f(a) { return a; }\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function f(a) { return a; }", + ), + ( + "function f(a)\n return a\nend\n", + Language::Lua, + ".lua", + "function_declaration", + "function f(a)\n return a\nend", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .inline_parameter_begin_marker(node) + .as_ref() + .map(node_value) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_inline_parameter_begin_marker_value(source, language, suffix, kind, text), + "inline_parameter_begin_marker mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn prepend_inline_parameter_begin_matches_ruby_private_method() { + let scalar = test_node("VCALL", Vec::new()); + let block = test_node( + "BLOCK", + vec![Child::Node(Box::new(scalar.clone())), Child::Nil], + ); + let empty_block = test_node("BLOCK", vec![Child::Nil]); + + let cases = vec![ + ( + "no_marker", + "def f(a)\n a\nend\n", + Language::Ruby, + ".rb", + "method", + "def f(a)\n a\nend", + Some(scalar.clone()), + ), + ( + "marker_nil_body", + "def f(a); a; end\n", + Language::Ruby, + ".rb", + "method", + "def f(a); a; end", + None, + ), + ( + "marker_scalar_body", + "def f(a); a; end\n", + Language::Ruby, + ".rb", + "method", + "def f(a); a; end", + Some(scalar.clone()), + ), + ( + "marker_block_body", + "def f(a); a; end\n", + Language::Ruby, + ".rb", + "method", + "def f(a); a; end", + Some(block), + ), + ( + "marker_empty_block", + "def f(a); a; end\n", + Language::Ruby, + ".rb", + "method", + "def f(a); a; end", + Some(empty_block), + ), + ( + "non_ruby", + "def f(a):\n return a\n", + Language::Python, + ".py", + "function_definition", + "def f(a):\n return a", + Some(scalar), + ), + ]; + + for (label, source, language, suffix, kind, text, body) in cases { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .prepend_inline_parameter_begin(node, body.clone()) + .as_ref() + .map(node_value) + .unwrap_or(Value::Null); + let body_value = body.as_ref().map(node_value).unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_prepend_inline_parameter_begin_value( + source, + language, + suffix, + kind, + text, + &body_value, + ), + "prepend_inline_parameter_begin mismatch for {label}" + ); + } +} + +#[test] +fn scalar_argument_list_value_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n return yield\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "yield", + ), + ( + "def f\n return nil\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "nil", + ), + ( + "def f\n return true\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "true", + ), + ( + "def f\n return false\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "false", + ), + ( + "def f\n return :ok?\nend\n", + Language::Ruby, + ".rb", + "argument_list", + ":ok?", + ), + ( + "def f\n return 12\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "12", + ), + ( + "def f\n return -12\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "-12", + ), + ( + "def f\n return name\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "name", + ), + ( + "def f():\n return value\n", + Language::Python, + ".py", + "identifier", + "value", + ), + ( + "function f() { return value; }\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + ), + ( + "function f()\n return value\nend\n", + Language::Lua, + ".lua", + "expression_list", + "value", + ), + ( + "function f() { return yield; }\n", + Language::TypeScript, + ".ts", + "yield_expression", + "yield", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .scalar_argument_list_value(node) + .as_ref() + .map(node_value) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "scalar_argument_list_value", + kind, + text, + ), + "scalar_argument_list_value mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn local_or_call_for_name_matches_ruby_private_method() { + for (source, language, suffix, kind, text, name, local) in [ + ( + "def f\n {name:}\nend\n", + Language::Ruby, + ".rb", + "hash_key_symbol", + "name", + "name", + false, + ), + ( + "def f\n {name:}\nend\n", + Language::Ruby, + ".rb", + "hash_key_symbol", + "name", + "name", + true, + ), + ( + "def f():\n value\n", + Language::Python, + ".py", + "identifier", + "f", + "f", + false, + ), + ( + "function f() { value; }\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + "value", + false, + ), + ( + "function f()\n value()\nend\n", + Language::Lua, + ".lua", + "identifier", + "value", + "value", + false, + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + if local { + normalizer + .local_stack + .push(BTreeSet::from([name.to_string()])); + } + let rust = node_value(&normalizer.local_or_call_for_name(name, node)); + + assert_eq!( + rust, + ruby_private_local_or_call_for_name_value( + source, language, suffix, kind, text, name, local + ), + "local_or_call_for_name mismatch for {language:?} {name:?} local={local}" + ); + } +} + +#[test] +fn literal_arguments_from_text_normalization_matches_ruby() { + let symbol_source = "puts :ok\n"; + let root = parse_language_source(symbol_source, Language::Ruby, ".rb"); + let fcall = first_node(&root, "FCALL", "puts :ok"); + assert_eq!( + fcall.children.first(), + Some(&Child::Symbol("puts".to_string())) + ); + let args = child_node(fcall, 1); + assert_eq!(args.r#type, "LIST"); + let lit = child_node(args, 0); + assert_eq!(lit.r#type, "LIT"); + assert_eq!(lit.children.first(), Some(&Child::Symbol("ok".to_string()))); + assert_ruby_parity(symbol_source, Language::Ruby, ".rb"); + + let heredoc_source = "def f\n puts <<~TXT\n hi\n TXT\nend\n"; + let root = parse_language_source(heredoc_source, Language::Ruby, ".rb"); + let fcall = first_node(&root, "FCALL", "puts <<~TXT"); + let args = child_node(fcall, 1); + assert_eq!(args.r#type, "LIST"); + let dstr = child_node(args, 0); + assert_eq!(dstr.r#type, "DSTR"); + assert_eq!(child_types(dstr), vec!["STR"]); + let body = child_node(dstr, 0); + assert_eq!( + body.children.first(), + Some(&Child::String("\n hi\n ".to_string())) + ); + assert_ruby_parity(heredoc_source, Language::Ruby, ".rb"); +} + +#[test] +fn literal_symbol_arguments_matches_ruby_scan_contract() { + assert_eq!( + super::literal_symbol_arguments(":one, :two?, :three!, :four=, :1, ::Name"), + vec![ + "one".to_string(), + "two?".to_string(), + "three!".to_string(), + "four=".to_string(), + "Name".to_string(), + ] + ); +} + +#[test] +fn elide_tail_returns_matches_ruby_private_method() { + let leaf = |node_type: &str| test_node(node_type, vec![Child::String("value".to_string())]); + let return_leaf = || test_node("RETURN", vec![Child::Node(Box::new(leaf("LVAR")))]); + let protected_def = test_node( + "DEFN", + vec![ + Child::Symbol("kept".to_string()), + Child::Node(Box::new(test_node( + "SCOPE", + vec![Child::Nil, Child::Nil, Child::Node(Box::new(return_leaf()))], + ))), + ], + ); + let cases = vec![ + None, + Some(return_leaf()), + Some(test_node( + "BLOCK", + vec![ + Child::Node(Box::new(leaf("LVAR"))), + Child::Node(Box::new(return_leaf())), + ], + )), + Some(test_node( + "SCOPE", + vec![Child::Nil, Child::Nil, Child::Node(Box::new(return_leaf()))], + )), + Some(test_node( + "IF", + vec![ + Child::Node(Box::new(leaf("COND"))), + Child::Node(Box::new(return_leaf())), + Child::Node(Box::new(return_leaf())), + ], + )), + Some(test_node( + "UNLESS", + vec![ + Child::Node(Box::new(leaf("COND"))), + Child::Node(Box::new(return_leaf())), + Child::Node(Box::new(return_leaf())), + ], + )), + Some(test_node( + "CASE", + vec![ + Child::Node(Box::new(leaf("LVAR"))), + Child::Node(Box::new(return_leaf())), + ], + )), + Some(test_node( + "CASE2", + vec![Child::Node(Box::new(return_leaf()))], + )), + Some(test_node( + "WHEN", + vec![ + Child::Node(Box::new(leaf("LIST"))), + Child::Node(Box::new(return_leaf())), + Child::Node(Box::new(return_leaf())), + ], + )), + Some(test_node( + "RESCUE", + vec![ + Child::Node(Box::new(return_leaf())), + Child::Node(Box::new(return_leaf())), + ], + )), + Some(test_node( + "RESBODY", + vec![ + Child::Node(Box::new(leaf("LIST"))), + Child::Node(Box::new(return_leaf())), + Child::Node(Box::new(return_leaf())), + ], + )), + Some(protected_def), + ]; + let normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); + + for node in cases { + let input = node.as_ref().map(node_value).unwrap_or(Value::Null); + let rust = normalizer + .elide_tail_returns(node) + .as_ref() + .map(node_value) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_elide_tail_returns_value(&input, true), + "elide_tail_returns mismatch for input {input}" + ); + } + + let non_ruby = Some(return_leaf()); + let input = non_ruby.as_ref().map(node_value).unwrap_or(Value::Null); + let normalizer = super::TreeSitterNormalizer::new("", Language::Python); + let rust = normalizer + .elide_tail_returns(non_ruby) + .as_ref() + .map(node_value) + .unwrap_or(Value::Null); + + assert_eq!(rust, input); + assert_eq!(ruby_private_elide_tail_returns_value(&input, false), input); +} + +#[test] +fn elide_implicit_nil_body_matches_ruby_private_method() { + let normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); + let leaf = || test_node("LVAR", vec![Child::String("value".to_string())]); + let nil_node = || test_node("NIL", Vec::new()); + let cases = vec![ + None, + Some(nil_node()), + Some(leaf()), + Some(test_node( + "BLOCK", + vec![ + Child::Node(Box::new(leaf())), + Child::Node(Box::new(nil_node())), + Child::Node(Box::new(nil_node())), + ], + )), + Some(test_node( + "BLOCK", + vec![Child::Nil, Child::Node(Box::new(nil_node()))], + )), + Some(test_node( + "BLOCK", + vec![ + Child::Node(Box::new(leaf())), + Child::Node(Box::new(leaf())), + Child::Node(Box::new(nil_node())), + ], + )), + ]; + + for node in cases { + let input = node.as_ref().map(node_value).unwrap_or(Value::Null); + let rust = normalizer + .elide_implicit_nil_body(node) + .as_ref() + .map(node_value) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_elide_implicit_nil_body_value(&input, true), + "elide_implicit_nil_body mismatch for input {input}" + ); + } + + let non_ruby = Some(nil_node()); + let input = non_ruby.as_ref().map(node_value).unwrap_or(Value::Null); + let normalizer = super::TreeSitterNormalizer::new("", Language::Python); + let rust = normalizer + .elide_implicit_nil_body(non_ruby) + .as_ref() + .map(node_value) + .unwrap_or(Value::Null); + + assert_eq!(rust, input); + assert_eq!( + ruby_private_elide_implicit_nil_body_value(&input, false), + input + ); +} + +#[test] +fn drop_trailing_nil_statement_matches_ruby_private_method() { + let normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); + let leaf = |node_type: &str| test_node(node_type, vec![Child::Symbol("value".to_string())]); + let nil_node = || test_node("NIL", Vec::new()); + let block = |children| test_node("BLOCK", children); + + for node in [ + None, + Some(nil_node()), + Some(block(vec![ + Child::Node(Box::new(leaf("LASGN"))), + Child::Node(Box::new(nil_node())), + ])), + Some(block(vec![ + Child::Node(Box::new(leaf("LASGN"))), + Child::Node(Box::new(nil_node())), + Child::Node(Box::new(nil_node())), + ])), + Some(block(vec![ + Child::Node(Box::new(leaf("LASGN"))), + Child::Nil, + Child::Node(Box::new(nil_node())), + ])), + Some(block(vec![Child::Nil, Child::Node(Box::new(nil_node()))])), + Some(block(vec![ + Child::Node(Box::new(leaf("LASGN"))), + Child::Nil, + Child::Node(Box::new(leaf("VCALL"))), + ])), + Some(block(vec![ + Child::Node(Box::new(leaf("LASGN"))), + Child::Nil, + Child::Node(Box::new(leaf("VCALL"))), + Child::Node(Box::new(nil_node())), + ])), + ] { + let input = node.as_ref().map(node_value).unwrap_or(Value::Null); + let rust = normalizer + .drop_trailing_nil_statement(node) + .as_ref() + .map(node_value) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_drop_trailing_nil_statement_value(&input), + "drop_trailing_nil_statement mismatch for input {input}" + ); + } +} + +#[test] +fn symbol_literal_node_matches_ruby_private_predicate() { + let normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); + for (node, node_type, child_kind) in [ + (None, None, None), + ( + Some(test_node("LIT", vec![Child::Symbol("value".to_string())])), + Some("LIT"), + Some("symbol"), + ), + ( + Some(test_node("LIT", vec![Child::String("value".to_string())])), + Some("LIT"), + Some("string"), + ), + (Some(test_node("LIT", Vec::new())), Some("LIT"), None), + ( + Some(test_node("STR", vec![Child::Symbol("value".to_string())])), + Some("STR"), + Some("symbol"), + ), + ( + Some(test_node( + "LIT", + vec![Child::Node(Box::new(test_node("NIL", Vec::new())))], + )), + Some("LIT"), + Some("node"), + ), + ( + Some(test_node("LIT", vec![Child::Nil])), + Some("LIT"), + Some("nil"), + ), + ] { + assert_eq!( + normalizer.symbol_literal_node(node.as_ref()), + ruby_private_symbol_literal_node_predicate(node_type, child_kind), + "symbol_literal_node? mismatch for node_type={node_type:?} child_kind={child_kind:?}" + ); + } +} + +#[test] +fn same_ts_node_matches_ruby_private_predicate() { + for ( + source, + language, + suffix, + left_kind, + left_text, + left_index, + right_kind, + right_text, + right_index, + ) in [ + ( + "value\nvalue\n", + Language::Ruby, + ".rb", + "identifier", + "value", + 0, + "identifier", + "value", + 0, + ), + ( + "value\nvalue\n", + Language::Ruby, + ".rb", + "identifier", + "value", + 0, + "identifier", + "value", + 1, + ), + ( + "value\nvalue\n", + Language::Python, + ".py", + "expression_statement", + "value", + 0, + "expression_statement", + "value", + 0, + ), + ( + "value\nvalue\n", + Language::Python, + ".py", + "expression_statement", + "value", + 0, + "expression_statement", + "value", + 1, + ), + ( + "value;\nvalue;\n", + Language::TypeScript, + ".ts", + "expression_statement", + "value;", + 0, + "expression_statement", + "value;", + 1, + ), + ( + "value()\nvalue()\n", + Language::Lua, + ".lua", + "function_call", + "value()", + 0, + "function_call", + "value()", + 0, + ), + ( + "value()\nvalue()\n", + Language::Lua, + ".lua", + "function_call", + "value()", + 0, + "function_call", + "value()", + 1, + ), + ] { + let tree = raw_tree(source, language); + let left = nth_raw_node(tree.root_node(), source, left_kind, left_text, left_index); + let right = nth_raw_node( + tree.root_node(), + source, + right_kind, + right_text, + right_index, + ); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.same_ts_node(left, right), + ruby_private_same_ts_node_predicate( + source, + language, + suffix, + left_kind, + left_text, + left_index, + right_kind, + right_text, + right_index + ), + "same_ts_node? mismatch for {language:?} {left_kind}:{left_text:?}[{left_index}] vs {right_kind}:{right_text:?}[{right_index}]" + ); + } +} + +#[test] +fn parent_named_child_matches_ruby_private_predicate() { + for ( + source, + language, + suffix, + parent_kind, + parent_text, + parent_index, + child_kind, + child_text, + child_index, + ) in [ + ( + "def f\n {name:}\nend\n", + Language::Ruby, + ".rb", + "pair", + "name:", + 0, + "hash_key_symbol", + "name", + 0, + ), + ( + "def f\n {name:}\nend\n", + Language::Ruby, + ".rb", + "pair", + "name:", + 0, + "identifier", + "f", + 0, + ), + ( + "def f():\n value\n", + Language::Python, + ".py", + "function_definition", + "def f():\n value", + 0, + "identifier", + "f", + 0, + ), + ( + "def f():\n value\n", + Language::Python, + ".py", + "block", + "value", + 0, + "identifier", + "f", + 0, + ), + ( + "function f() { value; }\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function f() { value; }", + 0, + "identifier", + "f", + 0, + ), + ( + "function f() { value; }\n", + Language::TypeScript, + ".ts", + "statement_block", + "{ value; }", + 0, + "identifier", + "f", + 0, + ), + ( + "function f()\n value()\nend\n", + Language::Lua, + ".lua", + "function_declaration", + "function f()\n value()\nend", + 0, + "identifier", + "f", + 0, + ), + ( + "function f()\n value()\nend\n", + Language::Lua, + ".lua", + "block", + "value()", + 0, + "identifier", + "f", + 0, + ), + ] { + let tree = raw_tree(source, language); + let parent = nth_raw_node( + tree.root_node(), + source, + parent_kind, + parent_text, + parent_index, + ); + let child = nth_raw_node( + tree.root_node(), + source, + child_kind, + child_text, + child_index, + ); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.parent_named_child(parent, child), + ruby_private_parent_named_child_predicate( + source, + language, + suffix, + parent_kind, + parent_text, + parent_index, + child_kind, + child_text, + child_index + ), + "parent_named_child? mismatch for {language:?} {parent_kind}:{parent_text:?}[{parent_index}] -> {child_kind}:{child_text:?}[{child_index}]" + ); + } +} + +#[test] +fn node_key_matches_ruby_private_method() { + for (source, language, suffix, kind, text, index) in [ + ( + "value\nvalue\n", + Language::Ruby, + ".rb", + "identifier", + "value", + 0, + ), + ( + "value\nvalue\n", + Language::Ruby, + ".rb", + "identifier", + "value", + 1, + ), + ( + "value\nvalue\n", + Language::Python, + ".py", + "expression_statement", + "value", + 1, + ), + ( + "value;\nvalue;\n", + Language::TypeScript, + ".ts", + "expression_statement", + "value;", + 0, + ), + ( + "value()\nvalue()\n", + Language::Lua, + ".lua", + "function_call", + "value()", + 1, + ), + ] { + let tree = raw_tree(source, language); + let node = nth_raw_node(tree.root_node(), source, kind, text, index); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.node_key(node), + ruby_private_node_key_signature(source, language, suffix, kind, text, index), + "node_key mismatch for {language:?} {kind}:{text:?}[{index}]" + ); + } +} + +#[test] +fn bare_identifier_text_matches_ruby_private_predicate() { + for text in [ + "value", + "_value", + "value1", + "value?", + "value!", + "value=", + " value? ", + "", + "1value", + "value-name", + "value?name", + "value??", + "value!=", + "value =", + ] { + assert_eq!( + super::bare_identifier_text(text), + ruby_private_text_predicate(Language::Ruby, "bare_identifier_text?", text), + "bare_identifier_text? mismatch for {text:?}" + ); + } +} + +#[test] +fn hidden_match_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "match(value)\n", + Language::Ruby, + ".rb", + "call", + "match(value)", + ), + ( + "match value:\n case 1:\n result\n", + Language::Python, + ".py", + "match_statement", + "match value:\n case 1:\n result", + ), + ( + "match(value)\n", + Language::Python, + ".py", + "expression_statement", + "match(value)", + ), + ( + "match(value);\n", + Language::TypeScript, + ".ts", + "expression_statement", + "match(value);", + ), + ( + "match(value)\n", + Language::Lua, + ".lua", + "function_call", + "match(value)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.hidden_match(node), + ruby_private_predicate(source, language, suffix, "hidden_match?", kind, text), + "hidden_match? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn kind_type_matches_ruby_private_method() { + for kind in [ + "", + "body_statement", + "block_body", + "block", + "statements", + "expression_statement", + "alreadyCAPS", + "argument-list??", + "foo__bar", + "123kind", + "é_node", + ] { + assert_eq!( + super::kind_type(kind), + ruby_private_text_string(Language::Ruby, "kind_type", kind), + "kind_type mismatch for {kind:?}" + ); + } +} + +#[test] +fn ts_node_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ("ready?\n", Language::Ruby, ".rb", "call", "ready?"), + ( + "value\n", + Language::Python, + ".py", + "expression_statement", + "value", + ), + ( + "let value = 1;\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + ), + ( + "value = 1\n", + Language::Lua, + ".lua", + "variable_list", + "value", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + + assert_eq!( + super::ts_node(Some(node)), + ruby_private_predicate(source, language, suffix, "ts_node?", kind, text), + "ts_node? raw-node mismatch for {language:?} {kind}:{text:?}" + ); + } + + assert_eq!(super::ts_node(None), ruby_private_ts_node_value("nil")); + assert!(!ruby_private_ts_node_value("string")); + assert!(!ruby_private_ts_node_value("normalized_node")); +} + +#[test] +fn command_call_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n puts value\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "puts value", + ), + ( + "def f\n foo { value }\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "foo { value }", + ), + ( + "def f\n foo\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "foo", + ), + ( + "def f\n user.name value\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "user.name value", + ), + ( + "print(value)\n", + Language::Python, + ".py", + "expression_statement", + "print(value)", + ), + ( + "console.log(value);\n", + Language::TypeScript, + ".ts", + "expression_statement", + "console.log(value);", + ), + ( + "print(value)\n", + Language::Lua, + ".lua", + "function_call", + "print(value)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.command_call_statement(node), + ruby_private_predicate( + source, + language, + suffix, + "command_call_statement?", + kind, + text + ), + "command_call_statement? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_command_call_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n puts value\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "puts value", + ), + ( + "def f\n foo { value }\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "foo { value }", + ), + ( + "print(value)\n", + Language::Python, + ".py", + "expression_statement", + "print(value)", + ), + ( + "console.log(value);\n", + Language::TypeScript, + ".ts", + "expression_statement", + "console.log(value);", + ), + ( + "print(value)\n", + Language::Lua, + ".lua", + "function_call", + "print(value)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_command_call_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_command_call_statement", + kind, + text + ), + "normalize_command_call_statement mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn zero_child_identifier_call_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ("foo?\n", Language::Ruby, ".rb", "call", "foo?"), + ("foo!\n", Language::Ruby, ".rb", "call", "foo!"), + ("foo()\n", Language::Ruby, ".rb", "call", "foo()"), + ( + "foo()\n", + Language::Python, + ".py", + "expression_statement", + "foo()", + ), + ( + "foo();\n", + Language::TypeScript, + ".ts", + "call_expression", + "foo()", + ), + ("foo()\n", Language::Lua, ".lua", "function_call", "foo()"), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.zero_child_identifier_call(node), + ruby_private_predicate( + source, + language, + suffix, + "zero_child_identifier_call?", + kind, + text + ), + "zero_child_identifier_call? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn zero_child_identifier_call_normalization_matches_ruby() { + for source in ["foo?\n", "foo!\n"] { + let root = parse_language_source(source, Language::Ruby, ".rb"); + let text = source.trim(); + let vcall = first_node(&root, "VCALL", text); + assert_eq!( + vcall.children.first(), + Some(&Child::Symbol(text.to_string())) + ); + assert_ruby_parity(source, Language::Ruby, ".rb"); + } +} + +#[test] +fn normalize_zero_child_call_matches_ruby_private_method() { + for source in ["foo?\n", "foo!\n", "foo()\n"] { + let text = source.trim(); + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, "call", text); + let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let rust = normalizer.normalize_zero_child_call(node); + + assert_eq!( + node_value(&rust), + ruby_private_normalize_method_value( + source, + Language::Ruby, + ".rb", + "normalize_zero_child_call", + "call", + text + ), + "normalize_zero_child_call mismatch for {text:?}" + ); + } +} + +#[test] +fn normalize_const_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ("Foo\n", Language::Ruby, ".rb", "constant", "Foo"), + ( + "Foo::Bar\n", + Language::Ruby, + ".rb", + "scope_resolution", + "Foo::Bar", + ), + ( + "class Foo::Bar::Baz\nend\n", + Language::Ruby, + ".rb", + "scope_resolution", + "Foo::Bar::Baz", + ), + ( + "type Alias = Foo;\n", + Language::TypeScript, + ".ts", + "type_identifier", + "Foo", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.normalize_const(node); + + assert_eq!( + node_value(&rust), + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_const", + kind, + text + ), + "normalize_const mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn assignment_receiver_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ("value += 1\n", Language::Ruby, ".rb", "identifier", "value"), + ( + "@value += 1\n", + Language::Ruby, + ".rb", + "instance_variable", + "@value", + ), + ( + "$value += 1\n", + Language::Ruby, + ".rb", + "global_variable", + "$value", + ), + ("VALUE += 1\n", Language::Ruby, ".rb", "constant", "VALUE"), + ( + "user.value += 1\n", + Language::Ruby, + ".rb", + "call", + "user.value", + ), + ( + "value += 1\n", + Language::Python, + ".py", + "identifier", + "value", + ), + ( + "user.value += 1\n", + Language::Python, + ".py", + "attribute", + "user.value", + ), + ( + "value += 1;\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + ), + ( + "user.value += 1;\n", + Language::TypeScript, + ".ts", + "member_expression", + "user.value", + ), + ( + "value = 1\n", + Language::Lua, + ".lua", + "variable_list", + "value", + ), + ( + "user.value = 1\n", + Language::Lua, + ".lua", + "variable_list", + "user.value", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .assignment_receiver(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "assignment_receiver", + kind, + text + ), + "assignment_receiver mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn assignment_target_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "@value = 1\n", + Language::Ruby, + ".rb", + "instance_variable", + "@value", + ), + ( + "$value = 1\n", + Language::Ruby, + ".rb", + "global_variable", + "$value", + ), + ( + "items[index] = value\n", + Language::Ruby, + ".rb", + "element_reference", + "items[index]", + ), + ( + "user.value = 1\n", + Language::Ruby, + ".rb", + "call", + "user.value", + ), + ( + "user.value = 1\n", + Language::Python, + ".py", + "attribute", + "user.value", + ), + ( + "user.value = 1;\n", + Language::TypeScript, + ".ts", + "member_expression", + "user.value", + ), + ( + "user.value = 1\n", + Language::Lua, + ".lua", + "variable_list", + "user.value", + ), + ( + "value = 1\n", + Language::Lua, + ".lua", + "variable_list", + "value", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let source_node = normalizer.parent_node(node).unwrap_or(node); + let right = normalizer + .assignment_right(source_node) + .and_then(|right| normalizer.normalize_node(right)); + let rust = normalizer + .assignment_target(node, right, source_node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_assignment_target_value(source, language, suffix, kind, text), + "assignment_target mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn augmented_assignment_value_matches_ruby_private_method() { + for (source, language, suffix, kind, text, operator) in [ + ( + "value += 1\n", + Language::Ruby, + ".rb", + "identifier", + "value", + "+", + ), + ( + "@value *= 2\n", + Language::Ruby, + ".rb", + "instance_variable", + "@value", + "*", + ), + ( + "$value += 1\n", + Language::Ruby, + ".rb", + "global_variable", + "$value", + "+", + ), + ( + "VALUE -= 1\n", + Language::Ruby, + ".rb", + "constant", + "VALUE", + "-", + ), + ( + "user.value += 1\n", + Language::Ruby, + ".rb", + "call", + "user.value", + "+", + ), + ( + "value += 1\n", + Language::Python, + ".py", + "identifier", + "value", + "+", + ), + ( + "user.value += 1\n", + Language::Python, + ".py", + "attribute", + "user.value", + "+", + ), + ( + "value += 1;\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + "+", + ), + ( + "user.value += 1;\n", + Language::TypeScript, + ".ts", + "member_expression", + "user.value", + "+", + ), + ( + "value = 1\n", + Language::Lua, + ".lua", + "variable_list", + "value", + "+", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let source_node = normalizer.parent_node(node).unwrap_or(node); + let right_raw = normalizer.assignment_right(source_node); + let rust = normalizer.augmented_assignment_value(node, operator, right_raw, source_node); + + assert_eq!( + node_value(&rust), + ruby_private_augmented_assignment_value(source, language, suffix, kind, text, operator), + "augmented_assignment_value mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn target_name_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "value = other\n", + Language::Ruby, + ".rb", + "identifier", + "value", + ), + ( + "$value = other\n", + Language::Ruby, + ".rb", + "global_variable", + "$value", + ), + ( + "VALUE = other\n", + Language::Ruby, + ".rb", + "constant", + "VALUE", + ), + ( + "a, *rest = values\n", + Language::Ruby, + ".rb", + "rest_assignment", + "*rest", + ), + ( + "value = other\n", + Language::Python, + ".py", + "identifier", + "value", + ), + ( + "let value = other;\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + ), + ( + "value = other\n", + Language::Lua, + ".lua", + "variable_list", + "value", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + Value::String(normalizer.target_name(node)), + ruby_private_normalize_method_value( + source, + language, + suffix, + "target_name", + kind, + text + ), + "target_name mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_multiple_assignment_matches_ruby_private_method() { + for (source, kind, text) in [ + ("a, b = values\n", "assignment", "a, b = values"), + ("$a, b = values\n", "assignment", "$a, b = values"), + ("a, *rest = values\n", "assignment", "a, *rest = values"), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let left = normalizer + .assignment_left(node) + .expect("multiple assignment should have left side"); + let right = normalizer + .assignment_right(node) + .and_then(|right| normalizer.normalize_node(right)); + let rust = normalizer.normalize_multiple_assignment(left, right, node); + + assert_eq!( + node_value(&rust), + ruby_private_normalize_multiple_assignment_value( + source, + Language::Ruby, + ".rb", + kind, + text + ), + "normalize_multiple_assignment mismatch for {text:?}" + ); + } +} + +#[test] +fn normalize_assignment_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "value = other\n", + Language::Ruby, + ".rb", + "assignment", + "value = other", + ), + ( + "@value = other\n", + Language::Ruby, + ".rb", + "assignment", + "@value = other", + ), + ( + "$value = other\n", + Language::Ruby, + ".rb", + "assignment", + "$value = other", + ), + ( + "items[index] = value\n", + Language::Ruby, + ".rb", + "assignment", + "items[index] = value", + ), + ( + "user.value = other\n", + Language::Ruby, + ".rb", + "assignment", + "user.value = other", + ), + ( + "a, b = values\n", + Language::Ruby, + ".rb", + "assignment", + "a, b = values", + ), + ( + "value = other\n", + Language::Python, + ".py", + "expression_statement", + "value = other", + ), + ( + "user.value = other\n", + Language::Python, + ".py", + "expression_statement", + "user.value = other", + ), + ( + "value = other;\n", + Language::TypeScript, + ".ts", + "expression_statement", + "value = other;", + ), + ( + "user.value = other;\n", + Language::TypeScript, + ".ts", + "expression_statement", + "user.value = other;", + ), + ( + "value = other\n", + Language::Lua, + ".lua", + "assignment_statement", + "value = other", + ), + ( + "user.value = other\n", + Language::Lua, + ".lua", + "assignment_statement", + "user.value = other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_assignment(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_assignment", + kind, + text + ), + "normalize_assignment mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_assignment_lhs_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "value = other\n", + Language::Ruby, + ".rb", + "identifier", + "value", + ), + ( + "@value = other\n", + Language::Ruby, + ".rb", + "instance_variable", + "@value", + ), + ( + "$value = other\n", + Language::Ruby, + ".rb", + "global_variable", + "$value", + ), + ( + "items[index] = value\n", + Language::Ruby, + ".rb", + "element_reference", + "items[index]", + ), + ( + "user.value = other\n", + Language::Ruby, + ".rb", + "call", + "user.value", + ), + ( + "value = other\n", + Language::Python, + ".py", + "identifier", + "value", + ), + ( + "user.value = other\n", + Language::Python, + ".py", + "attribute", + "user.value", + ), + ( + "value = other;\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + ), + ( + "user.value = other;\n", + Language::TypeScript, + ".ts", + "member_expression", + "user.value", + ), + ( + "value = other\n", + Language::Lua, + ".lua", + "variable_list", + "value", + ), + ( + "user.value = other\n", + Language::Lua, + ".lua", + "variable_list", + "user.value", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_assignment_lhs(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_assignment_lhs", + kind, + text + ), + "normalize_assignment_lhs mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_begin_matches_ruby_private_method() { + for (source, text) in [ + ("begin\n work\n done\nend\n", "begin\n work\n done\nend"), + ( + "begin\n work\nensure\n cleanup\nend\n", + "begin\n work\nensure\n cleanup\nend", + ), + ( + "begin\n work\nrescue Error => e\n handle\nend\n", + "begin\n work\nrescue Error => e\n handle\nend", + ), + ( + "begin\n work\nrescue Error => e\n handle\nensure\n cleanup\nend\n", + "begin\n work\nrescue Error => e\n handle\nensure\n cleanup\nend", + ), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, "begin", text); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let rust = normalizer + .normalize_begin(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + Language::Ruby, + ".rb", + "normalize_begin", + "begin", + text + ), + "normalize_begin mismatch for {text:?}" + ); + } +} + +#[test] +fn normalize_block_argument_matches_ruby_private_method() { + for (source, text) in [ + ("foo(&block)\n", "&block"), + ("foo(&:to_s)\n", "&:to_s"), + ("foo(&method(:bar))\n", "&method(:bar)"), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, "block_argument", text); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let rust = normalizer + .normalize_block_argument(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + Language::Ruby, + ".rb", + "normalize_block_argument", + "block_argument", + text + ), + "normalize_block_argument mismatch for {text:?}" + ); + } +} + +#[test] +fn normalize_body_nodes_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ("\n", Language::Ruby, ".rb", "__root__", ""), + ("value\n", Language::Ruby, ".rb", "__root__", ""), + ("first\nsecond\n", Language::Ruby, ".rb", "__root__", ""), + ( + "first()\nsecond()\n", + Language::Python, + ".py", + "__root__", + "", + ), + ( + "first();\nsecond();\n", + Language::TypeScript, + ".ts", + "__root__", + "", + ), + ("first()\nsecond()\n", Language::Lua, ".lua", "__root__", ""), + ] { + let tree = raw_tree(source, language); + let target = if kind == "__root__" { + tree.root_node() + } else { + first_raw_node(tree.root_node(), source, kind, text) + }; + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let nodes = normalizer.named_children(target); + let rust = normalizer + .normalize_body_nodes(nodes, target) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_body_nodes_value(source, language, suffix, kind, text), + "normalize_body_nodes mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_children_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n one\n two\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "one\n two", + ), + ( + "def f\n value = other\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value = other", + ), + ( + "def f\n x = <<~TXT\n hi\n TXT\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "x = <<~TXT\n hi\n TXT", + ), + ( + "def f():\n one()\n two()\n", + Language::Python, + ".py", + "block", + "one()\n two()", + ), + ( + "def f():\n value = other\n", + Language::Python, + ".py", + "block", + "value = other", + ), + ( + "function f(){ one(); two(); }\n", + Language::TypeScript, + ".ts", + "statement_block", + "{ one(); two(); }", + ), + ( + "function f(){ value = other; }\n", + Language::TypeScript, + ".ts", + "assignment_expression", + "value = other", + ), + ( + "function f()\n one()\n two()\nend\n", + Language::Lua, + ".lua", + "block", + "one()\n two()", + ), + ( + "function f()\n value = other\nend\n", + Language::Lua, + ".lua", + "block", + "value = other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = children_value(&normalizer.normalize_children(node)); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_children", + kind, + text + ), + "normalize_children mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_class_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "class Thing; end\n", + Language::Ruby, + ".rb", + "class", + "class Thing; end", + ), + ( + "class Thing:\n pass\n", + Language::Python, + ".py", + "class_definition", + "class Thing:\n pass", + ), + ( + "class Thing {}\n", + Language::TypeScript, + ".ts", + "class_declaration", + "class Thing {}", + ), + ( + "local Thing = {}\n", + Language::Lua, + ".lua", + "variable_declaration", + "local Thing = {}", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_class(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_class", + kind, + text + ), + "normalize_class mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_impl_matches_ruby_private_method() { + for (source, kind, text) in [( + "impl Thing {\n fn call(&self) {\n work();\n }\n}\n", + "impl_item", + "impl Thing {\n fn call(&self) {\n work();\n }\n}", + )] { + let tree = raw_tree(source, Language::Rust); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Rust); + let rust = normalizer + .normalize_impl(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + Language::Rust, + ".rs", + "normalize_impl", + kind, + text + ), + "normalize_impl mismatch for {kind} {text:?}" + ); + } +} + +#[test] +fn rust_impl_normalization_matches_ruby() { + let source = "impl Thing {\n fn call(&self) {\n work();\n }\n}\n"; + let root = parse_language_source(source, Language::Rust, ".rs"); + let class_node = first_node(&root, "CLASS", source.trim_end()); + + assert_eq!(child_node(class_node, 0).r#type, "CONST"); + assert_ruby_parity(source, Language::Rust, ".rs"); +} + +#[test] +fn normalize_body_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n value\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value", + ), + ( + "def f\n return value\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "return value", + ), + ( + "def f\n items[index]\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "items[index]", + ), + ( + "def f\n [first, second]\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "[first, second]", + ), + ( + "def f\n value if ready?\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value if ready?", + ), + ( + "def f\n left && right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left && right", + ), + ( + "def f():\n return value\n", + Language::Python, + ".py", + "block", + "return value", + ), + ( + "def f():\n value = other\n", + Language::Python, + ".py", + "block", + "value = other", + ), + ( + "function f() {\n return value;\n}\n", + Language::TypeScript, + ".ts", + "return_statement", + "return value;", + ), + ( + "function f() {\n value = other;\n}\n", + Language::TypeScript, + ".ts", + "expression_statement", + "value = other;", + ), + ( + "function f()\n return value\nend\n", + Language::Lua, + ".lua", + "block", + "return value", + ), + ( + "function f()\n value = other\nend\n", + Language::Lua, + ".lua", + "block", + "value = other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_body(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_body", + kind, + text + ), + "normalize_body mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_return_value_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n return nil\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "nil", + ), + ( + "def f\n return items[index]\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "items[index]", + ), + ( + "def f\n return left && right\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "left && right", + ), + ( + "def f\n return condition ? yes : no\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "condition ? yes : no", + ), + ( + "def f\n return foo { value }\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "foo { value }", + ), + ( + "def f\n return user.name\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "user.name", + ), + ( + "def f\n return !value\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "!value", + ), + ( + "def f\n return left + right\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "left + right", + ), + ( + "def f\n return foo(bar)\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "foo(bar)", + ), + ( + "def f():\n return value + other\n", + Language::Python, + ".py", + "binary_operator", + "value + other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_return_value(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_return_value", + kind, + text + ), + "normalize_return_value mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_return_node_matches_ruby_private_method() { + for (source, language, suffix, kind, text, elide_symbol) in [ + ( + "return :ok if cond\n", + Language::Ruby, + ".rb", + "return", + "return :ok", + false, + ), + ( + "return :ok if cond\n", + Language::Ruby, + ".rb", + "return", + "return :ok", + true, + ), + ( + "return value if cond\n", + Language::Ruby, + ".rb", + "return", + "return value", + true, + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_return_node_with_elide_symbol(node, elide_symbol) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_return_node_value( + source, + language, + suffix, + kind, + text, + elide_symbol + ), + "normalize_return_node mismatch for {language:?} {kind} {text:?} elide_symbol={elide_symbol}" + ); + } +} + +#[test] +fn normalize_return_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "return :ok if cond\n", + Language::Ruby, + ".rb", + "return", + "return :ok", + ), + ("break if done\n", Language::Ruby, ".rb", "break", "break"), + ( + "next value if done\n", + Language::Ruby, + ".rb", + "next", + "next value", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_return(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_return", + kind, + text + ), + "normalize_return mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn call_arguments_matches_ruby_private_method() { + for (source, language, suffix, kind, text, function_mode) in [ + ( + "foo(value)\n", + Language::Ruby, + ".rb", + "call", + "foo(value)", + "auto", + ), + ( + "foo(left + right)\n", + Language::Ruby, + ".rb", + "call", + "foo(left + right)", + "auto", + ), + ( + "foo(user.name)\n", + Language::Ruby, + ".rb", + "call", + "foo(user.name)", + "auto", + ), + ( + "user.name(value)\n", + Language::Ruby, + ".rb", + "call", + "user.name(value)", + "none", + ), + ( + "foo(value)\n", + Language::Python, + ".py", + "call", + "foo(value)", + "auto", + ), + ( + "foo(value);\n", + Language::TypeScript, + ".ts", + "call_expression", + "foo(value)", + "auto", + ), + ( + "foo(value)\n", + Language::Lua, + ".lua", + "function_call", + "foo(value)", + "auto", + ), + ( + "user.name(value)\n", + Language::Lua, + ".lua", + "function_call", + "user.name(value)", + "none", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let function = match function_mode { + "auto" => normalizer + .named_field(node, "function") + .or_else(|| normalizer.named_field(node, "call")) + .or_else(|| normalizer.named_children(node).into_iter().next()), + "none" => None, + other => panic!("unknown function mode {other:?}"), + }; + let rust = Value::Array( + normalizer + .call_arguments(node, function) + .iter() + .map(node_value) + .collect(), + ); + + assert_eq!( + rust, + ruby_private_call_arguments_value(source, language, suffix, kind, text, function_mode), + "call_arguments mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_call_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ("ready?\n", Language::Ruby, ".rb", "call", "ready?"), + ("foo(value)\n", Language::Ruby, ".rb", "call", "foo(value)"), + ( + "user.name(value)\n", + Language::Ruby, + ".rb", + "call", + "user.name(value)", + ), + ( + "def f\n foo { bar }\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "foo { bar }", + ), + ( + "foo(value)\n", + Language::Python, + ".py", + "expression_statement", + "foo(value)", + ), + ( + "foo(value);\n", + Language::TypeScript, + ".ts", + "call_expression", + "foo(value)", + ), + ( + "foo(value)\n", + Language::Lua, + ".lua", + "function_call", + "foo(value)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_call(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_call", + kind, + text + ), + "normalize_call mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_call_with_block_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "items.map { |item| item }\n", + Language::Ruby, + ".rb", + "call", + "items.map { |item| item }", + ), + ( + "items.each do |item|\n item\nend\n", + Language::Ruby, + ".rb", + "call", + "items.each do |item|\n item\nend", + ), + ( + "foo(1) { bar }\n", + Language::Ruby, + ".rb", + "call", + "foo(1) { bar }", + ), + ( + "def f\n foo { bar }\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "foo { bar }", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_call_with_block(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_call_with_block", + kind, + text + ), + "normalize_call_with_block mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_call_without_block_matches_ruby_private_method() { + for (source, language, suffix, kind, text, block_mode) in [ + ( + "foo(value)\n", + Language::Ruby, + ".rb", + "call", + "foo(value)", + "none", + ), + ( + "user.name(value)\n", + Language::Ruby, + ".rb", + "call", + "user.name(value)", + "none", + ), + ( + "foo(1) { bar }\n", + Language::Ruby, + ".rb", + "call", + "foo(1) { bar }", + "auto", + ), + ( + "items.map(1) { |item| item }\n", + Language::Ruby, + ".rb", + "call", + "items.map(1) { |item| item }", + "auto", + ), + ( + "Foo { bar }\n", + Language::Ruby, + ".rb", + "call", + "Foo { bar }", + "auto", + ), + ( + "foo(value)\n", + Language::Python, + ".py", + "expression_statement", + "foo(value)", + "none", + ), + ( + "foo(value);\n", + Language::TypeScript, + ".ts", + "call_expression", + "foo(value)", + "none", + ), + ( + "foo(value)\n", + Language::Lua, + ".lua", + "function_call", + "foo(value)", + "none", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let block = match block_mode { + "auto" => normalizer.call_block(node), + "none" => None, + other => panic!("unknown block mode {other:?}"), + }; + let rust = normalizer + .normalize_call_without_block(node, block) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_call_without_block_value( + source, language, suffix, kind, text, block_mode + ), + "normalize_call_without_block mismatch for {language:?} {kind} {text:?} with block mode {block_mode:?}" + ); + } +} + +#[test] +fn command_arguments_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "foo value\n", + Language::Ruby, + ".rb", + "argument_list", + "value", + ), + ( + "foo :name\n", + Language::Ruby, + ".rb", + "argument_list", + ":name", + ), + ( + "foo left + right\n", + Language::Ruby, + ".rb", + "argument_list", + "left + right", + ), + ( + "foo user.name\n", + Language::Ruby, + ".rb", + "argument_list", + "user.name", + ), + ( + "foo(value)\n", + Language::Python, + ".py", + "argument_list", + "(value)", + ), + ( + "foo(left + right)\n", + Language::Python, + ".py", + "argument_list", + "(left + right)", + ), + ( + "foo(value);\n", + Language::TypeScript, + ".ts", + "arguments", + "(value)", + ), + ( + "foo(value)\n", + Language::Lua, + ".lua", + "arguments", + "(value)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = Value::Array( + normalizer + .command_arguments(node) + .iter() + .map(node_value) + .collect(), + ); + + assert_eq!( + rust, + ruby_private_command_arguments_value(source, language, suffix, kind, text), + "command_arguments mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn const_for_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ("Foo\n", Language::Ruby, ".rb", "constant", "Foo"), + ("foo\n", Language::Ruby, ".rb", "identifier", "foo"), + ( + "class Foo:\n pass\n", + Language::Python, + ".py", + "identifier", + "Foo", + ), + ( + "type Alias = Foo;\n", + Language::TypeScript, + ".ts", + "type_identifier", + "Foo", + ), + ( + "local Foo = {}\n", + Language::Lua, + ".lua", + "variable_list", + "Foo", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.const_for(Some(node), node); + + assert_eq!( + node_value(&rust), + ruby_private_normalize_method_value(source, language, suffix, "const_for", kind, text), + "const_for mismatch for {language:?} {kind} {text:?}" + ); + } + + for (source, language, suffix) in [ + ("class Foo\nend\n", Language::Ruby, ".rb"), + ("class Foo:\n pass\n", Language::Python, ".py"), + ("class Foo {}\n", Language::TypeScript, ".ts"), + ("local Foo = {}\n", Language::Lua, ".lua"), + ] { + let tree = raw_tree(source, language); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.const_for(None, tree.root_node()); + + assert_eq!( + node_value(&rust), + ruby_private_const_for_nil_value(source, language, suffix), + "const_for nil mismatch for {language:?}" + ); + } +} + +#[test] +fn normalize_patterns_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "case value\nwhen 1\n one\nend\n", + Language::Ruby, + ".rb", + "when", + "when 1\n one", + ), + ( + "case\nwhen ready\n one\nend\n", + Language::Ruby, + ".rb", + "when", + "when ready\n one", + ), + ( + "case value\nwhen Foo::Bar\n one\nend\n", + Language::Ruby, + ".rb", + "when", + "when Foo::Bar\n one", + ), + ( + "case value\nwhen Foo\n one\nend\n", + Language::Ruby, + ".rb", + "when", + "when Foo\n one", + ), + ( + "match value:\n case 1:\n one()\n", + Language::Python, + ".py", + "case_clause", + "case 1:\n one()", + ), + ( + "switch (value) { case 1: one(); default: other(); }\n", + Language::TypeScript, + ".ts", + "switch_case", + "case 1: one();", + ), + ("return 1\n", Language::Lua, ".lua", "expression_list", "1"), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = Value::Array( + normalizer + .normalize_patterns(node) + .iter() + .map(node_value) + .collect(), + ); + + assert_eq!( + rust, + ruby_private_normalize_patterns_value(source, language, suffix, kind, text), + "normalize_patterns mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn case_value_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "case value\nwhen 1\n one\nend\n", + Language::Ruby, + ".rb", + "case", + "case value\nwhen 1\n one\nend", + ), + ( + "case\nwhen ready\n one\nend\n", + Language::Ruby, + ".rb", + "case", + "case\nwhen ready\n one\nend", + ), + ( + "match value:\n case 1:\n one()\n", + Language::Python, + ".py", + "match_statement", + "match value:\n case 1:\n one()", + ), + ( + "switch (value) { case 1: one(); }\n", + Language::TypeScript, + ".ts", + "switch_statement", + "switch (value) { case 1: one(); }", + ), + ( + "if value == 1 then one() end\n", + Language::Lua, + ".lua", + "if_statement", + "if value == 1 then one() end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.case_value(node).map(|value| { + ( + value.kind().to_string(), + super::node_text(value, source).to_string(), + ) + }); + + assert_eq!( + rust, + ruby_private_node_signature(source, language, suffix, "case_value", kind, text), + "case_value mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn case_arms_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "case value\nwhen 1\n one\nwhen 2\n two\nelse\n other\nend\n", + Language::Ruby, + ".rb", + "case", + "case value\nwhen 1\n one\nwhen 2\n two\nelse\n other\nend", + ), + ( + "match value:\n case 1:\n one()\n case _:\n other()\n", + Language::Python, + ".py", + "match_statement", + "match value:\n case 1:\n one()\n case _:\n other()", + ), + ( + "switch (value) { case 1: one(); default: other(); }\n", + Language::TypeScript, + ".ts", + "switch_statement", + "switch (value) { case 1: one(); default: other(); }", + ), + ( + "if value == 1 then one() end\n", + Language::Lua, + ".lua", + "if_statement", + "if value == 1 then one() end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .case_arms(node) + .into_iter() + .map(|arm| { + ( + arm.kind().to_string(), + super::node_text(arm, source).to_string(), + ) + }) + .collect::>(); + + assert_eq!( + rust, + ruby_private_node_list_signature(source, language, suffix, "case_arms", kind, text), + "case_arms mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn when_body_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "case value\nwhen 1\n one\nend\n", + Language::Ruby, + ".rb", + "when", + "when 1\n one", + ), + ( + "match value:\n case 1:\n one()\n", + Language::Python, + ".py", + "case_clause", + "case 1:\n one()", + ), + ( + "switch (value) { case 1: one(); default: other(); }\n", + Language::TypeScript, + ".ts", + "switch_case", + "case 1: one();", + ), + ( + "switch (value) { case 1: one(); default: other(); }\n", + Language::TypeScript, + ".ts", + "switch_default", + "default: other();", + ), + ( + "if value == 1 then one() end\n", + Language::Lua, + ".lua", + "if_statement", + "if value == 1 then one() end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.when_body(node).map(|body| { + ( + body.kind().to_string(), + super::node_text(body, source).to_string(), + ) + }); + + assert_eq!( + rust, + ruby_private_node_signature(source, language, suffix, "when_body", kind, text), + "when_body mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_when_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "case value\nwhen 1\n one\nend\n", + Language::Ruby, + ".rb", + "when", + "when 1\n one", + ), + ( + "case value\nwhen Foo::Bar\n one\nend\n", + Language::Ruby, + ".rb", + "when", + "when Foo::Bar\n one", + ), + ( + "match value:\n case 1:\n one()\n", + Language::Python, + ".py", + "case_clause", + "case 1:\n one()", + ), + ( + "switch (value) { case 1: one(); break; default: other(); }\n", + Language::TypeScript, + ".ts", + "switch_case", + "case 1: one(); break;", + ), + ( + "if value == 1 then one() else other() end\n", + Language::Lua, + ".lua", + "if_statement", + "if value == 1 then one() else other() end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_when(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_when", + kind, + text + ), + "normalize_when mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn case_else_body_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "case value\nwhen 1\n one\nelse\n other\nend\n", + Language::Ruby, + ".rb", + "case", + "case value\nwhen 1\n one\nelse\n other\nend", + ), + ( + "case value\nwhen 1\n one\nend\n", + Language::Ruby, + ".rb", + "case", + "case value\nwhen 1\n one\nend", + ), + ( + "match value:\n case 1:\n one()\n case _:\n other()\n", + Language::Python, + ".py", + "match_statement", + "match value:\n case 1:\n one()\n case _:\n other()", + ), + ( + "match value:\n case 1:\n one()\n", + Language::Python, + ".py", + "match_statement", + "match value:\n case 1:\n one()", + ), + ( + "switch (value) { case 1: one(); break; default: other(); }\n", + Language::TypeScript, + ".ts", + "switch_statement", + "switch (value) { case 1: one(); break; default: other(); }", + ), + ( + "switch (value) { case 1: one(); break; }\n", + Language::TypeScript, + ".ts", + "switch_statement", + "switch (value) { case 1: one(); break; }", + ), + ( + "if value == 1 then one() else other() end\n", + Language::Lua, + ".lua", + "if_statement", + "if value == 1 then one() else other() end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .case_else_body(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "case_else_body", + kind, + text + ), + "case_else_body mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_case_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "case value\nwhen 1\n one\nwhen 2\n two\nelse\n other\nend\n", + Language::Ruby, + ".rb", + "case", + "case value\nwhen 1\n one\nwhen 2\n two\nelse\n other\nend", + ), + ( + "case\nwhen ready\n one\nelse\n other\nend\n", + Language::Ruby, + ".rb", + "case", + "case\nwhen ready\n one\nelse\n other\nend", + ), + ( + "match value:\n case 1:\n one()\n case _:\n other()\n", + Language::Python, + ".py", + "match_statement", + "match value:\n case 1:\n one()\n case _:\n other()", + ), + ( + "switch (value) { case 1: one(); break; default: other(); }\n", + Language::TypeScript, + ".ts", + "switch_statement", + "switch (value) { case 1: one(); break; default: other(); }", + ), + ( + "if value == 1 then one() else other() end\n", + Language::Lua, + ".lua", + "if_statement", + "if value == 1 then one() else other() end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_case(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_case", + kind, + text + ), + "normalize_case mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn dotted_call_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ("user.name\n", Language::Ruby, ".rb", "call", "user.name"), + ("user&.name\n", Language::Ruby, ".rb", "call", "user&.name"), + ("user\n", Language::Ruby, ".rb", "identifier", "user"), + ( + "user.name()\n", + Language::Python, + ".py", + "attribute", + "user.name", + ), + ( + "user\n", + Language::Python, + ".py", + "expression_statement", + "user", + ), + ( + "user.name();\n", + Language::TypeScript, + ".ts", + "member_expression", + "user.name", + ), + ("user;\n", Language::TypeScript, ".ts", "identifier", "user"), + ( + "user.name()\n", + Language::Lua, + ".lua", + "dot_index_expression", + "user.name", + ), + ("user()\n", Language::Lua, ".lua", "function_call", "user()"), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.dotted_call(node), + ruby_private_predicate(source, language, suffix, "dotted_call?", kind, text), + "dotted_call? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn dotted_expression_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n user.name\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "user.name", + ), + ("user.name\n", Language::Ruby, ".rb", "call", "user.name"), + ( + "user.name\n", + Language::Python, + ".py", + "expression_statement", + "user.name", + ), + ( + "user.name()\n", + Language::Python, + ".py", + "attribute", + "user.name", + ), + ( + "user.name;\n", + Language::TypeScript, + ".ts", + "expression_statement", + "user.name;", + ), + ( + "user.name;\n", + Language::TypeScript, + ".ts", + "member_expression", + "user.name", + ), + ( + "user.name()\n", + Language::Lua, + ".lua", + "dot_index_expression", + "user.name", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.dotted_expression(node), + ruby_private_predicate(source, language, suffix, "dotted_expression?", kind, text), + "dotted_expression? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn dotted_expression_normalization_matches_ruby() { + for (source, language, suffix) in [ + ("def f\n user.name\nend\n", Language::Ruby, ".rb"), + ("user.name\n", Language::Python, ".py"), + ] { + assert_ruby_parity(source, language, suffix); + } +} + +#[test] +fn normalize_else_or_branch_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "if ready\n call\nelse\n stop\nend\n", + Language::Ruby, + ".rb", + "else", + "else\n stop", + ), + ( + "if ready\n call\nelse\n user.name\nend\n", + Language::Ruby, + ".rb", + "else", + "else\n user.name", + ), + ( + "if ready:\n call()\nelse:\n stop()\n", + Language::Python, + ".py", + "else_clause", + "else:\n stop()", + ), + ( + "if ready:\n call()\nelse:\n if backup:\n stop()\n", + Language::Python, + ".py", + "else_clause", + "else:\n if backup:\n stop()", + ), + ( + "if (ready) { call(); } else { stop(); }\n", + Language::TypeScript, + ".ts", + "else_clause", + "else { stop(); }", + ), + ( + "if ready then\n call()\nelse\n stop()\nend\n", + Language::Lua, + ".lua", + "else_statement", + "else\n stop()", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_else_or_branch(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_else_or_branch", + kind, + text + ), + "normalize_else_or_branch mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_if_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "if ready\n call\nelse\n stop\nend\n", + Language::Ruby, + ".rb", + "if", + "if ready\n call\nelse\n stop\nend", + ), + ( + "call if ready\n", + Language::Ruby, + ".rb", + "if_modifier", + "call if ready", + ), + ( + "unless ready\n call\nend\n", + Language::Ruby, + ".rb", + "unless", + "unless ready\n call\nend", + ), + ( + "if ready:\n call()\nelse:\n stop()\n", + Language::Python, + ".py", + "if_statement", + "if ready:\n call()\nelse:\n stop()", + ), + ( + "if ready:\n call()\nelif other:\n stop()\n", + Language::Python, + ".py", + "if_statement", + "if ready:\n call()\nelif other:\n stop()", + ), + ( + "if (ready) { call(); } else { stop(); }\n", + Language::TypeScript, + ".ts", + "if_statement", + "if (ready) { call(); } else { stop(); }", + ), + ( + "if ready then\n call()\nelseif other then\n stop()\nend\n", + Language::Lua, + ".lua", + "if_statement", + "if ready then\n call()\nelseif other then\n stop()\nend", + ), + ( + "if ready then\n call()\nelse\n stop()\nend\n", + Language::Lua, + ".lua", + "if_statement", + "if ready then\n call()\nelse\n stop()\nend", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_if(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_if", + kind, + text + ), + "normalize_if mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_elsif_matches_ruby_private_method() { + for (source, kind, text) in [ + ( + "if ready\n call\nelsif other\n stop\nend\n", + "elsif", + "elsif other\n stop", + ), + ( + "if ready\n call\nelsif other\n stop\nelse\n done\nend\n", + "elsif", + "elsif other\n stop\nelse\n done", + ), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let rust = node_value(&normalizer.normalize_elsif(node)); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + Language::Ruby, + ".rb", + "normalize_elsif", + kind, + text + ), + "normalize_elsif mismatch for {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_loop_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "while ready\n work\nend\n", + Language::Ruby, + ".rb", + "while", + "while ready\n work\nend", + ), + ( + "work while ready\n", + Language::Ruby, + ".rb", + "while_modifier", + "work while ready", + ), + ( + "work until ready\n", + Language::Ruby, + ".rb", + "until_modifier", + "work until ready", + ), + ( + "for item in items\n work\nend\n", + Language::Ruby, + ".rb", + "for", + "for item in items\n work\nend", + ), + ( + "while ready:\n work()\n", + Language::Python, + ".py", + "while_statement", + "while ready:\n work()", + ), + ( + "for item in items:\n work()\n", + Language::Python, + ".py", + "for_statement", + "for item in items:\n work()", + ), + ( + "while ready do\n work()\nend\n", + Language::Lua, + ".lua", + "while_statement", + "while ready do\n work()\nend", + ), + ( + "while (ready) { work(); }\n", + Language::TypeScript, + ".ts", + "while_statement", + "while (ready) { work(); }", + ), + ( + "for (let i = 0; i < n; i++) { work(i); }\n", + Language::TypeScript, + ".ts", + "for_statement", + "for (let i = 0; i < n; i++) { work(i); }", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let node_type = super::loop_kind(node.kind()).expect("test node should be a loop kind"); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_loop(node, node_type) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_loop", + kind, + text + ), + "normalize_loop mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn ruby_elsif_normalization_matches_ruby() { + for source in [ + "if ready\n call\nelsif other\n stop\nend\n", + "if ready\n call\nelsif other\n stop\nelse\n done\nend\n", + ] { + let root = parse_language_source(source, Language::Ruby, ".rb"); + let if_node = first_node(&root, "IF", source.trim_end()); + + assert_eq!( + child_node(if_node, 2).r#type, + "IF", + "expected Ruby elsif alternative to normalize as nested IF: {if_node:#?}" + ); + assert_ruby_parity(source, Language::Ruby, ".rb"); + } +} + +#[test] +fn normalize_dotted_expression_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n user.name\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "user.name", + ), + ( + "def f\n user.name { value }\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "user.name { value }", + ), + ( + "user.name\n", + Language::Python, + ".py", + "expression_statement", + "user.name", + ), + ( + "user.name;\n", + Language::TypeScript, + ".ts", + "expression_statement", + "user.name;", + ), + ( + "user.name()\n", + Language::Lua, + ".lua", + "dot_index_expression", + "user.name", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_dotted_expression(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_dotted_expression", + kind, + text + ), + "normalize_dotted_expression mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_dotted_call_expression_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n user.name\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "user.name", + ), + ( + "def f\n user.name(1)\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "user.name(1)", + ), + ( + "def f\n user&.name\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "user&.name", + ), + ( + "def f\n user.name { value }\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "user.name { value }", + ), + ( + "user.name\n", + Language::Python, + ".py", + "expression_statement", + "user.name", + ), + ( + "user.name;\n", + Language::TypeScript, + ".ts", + "member_expression", + "user.name", + ), + ( + "user.name()\n", + Language::Lua, + ".lua", + "dot_index_expression", + "user.name", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_dotted_call_expression(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_dotted_call_expression", + kind, + text + ), + "normalize_dotted_call_expression mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn argument_list_call_with_block_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n return foo { bar }\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "foo { bar }", + ), + ( + "def f\n return foo do\n bar\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "foo do\n bar\n end", + ), + ( + "def f\n return foo(1) { bar }\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "foo(1) { bar }", + ), + ( + "def f\n foo { bar }\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "foo { bar }", + ), + ( + "def f\n return foo.bar { baz }\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "foo.bar { baz }", + ), + ( + "def f\n return Foo { bar }\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "Foo { bar }", + ), + ( + "def f():\n return foo(lambda: bar)\n", + Language::Python, + ".py", + "argument_list", + "(lambda: bar)", + ), + ( + "function f(){ return foo(() => bar); }\n", + Language::TypeScript, + ".ts", + "arguments", + "(() => bar)", + ), + ( + "function f() return foo(function() return bar end) end\n", + Language::Lua, + ".lua", + "arguments", + "(function() return bar end)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.argument_list_call_with_block(node), + ruby_private_predicate( + source, + language, + suffix, + "argument_list_call_with_block?", + kind, + text + ), + "argument_list_call_with_block? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_argument_list_call_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n return foo { bar }\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "foo { bar }", + ), + ( + "def f\n return foo do\n bar\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "foo do\n bar\n end", + ), + ( + "def f\n return foo(1) { bar }\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "foo(1) { bar }", + ), + ( + "def f\n foo { bar }\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "foo { bar }", + ), + ( + "def f():\n return foo(lambda: bar)\n", + Language::Python, + ".py", + "argument_list", + "(lambda: bar)", + ), + ( + "function f(){ return foo(() => bar); }\n", + Language::TypeScript, + ".ts", + "arguments", + "(() => bar)", + ), + ( + "function f() return foo(function() return bar end) end\n", + Language::Lua, + ".lua", + "arguments", + "(function() return bar end)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_argument_list_call(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_argument_list_call", + kind, + text + ), + "normalize_argument_list_call mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_argument_list_call_with_block_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n return foo { bar }\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "foo { bar }", + ), + ( + "def f\n return foo do\n bar\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "foo do\n bar\n end", + ), + ( + "def f\n return foo(1) { bar }\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "foo(1) { bar }", + ), + ( + "def f\n foo { bar }\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "foo { bar }", + ), + ( + "def f():\n return foo(lambda: bar)\n", + Language::Python, + ".py", + "argument_list", + "(lambda: bar)", + ), + ( + "function f(){ return foo(() => bar); }\n", + Language::TypeScript, + ".ts", + "arguments", + "(() => bar)", + ), + ( + "function f() return foo(function() return bar end) end\n", + Language::Lua, + ".lua", + "arguments", + "(function() return bar end)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_argument_list_call_with_block(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_argument_list_call_with_block", + kind, + text + ), + "normalize_argument_list_call_with_block mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn safe_navigation_call_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ("user&.name\n", Language::Ruby, ".rb", "call", "user&.name"), + ("user.name\n", Language::Ruby, ".rb", "call", "user.name"), + ( + "user.name()\n", + Language::Python, + ".py", + "attribute", + "user.name", + ), + ( + "user?.name;\n", + Language::TypeScript, + ".ts", + "member_expression", + "user?.name", + ), + ( + "user?.name();\n", + Language::TypeScript, + ".ts", + "call_expression", + "user?.name()", + ), + ( + "user.name;\n", + Language::TypeScript, + ".ts", + "member_expression", + "user.name", + ), + ( + "user.name()\n", + Language::Lua, + ".lua", + "dot_index_expression", + "user.name", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.safe_navigation_call(node), + ruby_private_predicate( + source, + language, + suffix, + "safe_navigation_call?", + kind, + text + ), + "safe_navigation_call? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn rescue_source_end_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "begin\n work\nrescue Error => e\n handle\nend\n", + Language::Ruby, + ".rb", + "rescue", + "rescue Error => e\n handle", + ), + ( + "try:\n work()\nexcept Error as e:\n handle()\n", + Language::Python, + ".py", + "except_clause", + "except Error as e:\n handle()", + ), + ( + "try { work(); } catch (e) { handle(); }\n", + Language::TypeScript, + ".ts", + "catch_clause", + "catch (e) { handle(); }", + ), + ("work()\n", Language::Lua, ".lua", "function_call", "work()"), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.rescue_source_end(node).map(|source_end| { + ( + source_end.kind().to_string(), + super::node_text(source_end, source).to_string(), + ) + }); + + assert_eq!( + rust, + ruby_private_node_signature(source, language, suffix, "rescue_source_end", kind, text), + "rescue_source_end mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn rescue_exception_variable_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "begin\n work\nrescue Error => e\n handle\nend\n", + Language::Ruby, + ".rb", + "rescue", + "rescue Error => e\n handle", + ), + ( + "begin\n work\nrescue Error\n handle\nend\n", + Language::Ruby, + ".rb", + "rescue", + "rescue Error\n handle", + ), + ( + "try:\n work()\nexcept Error as e:\n handle()\n", + Language::Python, + ".py", + "except_clause", + "except Error as e:\n handle()", + ), + ( + "try:\n work()\nexcept Error:\n handle()\n", + Language::Python, + ".py", + "except_clause", + "except Error:\n handle()", + ), + ( + "try { work(); } catch (e) { handle(); }\n", + Language::TypeScript, + ".ts", + "catch_clause", + "catch (e) { handle(); }", + ), + ("work()\n", Language::Lua, ".lua", "function_call", "work()"), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .rescue_exception_variable(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "rescue_exception_variable", + kind, + text + ), + "rescue_exception_variable mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_rescue_clause_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "begin\n work\nrescue Error => e\n handle\nend\n", + Language::Ruby, + ".rb", + "rescue", + "rescue Error => e\n handle", + ), + ( + "begin\n work\nrescue Net::Error\n handle\nend\n", + Language::Ruby, + ".rb", + "rescue", + "rescue Net::Error\n handle", + ), + ( + "try:\n work()\nexcept Error as e:\n handle(e)\n", + Language::Python, + ".py", + "except_clause", + "except Error as e:\n handle(e)", + ), + ( + "try { work(); } catch (e) { handle(e); }\n", + Language::TypeScript, + ".ts", + "catch_clause", + "catch (e) { handle(e); }", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_rescue_clause(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_rescue_clause", + kind, + text + ), + "normalize_rescue_clause mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_rescue_modifier_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [( + "value rescue fallback\n", + Language::Ruby, + ".rb", + "rescue_modifier", + "value rescue fallback", + )] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_rescue_modifier(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_rescue_modifier", + kind, + text + ), + "normalize_rescue_modifier mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn prepend_rescue_exception_assignment_matches_ruby_private_method() { + fn synthetic_node( + node_type: &str, + text: &str, + first_lineno: usize, + first_column: usize, + last_lineno: usize, + last_column: usize, + children: Vec, + ) -> Node { + Node { + r#type: node_type.to_string(), + children, + first_lineno, + first_column, + last_lineno, + last_column, + text: text.to_string(), + } + } + + let source = "assign\nbody\n"; + let assignment = synthetic_node("LASGN", "assign", 1, 0, 1, 6, Vec::new()); + let body = synthetic_node("VCALL", "body", 2, 0, 2, 4, Vec::new()); + let block = synthetic_node( + "BLOCK", + "body", + 2, + 0, + 2, + 4, + vec![Child::Node(Box::new(body.clone())), Child::Nil], + ); + + for (label, body_node, assignment_node) in [ + ("no_assignment", Some(body.clone()), None), + ("no_body", None, Some(assignment.clone())), + ("block_body", Some(block), Some(assignment.clone())), + ("scalar_body", Some(body), Some(assignment)), + ] { + let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let rust = normalizer + .prepend_rescue_exception_assignment(body_node.clone(), assignment_node.clone()) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + let body_value = body_node.as_ref().map(node_value).unwrap_or(Value::Null); + let assignment_value = assignment_node + .as_ref() + .map(node_value) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_prepend_rescue_exception_assignment_value( + source, + &body_value, + &assignment_value + ), + "prepend_rescue_exception_assignment mismatch for {label}" + ); + } +} + +#[test] +fn dotted_call_parts_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ("user.name\n", Language::Ruby, ".rb", "call", "user.name"), + ("user&.name\n", Language::Ruby, ".rb", "call", "user&.name"), + ( + "user.name()\n", + Language::Python, + ".py", + "attribute", + "user.name", + ), + ( + "user.name();\n", + Language::TypeScript, + ".ts", + "member_expression", + "user.name", + ), + ( + "user.name()\n", + Language::Lua, + ".lua", + "dot_index_expression", + "user.name", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .dotted_call_parts(node, None) + .map(|(receiver, method)| { + ( + receiver.kind().to_string(), + super::node_text(receiver, source).to_string(), + method, + ) + }); + + assert_eq!( + rust, + ruby_private_dotted_call_parts(source, language, suffix, kind, text), + "dotted_call_parts mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn dotted_call_parts_normalization_matches_ruby() { + for (source, language, suffix) in [ + ("user.name\n", Language::Ruby, ".rb"), + ("user&.name\n", Language::Ruby, ".rb"), + ("user.name()\n", Language::Python, ".py"), + ("user.name();\n", Language::TypeScript, ".ts"), + ("user.name()\n", Language::Lua, ".lua"), + ] { + let root = parse_language_source(source, language, suffix); + if language != Language::Lua { + let mut calls = Vec::new(); + nodes_of_type(&root, "CALL", &mut calls); + let mut qcalls = Vec::new(); + nodes_of_type(&root, "QCALL", &mut qcalls); + assert!( + calls + .iter() + .chain(qcalls.iter()) + .any(|node| matches!(node.children.get(1), Some(Child::Symbol(method)) if method == "name")), + "expected dotted call method name for {language:?} in {root:#?}" + ); + } + assert_ruby_parity(source, language, suffix); + } +} + +#[test] +fn leading_if_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n if x\n y\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "if x\n y\n end", + ), + ( + "def f():\n if x:\n y()\n", + Language::Python, + ".py", + "block", + "if x:\n y()", + ), + ( + "function f()\n if x then\n y()\n end\nend\n", + Language::Lua, + ".lua", + "block", + "if x then\n y()\n end", + ), + ( + "function f() { if (x) { y(); } }\n", + Language::TypeScript, + ".ts", + "if_statement", + "if (x) { y(); }", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.leading_if_statement(node), + ruby_private_predicate( + source, + language, + suffix, + "leading_if_statement?", + kind, + text + ), + "leading_if_statement? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_leading_if_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n if x\n y\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "if x\n y\n end", + ), + ( + "def f():\n if x:\n y()\n", + Language::Python, + ".py", + "block", + "if x:\n y()", + ), + ( + "function f()\n if x then\n y()\n end\nend\n", + Language::Lua, + ".lua", + "block", + "if x then\n y()\n end", + ), + ( + "function f() { if (x) { y(); } }\n", + Language::TypeScript, + ".ts", + "if_statement", + "if (x) { y(); }", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_leading_if_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_leading_if_statement", + kind, + text + ), + "normalize_leading_if_statement mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn leading_if_statement_normalization_matches_ruby() { + for (source, language, suffix) in [ + ("def f\n if x\n y\n end\nend\n", Language::Ruby, ".rb"), + ( + "def f():\n if x:\n y()\n", + Language::Python, + ".py", + ), + ( + "function f()\n if x then\n y()\n end\nend\n", + Language::Lua, + ".lua", + ), + ( + "function f() { if (x) { y(); } }\n", + Language::TypeScript, + ".ts", + ), + ] { + let root = parse_language_source(source, language, suffix); + let mut if_nodes = Vec::new(); + nodes_of_type(&root, "IF", &mut if_nodes); + assert!( + !if_nodes.is_empty(), + "expected IF node for {language:?} in {root:#?}" + ); + assert_ruby_parity(source, language, suffix); + } +} + +#[test] +fn leading_case_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f(x)\n case x\n when 1 then y\n else z\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "case x\n when 1 then y\n else z\n end", + ), + ( + "def f(x):\n match x:\n case 1:\n y()\n", + Language::Python, + ".py", + "block", + "match x:\n case 1:\n y()", + ), + ( + "function f(x) { switch (x) { case 1: y(); break; default: z(); } }\n", + Language::TypeScript, + ".ts", + "switch_statement", + "switch (x) { case 1: y(); break; default: z(); }", + ), + ( + "function f(x)\n if x == 1 then y() end\nend\n", + Language::Lua, + ".lua", + "block", + "if x == 1 then y() end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.leading_case_statement(node), + ruby_private_predicate( + source, + language, + suffix, + "leading_case_statement?", + kind, + text + ), + "leading_case_statement? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_leading_case_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f(x)\n case x\n when 1 then y\n else z\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "case x\n when 1 then y\n else z\n end", + ), + ( + "def f(x):\n match x:\n case 1:\n y()\n", + Language::Python, + ".py", + "block", + "match x:\n case 1:\n y()", + ), + ( + "function f(x) { switch (x) { case 1: y(); break; default: z(); } }\n", + Language::TypeScript, + ".ts", + "switch_statement", + "switch (x) { case 1: y(); break; default: z(); }", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_leading_case_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_leading_case_statement", + kind, + text + ), + "normalize_leading_case_statement mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn leading_case_statement_normalization_matches_ruby() { + for (source, language, suffix) in [ + ( + "def f(x)\n case x\n when 1 then y\n else z\n end\nend\n", + Language::Ruby, + ".rb", + ), + ( + "def f(x):\n match x:\n case 1:\n y()\n", + Language::Python, + ".py", + ), + ( + "function f(x) { switch (x) { case 1: y(); break; default: z(); } }\n", + Language::TypeScript, + ".ts", + ), + ] { + let root = parse_language_source(source, language, suffix); + let mut case_nodes = Vec::new(); + nodes_of_type(&root, "CASE", &mut case_nodes); + assert!( + !case_nodes.is_empty(), + "expected CASE node for {language:?} in {root:#?}" + ); + assert_ruby_parity(source, language, suffix); + } +} + +#[test] +fn leading_loop_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f(x)\n while x\n y\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "while x\n y\n end", + ), + ( + "def f(x):\n while x:\n y()\n", + Language::Python, + ".py", + "block", + "while x:\n y()", + ), + ( + "function f(x)\n while x do\n y()\n end\nend\n", + Language::Lua, + ".lua", + "block", + "while x do\n y()\n end", + ), + ( + "function f(x) { while (x) { y(); } }\n", + Language::TypeScript, + ".ts", + "while_statement", + "while (x) { y(); }", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.leading_loop_statement(node), + ruby_private_predicate( + source, + language, + suffix, + "leading_loop_statement?", + kind, + text + ), + "leading_loop_statement? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_leading_loop_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f(x)\n while x\n y\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "while x\n y\n end", + ), + ( + "def f(x)\n until x\n y\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "until x\n y\n end", + ), + ( + "def f(x):\n while x:\n y()\n", + Language::Python, + ".py", + "block", + "while x:\n y()", + ), + ( + "function f(x)\n while x do\n y()\n end\nend\n", + Language::Lua, + ".lua", + "block", + "while x do\n y()\n end", + ), + ( + "function f(x) { while (x) { y(); } }\n", + Language::TypeScript, + ".ts", + "while_statement", + "while (x) { y(); }", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_leading_loop_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_leading_loop_statement", + kind, + text + ), + "normalize_leading_loop_statement mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn leading_loop_statement_normalization_matches_ruby() { + for (source, language, suffix) in [ + ( + "def f(x)\n while x\n y\n end\nend\n", + Language::Ruby, + ".rb", + ), + ( + "def f(x):\n while x:\n y()\n", + Language::Python, + ".py", + ), + ( + "function f(x)\n while x do\n y()\n end\nend\n", + Language::Lua, + ".lua", + ), + ( + "function f(x) { while (x) { y(); } }\n", + Language::TypeScript, + ".ts", + ), + ] { + let root = parse_language_source(source, language, suffix); + let mut while_nodes = Vec::new(); + nodes_of_type(&root, "WHILE", &mut while_nodes); + assert!( + !while_nodes.is_empty(), + "expected WHILE node for {language:?} in {root:#?}" + ); + assert_ruby_parity(source, language, suffix); + } +} + +#[test] +fn rescue_body_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n work\nrescue Error => e\n handle\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "work\nrescue Error => e\n handle", + ), + ( + "try:\n work()\nexcept Error as e:\n handle(e)\n", + Language::Python, + ".py", + "try_statement", + "try:\n work()\nexcept Error as e:\n handle(e)", + ), + ( + "try { work(); } catch (e) { handle(e); }\n", + Language::TypeScript, + ".ts", + "try_statement", + "try { work(); } catch (e) { handle(e); }", + ), + ( + "local ok, err = pcall(work)\n", + Language::Lua, + ".lua", + "variable_declaration", + "local ok, err = pcall(work)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.rescue_body_statement(node), + ruby_private_predicate( + source, + language, + suffix, + "rescue_body_statement?", + kind, + text + ), + "rescue_body_statement? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_rescue_body_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n work\nrescue Error => e\n handle\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "work\nrescue Error => e\n handle", + ), + ( + "try:\n work()\nexcept Error as e:\n handle(e)\n", + Language::Python, + ".py", + "try_statement", + "try:\n work()\nexcept Error as e:\n handle(e)", + ), + ( + "try { work(); } catch (e) { handle(e); }\n", + Language::TypeScript, + ".ts", + "try_statement", + "try { work(); } catch (e) { handle(e); }", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_rescue_body_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_rescue_body_statement", + kind, + text + ), + "normalize_rescue_body_statement mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn rescue_body_statement_normalization_matches_ruby() { + for (source, language, suffix) in [ + ( + "def f\n work\nrescue Error => e\n handle\nend\n", + Language::Ruby, + ".rb", + ), + ( + "try:\n work()\nexcept Error as e:\n handle(e)\n", + Language::Python, + ".py", + ), + ( + "try { work(); } catch (e) { handle(e); }\n", + Language::TypeScript, + ".ts", + ), + ] { + let root = parse_language_source(source, language, suffix); + let mut rescue_nodes = Vec::new(); + nodes_of_type(&root, "RESCUE", &mut rescue_nodes); + assert!( + !rescue_nodes.is_empty(), + "expected RESCUE node for {language:?} in {root:#?}" + ); + let mut resbody_nodes = Vec::new(); + nodes_of_type(&root, "RESBODY", &mut resbody_nodes); + assert!( + !resbody_nodes.is_empty(), + "expected RESBODY node for {language:?} in {root:#?}" + ); + assert_ruby_parity(source, language, suffix); + } +} + +#[test] +fn ensure_body_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n work\nensure\n cleanup\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "work\nensure\n cleanup", + ), + ( + "try:\n work()\nfinally:\n cleanup()\n", + Language::Python, + ".py", + "try_statement", + "try:\n work()\nfinally:\n cleanup()", + ), + ( + "try { work(); } finally { cleanup(); }\n", + Language::TypeScript, + ".ts", + "try_statement", + "try { work(); } finally { cleanup(); }", + ), + ( + "work()\ncleanup()\n", + Language::Lua, + ".lua", + "function_call", + "work()", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.ensure_body_statement(node), + ruby_private_predicate( + source, + language, + suffix, + "ensure_body_statement?", + kind, + text + ), + "ensure_body_statement? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn ensure_body_statement_normalization_matches_ruby() { + for (source, language, suffix) in [ + ( + "def f\n work\nensure\n cleanup\nend\n", + Language::Ruby, + ".rb", + ), + ( + "try:\n work()\nfinally:\n cleanup()\n", + Language::Python, + ".py", + ), + ( + "try { work(); } finally { cleanup(); }\n", + Language::TypeScript, + ".ts", + ), + ( + "try:\n work()\nexcept Error as e:\n handle(e)\nfinally:\n cleanup()\n", + Language::Python, + ".py", + ), + ] { + let root = parse_language_source(source, language, suffix); + let mut ensure_nodes = Vec::new(); + nodes_of_type(&root, "ENSURE", &mut ensure_nodes); + assert!( + !ensure_nodes.is_empty(), + "expected ENSURE node for {language:?} in {root:#?}" + ); + assert_ruby_parity(source, language, suffix); + } +} + +#[test] +fn normalize_ensure_body_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n work\nensure\n cleanup\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "work\nensure\n cleanup", + ), + ( + "try:\n work()\nfinally:\n cleanup()\n", + Language::Python, + ".py", + "try_statement", + "try:\n work()\nfinally:\n cleanup()", + ), + ( + "try:\n work()\nexcept Error as e:\n handle(e)\nfinally:\n cleanup()\n", + Language::Python, + ".py", + "try_statement", + "try:\n work()\nexcept Error as e:\n handle(e)\nfinally:\n cleanup()", + ), + ( + "try { work(); } finally { cleanup(); }\n", + Language::TypeScript, + ".ts", + "try_statement", + "try { work(); } finally { cleanup(); }", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_ensure_body_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_ensure_body_statement", + kind, + text + ), + "normalize_ensure_body_statement mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_ensure_clause_matches_ruby_private_method() { + for (source, kind, text) in [ + ( + "begin\n work\nensure\n cleanup\nend\n", + "ensure", + "ensure\n cleanup", + ), + ( + "begin\n work\nensure\n user.name\nend\n", + "ensure", + "ensure\n user.name", + ), + ( + "begin\n work\nensure\n user.name\n cleanup\nend\n", + "ensure", + "ensure\n user.name\n cleanup", + ), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let rust = normalizer + .normalize_ensure_clause(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + Language::Ruby, + ".rb", + "normalize_ensure_clause", + kind, + text + ), + "normalize_ensure_clause mismatch for {kind} {text:?}" + ); + } +} + +#[test] +fn ruby_begin_ensure_clause_keeps_all_body_statements() { + let source = "begin\n work\nensure\n user.name\n cleanup\nend\n"; + let root = parse_language_source(source, Language::Ruby, ".rb"); + let ensure = first_node(&root, "ENSURE", "work\nensure\n user.name\n cleanup"); + let ensure_body = child_node(ensure, 1); + + assert_eq!( + child_types(ensure_body), + vec!["CALL", "VCALL"], + "Ruby ensure clause body must retain all statements: {ensure:#?}" + ); + assert_ruby_parity(source, Language::Ruby, ".rb"); +} + +#[test] +fn array_literal_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n [a, b]\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "[a, b]", + ), + ( + "def f():\n [a, b]\n", + Language::Python, + ".py", + "block", + "[a, b]", + ), + ( + "function f() { [a, b]; }\n", + Language::TypeScript, + ".ts", + "expression_statement", + "[a, b];", + ), + ( + "function f()\n {a, b}\nend\n", + Language::Lua, + ".lua", + "block", + "\n {a, b}", + ), + ( + "function f()\n {x = a, y = b}\nend\n", + Language::Lua, + ".lua", + "block", + "\n {x = a, y = b}", + ), + ( + "local rocks_path = table.concat({rocks_tree, \"a_rock\"})\n", + Language::Lua, + ".lua", + "arguments", + "({rocks_tree, \"a_rock\"})", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.array_literal_statement(node), + ruby_private_predicate( + source, + language, + suffix, + "array_literal_statement?", + kind, + text + ), + "array_literal_statement? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn array_literal_statement_normalization_matches_ruby() { + for (source, language, suffix) in [ + ("def f\n [a, b]\nend\n", Language::Ruby, ".rb"), + ("def f():\n [a, b]\n", Language::Python, ".py"), + ("function f() { [a, b]; }\n", Language::TypeScript, ".ts"), + ("function f()\n {a, b}\nend\n", Language::Lua, ".lua"), + ] { + let root = parse_language_source(source, language, suffix); + let mut lists = Vec::new(); + nodes_of_type(&root, "LIST", &mut lists); + assert!( + lists + .iter() + .any(|node| node.text.contains('a') && node.text.contains('b')), + "expected LIST for {language:?} in {root:#?}" + ); + assert_ruby_parity(source, language, suffix); + } +} + +#[test] +fn normalize_array_literal_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n [a, b]\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "[a, b]", + ), + ( + "def f\n []\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "[]", + ), + ( + "def f():\n [a, b]\n", + Language::Python, + ".py", + "block", + "[a, b]", + ), + ("def f():\n []\n", Language::Python, ".py", "block", "[]"), + ( + "function f() { [a, b]; }\n", + Language::TypeScript, + ".ts", + "expression_statement", + "[a, b];", + ), + ( + "function f() { []; }\n", + Language::TypeScript, + ".ts", + "expression_statement", + "[];", + ), + ( + "function f()\n {a, b}\nend\n", + Language::Lua, + ".lua", + "block", + "\n {a, b}", + ), + ( + "assert.same(install, { bin = { P\"bin/binfile\" } })\n", + Language::Lua, + ".lua", + "arguments", + "(install, { bin = { P\"bin/binfile\" } })", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_array_literal_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_array_literal_statement", + kind, + text + ), + "normalize_array_literal_statement mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn element_reference_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n items[0]\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "items[0]", + ), + ( + "def f\n [0]\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "[0]", + ), + ( + "def f():\n items[0]\n", + Language::Python, + ".py", + "block", + "items[0]", + ), + ( + "return items[0]\n", + Language::Python, + ".py", + "subscript", + "items[0]", + ), + ( + "function f() { items[0]; }\n", + Language::TypeScript, + ".ts", + "expression_statement", + "items[0];", + ), + ( + "return items[0];\n", + Language::TypeScript, + ".ts", + "subscript_expression", + "items[0]", + ), + ( + "return items[1]\n", + Language::Lua, + ".lua", + "expression_list", + "items[1]", + ), + ( + "print(items[1])\n", + Language::Lua, + ".lua", + "bracket_index_expression", + "items[1]", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.element_reference_statement(node), + ruby_private_predicate( + source, + language, + suffix, + "element_reference_statement?", + kind, + text + ), + "element_reference_statement? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_element_reference_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n items[0]\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "items[0]", + ), + ( + "def f\n self[0]\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "self[0]", + ), + ( + "return items[0]\n", + Language::Python, + ".py", + "subscript", + "items[0]", + ), + ( + "return items[0];\n", + Language::TypeScript, + ".ts", + "subscript_expression", + "items[0]", + ), + ( + "print(items[1])\n", + Language::Lua, + ".lua", + "bracket_index_expression", + "items[1]", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_element_reference(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_element_reference", + kind, + text + ), + "normalize_element_reference mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_element_reference_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n items[0]\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "items[0]", + ), + ( + "def f\n self[0]\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "self[0]", + ), + ( + "def f():\n items[0]\n", + Language::Python, + ".py", + "block", + "items[0]", + ), + ( + "return items[0]\n", + Language::Python, + ".py", + "subscript", + "items[0]", + ), + ( + "function f() { items[0]; }\n", + Language::TypeScript, + ".ts", + "expression_statement", + "items[0];", + ), + ( + "return items[0];\n", + Language::TypeScript, + ".ts", + "subscript_expression", + "items[0]", + ), + ( + "return items[1]\n", + Language::Lua, + ".lua", + "expression_list", + "items[1]", + ), + ( + "print(items[1])\n", + Language::Lua, + ".lua", + "bracket_index_expression", + "items[1]", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_element_reference_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_element_reference_statement", + kind, + text + ), + "normalize_element_reference_statement mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn element_reference_statement_normalization_matches_ruby() { + for (source, language, suffix) in [ + ("def f\n items[0]\nend\n", Language::Ruby, ".rb"), + ("def f():\n items[0]\n", Language::Python, ".py"), + ("function f() { items[0]; }\n", Language::TypeScript, ".ts"), + ("return items[1]\n", Language::Lua, ".lua"), + ] { + let root = parse_language_source(source, language, suffix); + let mut calls = Vec::new(); + nodes_of_type(&root, "CALL", &mut calls); + assert!( + calls.iter().any(|node| { + matches!(node.children.get(1), Some(Child::Symbol(message)) if message == "[]") + && node.text.contains("items") + }), + "expected element reference CALL for {language:?} in {root:#?}" + ); + assert_ruby_parity(source, language, suffix); + } +} + +#[test] +fn hash_literal_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n {a: b}\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "{a: b}", + ), + ( + "def f():\n {\"a\": b}\n", + Language::Python, + ".py", + "block", + "{\"a\": b}", + ), + ( + "function f() { ({a: b}); }\n", + Language::TypeScript, + ".ts", + "expression_statement", + "({a: b});", + ), + ( + "return {a: b};\n", + Language::TypeScript, + ".ts", + "object", + "{a: b}", + ), + ( + "function f()\n {a = b}\nend\n", + Language::Lua, + ".lua", + "block", + "\n {a = b}", + ), + ( + "function f()\n {a, b}\nend\n", + Language::Lua, + ".lua", + "block", + "\n {a, b}", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.hash_literal_statement(node), + ruby_private_predicate( + source, + language, + suffix, + "hash_literal_statement?", + kind, + text + ), + "hash_literal_statement? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_hash_literal_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n {a: b}\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "{a: b}", + ), + ( + "def f():\n {\"a\": b}\n", + Language::Python, + ".py", + "block", + "{\"a\": b}", + ), + ( + "function f() { ({a: b}); }\n", + Language::TypeScript, + ".ts", + "expression_statement", + "({a: b});", + ), + ( + "return {a: b};\n", + Language::TypeScript, + ".ts", + "object", + "{a: b}", + ), + ( + "function f()\n {a = b}\nend\n", + Language::Lua, + ".lua", + "block", + "\n {a = b}", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_hash_literal_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_hash_literal_statement", + kind, + text + ), + "normalize_hash_literal_statement mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_pair_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n {a: b}\nend\n", + Language::Ruby, + ".rb", + "pair", + "a: b", + ), + ( + "def f\n {name:}\nend\n", + Language::Ruby, + ".rb", + "pair", + "name:", + ), + ( + "def f\n {\"a\" => b}\nend\n", + Language::Ruby, + ".rb", + "pair", + "\"a\" => b", + ), + ( + "def f():\n {\"a\": b}\n", + Language::Python, + ".py", + "pair", + "\"a\": b", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_pair(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_pair", + kind, + text + ), + "normalize_pair mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn hash_literal_statement_normalization_matches_ruby() { + for (source, language, suffix) in [ + ("def f\n {a: b}\nend\n", Language::Ruby, ".rb"), + ("def f():\n {\"a\": b}\n", Language::Python, ".py"), + ("function f() { ({a: b}); }\n", Language::TypeScript, ".ts"), + ("function f()\n {a = b}\nend\n", Language::Lua, ".lua"), + ] { + let root = parse_language_source(source, language, suffix); + let mut hashes = Vec::new(); + nodes_of_type(&root, "HASH", &mut hashes); + assert!( + hashes + .iter() + .any(|node| node.text.contains('a') && node.text.contains('b')), + "expected hash literal HASH for {language:?} in {root:#?}" + ); + assert_ruby_parity(source, language, suffix); + } +} + +#[test] +fn empty_body_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f():\n pass\n", + Language::Python, + ".py", + "block", + "pass", + ), + ( + "function f() {}\n", + Language::TypeScript, + ".ts", + "statement_block", + "{}", + ), + ( + "function f() { work(); }\n", + Language::TypeScript, + ".ts", + "statement_block", + "{ work(); }", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.empty_body_statement(node), + ruby_private_predicate( + source, + language, + suffix, + "empty_body_statement?", + kind, + text + ), + "empty_body_statement? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn empty_body_statement_normalization_matches_ruby() { + for (source, language, suffix) in [ + ("def f():\n pass\n", Language::Python, ".py"), + ("function f() {}\n", Language::TypeScript, ".ts"), + ] { + let root = parse_language_source(source, language, suffix); + let mut defns = Vec::new(); + nodes_of_type(&root, "DEFN", &mut defns); + let scope = child_node(defns[0], 1); + assert!( + matches!(scope.children.get(2), Some(Child::Nil)), + "expected empty body for {language:?} in {root:#?}" + ); + assert_ruby_parity(source, language, suffix); + } +} + +#[test] +fn heredoc_body_statement_matches_ruby_private_predicate() { + let ruby_source = "def f\n puts <<~TXT\n hi\n TXT\nend\n"; + for (source, language, suffix, kind, text) in [ + ( + ruby_source, + Language::Ruby, + ".rb", + "body_statement", + "puts <<~TXT\n hi\n TXT", + ), + (ruby_source, Language::Ruby, ".rb", "call", "puts <<~TXT"), + ( + "def f():\n value = 1\n", + Language::Python, + ".py", + "block", + "value = 1", + ), + ( + "function f() { value = 1; }\n", + Language::TypeScript, + ".ts", + "statement_block", + "{ value = 1; }", + ), + ( + "function f()\n value = 1\nend\n", + Language::Lua, + ".lua", + "block", + "value = 1", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.heredoc_body_statement(node), + ruby_private_predicate( + source, + language, + suffix, + "heredoc_body_statement?", + kind, + text + ), + "heredoc_body_statement? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn heredoc_call_for_body_matches_ruby_private_predicate() { + let ruby_arg_source = "def f\n puts <<~TXT\n hi\n TXT\nend\n"; + let ruby_receiver_source = "def emit\n <<~ZIG.chomp\n hi\n ZIG\nend\n"; + for (source, language, suffix, kind, text) in [ + ( + ruby_arg_source, + Language::Ruby, + ".rb", + "body_statement", + "puts <<~TXT\n hi\n TXT", + ), + ( + ruby_arg_source, + Language::Ruby, + ".rb", + "call", + "puts <<~TXT", + ), + ( + ruby_arg_source, + Language::Ruby, + ".rb", + "argument_list", + "<<~TXT", + ), + ( + ruby_arg_source, + Language::Ruby, + ".rb", + "method", + "def f\n puts <<~TXT\n hi\n TXT\nend", + ), + ( + ruby_receiver_source, + Language::Ruby, + ".rb", + "call", + "<<~ZIG.chomp", + ), + ( + ruby_receiver_source, + Language::Ruby, + ".rb", + "heredoc_beginning", + "<<~ZIG", + ), + ( + "def f():\n value = 1\n", + Language::Python, + ".py", + "block", + "value = 1", + ), + ( + "function f() { value = 1; }\n", + Language::TypeScript, + ".ts", + "statement_block", + "{ value = 1; }", + ), + ( + "function f()\n value = 1\nend\n", + Language::Lua, + ".lua", + "block", + "value = 1", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.heredoc_call_for_body(node), + ruby_private_predicate( + source, + language, + suffix, + "heredoc_call_for_body?", + kind, + text + ), + "heredoc_call_for_body? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn with_current_heredoc_body_restores_previous_body() { + let source = "def f\n puts <<~TXT\n hi\n TXT\nend\n"; + let tree = raw_tree(source, Language::Ruby); + let body = first_raw_node(tree.root_node(), source, "heredoc_body", "\n hi\n TXT"); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + normalizer.current_heredoc_body_span = Some([9, 2, 9, 7]); + + let result = normalizer.with_current_heredoc_body(Some(body), |normalizer| { + assert_eq!( + normalizer.current_heredoc_body_span, + Some(super::span(body)) + ); + "result" + }); + + assert_eq!(result, "result"); + assert_eq!(normalizer.current_heredoc_body_span, Some([9, 2, 9, 7])); +} + +#[test] +fn normalize_interpolation_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "name = \"#{user}\"\n", + Language::Ruby, + ".rb", + "interpolation", + "#{user}", + ), + ( + "name = \"#{a; b}\"\n", + Language::Ruby, + ".rb", + "interpolation", + "#{a; b}", + ), + ( + "name = f\"hi {user}\"\n", + Language::Python, + ".py", + "interpolation", + "{user}", + ), + ( + "const name = `hi ${user}`;\n", + Language::TypeScript, + ".ts", + "template_substitution", + "${user}", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_interpolation(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_interpolation", + kind, + text + ), + "normalize_interpolation mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_heredoc_children_matches_ruby_private_method() { + for (source, kind, text) in [ + ( + "def f\n puts <<~TXT\n hi\n TXT\nend\n", + "heredoc_body", + "\n hi\n TXT", + ), + ( + "def f\n puts <<~TXT\n hi #{name}\n TXT\nend\n", + "heredoc_body", + "\n hi #{name}\n TXT", + ), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let rust = children_value(&normalizer.normalize_heredoc_children(node)); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + Language::Ruby, + ".rb", + "normalize_heredoc_children", + kind, + text + ), + "normalize_heredoc_children mismatch for {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_heredoc_beginning_matches_ruby_private_method() { + for (source, kind, text) in [( + "def emit\n <<~ZIG.chomp\n hi\n ZIG\nend\n", + "heredoc_beginning", + "<<~ZIG", + )] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let rust = node_value(&normalizer.normalize_heredoc_beginning(node)); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + Language::Ruby, + ".rb", + "normalize_heredoc_beginning", + kind, + text + ), + "normalize_heredoc_beginning mismatch for {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_heredoc_beginning_uses_current_body_for_multiple_heredocs() { + let source = "def f\n puts <<~A, <<~B\n one\n A\n two\n B\nend\n"; + let tree = raw_tree(source, Language::Ruby); + let beginning = first_raw_node(tree.root_node(), source, "heredoc_beginning", "<<~B"); + let body = first_raw_node(tree.root_node(), source, "heredoc_body", "\n two\n B"); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + + let dstr = normalizer.with_current_heredoc_body(Some(body), |normalizer| { + normalizer.normalize_heredoc_beginning(beginning) + }); + + let content = child_node(&dstr, 0); + assert_eq!(content.r#type, "STR"); + assert_eq!( + content.children, + vec![Child::String("\n two\n ".to_string())] + ); +} + +#[test] +fn normalize_heredoc_body_statement_matches_ruby_private_method() { + for (source, kind, text) in [ + ( + "def f\n puts <<~TXT\n hi\n TXT\nend\n", + "body_statement", + "puts <<~TXT\n hi\n TXT", + ), + ( + "def emit\n <<~ZIG.chomp\n hi\n ZIG\nend\n", + "body_statement", + "<<~ZIG.chomp\n hi\n ZIG", + ), + ( + "def f\n puts <<~A, <<~B\n one\n A\n two\n B\nend\n", + "body_statement", + "puts <<~A, <<~B\n one\n A\n two\n B", + ), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let rust = normalizer + .normalize_heredoc_body_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + Language::Ruby, + ".rb", + "normalize_heredoc_body_statement", + kind, + text + ), + "normalize_heredoc_body_statement mismatch for {kind} {text:?}" + ); + } +} + +#[test] +fn interpolated_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n \"hi #{name}\"\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "\"hi #{name}\"", + ), + ( + "def f():\n f\"hi {name}\"\n", + Language::Python, + ".py", + "block", + "f\"hi {name}\"", + ), + ( + "function f() { `hi ${name}`; }\n", + Language::TypeScript, + ".ts", + "expression_statement", + "`hi ${name}`;", + ), + ( + "function f()\n \"hi\"\nend\n", + Language::Lua, + ".lua", + "block", + "\n \"hi\"", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.interpolated_statement(node), + ruby_private_predicate( + source, + language, + suffix, + "interpolated_statement?", + kind, + text + ), + "interpolated_statement? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn interpolated_statement_normalization_matches_ruby() { + let source = "def f\n \"hi #{name}\"\nend\n"; + let root = parse_language_source(source, Language::Ruby, ".rb"); + let dstr = first_node(&root, "DSTR", "\"hi #{name}\""); + + assert_eq!(child_types(dstr), vec!["STR", "EVSTR"]); + assert_ruby_parity(source, Language::Ruby, ".rb"); +} + +#[test] +fn normalize_interpolated_statement_matches_ruby_private_method() { + for (source, kind, text) in [ + ( + "def f\n \"hi #{name}\"\nend\n", + "body_statement", + "\"hi #{name}\"", + ), + ( + "def f\n \"#{first} #{last}\"\nend\n", + "body_statement", + "\"#{first} #{last}\"", + ), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let rust = node_value(&normalizer.normalize_interpolated_statement(node)); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + Language::Ruby, + ".rb", + "normalize_interpolated_statement", + kind, + text + ), + "normalize_interpolated_statement mismatch for {kind} {text:?}" + ); + } +} + +#[test] +fn concatenated_string_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n \"a\" \"b\"\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "\"a\" \"b\"", + ), + ( + "def f():\n \"a\" \"b\"\n", + Language::Python, + ".py", + "block", + "\"a\" \"b\"", + ), + ( + "function f() { \"a\"; }\n", + Language::TypeScript, + ".ts", + "expression_statement", + "\"a\";", + ), + ( + "function f()\n \"a\"\nend\n", + Language::Lua, + ".lua", + "block", + "\n \"a\"", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.concatenated_string_statement(node), + ruby_private_predicate( + source, + language, + suffix, + "concatenated_string_statement?", + kind, + text + ), + "concatenated_string_statement? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn concatenated_string_statement_normalization_matches_ruby() { + for (source, language, suffix, expected_text, expected_types) in [ + ( + "def f\n \"a\" \"b\"\nend\n", + Language::Ruby, + ".rb", + "\"a\"", + vec!["STR", "STR"], + ), + ( + "def f\n \"a\" \"b #{name}\"\nend\n", + Language::Ruby, + ".rb", + "\"b #{name}\"", + vec!["STR", "STR", "EVSTR"], + ), + ( + "def f():\n \"a\" \"b\"\n", + Language::Python, + ".py", + "\"a\"", + vec!["STR", "STR"], + ), + ( + "def f():\n \"a\" f\"b {name}\"\n", + Language::Python, + ".py", + "f\"b {name}\"", + vec!["STR", "STRING_START", "STR", "EVSTR", "STRING_END"], + ), + ] { + let root = parse_language_source(source, language, suffix); + let dstr = first_node(&root, "DSTR", expected_text); + + assert_eq!(child_types(dstr), expected_types); + assert_ruby_parity(source, language, suffix); + } +} + +#[test] +fn normalize_concatenated_string_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n \"a\" \"b\"\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "\"a\" \"b\"", + ), + ( + "def f\n \"a\" \"b #{name}\"\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "\"a\" \"b #{name}\"", + ), + ( + "def f():\n \"a\" \"b\"\n", + Language::Python, + ".py", + "block", + "\"a\" \"b\"", + ), + ( + "def f():\n \"a\" f\"b {name}\"\n", + Language::Python, + ".py", + "block", + "\"a\" f\"b {name}\"", + ), + ( + "function f() { \"a\"; }\n", + Language::TypeScript, + ".ts", + "expression_statement", + "\"a\";", + ), + ( + "function f()\n \"a\"\nend\n", + Language::Lua, + ".lua", + "block", + "\n \"a\"", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.normalize_concatenated_string_statement(node); + + assert_eq!( + node_value(&rust), + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_concatenated_string_statement", + kind, + text + ), + "normalize_concatenated_string_statement mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_chained_string_matches_ruby_private_method() { + for (source, language, suffix, ruby_kind, ruby_text, rust_kind, rust_text) in [ + ( + "def f\n \"a\" \"b\"\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "\"a\" \"b\"", + "chained_string", + "\"a\" \"b\"", + ), + ( + "def f\n \"a\" \"b #{name}\"\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "\"a\" \"b #{name}\"", + "chained_string", + "\"a\" \"b #{name}\"", + ), + ( + "def f():\n \"a\" \"b\"\n", + Language::Python, + ".py", + "block", + "\"a\" \"b\"", + "concatenated_string", + "\"a\" \"b\"", + ), + ( + "def f():\n \"a\" f\"b {name}\"\n", + Language::Python, + ".py", + "block", + "\"a\" f\"b {name}\"", + "concatenated_string", + "\"a\" f\"b {name}\"", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, rust_kind, rust_text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.normalize_chained_string(node); + + assert_eq!( + node_value(&rust), + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_chained_string", + ruby_kind, + ruby_text + ), + "normalize_chained_string mismatch for {language:?} {rust_kind} {rust_text:?}" + ); + } +} + +#[test] +fn dynamic_string_source_matches_ruby_private_method() { + for (source, language, suffix, ruby_kind, ruby_text, rust_kind, rust_text) in [ + ( + "def f\n \"a\" \"b #{name}\"\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "\"a\" \"b #{name}\"", + "chained_string", + "\"a\" \"b #{name}\"", + ), + ( + "def f\n \"a\" \"b\"\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "\"a\" \"b\"", + "chained_string", + "\"a\" \"b\"", + ), + ( + "def f():\n \"a\" f\"b {name}\"\n", + Language::Python, + ".py", + "block", + "\"a\" f\"b {name}\"", + "concatenated_string", + "\"a\" f\"b {name}\"", + ), + ( + "def f():\n \"a\" \"b\"\n", + Language::Python, + ".py", + "block", + "\"a\" \"b\"", + "concatenated_string", + "\"a\" \"b\"", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, rust_kind, rust_text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let mut normalized_children = Vec::new(); + for child in normalizer.named_children(node) { + let normalized = normalizer.normalize_node(child); + normalized_children.push((child, normalized)); + } + let rust = normalizer + .dynamic_string_source(&normalized_children) + .map(|node| { + ( + node.kind().to_string(), + super::node_text(node, source).to_string(), + ) + }); + let ruby = ruby_private_dynamic_string_source_signature( + source, language, suffix, ruby_kind, ruby_text, + ); + + assert_eq!( + rust, ruby, + "dynamic_string_source mismatch for {language:?} {rust_kind} {rust_text:?}" + ); + } +} + +#[test] +fn terminal_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n foo()\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "()", + ), + ( + "def f\n foo\n foo()\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "foo\n foo()", + ), + ( + "def f():\n foo()\n", + Language::Python, + ".py", + "argument_list", + "()", + ), + ( + "def f():\n foo\n", + Language::Python, + ".py", + "block", + "foo", + ), + ( + "function f() { foo(); }\n", + Language::TypeScript, + ".ts", + "arguments", + "()", + ), + ( + "function f()\n foo()\nend\n", + Language::Lua, + ".lua", + "arguments", + "()", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.terminal_statement(node), + ruby_private_predicate(source, language, suffix, "terminal_statement?", kind, text), + "terminal_statement? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_terminal_statement_matches_ruby_private_method() { + let cases = vec![ + ( + "yield\n", + Language::Ruby, + ".rb", + "yield", + "yield", + "yield", + Vec::<&str>::new(), + ), + ( + "@name\n", + Language::Ruby, + ".rb", + "instance_variable", + "instance_variable", + "@name", + Vec::<&str>::new(), + ), + ( + "$1\n$value\n", + Language::Ruby, + ".rb", + "global_variable", + "global_variable", + "$1", + Vec::<&str>::new(), + ), + ( + "$1\n$value\n", + Language::Ruby, + ".rb", + "global_variable", + "global_variable", + "$value", + Vec::<&str>::new(), + ), + ( + "nil\ntrue\nfalse\n", + Language::Ruby, + ".rb", + "nil", + "nil", + "nil", + Vec::<&str>::new(), + ), + ( + "nil\ntrue\nfalse\n", + Language::Ruby, + ".rb", + "true", + "true", + "true", + Vec::<&str>::new(), + ), + ( + "nil\ntrue\nfalse\n", + Language::Ruby, + ".rb", + "false", + "false", + "false", + Vec::<&str>::new(), + ), + ( + ":ready\n", + Language::Ruby, + ".rb", + "simple_symbol", + "simple_symbol", + ":ready", + Vec::<&str>::new(), + ), + ( + "-123\n", + Language::Ruby, + ".rb", + "unary", + "unary", + "-123", + Vec::<&str>::new(), + ), + ( + "[]\n", + Language::Ruby, + ".rb", + "array", + "array", + "[]", + Vec::<&str>::new(), + ), + ( + "foo\n", + Language::Ruby, + ".rb", + "identifier", + "identifier", + "foo", + Vec::<&str>::new(), + ), + ( + "foo\n", + Language::Ruby, + ".rb", + "identifier", + "identifier", + "foo", + vec!["foo"], + ), + ( + "foo\n", + Language::Python, + ".py", + "expression_statement", + "identifier", + "foo", + Vec::<&str>::new(), + ), + ( + "foo;\n", + Language::TypeScript, + ".ts", + "identifier", + "identifier", + "foo", + Vec::<&str>::new(), + ), + ( + "foo()\n", + Language::Lua, + ".lua", + "identifier", + "identifier", + "foo", + Vec::<&str>::new(), + ), + ( + "foo()\n", + Language::Ruby, + ".rb", + "argument_list", + "argument_list", + "()", + Vec::<&str>::new(), + ), + ]; + + for (source, language, suffix, ruby_kind, rust_kind, text, locals) in cases { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, rust_kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + if !locals.is_empty() { + normalizer + .local_stack + .push(locals.iter().map(|name| name.to_string()).collect()); + } + let rust = node_value(&normalizer.normalize_terminal_statement(node)); + + assert_eq!( + rust, + ruby_private_normalize_terminal_statement_value( + source, + language, + suffix, + ruby_kind, + text, + &locals, + ), + "normalize_terminal_statement mismatch for {language:?} ruby={ruby_kind} rust={rust_kind} {text:?} locals={locals:?}" + ); + } +} + +#[test] +fn operator_assignment_statement_parts_matches_ruby_private_method() { + for (source, language, suffix, ruby_kind, ruby_text, rust_kind, rust_text) in [ + ( + "def f\n x += 1\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "x += 1", + "operator_assignment", + "x += 1", + ), + ( + "def f\n x ||= y\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "x ||= y", + "operator_assignment", + "x ||= y", + ), + ( + "def f\n x += 1\n y += 2\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "x += 1\n y += 2", + "body_statement", + "x += 1\n y += 2", + ), + ( + "def f():\n x += 1\n", + Language::Python, + ".py", + "block", + "x += 1", + "augmented_assignment", + "x += 1", + ), + ( + "function f() { obj.x ||= y; }\n", + Language::TypeScript, + ".ts", + "augmented_assignment_expression", + "obj.x ||= y", + "augmented_assignment_expression", + "obj.x ||= y", + ), + ( + "function f() { x += 1; }\n", + Language::TypeScript, + ".ts", + "expression_statement", + "x += 1;", + "expression_statement", + "x += 1;", + ), + ( + "function f()\n x = x + 1\nend\n", + Language::Lua, + ".lua", + "block", + "x = x + 1", + "block", + "x = x + 1", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, rust_kind, rust_text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = + normalizer + .operator_assignment_statement_parts(node) + .map(|(left, operator, right)| { + ( + left.kind().to_string(), + super::node_text(left, source).to_string(), + operator, + right.kind().to_string(), + super::node_text(right, source).to_string(), + ) + }); + let ruby = ruby_private_operator_assignment_statement_parts_signature( + source, language, suffix, ruby_kind, ruby_text, + ); + + assert_eq!( + rust, ruby, + "operator_assignment_statement_parts mismatch for {language:?} {rust_kind} {rust_text:?}" + ); + } +} + +#[test] +fn operator_assignment_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n x += 1\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "x += 1", + ), + ( + "def f\n x ||= y\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "x ||= y", + ), + ( + "def f\n x = 1\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "x = 1", + ), + ( + "def f\n x += 1\n y += 2\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "x += 1\n y += 2", + ), + ( + "def f():\n x += 1\n", + Language::Python, + ".py", + "block", + "x += 1", + ), + ( + "function f() { x += 1; }\n", + Language::TypeScript, + ".ts", + "expression_statement", + "x += 1;", + ), + ( + "function f()\n x = x + 1\nend\n", + Language::Lua, + ".lua", + "block", + "x = x + 1", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.operator_assignment_statement(node), + ruby_private_predicate( + source, + language, + suffix, + "operator_assignment_statement?", + kind, + text + ), + "operator_assignment_statement? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_operator_assignment_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n x += 1\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "x += 1", + ), + ( + "def f\n x ||= y\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "x ||= y", + ), + ( + "def f\n items[index] += value\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "items[index] += value", + ), + ( + "def f\n object.value += 1\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "object.value += 1", + ), + ( + "def f():\n x += 1\n", + Language::Python, + ".py", + "block", + "x += 1", + ), + ( + "function f() { x += 1; }\n", + Language::TypeScript, + ".ts", + "augmented_assignment_expression", + "x += 1", + ), + ( + "function f() { obj.x ||= y; }\n", + Language::TypeScript, + ".ts", + "augmented_assignment_expression", + "obj.x ||= y", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_operator_assignment_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_operator_assignment_statement", + kind, + text + ), + "normalize_operator_assignment_statement mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn python_f_string_interpolation_next_to_equals_is_evstr_not_assignment() { + let root = parse_language_source( + r#" +class Tag: + @property + def markup(self): + return f"[{self.name}={self.parameters}]" +"#, + Language::Python, + ".py", + ); + let dstr = first_node(&root, "DSTR", r#"f"[{self.name}={self.parameters}]""#); + + let types = child_types(dstr); + assert_eq!( + types, + vec![ + "STRING_START", + "STR", + "EVSTR", + "STR", + "EVSTR", + "STR", + "STRING_END" + ], + "expected Ruby-style f-string interpolation parts in {dstr:#?}" + ); + assert!( + !types.contains(&"LASGN"), + "interpolation next to '=' must not normalize as assignment: {dstr:#?}" + ); +} + +#[test] +fn python_relative_import_prefix_only_has_no_children() { + let root = parse_language_source( + r#" +if __name__ == "__main__": + from . import box as box +"#, + Language::Python, + ".py", + ); + let relative_import = first_node(&root, "RELATIVE_IMPORT", "."); + + assert!( + relative_import.children.is_empty(), + "Ruby exposes bare relative import prefix as an empty RELATIVE_IMPORT: {relative_import:#?}" + ); +} + +#[test] +fn python_annotation_type_wrappers_match_ruby_tree_shape() { + let root = parse_language_source( + r#" +from typing import Callable + +_is_single_cell_widths: Callable[[str], bool] = value +last_measured_character: str | None = None +fileno: Callable[[], int] | None = value +"#, + Language::Python, + ".py", + ); + + let str_list_type = first_node(&root, "TYPE", "[str]"); + assert_eq!(child_types(str_list_type), vec!["LVAR"]); + assert_eq!( + child_node(str_list_type, 0).children, + vec![Child::String("str".to_string())] + ); + + let empty_list_type = first_node(&root, "TYPE", "[]"); + assert!( + empty_list_type.children.is_empty(), + "Ruby keeps Callable[[]] list type empty: {empty_list_type:#?}" + ); + + let union_type = first_node(&root, "TYPE", "str | None"); + assert_eq!(child_types(union_type), vec!["LVAR", "NIL"]); +} + +#[test] +fn python_docstring_only_class_body_stays_block_wrapped() { + let root = parse_language_source( + r#" +class ColorParseError(Exception): + """The color could not be parsed.""" +"#, + Language::Python, + ".py", + ); + let class_node = first_node( + &root, + "CLASS", + "class ColorParseError(Exception):\n \"\"\"The color could not be parsed.\"\"\"", + ); + let scope = child_node(class_node, 2); + let body = child_node(scope, 2); + + assert_eq!(body.r#type, "BLOCK"); + assert_eq!( + child_types(body), + vec!["STRING_START", "STR", "STRING_END"], + "Ruby exposes docstring-only class body as BLOCK of string parts: {body:#?}" + ); +} + +#[test] +fn python_ellipsis_only_function_body_is_empty_scope_with_root_source() { + assert_ruby_parity( + r#"def __rich__(): + ... +"#, + Language::Python, + ".py", + ); +} + +#[test] +fn python_explicit_return_none_is_not_elided_from_function_body() { + let source = r#" +class Thing: + def _repr_latex_(self): + return None +"#; + let root = parse_language_source(source, Language::Python, ".py"); + let defn = first_node( + &root, + "DEFN", + "def _repr_latex_(self):\n return None", + ); + let scope = child_node(defn, 1); + let body = child_node(scope, 2); + + assert_eq!(body.r#type, "RETURN"); + assert_eq!( + child_node(body, 0).r#type, + "NIL", + "Ruby only elides implicit nil bodies for Ruby, not explicit Python return None: {scope:#?}" + ); + assert_ruby_parity(source, Language::Python, ".py"); +} + +#[test] +fn python_with_attribute_item_uses_ruby_clause_children() { + let root = parse_language_source( + r#" +def page(self): + with self._console._lock: + buffer = self._console._buffer[:] +"#, + Language::Python, + ".py", + ); + let clause = first_node(&root, "WITH_CLAUSE", "self._console._lock"); + + assert_eq!( + child_types(clause), + vec!["CALL", "LVAR"], + "Ruby with_clause exposes attribute receiver and field separately: {clause:#?}" + ); + assert_eq!(child_node(clause, 0).text, "self._console"); + assert_eq!(child_node(clause, 1).text, "_lock"); +} + +#[test] +fn python_bare_identifier_expression_statement_has_no_children() { + let root = parse_language_source( + r#" +def _is_jupyter(): + try: + get_ipython # type: ignore[name-defined] + except NameError: + return False +"#, + Language::Python, + ".py", + ); + let expression = first_node(&root, "EXPRESSION_STATEMENT", "get_ipython"); + + assert!( + expression.children.is_empty(), + "Ruby parser exposes bare identifier expression statements without named children: {expression:#?}" + ); +} + +#[test] +fn python_bare_identifier_only_block_has_no_children() { + assert_ruby_parity( + r#" +def get_exception(): + try: + pass + except: + foobarbaz +"#, + Language::Python, + ".py", + ); +} + +#[test] +fn python_bare_dotted_expression_statement_normalizes_as_call() { + let root = parse_language_source("os.get_terminal_size\n", Language::Python, ".py"); + let call = first_node(&root, "CALL", "os.get_terminal_size"); + + assert_eq!( + child_types(call), + vec!["LVAR"], + "bare Python dotted expression statements should normalize as calls: {call:#?}" + ); +} + +#[test] +fn python_bare_comparison_expression_statement_keeps_statement_wrapper() { + let root = parse_language_source( + r#" +def test_get_style(): + console.get_style("repr.brace") == Style(bold=True) +"#, + Language::Python, + ".py", + ); + let expression = first_node( + &root, + "EXPRESSION_STATEMENT", + r#"console.get_style("repr.brace") == Style(bold=True)"#, + ); + + assert_eq!( + child_types(expression), + vec!["CALL", "FCALL"], + "Ruby exposes bare comparison statements as expression_statement operand children: {expression:#?}" + ); +} + +#[test] +fn python_delete_statement_matches_ruby_block_contexts() { + assert_ruby_parity( + r#" +def save(self, clear): + if clear: + del self._record_buffer[:] + with self._record_buffer_lock: + del self._record_buffer[:] + text = "" +"#, + Language::Python, + ".py", + ); +} + +#[test] +fn python_single_subscript_expression_block_exposes_subscript_children() { + assert_ruby_parity( + r#" +def test_render(): + with pytest.raises(KeyError): + top["asdasd"] +"#, + Language::Python, + ".py", + ); +} + +#[test] +fn python_single_if_block_under_try_matches_ruby_if_shape() { + let root = parse_language_source( + r#" +def load(args): + try: + if args.path == "-": + json_data = sys.stdin.read() + else: + json_data = Path(args.path).read_text() + except Exception as error: + sys.exit(-1) +"#, + Language::Python, + ".py", + ); + let if_node = first_node( + &root, + "IF", + "if args.path == \"-\":\n json_data = sys.stdin.read()\n else:\n json_data = Path(args.path).read_text()", + ); + + assert_eq!( + child_types(if_node), + vec!["OPCALL", "LASGN", "ELSE_CLAUSE"], + "Ruby normalizes this Python try-body child as an IF: {if_node:#?}" + ); + assert_eq!(child_types(child_node(if_node, 2)), vec!["BLOCK"]); +} + +#[test] +fn python_single_decorated_definition_block_exposes_decorator_and_function() { + assert_ruby_parity( + r#" +def test_inspect_swig_edge_case(): + class Thing: + @property + def __class__(self): + raise AttributeError +"#, + Language::Python, + ".py", + ); +} + +#[test] +fn python_nested_class_inside_class_body_matches_ruby_iter_shape() { + let root = parse_language_source( + r#" +def test_can_handle_special_characters_in_docstrings(): + class Something: + class Thing: + pass +"#, + Language::Python, + ".py", + ); + let iter = first_node(&root, "ITER", "class Thing:\n pass"); + + assert_eq!(child_node(iter, 0).r#type, "VCALL"); + assert_eq!( + child_node(iter, 0).children, + vec![Child::Symbol("Thing".to_string()), Child::Nil] + ); + assert_eq!(child_node(iter, 1).r#type, "SCOPE"); +} + +#[test] +fn lua_local_assignment_call_rhs_matches_ruby_expression_list_shape() { + let root = parse_language_source( + r#"local test_env = require("spec.util.test_env") +"#, + Language::Lua, + ".lua", + ); + let expression_list = first_node(&root, "EXPRESSION_LIST", r#"require("spec.util.test_env")"#); + + assert_eq!( + child_types(expression_list), + vec!["LVAR", "ARGUMENTS"], + "Ruby exposes a Lua call RHS expression_list as the call function and arguments, without a FUNCTION_CALL wrapper: {expression_list:#?}" + ); +} + +#[test] +fn lua_local_assignment_member_rhs_matches_ruby_expression_list_shape() { + let root = parse_language_source("local run = test_env.run\n", Language::Lua, ".lua"); + let expression_list = first_node(&root, "EXPRESSION_LIST", "test_env.run"); + + assert_eq!( + child_types(expression_list), + vec!["LVAR", "LVAR"], + "Ruby exposes a Lua dotted RHS expression_list as receiver and field, without a DOT_INDEX_EXPRESSION wrapper: {expression_list:#?}" + ); +} + +#[test] +fn lua_table_string_entry_matches_ruby_field_shape() { + let root = parse_language_source( + "local extra_rocks = {\n \"/luasocket-${LUASOCKET}.src.rock\",\n}\n", + Language::Lua, + ".lua", + ); + let expression_list = first_node( + &root, + "EXPRESSION_LIST", + "{\n \"/luasocket-${LUASOCKET}.src.rock\",\n}", + ); + let field = child_node(expression_list, 0); + let string = child_node(field, 0); + + assert_eq!( + child_types(expression_list), + vec!["FIELD"], + "Ruby exposes a Lua table constructor assignment RHS as its field children: {expression_list:#?}" + ); + assert_eq!(string.r#type, "STR"); + assert_eq!( + string.children, + vec![Child::String( + "/luasocket-${LUASOCKET}.src.rock".to_string() + )], + "Ruby normalizes a Lua table string field from string_content, without quotes: {string:#?}" + ); +} + +#[test] +fn lua_table_dollar_string_entry_matches_ruby_str_not_gvar() { + let root = parse_language_source( + "local incdirs = { \"$(FOO1_INCDIR)\" }\n", + Language::Lua, + ".lua", + ); + let string = first_node(&root, "STR", "$(FOO1_INCDIR)"); + let mut gvars = Vec::new(); + nodes_of_type(&root, "GVAR", &mut gvars); + + assert_eq!( + string.children, + vec![Child::String("$(FOO1_INCDIR)".to_string())], + "Ruby normalizes Lua table strings starting with $ as STR, not GVAR: {string:#?}" + ); + assert!( + gvars.is_empty(), + "Lua string_content starting with $ must not normalize as GVAR: {gvars:#?}" + ); +} + +#[test] +fn lua_table_call_entry_matches_ruby_field_children_shape() { + assert_ruby_parity( + "assert.same(install, { bin = { P\"bin/binfile\" } })\n", + Language::Lua, + ".lua", + ); +} + +#[test] +fn lua_table_identifier_entry_matches_ruby_empty_field_shape() { + assert_ruby_parity( + "local rocks_path = table.concat({rocks_tree, \"a_rock\"})\n", + Language::Lua, + ".lua", + ); +} + +#[test] +fn lua_single_call_function_body_matches_ruby_block_shape() { + assert_ruby_parity( + "before_each(function()\n test_env.setup_specs(extra_rocks)\nend)\n", + Language::Lua, + ".lua", + ); +} + +#[test] +fn lua_single_assignment_function_body_matches_ruby_lasgn_shape() { + assert_ruby_parity( + "lazy_setup(function()\n git = git_repo.start()\nend)\n", + Language::Lua, + ".lua", + ); +} + +#[test] +fn lua_single_bare_assignment_function_body_matches_ruby_lasgn_shape() { + let root = parse_language_source("function()\n x = y\nend\n", Language::Lua, ".lua"); + let defn = first_node(&root, "DEFN", "function()\n x = y\nend"); + let scope = child_node(defn, 1); + let body = child_node(scope, 2); + let right = child_node(body, 1); + + assert_eq!(body.r#type, "LASGN"); + assert_eq!(body.children.first(), Some(&Child::String("x".to_string()))); + assert_eq!(right.r#type, "EXPRESSION_LIST"); + assert!( + right.children.is_empty(), + "Ruby exposes a bare identifier Lua single-assignment RHS with no children: {right:#?}" + ); +} + +#[test] +fn lua_single_dotted_assignment_function_body_normalizes_as_attribute_assignment() { + let root = parse_language_source( + "function()\n package.path = oldpath\nend\n", + Language::Lua, + ".lua", + ); + let defn = first_node(&root, "DEFN", "function()\n package.path = oldpath\nend"); + let scope = child_node(defn, 1); + let body = child_node(scope, 2); + let assignment = body; + let receiver = child_node(assignment, 0); + let args = child_node(assignment, 2); + + assert_eq!(body.r#type, "ATTRASGN"); + assert_eq!(receiver.r#type, "LVAR"); + assert_eq!( + receiver.children, + vec![Child::String("package".to_string())] + ); + assert_eq!( + assignment.children.get(1), + Some(&Child::Symbol("path=".to_string())) + ); + assert_eq!(args.r#type, "LIST"); +} + +#[test] +fn lua_single_local_assignment_function_body_matches_ruby_lasgn_shape() { + assert_ruby_parity( + "it(function()\n local output = run.luarocks(\"show --rock-tree luacov\")\nend)\n", + Language::Lua, + ".lua", + ); +} + +#[test] +fn lua_assigned_function_expression_matches_ruby_expression_list_shape() { + assert_ruby_parity( + "local test_with_location = function(location)\n lfs.mkdir(location)\nend\n", + Language::Lua, + ".lua", + ); +} + +#[test] +fn lua_assigned_function_if_else_matches_fixed_ruby_if_shape() { + assert_ruby_parity( + "local make_unreadable = function(path)\n if is_win then\n fs.execute(\"x\")\n else\n fs.execute(\"y\")\n end\nend\n", + Language::Lua, + ".lua", + ); +} + +#[test] +fn lua_single_return_function_body_matches_ruby_opcall_shape() { + let source = "function sum.sum(a, b)\n return a + b\nend\n"; + let root = parse_language_source(source, Language::Lua, ".lua"); + let defn = first_node( + &root, + "DEFN", + "function sum.sum(a, b)\n return a + b\nend", + ); + let scope = child_node(defn, 1); + let body = child_node(scope, 2); + let returned = child_node(body, 0); + + assert_eq!(body.r#type, "RETURN"); + assert_eq!(returned.r#type, "OPCALL"); + assert_eq!( + returned.children.get(1), + Some(&Child::Symbol("+".to_string())), + "Ruby exposes a single Lua return body as RETURN wrapping the returned operator call: {body:#?}" + ); + assert_ruby_parity(source, Language::Lua, ".lua"); +} + +#[test] +fn lua_top_level_return_identifier_matches_ruby_empty_expression_list() { + let root = parse_language_source("return sum\n", Language::Lua, ".lua"); + let return_node = first_node(&root, "RETURN", "return sum"); + let expression_list = child_node(return_node, 0); + + assert_eq!(expression_list.r#type, "EXPRESSION_LIST"); + assert!( + expression_list.children.is_empty(), + "Ruby exposes a Lua return of a bare identifier as an empty expression_list: {expression_list:#?}" + ); +} + +#[test] +fn lua_top_level_return_scalar_literals_match_ruby_empty_expression_list() { + for literal in ["true", "false", "nil", "0"] { + let root = parse_language_source(&format!("return {literal}\n"), Language::Lua, ".lua"); + let return_node = first_node(&root, "RETURN", &format!("return {literal}")); + let expression_list = child_node(return_node, 0); + + assert_eq!(expression_list.r#type, "EXPRESSION_LIST"); + assert!( + expression_list.children.is_empty(), + "Ruby exposes a Lua return of {literal} as an empty expression_list: {expression_list:#?}" + ); + } +} + +#[test] +fn lua_assignment_scalar_literals_match_ruby_empty_expression_list() { + for literal in ["true", "false", "nil", "0"] { + let root = parse_language_source(&format!("tmpfile = {literal}\n"), Language::Lua, ".lua"); + let assignment = first_node(&root, "LASGN", &format!("tmpfile = {literal}")); + let expression_list = child_node(assignment, 1); + + assert_eq!(expression_list.r#type, "EXPRESSION_LIST"); + assert!( + expression_list.children.is_empty(), + "Ruby exposes a Lua scalar literal assignment RHS as an empty expression_list: {expression_list:#?}" + ); + } +} + +#[test] +fn lua_no_paren_string_argument_matches_ruby_string_content_shape() { + let root = parse_language_source("V\"foo\"\n", Language::Lua, ".lua"); + let call = first_node(&root, "FUNCTION_CALL", "V\"foo\""); + let arguments = child_node(call, 1); + let string = child_node(arguments, 0); + + assert_eq!(arguments.r#type, "ARGUMENTS"); + assert_eq!(arguments.text, "\"foo\""); + assert_eq!(string.r#type, "STR"); + assert_eq!(string.text, "foo"); + assert_eq!(string.children, vec![Child::String("foo".to_string())]); +} + +#[test] +fn lua_long_string_assignment_matches_ruby_expression_list_content_shape() { + assert_ruby_parity( + "local c_module_source = [[\n #include \n]]\n", + Language::Lua, + ".lua", + ); +} + +#[test] +fn lua_elseif_branch_is_preserved_as_if_alternative() { + let root = parse_language_source( + r#"if test_env.LUA_V == "5.1" then + one() +elseif test_env.LUA_V == "5.2" then + two() +end +"#, + Language::Lua, + ".lua", + ); + let if_node = first_node( + &root, + "IF", + "if test_env.LUA_V == \"5.1\" then\n one()\nelseif test_env.LUA_V == \"5.2\" then\n two()\nend", + ); + let alternative = child_node(if_node, 2); + + assert_eq!(alternative.r#type, "ELSEIF_STATEMENT"); +} + +#[test] +fn lua_binary_assignment_rhs_matches_ruby_expression_list_shape() { + let root = parse_language_source( + "local rockspec = testing_paths.fixtures_dir .. \"/build_only_deps-0.1-1.rockspec\"\n", + Language::Lua, + ".lua", + ); + let expression_list = first_node( + &root, + "EXPRESSION_LIST", + "testing_paths.fixtures_dir .. \"/build_only_deps-0.1-1.rockspec\"", + ); + + assert_eq!( + child_types(expression_list), + vec!["DOT_INDEX_EXPRESSION", "STR"], + "Ruby exposes a Lua binary RHS expression_list as the binary operands, without a BINARY_EXPRESSION wrapper: {expression_list:#?}" + ); +} + +#[test] +fn lua_local_declaration_without_rhs_matches_ruby_empty_variable_list() { + let root = parse_language_source("local tmpdir\n", Language::Lua, ".lua"); + let variable_list = first_node(&root, "VARIABLE_LIST", "tmpdir"); + + assert!( + variable_list.children.is_empty(), + "Ruby exposes a Lua local declaration without RHS as an empty VARIABLE_LIST: {variable_list:#?}" + ); +} + +#[test] +fn lua_multi_local_declaration_without_rhs_keeps_ruby_variable_list_children() { + let root = parse_language_source("local cfg, fs\n", Language::Lua, ".lua"); + let variable_list = first_node(&root, "VARIABLE_LIST", "cfg, fs"); + + assert_eq!( + child_types(variable_list), + vec!["LVAR", "LVAR"], + "Ruby keeps children for a multi-name Lua local declaration without RHS: {variable_list:#?}" + ); +} + +#[test] +fn lua_single_generic_for_variable_matches_ruby_empty_variable_list() { + let root = parse_language_source( + "for f in lfs.dir(spec_quick) do end\n", + Language::Lua, + ".lua", + ); + let variable_list = first_node(&root, "VARIABLE_LIST", "f"); + + assert!( + variable_list.children.is_empty(), + "Ruby exposes a single Lua generic-for variable list as empty: {variable_list:#?}" + ); +} + +#[test] +fn lua_multi_generic_for_variable_list_keeps_ruby_children() { + let root = parse_language_source("for _, t in ipairs(tests) do end\n", Language::Lua, ".lua"); + let variable_list = first_node(&root, "VARIABLE_LIST", "_, t"); + + assert_eq!( + child_types(variable_list), + vec!["LVAR", "LVAR"], + "Ruby keeps children for a multi-name Lua generic-for variable list: {variable_list:#?}" + ); +} + +#[test] +fn normalizes_safe_navigation_inside_multi_statement_else_body() { + let root = parse_source( + r#" +def x(cond, node) + if cond + node.storage = :stack + else + node.storage = :heap + current_fn_ctx&.record_heap_use! + end +end +"#, + ); + let mut qcalls = Vec::new(); + nodes_of_type(&root, "QCALL", &mut qcalls); + + assert!( + qcalls + .iter() + .any(|node| node.text == "current_fn_ctx&.record_heap_use!"), + "expected normalized QCALL for current_fn_ctx safe navigation, got {qcalls:#?} in {root:#?}" + ); +} + +#[test] +fn normalizes_visibility_wrapped_singleton_def() { + let root = parse_source( + r#" +private_class_method def self.collect_payload_binding_names(node, names) + if node.is_a?(AST::Identifier) + return + end + AST.wrapped_children(node).each { |child| collect_payload_binding_names(child, names) if child.is_a?(AST::Locatable) } +end +"#, + ); + let mut defs = Vec::new(); + nodes_of_type(&root, "DEFS", &mut defs); + + assert!( + defs.iter().any(|node| node.children.get(1) + == Some(&Child::Symbol("collect_payload_binding_names".to_string()))), + "expected normalized DEFS for visibility-wrapped singleton def, got {root:#?}" + ); + + let def = defs + .into_iter() + .find(|node| { + node.children.get(1) + == Some(&Child::Symbol("collect_payload_binding_names".to_string())) + }) + .expect("visibility-wrapped singleton def should normalize to DEFS"); + let mut calls = Vec::new(); + nodes_of_type(def, "CALL", &mut calls); + nodes_of_type(def, "FCALL", &mut calls); + calls.sort_by_key(|node| (node.first_lineno, node.first_column)); + let ordered = calls + .iter() + .map(|node| (node.first_lineno, node.text.as_str())) + .collect::>(); + + let first_if_call = ordered + .iter() + .position(|(_line, text)| *text == "node.is_a?(AST::Identifier)") + .expect("expected identifier guard call"); + let recursive_call = ordered + .iter() + .position(|(_line, text)| *text == "collect_payload_binding_names(child, names)") + .expect("expected recursive payload scan call"); + assert!( + first_if_call < recursive_call, + "expected method body calls in source order, got {ordered:#?} in {root:#?}" + ); +} + +#[test] +fn normalizes_heredoc_beginning_as_dynamic_string_receiver() { + let root = parse_source( + r#" +def emit + <<~ZIG.chomp + hi + ZIG +end +"#, + ); + let mut calls = Vec::new(); + nodes_of_type(&root, "CALL", &mut calls); + + let call = calls + .iter() + .find(|node| node.text == "<<~ZIG.chomp") + .expect("expected heredoc chomp call"); + assert_eq!( + call.children.get(1), + Some(&Child::Symbol("chomp".to_string())) + ); + assert_eq!( + call.children + .first() + .and_then(super::node) + .map(|node| node.r#type.as_str()), + Some("DSTR") + ); +} + +#[test] +fn flatten_and_matches_ruby_ast_helper() { + let left = Node { + r#type: "LVAR".to_string(), + children: vec![Child::String("a".to_string())], + first_lineno: 1, + first_column: 0, + last_lineno: 1, + last_column: 1, + text: "a".to_string(), + }; + let right = Node { + r#type: "LVAR".to_string(), + children: vec![Child::String("b".to_string())], + first_lineno: 1, + first_column: 5, + last_lineno: 1, + last_column: 6, + text: "b".to_string(), + }; + let and_node = Node { + r#type: "AND".to_string(), + children: vec![Child::Node(Box::new(left)), Child::Node(Box::new(right))], + first_lineno: 1, + first_column: 0, + last_lineno: 1, + last_column: 6, + text: "a && b".to_string(), + }; + + assert_eq!(super::flatten_and(&and_node).len(), 2); +} diff --git a/gems/decomplex/rust/src/decomplex/ast.rs b/gems/decomplex/rust/src/decomplex/ast.rs index 7914d3e19..edb348779 100644 --- a/gems/decomplex/rust/src/decomplex/ast.rs +++ b/gems/decomplex/rust/src/decomplex/ast.rs @@ -8638,20944 +8638,5 @@ pub fn child_to_string(child: Option<&Child>) -> Option { } #[cfg(test)] -mod tests { - use super::{parse, parse_with_language, Child, Node}; - use crate::decomplex::syntax::Language; - use serde_json::{json, Value}; - use std::collections::BTreeSet; - use std::io::Write; - use std::path::Path; - use std::process::Command; - use tree_sitter::{Node as TreeSitterNode, Parser as TreeSitterParser}; - - fn parse_source(source: &str) -> Node { - let mut file = tempfile::Builder::new() - .suffix(".rb") - .tempfile() - .expect("create temp ruby file"); - file.write_all(source.as_bytes()) - .expect("write temp ruby file"); - parse(file.path()).expect("parse temp ruby file").0 - } - - fn parse_language_source(source: &str, language: Language, suffix: &str) -> Node { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create temp source file"); - file.write_all(source.as_bytes()) - .expect("write temp source file"); - parse_with_language(file.path(), language) - .expect("parse temp source file") - .0 - } - - fn nodes_of_type<'a>(node: &'a Node, node_type: &str, out: &mut Vec<&'a Node>) { - if node.r#type == node_type { - out.push(node); - } - for child in node.children.iter().filter_map(super::node) { - nodes_of_type(child, node_type, out); - } - } - - fn first_node<'a>(root: &'a Node, node_type: &str, text: &str) -> &'a Node { - let mut nodes = Vec::new(); - nodes_of_type(root, node_type, &mut nodes); - nodes - .into_iter() - .find(|node| node.text == text) - .unwrap_or_else(|| panic!("expected {node_type} with text {text:?} in {root:#?}")) - } - - fn child_node(node: &Node, index: usize) -> &Node { - node.children - .get(index) - .and_then(super::node) - .unwrap_or_else(|| panic!("expected child node {index} in {node:#?}")) - } - - fn child_types(node: &Node) -> Vec<&str> { - node.children - .iter() - .filter_map(super::node) - .map(|child| child.r#type.as_str()) - .collect() - } - - fn test_node(node_type: &str, children: Vec) -> Node { - Node { - r#type: node_type.to_string(), - children, - first_lineno: 1, - first_column: 0, - last_lineno: 1, - last_column: 1, - text: node_type.to_string(), - } - } - - fn infix_parts_text( - normalizer: &super::TreeSitterNormalizer<'_>, - node: TreeSitterNode<'_>, - source: &str, - ) -> Option<(String, String, String)> { - let (left, operator, right) = normalizer.infix_statement_parts(node)?; - Some(( - super::node_text(left, source).to_string(), - operator, - super::node_text(right, source).to_string(), - )) - } - - fn node_value(node: &Node) -> Value { - json!({ - "type": node.r#type, - "children": node.children.iter().map(child_value).collect::>(), - "first_lineno": node.first_lineno, - "first_column": node.first_column, - "last_lineno": node.last_lineno, - "last_column": node.last_column, - "text": node.text, - }) - } - - fn child_value(child: &Child) -> Value { - match child { - Child::Node(node) => node_value(node), - Child::Symbol(value) | Child::String(value) => Value::String(value.clone()), - Child::Integer(value) => Value::Number((*value).into()), - Child::Bool(value) => Value::Bool(*value), - Child::Nil => Value::Null, - } - } - - fn children_value(children: &[Child]) -> Value { - Value::Array(children.iter().map(child_value).collect()) - } - - fn ruby_language_name(language: Language) -> &'static str { - match language { - Language::Ruby => "ruby", - Language::Python => "python", - Language::JavaScript => "javascript", - Language::Java => "java", - Language::TypeScript => "typescript", - Language::Swift => "swift", - Language::Kotlin => "kotlin", - Language::Go => "go", - Language::Rust => "rust", - Language::Zig => "zig", - Language::Lua => "lua", - Language::C => "c", - Language::Cpp => "cpp", - Language::CSharp => "csharp", - } - } - - fn ruby_normalized_value(path: &Path, language: Language) -> Value { - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - root, = Decomplex::Ast.parse(ARGV.fetch(0)) - - def value(node) - if node.is_a?(Decomplex::Ast::Node) - { - "type" => node.type.to_s, - "children" => node.children.map { |child| value(child) }, - "first_lineno" => node.first_lineno, - "first_column" => node.first_column, - "last_lineno" => node.last_lineno, - "last_column" => node.last_column, - "text" => node.text.to_s, - } - elsif node.is_a?(Symbol) - node.to_s - elsif node.is_a?(Array) - node.map { |child| value(child) } - else - node - end - end - - puts JSON.generate(value(root)) - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "json", - "-e", - script, - ]) - .arg(path) - .output() - .expect("run ruby normalizer"); - assert!( - output.status.success(), - "ruby normalizer failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - serde_json::from_slice(&output.stdout).expect("ruby normalizer should emit JSON") - } - - fn assert_ruby_parity(source: &str, language: Language, suffix: &str) { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create parity temp source file"); - file.write_all(source.as_bytes()) - .expect("write parity temp source file"); - - let rust = node_value( - &parse_with_language(file.path(), language) - .expect("parse parity temp source file") - .0, - ); - let ruby = ruby_normalized_value(file.path(), language); - assert_eq!(rust, ruby); - } - - fn raw_tree(source: &str, language: Language) -> tree_sitter::Tree { - let mut parser = TreeSitterParser::new(); - parser - .set_language(&super::language_grammar(language)) - .expect("set raw parser language"); - parser.parse(source, None).expect("parse raw source") - } - - fn first_raw_node<'tree>( - node: TreeSitterNode<'tree>, - source: &str, - kind: &str, - text: &str, - ) -> TreeSitterNode<'tree> { - if node.kind() == kind && super::node_text(node, source) == text { - return node; - } - let mut cursor = node.walk(); - for child in node.children(&mut cursor) { - if let Some(found) = first_raw_node_opt(child, source, kind, text) { - return found; - } - } - panic!("expected raw node kind={kind:?} text={text:?}"); - } - - fn first_raw_node_opt<'tree>( - node: TreeSitterNode<'tree>, - source: &str, - kind: &str, - text: &str, - ) -> Option> { - if node.kind() == kind && super::node_text(node, source) == text { - return Some(node); - } - let mut cursor = node.walk(); - for child in node.children(&mut cursor) { - if let Some(found) = first_raw_node_opt(child, source, kind, text) { - return Some(found); - } - } - None - } - - fn nth_raw_node<'tree>( - node: TreeSitterNode<'tree>, - source: &str, - kind: &str, - text: &str, - index: usize, - ) -> TreeSitterNode<'tree> { - let mut found = Vec::new(); - collect_raw_nodes(node, source, kind, text, &mut found); - *found.get(index).unwrap_or_else(|| { - panic!("expected raw node kind={kind:?} text={text:?} index={index}") - }) - } - - fn collect_raw_nodes<'tree>( - node: TreeSitterNode<'tree>, - source: &str, - kind: &str, - text: &str, - found: &mut Vec>, - ) { - if node.kind() == kind && super::node_text(node, source) == text { - found.push(node); - } - let mut cursor = node.walk(); - for child in node.children(&mut cursor) { - collect_raw_nodes(child, source, kind, text, found); - } - } - - fn ruby_private_predicate( - source: &str, - language: Language, - suffix: &str, - method: &str, - kind: &str, - text: &str, - ) -> bool { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby predicate temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby predicate temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - target_kind = ARGV.fetch(1) - target_text = ARGV.fetch(2) - method = ARGV.fetch(3) - target = nil - walk = lambda do |node| - if node.respond_to?(:kind) - target ||= node if node.kind == target_kind && node.text.to_s == target_text - node.named_children.each { |child| walk.call(child) } - end - end - walk.call(document.root) - abort "target node not found: #{target_kind} #{target_text.inspect}" unless target - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - puts normalizer.send(method, target) ? "true" : "false" - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-e", - script, - ]) - .arg(file.path()) - .arg(kind) - .arg(text) - .arg(method) - .output() - .expect("run ruby private predicate"); - assert!( - output.status.success(), - "ruby predicate failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - String::from_utf8(output.stdout) - .expect("ruby predicate output should be utf8") - .trim() - == "true" - } - - fn ruby_private_collected_names( - source: &str, - language: Language, - suffix: &str, - method: &str, - kind: &str, - text: &str, - ) -> BTreeSet { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby collected names temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby collected names temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - target_kind = ARGV.fetch(1) - target_text = ARGV.fetch(2) - method = ARGV.fetch(3) - target = nil - walk = lambda do |node| - if node.respond_to?(:kind) - target ||= node if node.kind == target_kind && node.text.to_s == target_text - node.named_children.each { |child| walk.call(child) } - end - end - walk.call(document.root) - abort "target node not found: #{target_kind} #{target_text.inspect}" unless target - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - locals = Set.new - normalizer.send(method, target, locals) - puts JSON.generate(locals.to_a.sort) - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-r", - "json", - "-r", - "set", - "-e", - script, - ]) - .arg(file.path()) - .arg(kind) - .arg(text) - .arg(method) - .output() - .expect("run ruby collected names helper"); - assert!( - output.status.success(), - "ruby collected names helper failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - serde_json::from_slice::>(&output.stdout) - .expect("ruby collected names output should be json") - .into_iter() - .collect() - } - - fn ruby_private_scope_collected_names( - source: &str, - language: Language, - suffix: &str, - kind: &str, - text: &str, - root: bool, - ) -> BTreeSet { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby scope collected names temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby scope collected names temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - require "set" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - target_kind = ARGV.fetch(1) - target_text = ARGV.fetch(2) - root = ARGV.fetch(3) == "true" - target = nil - walk = lambda do |node| - if node.respond_to?(:kind) - target ||= node if node.kind == target_kind && node.text.to_s == target_text - node.named_children.each { |child| walk.call(child) } - end - end - walk.call(document.root) - abort "target node not found: #{target_kind} #{target_text.inspect}" unless target - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - locals = Set.new - normalizer.send(:collect_ruby_scope_locals, target, locals, root: root) - puts JSON.generate(locals.to_a.sort) - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-r", - "json", - "-e", - script, - ]) - .arg(file.path()) - .arg(kind) - .arg(text) - .arg(if root { "true" } else { "false" }) - .output() - .expect("run ruby scope collected names helper"); - assert!( - output.status.success(), - "ruby scope collected names helper failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - serde_json::from_slice::>(&output.stdout) - .expect("ruby scope collected names output should be json") - .into_iter() - .collect() - } - - fn ruby_private_ruby_scope_locals( - source: &str, - language: Language, - suffix: &str, - kind: &str, - text: &str, - ) -> BTreeSet { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby scope locals temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby scope locals temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - target_kind = ARGV.fetch(1) - target_text = ARGV.fetch(2) - target = nil - walk = lambda do |node| - if node.respond_to?(:kind) - target ||= node if node.kind == target_kind && node.text.to_s == target_text - node.named_children.each { |child| walk.call(child) } - end - end - walk.call(document.root) - abort "target node not found: #{target_kind} #{target_text.inspect}" unless target - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - puts JSON.generate(normalizer.send(:ruby_scope_locals, target).to_a.sort) - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-r", - "json", - "-r", - "set", - "-e", - script, - ]) - .arg(file.path()) - .arg(kind) - .arg(text) - .output() - .expect("run ruby scope locals helper"); - assert!( - output.status.success(), - "ruby scope locals helper failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - serde_json::from_slice::>(&output.stdout) - .expect("ruby scope locals output should be json") - .into_iter() - .collect() - } - - fn ruby_private_with_ruby_scope_trace( - source: &str, - language: Language, - suffix: &str, - kind: &str, - text: &str, - reset: bool, - initial_stack: &[Vec<&str>], - ) -> Value { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby with_ruby_scope temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby with_ruby_scope temp source file"); - let initial_stack_json = - serde_json::to_string(initial_stack).expect("serialize initial local stack"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - target_kind = ARGV.fetch(1) - target_text = ARGV.fetch(2) - reset = ARGV.fetch(3) == "true" - initial = JSON.parse(ARGV.fetch(4)).map { |names| Set.new(names) } - target = nil - walk = lambda do |node| - if node.respond_to?(:kind) - target ||= node if node.kind == target_kind && node.text.to_s == target_text - node.named_children.each { |child| walk.call(child) } - end - end - walk.call(document.root) - abort "target node not found: #{target_kind} #{target_text.inspect}" unless target - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - normalizer.instance_variable_set(:@local_stack, initial) - snapshot = lambda do - Array(normalizer.instance_variable_get(:@local_stack)).map { |locals| locals.to_a.sort } - end - before = snapshot.call - inside = nil - result = normalizer.send(:with_ruby_scope, target, reset: reset) do - inside = snapshot.call - "block-result" - end - after = snapshot.call - puts JSON.generate("before" => before, "inside" => inside, "after" => after, "result" => result) - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-r", - "json", - "-r", - "set", - "-e", - script, - ]) - .arg(file.path()) - .arg(kind) - .arg(text) - .arg(if reset { "true" } else { "false" }) - .arg(initial_stack_json) - .output() - .expect("run ruby with_ruby_scope helper"); - assert!( - output.status.success(), - "ruby with_ruby_scope helper failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - serde_json::from_slice(&output.stdout).expect("ruby with_ruby_scope output should be json") - } - - fn local_stack_from(names: &[Vec<&str>]) -> Vec> { - names - .iter() - .map(|scope| scope.iter().map(|name| name.to_string()).collect()) - .collect() - } - - fn local_stack_value(stack: &[BTreeSet]) -> Value { - json!(stack - .iter() - .map(|scope| scope.iter().cloned().collect::>()) - .collect::>()) - } - - fn ruby_private_destructured_parameter_targets_value( - source: &str, - kind: &str, - text: &str, - ) -> Value { - let mut file = tempfile::Builder::new() - .suffix(".rb") - .tempfile() - .expect("create ruby destructured parameter temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby destructured parameter temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - target_kind = ARGV.fetch(1) - target_text = ARGV.fetch(2) - target = nil - walk = lambda do |node| - if node.respond_to?(:kind) - target ||= node if node.kind == target_kind && node.text.to_s == target_text - node.named_children.each { |child| walk.call(child) } - end - end - walk.call(document.root) - abort "target node not found: #{target_kind} #{target_text.inspect}" unless target - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - targets = [] - normalizer.send(:collect_destructured_parameter_targets, target, targets) - - def value(node) - if node.is_a?(Decomplex::Ast::Node) - { - "type" => node.type.to_s, - "children" => node.children.map { |child| value(child) }, - "first_lineno" => node.first_lineno, - "first_column" => node.first_column, - "last_lineno" => node.last_lineno, - "last_column" => node.last_column, - "text" => node.text.to_s, - } - elsif node.is_a?(Symbol) - node.to_s - elsif node.is_a?(Array) - node.map { |child| value(child) } - else - node - end - end - - puts JSON.generate(targets.map { |node| value(node) }) - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env( - "DECOMPLEX_FORCE_LANGUAGE", - ruby_language_name(Language::Ruby), - ) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-r", - "json", - "-e", - script, - ]) - .arg(file.path()) - .arg(kind) - .arg(text) - .output() - .expect("run ruby destructured parameter helper"); - assert!( - output.status.success(), - "ruby destructured parameter helper failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - serde_json::from_slice(&output.stdout) - .expect("ruby destructured parameter output should be json") - } - - fn ruby_private_scope_value( - source: &str, - language: Language, - suffix: &str, - kind: &str, - text: &str, - mode: &str, - ) -> Value { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby scope temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby scope temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - target_kind = ARGV.fetch(1) - target_text = ARGV.fetch(2) - mode = ARGV.fetch(3) - target = nil - walk = lambda do |node| - if node.respond_to?(:kind) - target ||= node if node.kind == target_kind && node.text.to_s == target_text - node.named_children.each { |child| walk.call(child) } - end - end - walk.call(document.root) - abort "target node not found: #{target_kind} #{target_text.inspect}" unless target - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - - body = mode == "body" ? normalizer.send(:wrap, :BODY, children: [], source: target) : nil - args = mode == "args" ? normalizer.send(:wrap, :ARGS, children: [], source: target) : nil - result = normalizer.send(:scope, body, args: args, source: target) - - def value(node) - if node.is_a?(Decomplex::Ast::Node) - { - "type" => node.type.to_s, - "children" => node.children.map { |child| value(child) }, - "first_lineno" => node.first_lineno, - "first_column" => node.first_column, - "last_lineno" => node.last_lineno, - "last_column" => node.last_column, - "text" => node.text.to_s, - } - elsif node.is_a?(Symbol) - node.to_s - else - node - end - end - - puts JSON.generate(value(result)) - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-r", - "json", - "-e", - script, - ]) - .arg(file.path()) - .arg(kind) - .arg(text) - .arg(mode) - .output() - .expect("run ruby scope helper"); - assert!( - output.status.success(), - "ruby scope helper failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - serde_json::from_slice(&output.stdout).expect("ruby scope output should be json") - } - - fn ruby_private_list_value( - source: &str, - language: Language, - suffix: &str, - kind: &str, - text: &str, - mode: &str, - ) -> Value { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby list temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby list temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - target_kind = ARGV.fetch(1) - target_text = ARGV.fetch(2) - mode = ARGV.fetch(3) - target = nil - walk = lambda do |node| - if node.respond_to?(:kind) - target ||= node if node.kind == target_kind && node.text.to_s == target_text - node.named_children.each { |child| walk.call(child) } - end - end - walk.call(document.root) - abort "target node not found: #{target_kind} #{target_text.inspect}" unless target - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - - item = normalizer.send(:wrap, :ITEM, children: [], source: target) - children = - case mode - when "nil" then nil - when "empty" then [] - when "one" then [item] - else abort "unknown list mode: #{mode}" - end - result = normalizer.send(:list, children, source: target) - - def value(node) - if node.is_a?(Decomplex::Ast::Node) - { - "type" => node.type.to_s, - "children" => node.children.map { |child| value(child) }, - "first_lineno" => node.first_lineno, - "first_column" => node.first_column, - "last_lineno" => node.last_lineno, - "last_column" => node.last_column, - "text" => node.text.to_s, - } - elsif node.is_a?(Symbol) - node.to_s - else - node - end - end - - puts JSON.generate(value(result)) - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-r", - "json", - "-e", - script, - ]) - .arg(file.path()) - .arg(kind) - .arg(text) - .arg(mode) - .output() - .expect("run ruby list helper"); - assert!( - output.status.success(), - "ruby list helper failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - serde_json::from_slice(&output.stdout).expect("ruby list output should be json") - } - - fn ruby_private_string( - source: &str, - language: Language, - suffix: &str, - method: &str, - kind: &str, - text: &str, - ) -> String { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby string temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby string temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - target_kind = ARGV.fetch(1) - target_text = ARGV.fetch(2) - method = ARGV.fetch(3) - target = nil - walk = lambda do |node| - if node.respond_to?(:kind) - target ||= node if node.kind == target_kind && node.text.to_s == target_text - node.named_children.each { |child| walk.call(child) } - end - end - walk.call(document.root) - abort "target node not found: #{target_kind} #{target_text.inspect}" unless target - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - puts normalizer.send(method, target).to_s - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-e", - script, - ]) - .arg(file.path()) - .arg(kind) - .arg(text) - .arg(method) - .output() - .expect("run ruby private string helper"); - assert!( - output.status.success(), - "ruby string helper failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - String::from_utf8(output.stdout) - .expect("ruby string helper output should be utf8") - .trim_end_matches(['\r', '\n']) - .to_string() - } - - fn ruby_private_text_predicate(language: Language, method: &str, text: &str) -> bool { - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - language = ARGV.fetch(0).to_sym - text = ARGV.fetch(1) - method = ARGV.fetch(2) - document = Object.new - document.define_singleton_method(:language) { language } - normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate - normalizer.instance_variable_set(:@document, document) - puts normalizer.send(method, text) ? "true" : "false" - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .args(["-I", "lib", "-r", "decomplex/ast", "-e", script]) - .arg(ruby_language_name(language)) - .arg(text) - .arg(method) - .output() - .expect("run ruby private text predicate"); - assert!( - output.status.success(), - "ruby text predicate failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - String::from_utf8(output.stdout) - .expect("ruby text predicate output should be utf8") - .trim() - == "true" - } - - fn ruby_private_text_string(language: Language, method: &str, text: &str) -> String { - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - language = ARGV.fetch(0).to_sym - text = ARGV.fetch(1) - method = ARGV.fetch(2) - document = Object.new - document.define_singleton_method(:language) { language } - normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate - normalizer.instance_variable_set(:@document, document) - puts normalizer.send(method, text).to_s - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .args(["-I", "lib", "-r", "decomplex/ast", "-e", script]) - .arg(ruby_language_name(language)) - .arg(text) - .arg(method) - .output() - .expect("run ruby private text string helper"); - assert!( - output.status.success(), - "ruby text string helper failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - String::from_utf8(output.stdout) - .expect("ruby text string output should be utf8") - .trim_end_matches(['\r', '\n']) - .to_string() - } - - fn ruby_private_ts_node_value(value: &str) -> bool { - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - document = Object.new - document.define_singleton_method(:language) { :ruby } - normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate - normalizer.instance_variable_set(:@document, document) - target = - case ARGV.fetch(0) - when "nil" - nil - when "string" - "value" - when "normalized_node" - Decomplex::Ast::Node.new(type: :LIT, children: [], first_lineno: 1, first_column: 0, last_lineno: 1, last_column: 1, text: "1") - else - abort "unknown ts_node? probe" - end - puts normalizer.send(:ts_node?, target) ? "true" : "false" - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .args(["-I", "lib", "-r", "decomplex/ast", "-e", script]) - .arg(value) - .output() - .expect("run ruby private ts_node? value helper"); - assert!( - output.status.success(), - "ruby ts_node? value helper failed for {value}: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - String::from_utf8(output.stdout) - .expect("ruby ts_node? value output should be utf8") - .trim() - == "true" - } - - fn ruby_private_regex_literal_value(value: &str) -> bool { - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - document = Object.new - document.define_singleton_method(:language) { :ruby } - normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate - normalizer.instance_variable_set(:@document, document) - target = - case ARGV.fetch(0) - when "nil" - nil - when "string" - "value" - when "normalized_node" - Decomplex::Ast::Node.new(type: :LIT, children: [], first_lineno: 1, first_column: 0, last_lineno: 1, last_column: 1, text: "1") - else - abort "unknown regex_literal? probe" - end - puts normalizer.send(:regex_literal?, target) ? "true" : "false" - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .args(["-I", "lib", "-r", "decomplex/ast", "-e", script]) - .arg(value) - .output() - .expect("run ruby private regex_literal? value helper"); - assert!( - output.status.success(), - "ruby regex_literal? value helper failed for {value}: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - String::from_utf8(output.stdout) - .expect("ruby regex_literal? value output should be utf8") - .trim() - == "true" - } - - fn ruby_private_node_signature( - source: &str, - language: Language, - suffix: &str, - method: &str, - kind: &str, - text: &str, - ) -> Option<(String, String)> { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby node signature temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby node signature temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - target_kind = ARGV.fetch(1) - target_text = ARGV.fetch(2) - method = ARGV.fetch(3) - target = nil - walk = lambda do |node| - if node.respond_to?(:kind) - target ||= node if node.kind == target_kind && node.text.to_s == target_text - node.named_children.each { |child| walk.call(child) } - end - end - walk.call(document.root) - abort "target node not found" unless target - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - result = normalizer.send(method, target) - if result - puts JSON.generate([result.kind, result.text.to_s]) - else - puts "null" - end - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-r", - "json", - "-e", - script, - ]) - .arg(file.path()) - .arg(kind) - .arg(text) - .arg(method) - .output() - .expect("run ruby private node signature helper"); - assert!( - output.status.success(), - "ruby node signature helper failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - let value: Value = serde_json::from_slice(&output.stdout) - .expect("ruby node signature output should be json"); - if value.is_null() { - return None; - } - let pair = value - .as_array() - .expect("ruby node signature should be an array"); - Some(( - pair[0] - .as_str() - .expect("node kind should be string") - .to_string(), - pair[1] - .as_str() - .expect("node text should be string") - .to_string(), - )) - } - - fn ruby_private_inline_def_name_after_receiver( - source: &str, - language: Language, - suffix: &str, - kind: &str, - text: &str, - ) -> String { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby inline def name temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby inline def name temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - target_kind = ARGV.fetch(1) - target_text = ARGV.fetch(2) - target = nil - walk = lambda do |node| - if node.respond_to?(:kind) - target ||= node if node.kind == target_kind && node.text.to_s == target_text - node.named_children.each { |child| walk.call(child) } - end - end - walk.call(document.root) - abort "target node not found" unless target - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - receiver = normalizer.send(:inline_def_receiver, target) - puts normalizer.send(:inline_def_name_after_receiver, target, receiver).to_s - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-e", - script, - ]) - .arg(file.path()) - .arg(kind) - .arg(text) - .output() - .expect("run ruby inline def name helper"); - assert!( - output.status.success(), - "ruby inline def name helper failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - String::from_utf8(output.stdout) - .expect("ruby inline def name output should be utf8") - .trim() - .to_string() - } - - fn ruby_private_inline_parameter_begin_marker_value( - source: &str, - language: Language, - suffix: &str, - kind: &str, - text: &str, - ) -> Value { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby inline_parameter_begin_marker temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby inline_parameter_begin_marker temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - target_kind = ARGV.fetch(1) - target_text = ARGV.fetch(2) - target = nil - walk = lambda do |node| - if node.respond_to?(:kind) - target ||= node if node.kind == target_kind && node.text.to_s == target_text - node.named_children.each { |child| walk.call(child) } - end - end - walk.call(document.root) - abort "target node not found" unless target - - def value(node) - if node.is_a?(Decomplex::Ast::Node) - { - "type" => node.type.to_s, - "children" => node.children.map { |child| value(child) }, - "first_lineno" => node.first_lineno, - "first_column" => node.first_column, - "last_lineno" => node.last_lineno, - "last_column" => node.last_column, - "text" => node.text.to_s, - } - elsif node.is_a?(Symbol) - node.to_s - else - node - end - end - - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - result = normalizer.send(:inline_parameter_begin_marker, target) - puts JSON.generate(value(result)) - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-r", - "json", - "-e", - script, - ]) - .arg(file.path()) - .arg(kind) - .arg(text) - .output() - .expect("run ruby private inline_parameter_begin_marker helper"); - assert!( - output.status.success(), - "ruby inline_parameter_begin_marker helper failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - serde_json::from_slice(&output.stdout) - .expect("ruby inline_parameter_begin_marker output should be json") - } - - fn ruby_private_prepend_inline_parameter_begin_value( - source: &str, - language: Language, - suffix: &str, - kind: &str, - text: &str, - body: &Value, - ) -> Value { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby prepend_inline_parameter_begin temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby prepend_inline_parameter_begin temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - def node(value) - return nil if value.nil? - return value unless value.is_a?(Hash) - - Decomplex::Ast::Node.new( - type: value.fetch("type").to_sym, - children: value.fetch("children").map { |child| node(child) }, - first_lineno: value.fetch("first_lineno"), - first_column: value.fetch("first_column"), - last_lineno: value.fetch("last_lineno"), - last_column: value.fetch("last_column"), - text: value.fetch("text") - ) - end - - def value(node) - if node.is_a?(Decomplex::Ast::Node) - { - "type" => node.type.to_s, - "children" => node.children.map { |child| value(child) }, - "first_lineno" => node.first_lineno, - "first_column" => node.first_column, - "last_lineno" => node.last_lineno, - "last_column" => node.last_column, - "text" => node.text.to_s, - } - elsif node.is_a?(Symbol) - node.to_s - else - node - end - end - - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - target_kind = ARGV.fetch(1) - target_text = ARGV.fetch(2) - target = nil - walk = lambda do |ts_node| - if ts_node.respond_to?(:kind) - target ||= ts_node if ts_node.kind == target_kind && ts_node.text.to_s == target_text - ts_node.named_children.each { |child| walk.call(child) } - end - end - walk.call(document.root) - abort "target node not found: #{target_kind} #{target_text.inspect}" unless target - - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - body = node(JSON.parse(ARGV.fetch(3))) - result = normalizer.send(:prepend_inline_parameter_begin, target, body) - puts JSON.generate(value(result)) - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-r", - "json", - "-e", - script, - ]) - .arg(file.path()) - .arg(kind) - .arg(text) - .arg(body.to_string()) - .output() - .expect("run ruby private prepend_inline_parameter_begin helper"); - assert!( - output.status.success(), - "ruby prepend_inline_parameter_begin helper failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - serde_json::from_slice(&output.stdout) - .expect("ruby prepend_inline_parameter_begin output should be json") - } - - fn ruby_private_local_or_call_for_name_value( - source: &str, - language: Language, - suffix: &str, - kind: &str, - text: &str, - name: &str, - local: bool, - ) -> Value { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby local_or_call_for_name temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby local_or_call_for_name temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - require "set" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - target_kind = ARGV.fetch(1) - target_text = ARGV.fetch(2) - name = ARGV.fetch(3) - local = ARGV.fetch(4) == "true" - target = nil - walk = lambda do |node| - if node.respond_to?(:kind) - target ||= node if node.kind == target_kind && node.text.to_s == target_text - node.named_children.each { |child| walk.call(child) } - end - end - walk.call(document.root) - abort "target node not found" unless target - - def value(node) - if node.is_a?(Decomplex::Ast::Node) - { - "type" => node.type.to_s, - "children" => node.children.map { |child| value(child) }, - "first_lineno" => node.first_lineno, - "first_column" => node.first_column, - "last_lineno" => node.last_lineno, - "last_column" => node.last_column, - "text" => node.text.to_s, - } - elsif node.is_a?(Symbol) - node.to_s - else - node - end - end - - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - normalizer.instance_variable_set(:@local_stack, local ? [Set[name]] : []) - result = normalizer.send(:local_or_call_for_name, name, target) - puts JSON.generate(value(result)) - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-r", - "json", - "-e", - script, - ]) - .arg(file.path()) - .arg(kind) - .arg(text) - .arg(name) - .arg(if local { "true" } else { "false" }) - .output() - .expect("run ruby private local_or_call_for_name helper"); - assert!( - output.status.success(), - "ruby local_or_call_for_name helper failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - serde_json::from_slice(&output.stdout) - .expect("ruby local_or_call_for_name output should be json") - } - - fn ruby_private_ruby_vcall_identifier_predicate( - source: &str, - language: Language, - suffix: &str, - kind: &str, - text: &str, - local_names: &[&str], - ) -> bool { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby ruby_vcall_identifier temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby ruby_vcall_identifier temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - require "set" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - target_kind = ARGV.fetch(1) - target_text = ARGV.fetch(2) - local_names = ARGV.fetch(3).split(",").reject(&:empty?) - target = nil - walk = lambda do |node| - if node.respond_to?(:kind) - target ||= node if node.kind == target_kind && node.text.to_s == target_text - node.named_children.each { |child| walk.call(child) } - end - end - walk.call(document.root) - abort "target node not found: #{target_kind} #{target_text.inspect}" unless target - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - normalizer.instance_variable_set(:@local_stack, local_names.empty? ? [] : [Set.new(local_names)]) - puts normalizer.send(:ruby_vcall_identifier?, target) - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-e", - script, - ]) - .arg(file.path()) - .arg(kind) - .arg(text) - .arg(local_names.join(",")) - .output() - .expect("run ruby private ruby_vcall_identifier? helper"); - assert!( - output.status.success(), - "ruby ruby_vcall_identifier? helper failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - String::from_utf8(output.stdout) - .expect("ruby ruby_vcall_identifier? output should be utf8") - .trim() - == "true" - } - - fn ruby_private_vcall_identifier_predicate( - source: &str, - language: Language, - suffix: &str, - kind: &str, - text: &str, - local_names: &[&str], - ) -> bool { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby vcall_identifier temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby vcall_identifier temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - require "set" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - target_kind = ARGV.fetch(1) - target_text = ARGV.fetch(2) - local_names = ARGV.fetch(3).split(",").reject(&:empty?) - target = nil - walk = lambda do |node| - if node.respond_to?(:kind) - target ||= node if node.kind == target_kind && node.text.to_s == target_text - node.named_children.each { |child| walk.call(child) } - end - end - walk.call(document.root) - abort "target node not found: #{target_kind} #{target_text.inspect}" unless target - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - normalizer.instance_variable_set(:@local_stack, local_names.empty? ? [] : [Set.new(local_names)]) - puts normalizer.send(:vcall_identifier?, target) - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-e", - script, - ]) - .arg(file.path()) - .arg(kind) - .arg(text) - .arg(local_names.join(",")) - .output() - .expect("run ruby private vcall_identifier? helper"); - assert!( - output.status.success(), - "ruby vcall_identifier? helper failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - String::from_utf8(output.stdout) - .expect("ruby vcall_identifier? output should be utf8") - .trim() - == "true" - } - - fn ruby_private_normalize_terminal_statement_value( - source: &str, - language: Language, - suffix: &str, - kind: &str, - text: &str, - local_names: &[&str], - ) -> Value { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby normalize_terminal_statement temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby normalize_terminal_statement temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - require "set" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - target_kind = ARGV.fetch(1) - target_text = ARGV.fetch(2) - local_names = ARGV.fetch(3).split(",").reject(&:empty?) - target = nil - walk = lambda do |node| - if node.respond_to?(:kind) - target ||= node if node.kind == target_kind && node.text.to_s == target_text - node.named_children.each { |child| walk.call(child) } - end - end - walk.call(document.root) - abort "target node not found: #{target_kind} #{target_text.inspect}" unless target - - def value(node) - if node.is_a?(Decomplex::Ast::Node) - { - "type" => node.type.to_s, - "children" => node.children.map { |child| value(child) }, - "first_lineno" => node.first_lineno, - "first_column" => node.first_column, - "last_lineno" => node.last_lineno, - "last_column" => node.last_column, - "text" => node.text.to_s, - } - elsif node.is_a?(Symbol) - node.to_s - else - node - end - end - - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - normalizer.instance_variable_set(:@local_stack, local_names.empty? ? [] : [Set.new(local_names)]) - result = normalizer.send(:normalize_terminal_statement, target) - puts JSON.generate(value(result)) - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-r", - "json", - "-e", - script, - ]) - .arg(file.path()) - .arg(kind) - .arg(text) - .arg(local_names.join(",")) - .output() - .expect("run ruby private normalize_terminal_statement helper"); - assert!( - output.status.success(), - "ruby normalize_terminal_statement helper failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - serde_json::from_slice(&output.stdout) - .expect("ruby normalize_terminal_statement output should be json") - } - - fn ruby_private_node_list_signature( - source: &str, - language: Language, - suffix: &str, - method: &str, - kind: &str, - text: &str, - ) -> Vec<(String, String)> { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby node list signature temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby node list signature temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - target_kind = ARGV.fetch(1) - target_text = ARGV.fetch(2) - method = ARGV.fetch(3) - target = nil - walk = lambda do |node| - if node.respond_to?(:kind) - target ||= node if node.kind == target_kind && node.text.to_s == target_text - node.named_children.each { |child| walk.call(child) } - end - end - walk.call(document.root) - abort "target node not found" unless target - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - result = Array(normalizer.send(method, target)) - puts JSON.generate(result.map { |node| [node.kind, node.text.to_s] }) - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-r", - "json", - "-e", - script, - ]) - .arg(file.path()) - .arg(kind) - .arg(text) - .arg(method) - .output() - .expect("run ruby node list signature helper"); - assert!( - output.status.success(), - "ruby node list signature helper failed for {method}: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - let value: Value = serde_json::from_slice(&output.stdout) - .expect("ruby node list signature output should be json"); - value - .as_array() - .expect("ruby node list signature should be an array") - .iter() - .map(|item| { - let item = item - .as_array() - .expect("ruby node list item should be an array"); - ( - item[0] - .as_str() - .expect("ruby node list kind should be a string") - .to_string(), - item[1] - .as_str() - .expect("ruby node list text should be a string") - .to_string(), - ) - }) - .collect() - } - - fn ruby_private_dotted_call_parts( - source: &str, - language: Language, - suffix: &str, - kind: &str, - text: &str, - ) -> Option<(String, String, String)> { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby dotted_call_parts temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby dotted_call_parts temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - target_kind = ARGV.fetch(1) - target_text = ARGV.fetch(2) - target = nil - walk = lambda do |node| - if node.respond_to?(:kind) - target ||= node if node.kind == target_kind && node.text.to_s == target_text - node.named_children.each { |child| walk.call(child) } - end - end - walk.call(document.root) - abort "target node not found" unless target - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - receiver, method = normalizer.send(:dotted_call_parts, target) - if receiver - puts JSON.generate([receiver.kind, receiver.text.to_s, method.to_s]) - else - puts "null" - end - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-r", - "json", - "-e", - script, - ]) - .arg(file.path()) - .arg(kind) - .arg(text) - .output() - .expect("run ruby private dotted_call_parts helper"); - assert!( - output.status.success(), - "ruby dotted_call_parts helper failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - let value: Value = serde_json::from_slice(&output.stdout) - .expect("ruby dotted_call_parts output should be json"); - if value.is_null() { - return None; - } - let parts = value - .as_array() - .expect("ruby dotted_call_parts should be an array"); - Some(( - parts[0] - .as_str() - .expect("receiver kind should be string") - .to_string(), - parts[1] - .as_str() - .expect("receiver text should be string") - .to_string(), - parts[2] - .as_str() - .expect("method should be string") - .to_string(), - )) - } - - fn ruby_private_member_parts( - source: &str, - language: Language, - suffix: &str, - kind: &str, - text: &str, - ) -> Option<(String, String, String)> { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby member_parts temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby member_parts temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - target_kind = ARGV.fetch(1) - target_text = ARGV.fetch(2) - target = nil - walk = lambda do |node| - if node.respond_to?(:kind) - target ||= node if node.kind == target_kind && node.text.to_s == target_text - node.named_children.each { |child| walk.call(child) } - end - end - walk.call(document.root) - abort "target node not found: #{target_kind} #{target_text.inspect}" unless target - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - receiver, method = normalizer.send(:member_parts, target) - if receiver - puts JSON.generate([receiver.kind, receiver.text.to_s, method.to_s]) - else - puts "null" - end - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-r", - "json", - "-e", - script, - ]) - .arg(file.path()) - .arg(kind) - .arg(text) - .output() - .expect("run ruby private member_parts helper"); - assert!( - output.status.success(), - "ruby member_parts helper failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - let value: Value = serde_json::from_slice(&output.stdout) - .expect("ruby member_parts output should be json"); - if value.is_null() { - return None; - } - let parts = value - .as_array() - .expect("ruby member_parts should be an array"); - Some(( - parts[0] - .as_str() - .expect("receiver kind should be string") - .to_string(), - parts[1] - .as_str() - .expect("receiver text should be string") - .to_string(), - parts[2] - .as_str() - .expect("method should be string") - .to_string(), - )) - } - - fn ruby_private_named_field_signature( - source: &str, - language: Language, - suffix: &str, - kind: &str, - text: &str, - field: &str, - ) -> Option<(String, String)> { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby named_field temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby named_field temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - target_kind = ARGV.fetch(1) - target_text = ARGV.fetch(2) - field = ARGV.fetch(3) - target = nil - walk = lambda do |node| - if node.respond_to?(:kind) - target ||= node if node.kind == target_kind && node.text.to_s == target_text - node.named_children.each { |child| walk.call(child) } - end - end - walk.call(document.root) - abort "target node not found" unless target - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - result = normalizer.send(:named_field, target, field) - if result - puts JSON.generate([result.kind, result.text.to_s]) - else - puts "null" - end - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-r", - "json", - "-e", - script, - ]) - .arg(file.path()) - .arg(kind) - .arg(text) - .arg(field) - .output() - .expect("run ruby private named_field helper"); - assert!( - output.status.success(), - "ruby named_field helper failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - let value: Value = - serde_json::from_slice(&output.stdout).expect("ruby named_field output should be json"); - if value.is_null() { - return None; - } - let pair = value - .as_array() - .expect("ruby named_field output should be an array"); - Some(( - pair[0] - .as_str() - .expect("named_field kind should be string") - .to_string(), - pair[1] - .as_str() - .expect("named_field text should be string") - .to_string(), - )) - } - - fn ruby_private_branch_child_signature( - source: &str, - language: Language, - suffix: &str, - kind: &str, - text: &str, - condition_kind: &str, - condition_text: &str, - index: usize, - ) -> Option<(String, String)> { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby branch_child temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby branch_child temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - target_kind = ARGV.fetch(1) - target_text = ARGV.fetch(2) - condition_kind = ARGV.fetch(3) - condition_text = ARGV.fetch(4) - index = Integer(ARGV.fetch(5)) - target = nil - condition = nil - walk = lambda do |node| - if node.respond_to?(:kind) - target ||= node if node.kind == target_kind && node.text.to_s == target_text - condition ||= node if node.kind == condition_kind && node.text.to_s == condition_text - node.named_children.each { |child| walk.call(child) } - end - end - walk.call(document.root) - abort "target node not found" unless target - abort "condition node not found" unless condition - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - result = normalizer.send(:branch_child, target, condition, index) - if result - puts JSON.generate([result.kind, result.text.to_s]) - else - puts "null" - end - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-r", - "json", - "-e", - script, - ]) - .arg(file.path()) - .arg(kind) - .arg(text) - .arg(condition_kind) - .arg(condition_text) - .arg(index.to_string()) - .output() - .expect("run ruby private branch_child helper"); - assert!( - output.status.success(), - "ruby branch_child helper failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - let value: Value = serde_json::from_slice(&output.stdout) - .expect("ruby branch_child output should be json"); - if value.is_null() { - return None; - } - let pair = value - .as_array() - .expect("ruby branch_child output should be an array"); - Some(( - pair[0] - .as_str() - .expect("branch_child kind should be string") - .to_string(), - pair[1] - .as_str() - .expect("branch_child text should be string") - .to_string(), - )) - } - - fn ruby_private_wrap_value( - source: &str, - language: Language, - suffix: &str, - kind: &str, - text: &str, - normalized_source: bool, - ) -> Value { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby wrap temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby wrap temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - target_kind = ARGV.fetch(1) - target_text = ARGV.fetch(2) - normalized_source = ARGV.fetch(3) == "true" - target = nil - walk = lambda do |node| - if node.respond_to?(:kind) - target ||= node if node.kind == target_kind && node.text.to_s == target_text - node.named_children.each { |child| walk.call(child) } - end - end - walk.call(document.root) - abort "target node not found" unless target - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - source = if normalized_source - normalizer.send(:wrap, :INNER, children: [], source: target) - else - target - end - result = normalizer.send(:wrap, :OUTER, children: [:child], source: source) - - def value(node) - if node.is_a?(Decomplex::Ast::Node) - { - "type" => node.type.to_s, - "children" => node.children.map { |child| value(child) }, - "first_lineno" => node.first_lineno, - "first_column" => node.first_column, - "last_lineno" => node.last_lineno, - "last_column" => node.last_column, - "text" => node.text.to_s, - } - elsif node.is_a?(Symbol) - node.to_s - else - node - end - end - - puts JSON.generate(value(result)) - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-r", - "json", - "-e", - script, - ]) - .arg(file.path()) - .arg(kind) - .arg(text) - .arg(if normalized_source { "true" } else { "false" }) - .output() - .expect("run ruby private wrap helper"); - assert!( - output.status.success(), - "ruby wrap helper failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - serde_json::from_slice(&output.stdout).expect("ruby wrap output should be json") - } - - fn ruby_private_normalize_method_value( - source: &str, - language: Language, - suffix: &str, - method: &str, - kind: &str, - text: &str, - ) -> Value { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby normalize method temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby normalize method temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - target_kind = ARGV.fetch(1) - target_text = ARGV.fetch(2) - method = ARGV.fetch(3) - target = nil - walk = lambda do |node| - if node.respond_to?(:kind) - target ||= node if node.kind == target_kind && node.text.to_s == target_text - node.named_children.each { |child| walk.call(child) } - end - end - walk.call(document.root) - abort "target node not found: #{target_kind} #{target_text.inspect}" unless target - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - result = normalizer.send(method, target) - - def value(node) - if node.is_a?(Decomplex::Ast::Node) - { - "type" => node.type.to_s, - "children" => node.children.map { |child| value(child) }, - "first_lineno" => node.first_lineno, - "first_column" => node.first_column, - "last_lineno" => node.last_lineno, - "last_column" => node.last_column, - "text" => node.text.to_s, - } - elsif node.is_a?(Symbol) - node.to_s - elsif node.is_a?(Array) - node.map { |child| value(child) } - else - node - end - end - - puts JSON.generate(value(result)) - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-r", - "json", - "-e", - script, - ]) - .arg(file.path()) - .arg(kind) - .arg(text) - .arg(method) - .output() - .expect("run ruby private normalize method helper"); - assert!( - output.status.success(), - "ruby normalize method helper failed for {method}: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - serde_json::from_slice(&output.stdout).expect("ruby normalize method output should be json") - } - - fn ruby_private_normalize_return_node_value( - source: &str, - language: Language, - suffix: &str, - kind: &str, - text: &str, - elide_symbol: bool, - ) -> Value { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby normalize return node temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby normalize return node temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - target_kind = ARGV.fetch(1) - target_text = ARGV.fetch(2) - elide_symbol = ARGV.fetch(3) == "true" - target = nil - walk = lambda do |node| - if node.respond_to?(:kind) - target ||= node if node.kind == target_kind && node.text.to_s == target_text - node.named_children.each { |child| walk.call(child) } - end - end - walk.call(document.root) - abort "target node not found: #{target_kind} #{target_text.inspect}" unless target - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - result = normalizer.send(:normalize_return_node, target, elide_symbol: elide_symbol) - - def value(node) - if node.is_a?(Decomplex::Ast::Node) - { - "type" => node.type.to_s, - "children" => node.children.map { |child| value(child) }, - "first_lineno" => node.first_lineno, - "first_column" => node.first_column, - "last_lineno" => node.last_lineno, - "last_column" => node.last_column, - "text" => node.text.to_s, - } - elsif node.is_a?(Symbol) - node.to_s - elsif node.is_a?(Array) - node.map { |child| value(child) } - else - node - end - end - - puts JSON.generate(value(result)) - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-r", - "json", - "-e", - script, - ]) - .arg(file.path()) - .arg(kind) - .arg(text) - .arg(if elide_symbol { "true" } else { "false" }) - .output() - .expect("run ruby private normalize_return_node helper"); - assert!( - output.status.success(), - "ruby normalize_return_node helper failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - serde_json::from_slice(&output.stdout) - .expect("ruby normalize_return_node output should be json") - } - - fn ruby_private_normalize_body_nodes_value( - source: &str, - language: Language, - suffix: &str, - kind: &str, - text: &str, - ) -> Value { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby normalize body nodes temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby normalize body nodes temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - target_kind = ARGV.fetch(1) - target_text = ARGV.fetch(2) - target = nil - if target_kind == "__root__" - target = document.root - else - walk = lambda do |node| - if node.respond_to?(:kind) - target ||= node if node.kind == target_kind && node.text.to_s == target_text - node.named_children.each { |child| walk.call(child) } - end - end - walk.call(document.root) - end - abort "target node not found: #{target_kind} #{target_text.inspect}" unless target - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - result = normalizer.send(:normalize_body_nodes, target.named_children, source: target) - - def value(node) - if node.is_a?(Decomplex::Ast::Node) - { - "type" => node.type.to_s, - "children" => node.children.map { |child| value(child) }, - "first_lineno" => node.first_lineno, - "first_column" => node.first_column, - "last_lineno" => node.last_lineno, - "last_column" => node.last_column, - "text" => node.text.to_s, - } - elsif node.is_a?(Symbol) - node.to_s - else - node - end - end - - puts JSON.generate(value(result)) - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-r", - "json", - "-e", - script, - ]) - .arg(file.path()) - .arg(kind) - .arg(text) - .output() - .expect("run ruby private normalize_body_nodes helper"); - assert!( - output.status.success(), - "ruby normalize_body_nodes helper failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - serde_json::from_slice(&output.stdout) - .expect("ruby normalize_body_nodes output should be json") - } - - fn ruby_private_inline_def_from_argument_list_nil_value( - source: &str, - language: Language, - suffix: &str, - ) -> Value { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby inline def argument nil temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby inline def argument nil temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - result = normalizer.send(:inline_def_from_argument_list, nil) - - def value(node) - if node.is_a?(Decomplex::Ast::Node) - { - "type" => node.type.to_s, - "children" => node.children.map { |child| value(child) }, - "first_lineno" => node.first_lineno, - "first_column" => node.first_column, - "last_lineno" => node.last_lineno, - "last_column" => node.last_column, - "text" => node.text.to_s, - } - elsif node.is_a?(Symbol) - node.to_s - else - node - end - end - - puts JSON.generate(value(result)) - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-r", - "json", - "-e", - script, - ]) - .arg(file.path()) - .output() - .expect("run ruby private inline def argument nil helper"); - assert!( - output.status.success(), - "ruby inline def argument nil helper failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - serde_json::from_slice(&output.stdout) - .expect("ruby inline def argument nil output should be json") - } - - fn ruby_private_assignment_target_value( - source: &str, - language: Language, - suffix: &str, - kind: &str, - text: &str, - ) -> Value { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby assignment target temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby assignment target temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - target_kind = ARGV.fetch(1) - target_text = ARGV.fetch(2) - target = nil - walk = lambda do |node| - if node.respond_to?(:kind) - target ||= node if node.kind == target_kind && node.text.to_s == target_text - node.named_children.each { |child| walk.call(child) } - end - end - walk.call(document.root) - abort "target node not found: #{target_kind} #{target_text.inspect}" unless target - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - source = normalizer.send(:parent_node, target) || target - right_raw = normalizer.send(:assignment_right, source) - right = right_raw ? normalizer.send(:normalize_node, right_raw) : nil - result = normalizer.send(:assignment_target, target, right, source: source) - - def value(node) - if node.is_a?(Decomplex::Ast::Node) - { - "type" => node.type.to_s, - "children" => node.children.map { |child| value(child) }, - "first_lineno" => node.first_lineno, - "first_column" => node.first_column, - "last_lineno" => node.last_lineno, - "last_column" => node.last_column, - "text" => node.text.to_s, - } - elsif node.is_a?(Symbol) - node.to_s - else - node - end - end - - puts JSON.generate(value(result)) - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-r", - "json", - "-e", - script, - ]) - .arg(file.path()) - .arg(kind) - .arg(text) - .output() - .expect("run ruby private assignment target helper"); - assert!( - output.status.success(), - "ruby assignment target helper failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - serde_json::from_slice(&output.stdout) - .expect("ruby assignment target output should be json") - } - - fn ruby_private_normalize_multiple_assignment_value( - source: &str, - language: Language, - suffix: &str, - kind: &str, - text: &str, - ) -> Value { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby multiple assignment temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby multiple assignment temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - target_kind = ARGV.fetch(1) - target_text = ARGV.fetch(2) - target = nil - walk = lambda do |node| - if node.respond_to?(:kind) - target ||= node if node.kind == target_kind && node.text.to_s == target_text - node.named_children.each { |child| walk.call(child) } - end - end - walk.call(document.root) - abort "target node not found: #{target_kind} #{target_text.inspect}" unless target - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - left = normalizer.send(:assignment_left, target) - right_raw = normalizer.send(:assignment_right, target) - right = right_raw ? normalizer.send(:normalize_node, right_raw) : nil - result = normalizer.send(:normalize_multiple_assignment, left, right, target) - - def value(node) - if node.is_a?(Decomplex::Ast::Node) - { - "type" => node.type.to_s, - "children" => node.children.map { |child| value(child) }, - "first_lineno" => node.first_lineno, - "first_column" => node.first_column, - "last_lineno" => node.last_lineno, - "last_column" => node.last_column, - "text" => node.text.to_s, - } - elsif node.is_a?(Symbol) - node.to_s - else - node - end - end - - puts JSON.generate(value(result)) - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-r", - "json", - "-e", - script, - ]) - .arg(file.path()) - .arg(kind) - .arg(text) - .output() - .expect("run ruby private multiple assignment helper"); - assert!( - output.status.success(), - "ruby multiple assignment helper failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - serde_json::from_slice(&output.stdout) - .expect("ruby multiple assignment output should be json") - } - - fn ruby_private_augmented_assignment_value( - source: &str, - language: Language, - suffix: &str, - kind: &str, - text: &str, - operator: &str, - ) -> Value { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby augmented assignment value temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby augmented assignment value temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - target_kind = ARGV.fetch(1) - target_text = ARGV.fetch(2) - operator = ARGV.fetch(3).to_sym - target = nil - walk = lambda do |node| - if node.respond_to?(:kind) - target ||= node if node.kind == target_kind && node.text.to_s == target_text - node.named_children.each { |child| walk.call(child) } - end - end - walk.call(document.root) - abort "target node not found: #{target_kind} #{target_text.inspect}" unless target - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - source = normalizer.send(:parent_node, target) || target - right_raw = normalizer.send(:assignment_right, source) - result = normalizer.send(:augmented_assignment_value, target, operator, right_raw, source) - - def value(node) - if node.is_a?(Decomplex::Ast::Node) - { - "type" => node.type.to_s, - "children" => node.children.map { |child| value(child) }, - "first_lineno" => node.first_lineno, - "first_column" => node.first_column, - "last_lineno" => node.last_lineno, - "last_column" => node.last_column, - "text" => node.text.to_s, - } - elsif node.is_a?(Symbol) - node.to_s - else - node - end - end - - puts JSON.generate(value(result)) - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-r", - "json", - "-e", - script, - ]) - .arg(file.path()) - .arg(kind) - .arg(text) - .arg(operator) - .output() - .expect("run ruby private augmented assignment value helper"); - assert!( - output.status.success(), - "ruby augmented assignment value helper failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - serde_json::from_slice(&output.stdout) - .expect("ruby augmented assignment value output should be json") - } - - fn ruby_private_logical_operator_assignment_value( - source: &str, - language: Language, - suffix: &str, - kind: &str, - text: &str, - ) -> Value { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby logical operator assignment temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby logical operator assignment temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - target_kind = ARGV.fetch(1) - target_text = ARGV.fetch(2) - target = nil - walk = lambda do |node| - if node.respond_to?(:kind) - target ||= node if node.kind == target_kind && node.text.to_s == target_text - node.named_children.each { |child| walk.call(child) } - end - end - walk.call(document.root) - abort "target node not found: #{target_kind} #{target_text.inspect}" unless target - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - left = normalizer.send(:assignment_left, target) - right_raw = normalizer.send(:assignment_right, target) - right = normalizer.send(:normalize_node, right_raw) - operator = normalizer.send(:operator_assignment_operator, target) - result = normalizer.send(:normalize_logical_operator_assignment, left, operator, right, source: target) - - def value(node) - if node.is_a?(Decomplex::Ast::Node) - { - "type" => node.type.to_s, - "children" => node.children.map { |child| value(child) }, - "first_lineno" => node.first_lineno, - "first_column" => node.first_column, - "last_lineno" => node.last_lineno, - "last_column" => node.last_column, - "text" => node.text.to_s, - } - elsif node.is_a?(Symbol) - node.to_s - else - node - end - end - - puts JSON.generate(value(result)) - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-r", - "json", - "-e", - script, - ]) - .arg(file.path()) - .arg(kind) - .arg(text) - .output() - .expect("run ruby private logical operator assignment helper"); - assert!( - output.status.success(), - "ruby logical operator assignment helper failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - serde_json::from_slice(&output.stdout) - .expect("ruby logical operator assignment output should be json") - } - - fn ruby_private_call_arguments_value( - source: &str, - language: Language, - suffix: &str, - kind: &str, - text: &str, - function_mode: &str, - ) -> Value { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby call arguments temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby call arguments temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - target_kind = ARGV.fetch(1) - target_text = ARGV.fetch(2) - function_mode = ARGV.fetch(3) - target = nil - fallback_target = nil - walk = lambda do |node| - if node.respond_to?(:kind) - target ||= node if node.kind == target_kind && node.text.to_s == target_text - fallback_target ||= node if node.text.to_s == target_text - node.named_children.each { |child| walk.call(child) } - end - end - walk.call(document.root) - target ||= fallback_target - abort "target node not found: #{target_kind} #{target_text.inspect}" unless target - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - function = - case function_mode - when "auto" - normalizer.send(:named_field, target, "function") || - normalizer.send(:named_field, target, "call") || - target.named_children.first - when "none" - nil - else - abort "unknown function mode: #{function_mode.inspect}" - end - result = normalizer.send(:call_arguments, target, function) - - def value(node) - if node.is_a?(Decomplex::Ast::Node) - { - "type" => node.type.to_s, - "children" => node.children.map { |child| value(child) }, - "first_lineno" => node.first_lineno, - "first_column" => node.first_column, - "last_lineno" => node.last_lineno, - "last_column" => node.last_column, - "text" => node.text.to_s, - } - elsif node.is_a?(Symbol) - node.to_s - else - node - end - end - - puts JSON.generate(result.map { |node| value(node) }) - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-r", - "json", - "-e", - script, - ]) - .arg(file.path()) - .arg(kind) - .arg(text) - .arg(function_mode) - .output() - .expect("run ruby private call arguments helper"); - assert!( - output.status.success(), - "ruby call arguments helper failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - serde_json::from_slice(&output.stdout).expect("ruby call arguments output should be json") - } - - fn ruby_private_normalize_call_without_block_value( - source: &str, - language: Language, - suffix: &str, - kind: &str, - text: &str, - block_mode: &str, - ) -> Value { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby normalize_call_without_block temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby normalize_call_without_block temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - target_kind = ARGV.fetch(1) - target_text = ARGV.fetch(2) - block_mode = ARGV.fetch(3) - target = nil - walk = lambda do |node| - if node.respond_to?(:kind) - target ||= node if node.kind == target_kind && node.text.to_s == target_text - node.named_children.each { |child| walk.call(child) } - end - end - walk.call(document.root) - abort "target node not found: #{target_kind} #{target_text.inspect}" unless target - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - block = - case block_mode - when "auto" - normalizer.send(:call_block, target) - when "none" - nil - else - abort "unknown block mode: #{block_mode.inspect}" - end - result = normalizer.send(:normalize_call_without_block, target, block) - - def value(node) - if node.is_a?(Decomplex::Ast::Node) - { - "type" => node.type.to_s, - "children" => node.children.map { |child| value(child) }, - "first_lineno" => node.first_lineno, - "first_column" => node.first_column, - "last_lineno" => node.last_lineno, - "last_column" => node.last_column, - "text" => node.text.to_s, - } - elsif node.is_a?(Symbol) - node.to_s - else - node - end - end - - puts JSON.generate(value(result)) - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-r", - "json", - "-e", - script, - ]) - .arg(file.path()) - .arg(kind) - .arg(text) - .arg(block_mode) - .output() - .expect("run ruby private normalize_call_without_block helper"); - assert!( - output.status.success(), - "ruby normalize_call_without_block helper failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - serde_json::from_slice(&output.stdout) - .expect("ruby normalize_call_without_block output should be json") - } - - fn ruby_private_normalize_patterns_value( - source: &str, - language: Language, - suffix: &str, - kind: &str, - text: &str, - ) -> Value { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby normalize_patterns temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby normalize_patterns temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - target_kind = ARGV.fetch(1) - target_text = ARGV.fetch(2) - target = nil - walk = lambda do |node| - if node.respond_to?(:kind) - target ||= node if node.kind == target_kind && node.text.to_s == target_text - node.named_children.each { |child| walk.call(child) } - end - end - walk.call(document.root) - abort "target node not found: #{target_kind} #{target_text.inspect}" unless target - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - result = normalizer.send(:normalize_patterns, target) - - def value(node) - if node.is_a?(Decomplex::Ast::Node) - { - "type" => node.type.to_s, - "children" => node.children.map { |child| value(child) }, - "first_lineno" => node.first_lineno, - "first_column" => node.first_column, - "last_lineno" => node.last_lineno, - "last_column" => node.last_column, - "text" => node.text.to_s, - } - elsif node.is_a?(Symbol) - node.to_s - else - node - end - end - - puts JSON.generate(result.map { |node| value(node) }) - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-r", - "json", - "-e", - script, - ]) - .arg(file.path()) - .arg(kind) - .arg(text) - .output() - .expect("run ruby private normalize_patterns helper"); - assert!( - output.status.success(), - "ruby normalize_patterns helper failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - serde_json::from_slice(&output.stdout) - .expect("ruby normalize_patterns output should be json") - } - - fn ruby_private_command_arguments_value( - source: &str, - language: Language, - suffix: &str, - kind: &str, - text: &str, - ) -> Value { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby command arguments temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby command arguments temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - target_kind = ARGV.fetch(1) - target_text = ARGV.fetch(2) - target = nil - fallback_target = nil - walk = lambda do |node| - if node.respond_to?(:kind) - target ||= node if node.kind == target_kind && node.text.to_s == target_text - fallback_target ||= node if node.text.to_s == target_text - node.named_children.each { |child| walk.call(child) } - end - end - walk.call(document.root) - target ||= fallback_target - abort "target node not found: #{target_kind} #{target_text.inspect}" unless target - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - result = normalizer.send(:command_arguments, target) - - def value(node) - if node.is_a?(Decomplex::Ast::Node) - { - "type" => node.type.to_s, - "children" => node.children.map { |child| value(child) }, - "first_lineno" => node.first_lineno, - "first_column" => node.first_column, - "last_lineno" => node.last_lineno, - "last_column" => node.last_column, - "text" => node.text.to_s, - } - elsif node.is_a?(Symbol) - node.to_s - else - node - end - end - - puts JSON.generate(result.map { |node| value(node) }) - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-r", - "json", - "-e", - script, - ]) - .arg(file.path()) - .arg(kind) - .arg(text) - .output() - .expect("run ruby private command arguments helper"); - assert!( - output.status.success(), - "ruby command arguments helper failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - serde_json::from_slice(&output.stdout) - .expect("ruby command arguments output should be json") - } - - fn ruby_private_const_for_nil_value(source: &str, language: Language, suffix: &str) -> Value { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby const_for nil temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby const_for nil temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - result = normalizer.send(:const_for, nil) - - def value(node) - if node.is_a?(Decomplex::Ast::Node) - { - "type" => node.type.to_s, - "children" => node.children.map { |child| value(child) }, - "first_lineno" => node.first_lineno, - "first_column" => node.first_column, - "last_lineno" => node.last_lineno, - "last_column" => node.last_column, - "text" => node.text.to_s, - } - elsif node.is_a?(Symbol) - node.to_s - else - node - end - end - - puts JSON.generate(value(result)) - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-r", - "json", - "-e", - script, - ]) - .arg(file.path()) - .output() - .expect("run ruby private const_for nil helper"); - assert!( - output.status.success(), - "ruby const_for nil helper failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - serde_json::from_slice(&output.stdout).expect("ruby const_for nil output should be json") - } - - fn ruby_private_source_before_child_wrap_value( - source: &str, - language: Language, - suffix: &str, - kind: &str, - text: &str, - child_kind: &str, - child_text: &str, - ) -> Value { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby source_before_child temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby source_before_child temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - target_kind = ARGV.fetch(1) - target_text = ARGV.fetch(2) - child_kind = ARGV.fetch(3) - child_text = ARGV.fetch(4) - target = nil - child = nil - walk = lambda do |node| - if node.respond_to?(:kind) - target ||= node if node.kind == target_kind && node.text.to_s == target_text - child ||= node if node.kind == child_kind && node.text.to_s == child_text - node.named_children.each { |next_child| walk.call(next_child) } - end - end - walk.call(document.root) - abort "target node not found" unless target - abort "child node not found" unless child - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - source = normalizer.send(:source_before_child, target, child) - result = normalizer.send(:wrap, :OUTER, children: [], source: source) - - def value(node) - if node.is_a?(Decomplex::Ast::Node) - { - "type" => node.type.to_s, - "children" => node.children.map { |child| value(child) }, - "first_lineno" => node.first_lineno, - "first_column" => node.first_column, - "last_lineno" => node.last_lineno, - "last_column" => node.last_column, - "text" => node.text.to_s, - } - elsif node.is_a?(Symbol) - node.to_s - else - node - end - end - - puts JSON.generate(value(result)) - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-r", - "json", - "-e", - script, - ]) - .arg(file.path()) - .arg(kind) - .arg(text) - .arg(child_kind) - .arg(child_text) - .output() - .expect("run ruby private source_before_child helper"); - assert!( - output.status.success(), - "ruby source_before_child helper failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - serde_json::from_slice(&output.stdout) - .expect("ruby source_before_child output should be json") - } - - fn ruby_private_source_from_nodes_value( - source: &str, - language: Language, - suffix: &str, - first_kind: &str, - first_text: &str, - last_kind: &str, - last_text: &str, - ) -> Value { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby source_from_nodes temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby source_from_nodes temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - first_kind = ARGV.fetch(1) - first_text = ARGV.fetch(2) - last_kind = ARGV.fetch(3) - last_text = ARGV.fetch(4) - first_node = nil - last_node = nil - walk = lambda do |node| - if node.respond_to?(:kind) - first_node ||= node if node.kind == first_kind && node.text.to_s == first_text - last_node = node if node.kind == last_kind && node.text.to_s == last_text - node.named_children.each { |child| walk.call(child) } - end - end - walk.call(document.root) - abort "first node not found: #{first_kind} #{first_text.inspect}" unless first_node - abort "last node not found: #{last_kind} #{last_text.inspect}" unless last_node - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - result = normalizer.send(:source_from_nodes, first_node, last_node) - - def value(node) - if node.is_a?(Decomplex::Ast::Node) - { - "type" => node.type.to_s, - "children" => node.children.map { |child| value(child) }, - "first_lineno" => node.first_lineno, - "first_column" => node.first_column, - "last_lineno" => node.last_lineno, - "last_column" => node.last_column, - "text" => node.text.to_s, - } - elsif node.is_a?(Symbol) - node.to_s - else - node - end - end - - puts JSON.generate(value(result)) - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-r", - "json", - "-e", - script, - ]) - .arg(file.path()) - .arg(first_kind) - .arg(first_text) - .arg(last_kind) - .arg(last_text) - .output() - .expect("run ruby private source_from_nodes helper"); - assert!( - output.status.success(), - "ruby source_from_nodes helper failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - serde_json::from_slice(&output.stdout) - .expect("ruby source_from_nodes output should be json") - } - - fn ruby_private_source_from_normalized_nodes_value( - source: &str, - language: Language, - suffix: &str, - first_kind: &str, - first_text: &str, - last_kind: &str, - last_text: &str, - ) -> Value { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby source_from_normalized_nodes temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby source_from_normalized_nodes temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - first_kind = ARGV.fetch(1) - first_text = ARGV.fetch(2) - last_kind = ARGV.fetch(3) - last_text = ARGV.fetch(4) - first_raw = nil - last_raw = nil - walk = lambda do |node| - if node.respond_to?(:kind) - first_raw ||= node if node.kind == first_kind && node.text.to_s == first_text - last_raw ||= node if node.kind == last_kind && node.text.to_s == last_text - node.named_children.each { |child| walk.call(child) } - end - end - walk.call(document.root) - abort "first node not found" unless first_raw - abort "last node not found" unless last_raw - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - first_node = normalizer.send(:wrap, :FIRST, children: [], source: first_raw) - last_node = normalizer.send(:wrap, :LAST, children: [], source: last_raw) - result = normalizer.send(:source_from_normalized_nodes, first_node, last_node) - - def value(node) - if node.is_a?(Decomplex::Ast::Node) - { - "type" => node.type.to_s, - "children" => node.children.map { |child| value(child) }, - "first_lineno" => node.first_lineno, - "first_column" => node.first_column, - "last_lineno" => node.last_lineno, - "last_column" => node.last_column, - "text" => node.text.to_s, - } - elsif node.is_a?(Symbol) - node.to_s - else - node - end - end - - puts JSON.generate(value(result)) - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-r", - "json", - "-e", - script, - ]) - .arg(file.path()) - .arg(first_kind) - .arg(first_text) - .arg(last_kind) - .arg(last_text) - .output() - .expect("run ruby private source_from_normalized_nodes helper"); - assert!( - output.status.success(), - "ruby source_from_normalized_nodes helper failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - serde_json::from_slice(&output.stdout) - .expect("ruby source_from_normalized_nodes output should be json") - } - - fn ruby_private_dynamic_string_source_signature( - source: &str, - language: Language, - suffix: &str, - kind: &str, - text: &str, - ) -> Option<(String, String)> { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby dynamic_string_source temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby dynamic_string_source temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - target_kind = ARGV.fetch(1) - target_text = ARGV.fetch(2) - target = nil - walk = lambda do |node| - if node.respond_to?(:kind) - target ||= node if node.kind == target_kind && node.text.to_s == target_text - node.named_children.each { |child| walk.call(child) } - end - end - walk.call(document.root) - abort "target node not found" unless target - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - normalized = target.named_children.map { |child| [child, normalizer.send(:normalize_node, child)] } - result = normalizer.send(:dynamic_string_source, normalized) - if result - puts JSON.generate([result.kind, result.text.to_s]) - else - puts "null" - end - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-r", - "json", - "-e", - script, - ]) - .arg(file.path()) - .arg(kind) - .arg(text) - .output() - .expect("run ruby private dynamic_string_source helper"); - assert!( - output.status.success(), - "ruby dynamic_string_source helper failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - let value: Value = serde_json::from_slice(&output.stdout) - .expect("ruby dynamic_string_source output should be json"); - if value.is_null() { - return None; - } - let pair = value - .as_array() - .expect("ruby dynamic_string_source output should be an array"); - Some(( - pair[0] - .as_str() - .expect("dynamic_string_source kind should be string") - .to_string(), - pair[1] - .as_str() - .expect("dynamic_string_source text should be string") - .to_string(), - )) - } - - fn ruby_private_operator_assignment_statement_parts_signature( - source: &str, - language: Language, - suffix: &str, - kind: &str, - text: &str, - ) -> Option<(String, String, String, String, String)> { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby operator_assignment_statement_parts temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby operator_assignment_statement_parts temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - target_kind = ARGV.fetch(1) - target_text = ARGV.fetch(2) - target = nil - walk = lambda do |node| - if node.respond_to?(:kind) - target ||= node if node.kind == target_kind && node.text.to_s == target_text - node.named_children.each { |child| walk.call(child) } - end - end - walk.call(document.root) - abort "target node not found" unless target - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - left, operator, right = normalizer.send(:operator_assignment_statement_parts, target) - if left && operator && right - puts JSON.generate([left.kind, left.text.to_s, operator.to_s, right.kind, right.text.to_s]) - else - puts "null" - end - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-r", - "json", - "-e", - script, - ]) - .arg(file.path()) - .arg(kind) - .arg(text) - .output() - .expect("run ruby private operator_assignment_statement_parts helper"); - assert!( - output.status.success(), - "ruby operator_assignment_statement_parts helper failed for {language:?} {kind:?} {text:?}: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - let value: Value = serde_json::from_slice(&output.stdout) - .expect("ruby operator_assignment_statement_parts output should be json"); - if value.is_null() { - return None; - } - let parts = value - .as_array() - .expect("ruby operator_assignment_statement_parts output should be an array"); - Some(( - parts[0] - .as_str() - .expect("operator_assignment left kind should be string") - .to_string(), - parts[1] - .as_str() - .expect("operator_assignment left text should be string") - .to_string(), - parts[2] - .as_str() - .expect("operator_assignment operator should be string") - .to_string(), - parts[3] - .as_str() - .expect("operator_assignment right kind should be string") - .to_string(), - parts[4] - .as_str() - .expect("operator_assignment right text should be string") - .to_string(), - )) - } - - fn ruby_private_modifier_parts_signature( - source: &str, - language: Language, - suffix: &str, - kind: &str, - text: &str, - ) -> Option<((String, String), (String, String))> { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby modifier_parts temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby modifier_parts temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - target_kind = ARGV.fetch(1) - target_text = ARGV.fetch(2) - target = nil - walk = lambda do |node| - if node.respond_to?(:kind) - target ||= node if node.kind == target_kind && node.text.to_s == target_text - node.named_children.each { |child| walk.call(child) } - end - end - walk.call(document.root) - abort "target node not found" unless target - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - action, condition = normalizer.send(:modifier_parts, target) - if action && condition - puts JSON.generate([[action.kind, action.text.to_s], [condition.kind, condition.text.to_s]]) - else - puts "null" - end - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-r", - "json", - "-e", - script, - ]) - .arg(file.path()) - .arg(kind) - .arg(text) - .output() - .expect("run ruby private modifier_parts helper"); - assert!( - output.status.success(), - "ruby modifier_parts helper failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - let value: Value = serde_json::from_slice(&output.stdout) - .expect("ruby modifier_parts output should be json"); - if value.is_null() { - return None; - } - let pairs = value - .as_array() - .expect("ruby modifier_parts output should be an array"); - let action = pairs[0] - .as_array() - .expect("modifier_parts action should be an array"); - let condition = pairs[1] - .as_array() - .expect("modifier_parts condition should be an array"); - Some(( - ( - action[0] - .as_str() - .expect("modifier_parts action kind should be string") - .to_string(), - action[1] - .as_str() - .expect("modifier_parts action text should be string") - .to_string(), - ), - ( - condition[0] - .as_str() - .expect("modifier_parts condition kind should be string") - .to_string(), - condition[1] - .as_str() - .expect("modifier_parts condition text should be string") - .to_string(), - ), - )) - } - - fn ruby_private_visibility_inline_def_statement_predicate( - source: &str, - language: Language, - suffix: &str, - kind: &str, - text: &str, - ) -> bool { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby visibility_inline_def_statement temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby visibility_inline_def_statement temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - target_kind = ARGV.fetch(1) - target_text = ARGV.fetch(2) - target = nil - walk = lambda do |node| - if node.respond_to?(:kind) - target ||= node if node.kind == target_kind && node.text.to_s == target_text - node.named_children.each { |child| walk.call(child) } - end - end - walk.call(document.root) - abort "target node not found" unless target - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - puts normalizer.send(:visibility_inline_def_statement?, target, target.named_children.first) - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-e", - script, - ]) - .arg(file.path()) - .arg(kind) - .arg(text) - .output() - .expect("run ruby private visibility_inline_def_statement helper"); - assert!( - output.status.success(), - "ruby visibility_inline_def_statement helper failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - String::from_utf8(output.stdout) - .expect("ruby visibility_inline_def_statement output should be utf8") - .trim() - == "true" - } - - fn ruby_private_drop_trailing_nil_statement_value(input: &Value) -> Value { - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - def node(value) - return nil if value.nil? - return value unless value.is_a?(Hash) - - Decomplex::Ast::Node.new( - type: value.fetch("type").to_sym, - children: value.fetch("children").map { |child| node(child) }, - first_lineno: value.fetch("first_lineno"), - first_column: value.fetch("first_column"), - last_lineno: value.fetch("last_lineno"), - last_column: value.fetch("last_column"), - text: value.fetch("text") - ) - end - - def value(node) - if node.is_a?(Decomplex::Ast::Node) - { - "type" => node.type.to_s, - "children" => node.children.map { |child| value(child) }, - "first_lineno" => node.first_lineno, - "first_column" => node.first_column, - "last_lineno" => node.last_lineno, - "last_column" => node.last_column, - "text" => node.text.to_s, - } - elsif node.is_a?(Symbol) - node.to_s - else - node - end - end - - normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate - result = normalizer.send(:drop_trailing_nil_statement, node(JSON.parse(ARGV.fetch(0)))) - puts JSON.generate(value(result)) - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "json", - "-e", - script, - ]) - .arg(input.to_string()) - .output() - .expect("run ruby private drop_trailing_nil_statement helper"); - assert!( - output.status.success(), - "ruby drop_trailing_nil_statement helper failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - serde_json::from_slice(&output.stdout) - .expect("ruby drop_trailing_nil_statement output should be json") - } - - fn ruby_private_elide_tail_returns_value(input: &Value, ruby: bool) -> Value { - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - def node(value) - return nil if value.nil? - return value unless value.is_a?(Hash) - - Decomplex::Ast::Node.new( - type: value.fetch("type").to_sym, - children: value.fetch("children").map { |child| node(child) }, - first_lineno: value.fetch("first_lineno"), - first_column: value.fetch("first_column"), - last_lineno: value.fetch("last_lineno"), - last_column: value.fetch("last_column"), - text: value.fetch("text") - ) - end - - def value(node) - if node.is_a?(Decomplex::Ast::Node) - { - "type" => node.type.to_s, - "children" => node.children.map { |child| value(child) }, - "first_lineno" => node.first_lineno, - "first_column" => node.first_column, - "last_lineno" => node.last_lineno, - "last_column" => node.last_column, - "text" => node.text.to_s, - } - elsif node.is_a?(Symbol) - node.to_s - else - node - end - end - - normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate - adapter = if ARGV.fetch(1) == "ruby" - Decomplex::Ast::RubyTreeSitterNormalizationAdapter.new(nil) - else - Decomplex::Ast::TreeSitterNormalizationAdapter.new(nil) - end - normalizer.instance_variable_set(:@normalization_adapter, adapter) - result = normalizer.send(:elide_tail_returns, node(JSON.parse(ARGV.fetch(0)))) - puts JSON.generate(value(result)) - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "json", - "-e", - script, - ]) - .arg(input.to_string()) - .arg(if ruby { "ruby" } else { "other" }) - .output() - .expect("run ruby private elide_tail_returns helper"); - assert!( - output.status.success(), - "ruby elide_tail_returns helper failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - serde_json::from_slice(&output.stdout) - .expect("ruby elide_tail_returns output should be json") - } - - fn ruby_private_elide_implicit_nil_body_value(input: &Value, ruby: bool) -> Value { - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - def node(value) - return nil if value.nil? - return value unless value.is_a?(Hash) - - Decomplex::Ast::Node.new( - type: value.fetch("type").to_sym, - children: value.fetch("children").map { |child| node(child) }, - first_lineno: value.fetch("first_lineno"), - first_column: value.fetch("first_column"), - last_lineno: value.fetch("last_lineno"), - last_column: value.fetch("last_column"), - text: value.fetch("text") - ) - end - - def value(node) - if node.is_a?(Decomplex::Ast::Node) - { - "type" => node.type.to_s, - "children" => node.children.map { |child| value(child) }, - "first_lineno" => node.first_lineno, - "first_column" => node.first_column, - "last_lineno" => node.last_lineno, - "last_column" => node.last_column, - "text" => node.text.to_s, - } - elsif node.is_a?(Symbol) - node.to_s - else - node - end - end - - normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate - adapter = if ARGV.fetch(1) == "ruby" - Decomplex::Ast::RubyTreeSitterNormalizationAdapter.new(nil) - else - Decomplex::Ast::TreeSitterNormalizationAdapter.new(nil) - end - normalizer.instance_variable_set(:@normalization_adapter, adapter) - result = normalizer.send(:elide_implicit_nil_body, node(JSON.parse(ARGV.fetch(0)))) - puts JSON.generate(value(result)) - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "json", - "-e", - script, - ]) - .arg(input.to_string()) - .arg(if ruby { "ruby" } else { "other" }) - .output() - .expect("run ruby private elide_implicit_nil_body helper"); - assert!( - output.status.success(), - "ruby elide_implicit_nil_body helper failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - serde_json::from_slice(&output.stdout) - .expect("ruby elide_implicit_nil_body output should be json") - } - - fn ruby_private_prepend_rescue_exception_assignment_value( - source: &str, - body: &Value, - assignment: &Value, - ) -> Value { - let mut file = tempfile::Builder::new() - .suffix(".rb") - .tempfile() - .expect("create ruby prepend rescue temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby prepend rescue temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - def node(value) - return nil if value.nil? - return value unless value.is_a?(Hash) - - Decomplex::Ast::Node.new( - type: value.fetch("type").to_sym, - children: value.fetch("children").map { |child| node(child) }, - first_lineno: value.fetch("first_lineno"), - first_column: value.fetch("first_column"), - last_lineno: value.fetch("last_lineno"), - last_column: value.fetch("last_column"), - text: value.fetch("text") - ) - end - - def value(node) - if node.is_a?(Decomplex::Ast::Node) - { - "type" => node.type.to_s, - "children" => node.children.map { |child| value(child) }, - "first_lineno" => node.first_lineno, - "first_column" => node.first_column, - "last_lineno" => node.last_lineno, - "last_column" => node.last_column, - "text" => node.text.to_s, - } - elsif node.is_a?(Symbol) - node.to_s - else - node - end - end - - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - body = node(JSON.parse(ARGV.fetch(1))) - assignment = node(JSON.parse(ARGV.fetch(2))) - result = normalizer.send(:prepend_rescue_exception_assignment, body, assignment) - puts JSON.generate(value(result)) - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", "ruby") - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-r", - "json", - "-e", - script, - ]) - .arg(file.path()) - .arg(body.to_string()) - .arg(assignment.to_string()) - .output() - .expect("run ruby private prepend_rescue_exception_assignment helper"); - assert!( - output.status.success(), - "ruby prepend_rescue_exception_assignment helper failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - serde_json::from_slice(&output.stdout) - .expect("ruby prepend_rescue_exception_assignment output should be json") - } - - fn ruby_private_symbol_literal_node_predicate( - node_type: Option<&str>, - child_kind: Option<&str>, - ) -> bool { - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - def child(kind) - case kind - when "symbol" - :value - when "string" - "value" - when "node" - Decomplex::Ast::Node.new( - type: :NIL, - children: [], - first_lineno: 1, - first_column: 0, - last_lineno: 1, - last_column: 1, - text: "NIL" - ) - when "nil" - nil - else - nil - end - end - - node_type = ARGV.fetch(0) - child_kind = ARGV.fetch(1) - target = if node_type == "none" - nil - else - children = child_kind == "none" ? [] : [child(child_kind)] - Decomplex::Ast::Node.new( - type: node_type.to_sym, - children: children, - first_lineno: 1, - first_column: 0, - last_lineno: 1, - last_column: 1, - text: node_type - ) - end - normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate - puts normalizer.send(:symbol_literal_node?, target) - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .args(["-I", "lib", "-r", "decomplex/ast", "-e", script]) - .arg(node_type.unwrap_or("none")) - .arg(child_kind.unwrap_or("none")) - .output() - .expect("run ruby private symbol_literal_node? helper"); - assert!( - output.status.success(), - "ruby symbol_literal_node? helper failed: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - String::from_utf8(output.stdout) - .expect("ruby symbol_literal_node? output should be utf8") - .trim() - == "true" - } - - fn ruby_private_same_ts_node_predicate( - source: &str, - language: Language, - suffix: &str, - left_kind: &str, - left_text: &str, - left_index: usize, - right_kind: &str, - right_text: &str, - right_index: usize, - ) -> bool { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby same_ts_node temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby same_ts_node temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - left_kind = ARGV.fetch(1) - left_text = ARGV.fetch(2) - left_index = ARGV.fetch(3).to_i - right_kind = ARGV.fetch(4) - right_text = ARGV.fetch(5) - right_index = ARGV.fetch(6).to_i - - def matches(root, kind, text) - found = [] - walk = lambda do |node| - if node.respond_to?(:kind) - found << node if node.kind == kind && node.text.to_s == text - node.named_children.each { |child| walk.call(child) } - end - end - walk.call(root) - found - end - - left = matches(document.root, left_kind, left_text).fetch(left_index) - right = matches(document.root, right_kind, right_text).fetch(right_index) - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - puts normalizer.send(:same_ts_node?, left, right) - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-e", - script, - ]) - .arg(file.path()) - .arg(left_kind) - .arg(left_text) - .arg(left_index.to_string()) - .arg(right_kind) - .arg(right_text) - .arg(right_index.to_string()) - .output() - .expect("run ruby private same_ts_node? helper"); - assert!( - output.status.success(), - "ruby same_ts_node? helper failed for {language:?}: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - String::from_utf8(output.stdout) - .expect("ruby same_ts_node? output should be utf8") - .trim() - == "true" - } - - fn ruby_private_parent_named_child_predicate( - source: &str, - language: Language, - suffix: &str, - parent_kind: &str, - parent_text: &str, - parent_index: usize, - child_kind: &str, - child_text: &str, - child_index: usize, - ) -> bool { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby parent_named_child temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby parent_named_child temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - parent_kind = ARGV.fetch(1) - parent_text = ARGV.fetch(2) - parent_index = ARGV.fetch(3).to_i - child_kind = ARGV.fetch(4) - child_text = ARGV.fetch(5) - child_index = ARGV.fetch(6).to_i - - def matches(root, kind, text) - found = [] - walk = lambda do |node| - if node.respond_to?(:kind) - found << node if node.kind == kind && node.text.to_s == text - node.named_children.each { |child| walk.call(child) } - end - end - walk.call(root) - found - end - - parent = matches(document.root, parent_kind, parent_text).fetch(parent_index) - child = matches(document.root, child_kind, child_text).fetch(child_index) - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - puts normalizer.send(:parent_named_child?, parent, child) - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-e", - script, - ]) - .arg(file.path()) - .arg(parent_kind) - .arg(parent_text) - .arg(parent_index.to_string()) - .arg(child_kind) - .arg(child_text) - .arg(child_index.to_string()) - .output() - .expect("run ruby private parent_named_child? helper"); - assert!( - output.status.success(), - "ruby parent_named_child? helper failed for {language:?}: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - String::from_utf8(output.stdout) - .expect("ruby parent_named_child? output should be utf8") - .trim() - == "true" - } - - fn ruby_private_node_key_signature( - source: &str, - language: Language, - suffix: &str, - kind: &str, - text: &str, - index: usize, - ) -> (String, usize, usize) { - let mut file = tempfile::Builder::new() - .suffix(suffix) - .tempfile() - .expect("create ruby node_key temp source file"); - file.write_all(source.as_bytes()) - .expect("write ruby node_key temp source file"); - let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("decomplex rust dir should have gem parent"); - let script = r#" - document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") - target_kind = ARGV.fetch(1) - target_text = ARGV.fetch(2) - target_index = ARGV.fetch(3).to_i - found = [] - walk = lambda do |node| - if node.respond_to?(:kind) - found << node if node.kind == target_kind && node.text.to_s == target_text - node.named_children.each { |child| walk.call(child) } - end - end - walk.call(document.root) - target = found.fetch(target_index) - normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) - puts JSON.generate(normalizer.send(:node_key, target)) - "#; - let output = Command::new("ruby") - .current_dir(decomplex_dir) - .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) - .args([ - "-I", - "lib", - "-r", - "decomplex/ast", - "-r", - "decomplex/syntax", - "-r", - "json", - "-e", - script, - ]) - .arg(file.path()) - .arg(kind) - .arg(text) - .arg(index.to_string()) - .output() - .expect("run ruby private node_key helper"); - assert!( - output.status.success(), - "ruby node_key helper failed for {language:?}: stdout={} stderr={}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - let value: Value = - serde_json::from_slice(&output.stdout).expect("ruby node_key output should be json"); - let key = value - .as_array() - .expect("ruby node_key output should be an array"); - ( - key[0] - .as_str() - .expect("node_key kind should be string") - .to_string(), - key[1] - .as_u64() - .expect("node_key start byte should be integer") as usize, - key[2] - .as_u64() - .expect("node_key end byte should be integer") as usize, - ) - } - - #[test] - fn tree_normalizer_new_initializes_empty_state() { - let normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); - - assert_eq!(normalizer.source, ""); - assert_eq!(normalizer.language, Language::Ruby); - assert!(normalizer.local_stack.is_empty()); - assert_eq!(normalizer.root_span, None); - } - - #[test] - fn normalize_root_matches_ruby_across_tree_normalizer_languages() { - for (source, language, suffix) in [ - ( - "class C\n def each(value)\n yield value\n case value\n when 1 then :one\n else :other\n end\n end\nend\n", - Language::Ruby, - ".rb", - ), - ( - "def gen(value):\n yield value\n other()\n", - Language::Python, - ".py", - ), - ( - "function f(value: number) { switch (value) { case 1: one(); break; default: other(); } return value ? one() : other(); }\n", - Language::TypeScript, - ".ts", - ), - ( - "function f(value)\n if value then\n one()\n else\n other()\n end\n return value\nend\n", - Language::Lua, - ".lua", - ), - ] { - assert_ruby_parity(source, language, suffix); - } - } - - #[test] - fn tree_normalizer_yield_statement_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "def each\n yield :item\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "yield :item", - ), - ( - "def each\n value\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "value", - ), - ( - "def gen():\n yield item\n other()\n", - Language::Python, - ".py", - "expression_statement", - "yield item", - ), - ( - "def gen():\n yield from items\n other()\n", - Language::Python, - ".py", - "expression_statement", - "yield from items", - ), - ( - "def gen():\n yield item\n other()\n", - Language::Python, - ".py", - "block", - "yield item\n other()", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.yield_statement(node), - ruby_private_predicate(source, language, suffix, "yield_statement?", kind, text), - "yield_statement? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn yield_argument_list_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "def each\n yield(:item)\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "(:item)", - ), - ( - "def each\n yield :item\nend\n", - Language::Ruby, - ".rb", - "argument_list", - ":item", - ), - ( - "def call\n foo(:item)\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "(:item)", - ), - ( - "yield_value(value)\n", - Language::Python, - ".py", - "argument_list", - "(value)", - ), - ( - "yield(value);\n", - Language::TypeScript, - ".ts", - "parenthesized_expression", - "(value)", - ), - ( - "coroutine.yield(value)\n", - Language::Lua, - ".lua", - "arguments", - "(value)", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.yield_argument_list(node), - ruby_private_predicate( - source, - language, - suffix, - "yield_argument_list?", - kind, - text - ), - "yield_argument_list? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn yield_argument_nodes_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def each\n yield(:item)\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "(:item)", - ), - ( - "def each\n yield nil\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "nil", - ), - ( - "def each\n yield item, other\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "item, other", - ), - ( - "yield_value(value)\n", - Language::Python, - ".py", - "argument_list", - "(value)", - ), - ( - "yield(value);\n", - Language::TypeScript, - ".ts", - "parenthesized_expression", - "(value)", - ), - ( - "coroutine.yield(value)\n", - Language::Lua, - ".lua", - "arguments", - "(value)", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = Value::Array( - normalizer - .yield_argument_nodes(node) - .iter() - .map(node_value) - .collect(), - ); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "yield_argument_nodes", - kind, - text - ), - "yield_argument_nodes mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn yield_inline_arguments_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def each\n yield\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "yield", - ), - ( - "def gen():\n yield item\n other()\n", - Language::Python, - ".py", - "expression_statement", - "yield item", - ), - ( - "function* gen() { yield item; }\n", - Language::TypeScript, - ".ts", - "expression_statement", - "yield item;", - ), - ( - "coroutine.yield(item)\n", - Language::Lua, - ".lua", - "function_call", - "coroutine.yield(item)", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = Value::Array( - normalizer - .yield_inline_arguments(node) - .iter() - .map(node_value) - .collect(), - ); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "yield_inline_arguments", - kind, - text - ), - "yield_inline_arguments mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_yield_argument_list_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def each\n yield(:item)\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "(:item)", - ), - ( - "def each\n yield :item\nend\n", - Language::Ruby, - ".rb", - "argument_list", - ":item", - ), - ( - "def each\n yield nil\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "nil", - ), - ( - "yield_value(value)\n", - Language::Python, - ".py", - "argument_list", - "(value)", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = node_value(&normalizer.normalize_yield_argument_list(node)); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_yield_argument_list", - kind, - text - ), - "normalize_yield_argument_list mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_yield_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def each\n yield\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "yield", - ), - ( - "def each\n yield item\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "yield item", - ), - ( - "def each\n yield nil\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "yield nil", - ), - ( - "def gen():\n yield item\n other()\n", - Language::Python, - ".py", - "expression_statement", - "yield item", - ), - ( - "function* gen() { yield item; }\n", - Language::TypeScript, - ".ts", - "yield_expression", - "yield item", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = node_value(&normalizer.normalize_yield(node)); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_yield", - kind, - text - ), - "normalize_yield mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_yield_statement_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def each\n yield\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "yield", - ), - ( - "def each\n yield item\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "yield item", - ), - ( - "def each\n yield nil\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "yield nil", - ), - ( - "def gen():\n yield item\n other()\n", - Language::Python, - ".py", - "expression_statement", - "yield item", - ), - ( - "def gen():\n yield from items\n other()\n", - Language::Python, - ".py", - "expression_statement", - "yield from items", - ), - ( - "function* gen() { yield item; }\n", - Language::TypeScript, - ".ts", - "expression_statement", - "yield item;", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = node_value(&normalizer.normalize_yield_statement(node)); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_yield_statement", - kind, - text - ), - "normalize_yield_statement mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_node_dispatch_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def each\n yield item\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "yield item", - ), - ( - "def check\n !flag\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "!flag", - ), - ( - "def gen():\n yield item\n other()\n", - Language::Python, - ".py", - "expression_statement", - "yield item", - ), - ( - "switch (value) { case 1: one(); default: other(); }\n", - Language::TypeScript, - ".ts", - "switch_statement", - "switch (value) { case 1: one(); default: other(); }", - ), - ( - "if value then one() else other() end\n", - Language::Lua, - ".lua", - "if_statement", - "if value then one() else other() end", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_node(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_node", - kind, - text - ), - "normalize_node mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn python_yield_statement_in_multi_statement_block_matches_ruby_ast() { - let source = "def gen():\n yield item\n other()\n"; - assert_ruby_parity(source, Language::Python, ".py"); - - let root = parse_language_source(source, Language::Python, ".py"); - let defn = first_node(&root, "DEFN", "def gen():\n yield item\n other()"); - let scope = child_node(defn, 1); - let body = child_node(scope, 2); - - assert_eq!(body.r#type, "BLOCK"); - assert_eq!(child_types(body), vec!["YIELD", "EXPRESSION_STATEMENT"]); - } - - #[test] - fn tree_normalizer_super_statement_matches_ruby_private_predicate() { - for (source, kind, text) in [ - ( - "class Child < Parent\n def call\n super\n end\nend\n", - "body_statement", - "super", - ), - ( - "class Child < Parent\n def call\n super :item\n end\nend\n", - "body_statement", - "super :item", - ), - ( - "class Child < Parent\n def call\n value\n end\nend\n", - "body_statement", - "value", - ), - ] { - let tree = raw_tree(source, Language::Ruby); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); - - assert_eq!( - normalizer.super_statement(node), - ruby_private_predicate( - source, - Language::Ruby, - ".rb", - "super_statement?", - kind, - text - ), - "super_statement? mismatch for {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_super_statement_matches_ruby_private_method() { - for (source, kind, text) in [ - ( - "class Child < Parent\n def call\n super\n end\nend\n", - "body_statement", - "super", - ), - ( - "class Child < Parent\n def call\n super :item\n end\nend\n", - "body_statement", - "super :item", - ), - ( - "class Child < Parent\n def call\n super value\n end\nend\n", - "body_statement", - "super value", - ), - ] { - let tree = raw_tree(source, Language::Ruby); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); - let rust = node_value(&normalizer.normalize_super_statement(node)); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - Language::Ruby, - ".rb", - "normalize_super_statement", - kind, - text - ), - "normalize_super_statement mismatch for {kind} {text:?}" - ); - } - } - - #[test] - fn ruby_super_statement_normalization_matches_ruby_ast() { - let source = "class Child < Parent\n def bare\n super\n end\n def with_arg\n super :item\n end\nend\n"; - assert_ruby_parity(source, Language::Ruby, ".rb"); - - let root = parse_language_source(source, Language::Ruby, ".rb"); - let bare = first_node(&root, "SUPER", "super"); - let with_arg = first_node(&root, "SUPER", "super :item"); - - assert_eq!(bare.children, vec![Child::Nil]); - assert_eq!(child_types(with_arg), vec!["LIST"]); - assert_eq!(child_types(child_node(with_arg, 0)), vec!["LIT"]); - } - - #[test] - fn tree_normalizer_argument_list_element_reference_matches_ruby_private_predicate() { - for (source, text) in [ - ("def indexed\n return items[0]\nend\n", "items[0]"), - ("def indexed\n return obj.foo[0]\nend\n", "obj.foo[0]"), - ("def indexed\n return [0]\nend\n", "[0]"), - ( - "def indexed\n return items[0], other\nend\n", - "items[0], other", - ), - ("def indexed\n return items[]\nend\n", "items[]"), - ( - "def indexed\n return items[0] { nope }\nend\n", - "items[0] { nope }", - ), - ] { - let tree = raw_tree(source, Language::Ruby); - let node = first_raw_node(tree.root_node(), source, "argument_list", text); - let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); - - assert_eq!( - normalizer.argument_list_element_reference(node), - ruby_private_predicate( - source, - Language::Ruby, - ".rb", - "argument_list_element_reference?", - "argument_list", - text - ), - "argument_list_element_reference? mismatch for {text:?}" - ); - } - } - - #[test] - fn normalize_argument_list_element_reference_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def indexed\n return items[0]\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "items[0]", - ), - ( - "def indexed\n return obj.foo[0]\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "obj.foo[0]", - ), - ( - "def indexed\n return [0]\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "[0]", - ), - ( - "def indexed\n return items[0], other\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "items[0], other", - ), - ( - "def indexed\n return items[0] { nope }\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "items[0] { nope }", - ), - ( - "def indexed():\n return foo(items[0])\n", - Language::Python, - ".py", - "argument_list", - "(items[0])", - ), - ( - "function indexed(){ return foo(items[0]); }\n", - Language::TypeScript, - ".ts", - "arguments", - "(items[0])", - ), - ( - "function indexed() return foo(items[0]) end\n", - Language::Lua, - ".lua", - "arguments", - "(items[0])", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_argument_list_element_reference(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_argument_list_element_reference", - kind, - text - ), - "normalize_argument_list_element_reference mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn dynamic_scope_rewrites_locals_without_crossing_scope_boundaries() { - let inner_assignment = test_node("LASGN", vec![Child::Symbol("inner".to_string())]); - let node = test_node( - "BLOCK", - vec![ - Child::Node(Box::new(test_node( - "LASGN", - vec![Child::Symbol("value".to_string())], - ))), - Child::Node(Box::new(test_node( - "LVAR", - vec![Child::Symbol("value".to_string())], - ))), - Child::Node(Box::new(test_node( - "DEFN", - vec![ - Child::Symbol("nested".to_string()), - Child::Node(Box::new(test_node( - "SCOPE", - vec![ - Child::Nil, - Child::Nil, - Child::Node(Box::new(inner_assignment)), - ], - ))), - ], - ))), - ], - ); - - let result = super::dynamic_scope(node); - - assert_eq!(child_node(&result, 0).r#type, "DASGN"); - assert_eq!(child_node(&result, 1).r#type, "DVAR"); - let nested = child_node(&result, 2); - assert_eq!(nested.r#type, "DEFN"); - let nested_scope = child_node(nested, 1); - assert_eq!(nested_scope.r#type, "SCOPE"); - assert_eq!(child_node(nested_scope, 2).r#type, "LASGN"); - } - - #[test] - fn link_when_chain_sets_next_arm_and_pads_short_when_nodes() { - let fallback = test_node("ELSE", Vec::new()); - let first = test_node( - "WHEN", - vec![ - Child::Symbol("patterns".to_string()), - Child::Nil, - Child::Nil, - ], - ); - let second = test_node( - "WHEN", - vec![ - Child::Symbol("patterns".to_string()), - Child::Nil, - Child::Nil, - ], - ); - let normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); - - let result = normalizer - .link_when_chain(vec![first, second], Some(fallback)) - .expect("expected linked when chain"); - - assert_eq!(result.r#type, "WHEN"); - let next = child_node(&result, 2); - assert_eq!(next.r#type, "WHEN"); - assert_eq!(child_node(next, 2).r#type, "ELSE"); - - let short = test_node("WHEN", vec![Child::Symbol("patterns".to_string())]); - let fallback = test_node("ELSE", Vec::new()); - let result = normalizer - .link_when_chain(vec![short], Some(fallback)) - .expect("expected padded when chain"); - - assert_eq!(result.children.len(), 3); - assert_eq!(result.children[1], Child::Nil); - assert_eq!(child_node(&result, 2).r#type, "ELSE"); - } - - #[test] - fn link_rescue_chain_sets_next_rescue_and_pads_short_resbody_nodes() { - let first = test_node( - "RESBODY", - vec![ - Child::Symbol("exceptions".to_string()), - Child::Nil, - Child::Nil, - ], - ); - let second = test_node( - "RESBODY", - vec![ - Child::Symbol("exceptions".to_string()), - Child::Nil, - Child::Nil, - ], - ); - let normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); - - let result = normalizer - .link_rescue_chain(vec![first, second]) - .expect("expected linked rescue chain"); - - assert_eq!(result.r#type, "RESBODY"); - let next = child_node(&result, 2); - assert_eq!(next.r#type, "RESBODY"); - assert_eq!(next.children[2], Child::Nil); - - let short = test_node("RESBODY", vec![Child::Symbol("exceptions".to_string())]); - let result = normalizer - .link_rescue_chain(vec![short]) - .expect("expected padded rescue chain"); - - assert_eq!(result.children.len(), 3); - assert_eq!(result.children[1], Child::Nil); - assert_eq!(result.children[2], Child::Nil); - } - - #[test] - fn infix_statement_parts_extracts_allowed_wrapper_parts() { - let source = "def calc\n left + right\nend\n"; - let tree = raw_tree(source, Language::Ruby); - let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); - let body = first_raw_node(tree.root_node(), source, "body_statement", "left + right"); - let binary = first_raw_node(tree.root_node(), source, "binary", "left + right"); - - assert_eq!( - infix_parts_text(&normalizer, body, source), - Some(("left".to_string(), "+".to_string(), "right".to_string())) - ); - assert_eq!(infix_parts_text(&normalizer, binary, source), None); - - let source = "def calc\n return left + right\nend\n"; - let tree = raw_tree(source, Language::Ruby); - let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); - let args = first_raw_node(tree.root_node(), source, "argument_list", "left + right"); - assert_eq!( - infix_parts_text(&normalizer, args, source), - Some(("left".to_string(), "+".to_string(), "right".to_string())) - ); - - let source = "def calc\n left && right\nend\n"; - let tree = raw_tree(source, Language::Ruby); - let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); - let boolean = first_raw_node(tree.root_node(), source, "body_statement", "left && right"); - assert_eq!(infix_parts_text(&normalizer, boolean, source), None); - } - - #[test] - fn infix_statement_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "def calc\n left + right\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "left + right", - ), - ( - "def calc\n return left + right\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "left + right", - ), - ( - "def calc\n left && right\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "left && right", - ), - ( - "const value = left + right;\n", - Language::TypeScript, - ".ts", - "binary_expression", - "left + right", - ), - ( - "value = left + right\n", - Language::Python, - ".py", - "binary_operator", - "left + right", - ), - ( - "local value = left + right\n", - Language::Lua, - ".lua", - "expression_list", - "left + right", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.infix_statement(node), - ruby_private_predicate(source, language, suffix, "infix_statement?", kind, text), - "infix_statement? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_infix_statement_matches_ruby_private_method() { - for (source, kind, text) in [ - ( - "def calc\n left + right\nend\n", - "body_statement", - "left + right", - ), - ( - "def calc\n return left + right\nend\n", - "argument_list", - "left + right", - ), - ( - "def match\n value =~ /left/\nend\n", - "body_statement", - "value =~ /left/", - ), - ( - "def match\n value =~ pattern\nend\n", - "body_statement", - "value =~ pattern", - ), - ] { - let tree = raw_tree(source, Language::Ruby); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); - let rust = normalizer - .normalize_infix_statement(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - Language::Ruby, - ".rb", - "normalize_infix_statement", - kind, - text - ), - "normalize_infix_statement mismatch for {kind} {text:?}" - ); - } - } - - #[test] - fn regex_literal_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "value =~ /left/\n", - Language::Ruby, - ".rb", - "regex", - "/left/", - ), - ( - "value = \"left\"\n", - Language::Ruby, - ".rb", - "string", - "\"left\"", - ), - ( - "const pattern = /left/;\n", - Language::TypeScript, - ".ts", - "regex", - "/left/", - ), - ( - "pattern = r\"left\"\n", - Language::Python, - ".py", - "string", - "r\"left\"", - ), - ( - "local pattern = \"left\"\n", - Language::Lua, - ".lua", - "string_content", - "left", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.regex_literal(Some(node)), - ruby_private_predicate(source, language, suffix, "regex_literal?", kind, text), - "regex_literal? mismatch for {language:?} {kind} {text:?}" - ); - } - - let normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); - assert_eq!( - normalizer.regex_literal(None), - ruby_private_regex_literal_value("nil") - ); - assert!(!ruby_private_regex_literal_value("string")); - assert!(!ruby_private_regex_literal_value("normalized_node")); - } - - #[test] - fn argument_list_unary_not_matches_ruby_private_predicate() { - for (line, text) in [ - ("return !flag", "!flag"), - ("return !!flag", "!!flag"), - ("return flag", "flag"), - ("return !flag, other", "!flag, other"), - ("return (!flag)", "(!flag)"), - ("return not flag", "not flag"), - ] { - let source = format!("def check\n {line}\nend\n"); - let tree = raw_tree(&source, Language::Ruby); - let node = first_raw_node(tree.root_node(), &source, "argument_list", text); - let normalizer = super::TreeSitterNormalizer::new(&source, Language::Ruby); - - assert_eq!( - normalizer.argument_list_unary_not(node), - ruby_private_predicate( - &source, - Language::Ruby, - ".rb", - "argument_list_unary_not?", - "argument_list", - text - ), - "argument_list_unary_not? mismatch for {line:?}" - ); - } - } - - #[test] - fn normalize_argument_list_unary_not_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def check\n return !flag\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "!flag", - ), - ( - "def check\n return !!flag\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "!!flag", - ), - ( - "def check\n return flag\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "flag", - ), - ( - "def check\n return !flag, other\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "!flag, other", - ), - ( - "def check():\n return foo(not flag)\n", - Language::Python, - ".py", - "argument_list", - "(not flag)", - ), - ( - "function check(){ return foo(!flag); }\n", - Language::TypeScript, - ".ts", - "arguments", - "(!flag)", - ), - ( - "function check() return foo(not flag) end\n", - Language::Lua, - ".lua", - "arguments", - "(not flag)", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_argument_list_unary_not(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_argument_list_unary_not", - kind, - text - ), - "normalize_argument_list_unary_not mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn unary_not_statement_matches_ruby_private_predicate() { - for (line, text) in [ - ("!flag", "!flag"), - ("!!flag", "!!flag"), - ("flag", "flag"), - ("!flag; other", "!flag; other"), - ("(!flag)", "(!flag)"), - ("not flag", "not flag"), - ] { - let source = format!("def check\n {line}\nend\n"); - let tree = raw_tree(&source, Language::Ruby); - let node = first_raw_node(tree.root_node(), &source, "body_statement", text); - let normalizer = super::TreeSitterNormalizer::new(&source, Language::Ruby); - - assert_eq!( - normalizer.unary_not_statement(node), - ruby_private_predicate( - &source, - Language::Ruby, - ".rb", - "unary_not_statement?", - "body_statement", - text - ), - "unary_not_statement? mismatch for {line:?}" - ); - } - } - - #[test] - fn unary_not_expression_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "def check\n !flag\n !!flag\n -flag\n not flag\nend\n", - Language::Ruby, - ".rb", - "unary", - "!flag", - ), - ( - "def check\n !flag\n !!flag\n -flag\n not flag\nend\n", - Language::Ruby, - ".rb", - "unary", - "!!flag", - ), - ( - "def check\n !flag\n !!flag\n -flag\n not flag\nend\n", - Language::Ruby, - ".rb", - "unary", - "-flag", - ), - ( - "def check\n !flag\n !!flag\n -flag\n not flag\nend\n", - Language::Ruby, - ".rb", - "unary", - "not flag", - ), - ( - "function check(flag: boolean) { return !flag; }\n", - Language::TypeScript, - ".ts", - "unary_expression", - "!flag", - ), - ( - "if not flag:\n pass\n", - Language::Python, - ".py", - "not_operator", - "not flag", - ), - ( - "if not flag then end\n", - Language::Lua, - ".lua", - "unary_expression", - "not flag", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.unary_not_expression(node), - ruby_private_predicate( - source, - language, - suffix, - "unary_not_expression?", - kind, - text - ), - "unary_not_expression? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_unary_not_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def check\n !flag\n !!flag\n -flag\n not flag\nend\n", - Language::Ruby, - ".rb", - "unary", - "!flag", - ), - ( - "def check\n !flag\n !!flag\n -flag\n not flag\nend\n", - Language::Ruby, - ".rb", - "unary", - "!!flag", - ), - ( - "function check(flag: boolean) { return !flag; }\n", - Language::TypeScript, - ".ts", - "unary_expression", - "!flag", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_unary_not(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_unary_not", - kind, - text - ), - "normalize_unary_not mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_unary_not_statement_matches_ruby_private_method() { - for (line, text) in [("!flag", "!flag"), ("!!flag", "!!flag")] { - let source = format!("def check\n {line}\nend\n"); - let tree = raw_tree(&source, Language::Ruby); - let node = first_raw_node(tree.root_node(), &source, "body_statement", text); - let mut normalizer = super::TreeSitterNormalizer::new(&source, Language::Ruby); - let rust = normalizer - .normalize_unary_not_statement(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - &source, - Language::Ruby, - ".rb", - "normalize_unary_not_statement", - "body_statement", - text - ), - "normalize_unary_not_statement mismatch for {text:?}" - ); - } - } - - #[test] - fn unary_minus_expression_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "def check\n -flag\n !flag\n value\nend\n", - Language::Ruby, - ".rb", - "unary", - "-flag", - ), - ( - "def check\n -flag\n !flag\n value\nend\n", - Language::Ruby, - ".rb", - "unary", - "!flag", - ), - ( - "function check(value: number) { return -value; }\n", - Language::TypeScript, - ".ts", - "unary_expression", - "-value", - ), - ( - "x = -value\n", - Language::Python, - ".py", - "unary_operator", - "-value", - ), - ( - "local x = -value\n", - Language::Lua, - ".lua", - "expression_list", - "-value", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.unary_minus_expression(node), - ruby_private_predicate( - source, - language, - suffix, - "unary_minus_expression?", - kind, - text - ), - "unary_minus_expression? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_unary_minus_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def check\n -1\n -flag\nend\n", - Language::Ruby, - ".rb", - "unary", - "-1", - ), - ( - "def check\n -1\n -flag\nend\n", - Language::Ruby, - ".rb", - "unary", - "-flag", - ), - ( - "function check(value: number) { return -value; }\n", - Language::TypeScript, - ".ts", - "unary_expression", - "-value", - ), - ( - "x = -value\n", - Language::Python, - ".py", - "unary_operator", - "-value", - ), - ( - "local x = -value\n", - Language::Lua, - ".lua", - "expression_list", - "-value", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_unary_minus(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_unary_minus", - kind, - text - ), - "normalize_unary_minus mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn binary_operator_matches_ruby_private_helper() { - for (source, language, suffix, kind, text) in [ - ( - "def calc\n left + right\n left && right\n value\nend\n", - Language::Ruby, - ".rb", - "binary", - "left + right", - ), - ( - "def calc\n left + right\n left && right\n value\nend\n", - Language::Ruby, - ".rb", - "binary", - "left && right", - ), - ( - "def calc\n left + right\n left && right\n value\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "left + right\n left && right\n value", - ), - ( - "const value = left + right && other;\n", - Language::TypeScript, - ".ts", - "binary_expression", - "left + right && other", - ), - ( - "const value = left + right && other;\n", - Language::TypeScript, - ".ts", - "binary_expression", - "left + right", - ), - ( - "value = left + right and other\n", - Language::Python, - ".py", - "boolean_operator", - "left + right and other", - ), - ( - "value = left + right and other\n", - Language::Python, - ".py", - "binary_operator", - "left + right", - ), - ( - "local value = left + right and other\n", - Language::Lua, - ".lua", - "expression_list", - "left + right and other", - ), - ( - "local value = left + right and other\n", - Language::Lua, - ".lua", - "binary_expression", - "left + right", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.binary_operator(node).unwrap_or_default(), - ruby_private_string(source, language, suffix, "binary_operator", kind, text), - "binary_operator mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn boolean_operator_matches_ruby_private_helper() { - for (source, language, suffix, kind, text) in [ - ( - "def calc\n left && right\n left || right\n left + right\nend\n", - Language::Ruby, - ".rb", - "binary", - "left && right", - ), - ( - "def calc\n left && right\n left || right\n left + right\nend\n", - Language::Ruby, - ".rb", - "binary", - "left || right", - ), - ( - "def calc\n left && right\n left || right\n left + right\nend\n", - Language::Ruby, - ".rb", - "binary", - "left + right", - ), - ( - "const value = left && right || other;\n", - Language::TypeScript, - ".ts", - "binary_expression", - "left && right", - ), - ( - "const value = left && right || other;\n", - Language::TypeScript, - ".ts", - "binary_expression", - "left && right || other", - ), - ( - "value = left and right or other\n", - Language::Python, - ".py", - "boolean_operator", - "left and right", - ), - ( - "value = left and right or other\n", - Language::Python, - ".py", - "boolean_operator", - "left and right or other", - ), - ( - "local value = left and right or other\n", - Language::Lua, - ".lua", - "expression_list", - "left and right or other", - ), - ( - "local value = left + right\n", - Language::Lua, - ".lua", - "expression_list", - "left + right", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.boolean_operator(node).unwrap_or_default(), - ruby_private_string(source, language, suffix, "boolean_operator", kind, text), - "boolean_operator mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn comparison_operator_matches_ruby_private_helper() { - for (source, language, suffix, kind, text) in [ - ( - "def calc\n left == right\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "left == right", - ), - ( - "def calc\n left + right\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "left + right", - ), - ( - "const value = left === right;\n", - Language::TypeScript, - ".ts", - "binary_expression", - "left === right", - ), - ( - "const value = left + right;\n", - Language::TypeScript, - ".ts", - "binary_expression", - "left + right", - ), - ( - "value = left == right\n", - Language::Python, - ".py", - "comparison_operator", - "left == right", - ), - ( - "value = left + right\n", - Language::Python, - ".py", - "binary_operator", - "left + right", - ), - ( - "local value = left == right\n", - Language::Lua, - ".lua", - "expression_list", - "left == right", - ), - ( - "local value = left + right\n", - Language::Lua, - ".lua", - "expression_list", - "left + right", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.comparison_operator(node).unwrap_or_default(), - ruby_private_string(source, language, suffix, "comparison_operator", kind, text), - "comparison_operator mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn comparison_expression_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "def calc\n left == right\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "left == right", - ), - ( - "const value = left === right;\n", - Language::TypeScript, - ".ts", - "binary_expression", - "left === right", - ), - ( - "const value = left + right;\n", - Language::TypeScript, - ".ts", - "binary_expression", - "left + right", - ), - ( - "value = left == right\n", - Language::Python, - ".py", - "comparison_operator", - "left == right", - ), - ( - "value = left + right\n", - Language::Python, - ".py", - "binary_operator", - "left + right", - ), - ( - "local value = left == right\n", - Language::Lua, - ".lua", - "expression_list", - "left == right", - ), - ( - "local value = left + right\n", - Language::Lua, - ".lua", - "expression_list", - "left + right", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.comparison_expression(node), - ruby_private_predicate( - source, - language, - suffix, - "comparison_expression?", - kind, - text - ), - "comparison_expression? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn comparison_expression_normalization_matches_ruby() { - for (source, language, suffix) in [ - ("value = left == right\n", Language::Python, ".py"), - ( - "const value = left === right;\n", - Language::TypeScript, - ".ts", - ), - ("local value = left == right\n", Language::Lua, ".lua"), - ] { - assert_ruby_parity(source, language, suffix); - } - } - - #[test] - fn normalize_comparison_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def calc\n left == right\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "left == right", - ), - ( - "value = left == right\n", - Language::Python, - ".py", - "comparison_operator", - "left == right", - ), - ( - "const value = left === right;\n", - Language::TypeScript, - ".ts", - "binary_expression", - "left === right", - ), - ( - "local value = left == right\n", - Language::Lua, - ".lua", - "expression_list", - "left == right", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_comparison(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_comparison", - kind, - text - ), - "normalize_comparison mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn boolean_statement_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "def calc\n left && right\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "left && right", - ), - ( - "def calc\n left or right\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "left or right", - ), - ( - "def calc\n left + right\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "left + right", - ), - ( - "foo(left && right)\n", - Language::Ruby, - ".rb", - "argument_list", - "(left && right)", - ), - ( - "value = left and right\n", - Language::Python, - ".py", - "boolean_operator", - "left and right", - ), - ( - "local value = left and right\n", - Language::Lua, - ".lua", - "expression_list", - "left and right", - ), - ( - "const value = left && right;\n", - Language::TypeScript, - ".ts", - "binary_expression", - "left && right", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.boolean_statement(node), - ruby_private_predicate(source, language, suffix, "boolean_statement?", kind, text), - "boolean_statement? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn boolean_expression_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "def calc\n left && right\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "left && right", - ), - ( - "def calc\n left && right\n left + right\nend\n", - Language::Ruby, - ".rb", - "binary", - "left && right", - ), - ( - "def calc\n left && right\n left + right\nend\n", - Language::Ruby, - ".rb", - "binary", - "left + right", - ), - ( - "const value = left && right;\nconst other = left + right;\n", - Language::TypeScript, - ".ts", - "binary_expression", - "left && right", - ), - ( - "const value = left && right;\nconst other = left + right;\n", - Language::TypeScript, - ".ts", - "binary_expression", - "left + right", - ), - ( - "value = left and right\nother = left + right\n", - Language::Python, - ".py", - "boolean_operator", - "left and right", - ), - ( - "value = left and right\nother = left + right\n", - Language::Python, - ".py", - "binary_operator", - "left + right", - ), - ( - "local value = left and right\nlocal other = left + right\n", - Language::Lua, - ".lua", - "expression_list", - "left and right", - ), - ( - "local value = left and right\nlocal other = left + right\n", - Language::Lua, - ".lua", - "expression_list", - "left + right", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.boolean_expression(node), - ruby_private_predicate(source, language, suffix, "boolean_expression?", kind, text), - "boolean_expression? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_boolean_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def calc\n left && right\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "left && right", - ), - ( - "def calc\n left || right\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "left || right", - ), - ( - "def calc\n left && middle && right\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "left && middle && right", - ), - ( - "value = left and right\n", - Language::Python, - ".py", - "boolean_operator", - "left and right", - ), - ( - "value = left or right\n", - Language::Python, - ".py", - "boolean_operator", - "left or right", - ), - ( - "local value = left and right\n", - Language::Lua, - ".lua", - "expression_list", - "left and right", - ), - ( - "local value = left or right\n", - Language::Lua, - ".lua", - "expression_list", - "left or right", - ), - ( - "const value = left && right;\n", - Language::TypeScript, - ".ts", - "binary_expression", - "left && right", - ), - ( - "const value = left || right;\n", - Language::TypeScript, - ".ts", - "binary_expression", - "left || right", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_boolean(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_boolean", - kind, - text - ), - "normalize_boolean mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn boolean_expression_normalization_matches_ruby() { - for (source, language, suffix) in [ - ("def calc\n left && right\nend\n", Language::Ruby, ".rb"), - ("value = left and right\n", Language::Python, ".py"), - ("local value = left and right\n", Language::Lua, ".lua"), - ( - "const value = left && right;\n", - Language::TypeScript, - ".ts", - ), - ] { - assert_ruby_parity(source, language, suffix); - } - } - - #[test] - fn operator_call_expression_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "def calc\n left + right\n left && right\nend\n", - Language::Ruby, - ".rb", - "binary", - "left + right", - ), - ( - "def calc\n left + right\n left && right\nend\n", - Language::Ruby, - ".rb", - "binary", - "left && right", - ), - ( - "const value = left + right && other;\n", - Language::TypeScript, - ".ts", - "binary_expression", - "left + right", - ), - ( - "const value = left + right && other;\n", - Language::TypeScript, - ".ts", - "binary_expression", - "left + right && other", - ), - ( - "value = left + right and other\n", - Language::Python, - ".py", - "binary_operator", - "left + right", - ), - ( - "value = left + right and other\n", - Language::Python, - ".py", - "boolean_operator", - "left + right and other", - ), - ( - "local value = left + right\nlocal other = left and right\n", - Language::Lua, - ".lua", - "expression_list", - "left + right", - ), - ( - "local value = left + right\nlocal other = left and right\n", - Language::Lua, - ".lua", - "expression_list", - "left and right", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.operator_call_expression(node), - ruby_private_predicate( - source, - language, - suffix, - "operator_call_expression?", - kind, - text - ), - "operator_call_expression? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_operator_call_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def calc\n left + right\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "left + right", - ), - ( - "def calc\n left =~ /right/\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "left =~ /right/", - ), - ( - "def calc\n left =~ pattern\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "left =~ pattern", - ), - ( - "value = left + right\n", - Language::Python, - ".py", - "binary_operator", - "left + right", - ), - ( - "const value = left + right;\n", - Language::TypeScript, - ".ts", - "binary_expression", - "left + right", - ), - ( - "local value = left + right\n", - Language::Lua, - ".lua", - "expression_list", - "left + right", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_operator_call(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_operator_call", - kind, - text - ), - "normalize_operator_call mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn operator_call_expression_normalization_matches_ruby() { - for (source, language, suffix) in [ - ("value = left + right\n", Language::Python, ".py"), - ("local value = left + right\n", Language::Lua, ".lua"), - ("const value = left + right;\n", Language::TypeScript, ".ts"), - ] { - assert_ruby_parity(source, language, suffix); - } - } - - #[test] - fn spaced_text_matches_ruby_private_helper() { - for (source, language, suffix, kind, text) in [ - ( - "def calc\n left + right\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "left + right", - ), - ( - "const value = left + right;\n", - Language::TypeScript, - ".ts", - "binary_expression", - "left + right", - ), - ( - "value = left + right\n", - Language::Python, - ".py", - "binary_operator", - "left + right", - ), - ( - "local value = left + right\n", - Language::Lua, - ".lua", - "expression_list", - "left + right", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.spaced_text(node), - ruby_private_string(source, language, suffix, "spaced_text", kind, text), - "spaced_text mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn class_node_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "class Thing; end\n", - Language::Ruby, - ".rb", - "class", - "class Thing; end", - ), - ( - "class Thing:\n pass\n", - Language::Python, - ".py", - "class_definition", - "class Thing:\n pass", - ), - ( - "class Thing {}\n", - Language::TypeScript, - ".ts", - "class_declaration", - "class Thing {}", - ), - ( - "local Thing = {}\n", - Language::Lua, - ".lua", - "variable_declaration", - "local Thing = {}", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.class_node(node), - ruby_private_predicate(source, language, suffix, "class_node?", kind, text), - "class_node? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn module_node_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "module Thing\n value\nend\n", - Language::Ruby, - ".rb", - "module", - "module Thing\n value\nend", - ), - ( - "class Thing; end\n", - Language::Ruby, - ".rb", - "class", - "class Thing; end", - ), - ( - "value = 1\n", - Language::Python, - ".py", - "module", - "value = 1\n", - ), - ( - "namespace Thing { const value = 1; }\n", - Language::TypeScript, - ".ts", - "program", - "namespace Thing { const value = 1; }\n", - ), - ( - "local Thing = {}\n", - Language::Lua, - ".lua", - "chunk", - "local Thing = {}\n", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.module_node(node), - ruby_private_predicate(source, language, suffix, "module_node?", kind, text), - "module_node? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_module_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "module Thing\n value\nend\n", - Language::Ruby, - ".rb", - "module", - "module Thing\n value\nend", - ), - ( - "module Empty\nend\n", - Language::Ruby, - ".rb", - "module", - "module Empty\nend", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_module(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_module", - kind, - text - ), - "normalize_module mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_singleton_class_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "class << self\n value\nend\n", - Language::Ruby, - ".rb", - "singleton_class", - "class << self\n value\nend", - ), - ( - "class << object\nend\n", - Language::Ruby, - ".rb", - "singleton_class", - "class << object\nend", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_singleton_class(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_singleton_class", - kind, - text - ), - "normalize_singleton_class mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn ruby_definition_identifier_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "def helper(arg)\n arg\nend\n", - Language::Ruby, - ".rb", - "identifier", - "helper", - ), - ( - "def helper(arg)\n arg\nend\n", - Language::Ruby, - ".rb", - "identifier", - "arg", - ), - ( - "items.each { |item| item }\n", - Language::Ruby, - ".rb", - "identifier", - "item", - ), - ( - "def helper\n value\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "value", - ), - ( - "def helper(arg):\n return arg\n", - Language::Python, - ".py", - "identifier", - "arg", - ), - ( - "function helper(arg) { return arg; }\n", - Language::TypeScript, - ".ts", - "identifier", - "arg", - ), - ( - "function helper(arg)\n return arg\nend\n", - Language::Lua, - ".lua", - "identifier", - "arg", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.ruby_definition_identifier(node), - ruby_private_predicate( - source, - language, - suffix, - "ruby_definition_identifier?", - kind, - text - ), - "ruby_definition_identifier? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn literal_fragment_assignment_context_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "value = \"left = right\"\n", - Language::Ruby, - ".rb", - "string_content", - "left = right", - ), - ("value = 1\n", Language::Ruby, ".rb", "identifier", "value"), - ( - "value = \"left = right\"\n", - Language::Python, - ".py", - "string_content", - "left = right", - ), - ( - "const value = \"left = right\";\n", - Language::TypeScript, - ".ts", - "string_fragment", - "left = right", - ), - ( - "local value = \"left = right\"\n", - Language::Lua, - ".lua", - "string_content", - "left = right", - ), - ( - "local value = other\n", - Language::Lua, - ".lua", - "variable_list", - "value", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.literal_fragment_assignment_context(node), - ruby_private_predicate( - source, - language, - suffix, - "literal_fragment_assignment_context?", - kind, - text - ), - "literal_fragment_assignment_context? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn assignment_lhs_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "value = other\n", - Language::Ruby, - ".rb", - "identifier", - "value", - ), - ( - "value = other\n", - Language::Ruby, - ".rb", - "identifier", - "other", - ), - ( - "{ key: value }\n", - Language::Ruby, - ".rb", - "hash_key_symbol", - "key", - ), - ( - "{ key: value }\n", - Language::Ruby, - ".rb", - "identifier", - "value", - ), - ( - "value = other\n", - Language::Python, - ".py", - "identifier", - "value", - ), - ( - "value = other\n", - Language::Python, - ".py", - "identifier", - "other", - ), - ( - "let value = other;\n", - Language::TypeScript, - ".ts", - "identifier", - "value", - ), - ( - "let value = other;\n", - Language::TypeScript, - ".ts", - "identifier", - "other", - ), - ( - "let value = other;\n", - Language::TypeScript, - ".ts", - "variable_declarator", - "value = other", - ), - ( - "local value = other\n", - Language::Lua, - ".lua", - "variable_list", - "value", - ), - ( - "local value = other\n", - Language::Lua, - ".lua", - "expression_list", - "other", - ), - ( - "value = other\n", - Language::Lua, - ".lua", - "variable_list", - "value", - ), - ( - "value = other\n", - Language::Lua, - ".lua", - "expression_list", - "other", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.assignment_lhs(node), - ruby_private_predicate(source, language, suffix, "assignment_lhs?", kind, text), - "assignment_lhs? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn assignment_rhs_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "value = other\n", - Language::Ruby, - ".rb", - "identifier", - "value", - ), - ( - "value = other\n", - Language::Ruby, - ".rb", - "identifier", - "other", - ), - ( - "{ key: value }\n", - Language::Ruby, - ".rb", - "hash_key_symbol", - "key", - ), - ( - "{ key: value }\n", - Language::Ruby, - ".rb", - "identifier", - "value", - ), - ( - "value = other\n", - Language::Python, - ".py", - "identifier", - "value", - ), - ( - "value = other\n", - Language::Python, - ".py", - "identifier", - "other", - ), - ( - "let value = other;\n", - Language::TypeScript, - ".ts", - "identifier", - "value", - ), - ( - "let value = other;\n", - Language::TypeScript, - ".ts", - "identifier", - "other", - ), - ( - "let value = other;\n", - Language::TypeScript, - ".ts", - "variable_declarator", - "value = other", - ), - ( - "local value = other\n", - Language::Lua, - ".lua", - "variable_list", - "value", - ), - ( - "local value = other\n", - Language::Lua, - ".lua", - "expression_list", - "other", - ), - ( - "value = other\n", - Language::Lua, - ".lua", - "variable_list", - "value", - ), - ( - "value = other\n", - Language::Lua, - ".lua", - "expression_list", - "other", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.assignment_rhs(node), - ruby_private_predicate(source, language, suffix, "assignment_rhs?", kind, text), - "assignment_rhs? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn ruby_assignment_node_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "value = 1\n", - Language::Ruby, - ".rb", - "assignment", - "value = 1", - ), - ( - "value += 1\n", - Language::Ruby, - ".rb", - "operator_assignment", - "value += 1", - ), - ( - "def helper\n value\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "value", - ), - ( - "[1].each { |item| local = item }\n", - Language::Ruby, - ".rb", - "block_body", - "local = item", - ), - ( - "value = 1\n", - Language::Python, - ".py", - "expression_statement", - "value = 1", - ), - ( - "value = other;\n", - Language::TypeScript, - ".ts", - "assignment_expression", - "value = other", - ), - ( - "local value = other\n", - Language::Lua, - ".lua", - "assignment_statement", - "value = other", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.ruby_assignment_node(node), - ruby_private_predicate( - source, - language, - suffix, - "ruby_assignment_node?", - kind, - text - ), - "ruby_assignment_node? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn collect_assignment_target_names_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "value = other\n", - Language::Ruby, - ".rb", - "identifier", - "value", - ), - ( - "left, *rest = values\n", - Language::Ruby, - ".rb", - "left_assignment_list", - "left, *rest", - ), - ( - "value = other\n", - Language::Python, - ".py", - "identifier", - "value", - ), - ( - "const value = other;\n", - Language::TypeScript, - ".ts", - "identifier", - "value", - ), - ( - "local value = other\n", - Language::Lua, - ".lua", - "variable_list", - "value", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - let mut names = BTreeSet::new(); - normalizer.collect_assignment_target_names(node, &mut names); - - assert_eq!( - names, - ruby_private_collected_names( - source, - language, - suffix, - "collect_assignment_target_names", - kind, - text - ), - "collect_assignment_target_names mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn collect_identifier_names_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "left, *rest = values\n", - Language::Ruby, - ".rb", - "left_assignment_list", - "left, *rest", - ), - ( - "receiver.call(argument)\n", - Language::Ruby, - ".rb", - "call", - "receiver.call(argument)", - ), - ( - "value = other\n", - Language::Python, - ".py", - "expression_statement", - "value = other", - ), - ( - "const value = { shorthand };\n", - Language::TypeScript, - ".ts", - "object", - "{ shorthand }", - ), - ( - "local value = other\n", - Language::Lua, - ".lua", - "variable_declaration", - "local value = other", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - let mut names = BTreeSet::new(); - normalizer.collect_identifier_names(node, &mut names); - - assert_eq!( - names, - ruby_private_collected_names( - source, - language, - suffix, - "collect_identifier_names", - kind, - text - ), - "collect_identifier_names mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn member_name_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ("user.name\n", Language::Ruby, ".rb", "identifier", "name"), - ("user&.name\n", Language::Ruby, ".rb", "identifier", "name"), - ( - "user.name()\n", - Language::Python, - ".py", - "identifier", - "name", - ), - ( - "user?.name;\n", - Language::TypeScript, - ".ts", - "property_identifier", - "name", - ), - ("user.name()\n", Language::Lua, ".lua", "identifier", "name"), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.member_name(node), - ruby_private_string(source, language, suffix, "member_name", kind, text), - "member_name mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn member_parts_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ("user.name\n", Language::Ruby, ".rb", "call", "user.name"), - ("user&.name\n", Language::Ruby, ".rb", "call", "user&.name"), - ( - "user.name()\n", - Language::Python, - ".py", - "attribute", - "user.name", - ), - ( - "user.name(thing)\n", - Language::Python, - ".py", - "expression_statement", - "user.name(thing)", - ), - ( - "user.name();\n", - Language::TypeScript, - ".ts", - "member_expression", - "user.name", - ), - ( - "user.name(thing);\n", - Language::TypeScript, - ".ts", - "call_expression", - "user.name(thing)", - ), - ( - "user.name()\n", - Language::Lua, - ".lua", - "dot_index_expression", - "user.name", - ), - ( - "local value = other\n", - Language::Lua, - ".lua", - "expression_list", - "other", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer.member_parts(node).map(|(receiver, method)| { - ( - receiver.kind().to_string(), - super::node_text(receiver, source).to_string(), - method, - ) - }); - - assert_eq!( - rust, - ruby_private_member_parts(source, language, suffix, kind, text), - "member_parts mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn member_read_node_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ("user.name\n", Language::Ruby, ".rb", "call", "user.name"), - ("foo()\n", Language::Ruby, ".rb", "call", "foo()"), - ( - "user.name()\n", - Language::Python, - ".py", - "attribute", - "user.name", - ), - ( - "user.name(thing)\n", - Language::Python, - ".py", - "expression_statement", - "user.name(thing)", - ), - ( - "user.name();\n", - Language::TypeScript, - ".ts", - "member_expression", - "user.name", - ), - ( - "user.name(thing);\n", - Language::TypeScript, - ".ts", - "call_expression", - "user.name(thing)", - ), - ( - "user.name()\n", - Language::Lua, - ".lua", - "dot_index_expression", - "user.name", - ), - ( - "local value = other\n", - Language::Lua, - ".lua", - "expression_list", - "other", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.member_read_node(node), - ruby_private_predicate(source, language, suffix, "member_read_node?", kind, text), - "member_read_node? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_member_read_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ("user.name\n", Language::Ruby, ".rb", "call", "user.name"), - ( - "user.name()\n", - Language::Python, - ".py", - "attribute", - "user.name", - ), - ( - "user.name;\n", - Language::TypeScript, - ".ts", - "member_expression", - "user.name", - ), - ( - "user.name()\n", - Language::Lua, - ".lua", - "dot_index_expression", - "user.name", - ), - ("value\n", Language::Ruby, ".rb", "identifier", "value"), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_member_read(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_member_read", - kind, - text - ), - "normalize_member_read mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn assignment_left_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "value = other\n", - Language::Ruby, - ".rb", - "assignment", - "value = other", - ), - ( - "left, right = values\n", - Language::Ruby, - ".rb", - "assignment", - "left, right = values", - ), - ( - "value = other\n", - Language::Python, - ".py", - "expression_statement", - "value = other", - ), - ( - "value = other;\n", - Language::TypeScript, - ".ts", - "assignment_expression", - "value = other", - ), - ( - "value = other\n", - Language::Lua, - ".lua", - "assignment_statement", - "value = other", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer.assignment_left(node).map(|left| { - ( - left.kind().to_string(), - super::node_text(left, source).to_string(), - ) - }); - - assert_eq!( - rust, - ruby_private_node_signature( - source, - language, - suffix, - "assignment_left", - kind, - text - ), - "assignment_left mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn assignment_right_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "value = other\n", - Language::Ruby, - ".rb", - "assignment", - "value = other", - ), - ( - "left, right = values\n", - Language::Ruby, - ".rb", - "assignment", - "left, right = values", - ), - ( - "value = other\n", - Language::Python, - ".py", - "expression_statement", - "value = other", - ), - ( - "value = other;\n", - Language::TypeScript, - ".ts", - "assignment_expression", - "value = other", - ), - ( - "value = other\n", - Language::Lua, - ".lua", - "assignment_statement", - "value = other", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer.assignment_right(node).map(|right| { - ( - right.kind().to_string(), - super::node_text(right, source).to_string(), - ) - }); - - assert_eq!( - rust, - ruby_private_node_signature( - source, - language, - suffix, - "assignment_right", - kind, - text - ), - "assignment_right mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn singleton_receiver_matches_ruby_private_method() { - for (source, kind, text) in [ - ( - "def self.foo\nend\n", - "singleton_method", - "def self.foo\nend", - ), - ( - "def User.foo\nend\n", - "singleton_method", - "def User.foo\nend", - ), - ( - "def object.foo\nend\n", - "singleton_method", - "def object.foo\nend", - ), - ( - "def self.foo(value)\n value\nend\n", - "singleton_method", - "def self.foo(value)\n value\nend", - ), - ( - "def object.foo\n value\nend\n", - "singleton_method", - "def object.foo\n value\nend", - ), - ] { - let tree = raw_tree(source, Language::Ruby); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); - let rust = normalizer.singleton_receiver(node).map(|receiver| { - ( - receiver.kind().to_string(), - super::node_text(receiver, source).to_string(), - ) - }); - - assert_eq!( - rust, - ruby_private_node_signature( - source, - Language::Ruby, - ".rb", - "singleton_receiver", - kind, - text - ), - "singleton_receiver mismatch for {kind} {text:?}" - ); - } - } - - #[test] - fn singleton_name_matches_ruby_private_method() { - for (source, kind, text) in [ - ( - "def self.foo\nend\n", - "singleton_method", - "def self.foo\nend", - ), - ( - "def User.foo\nend\n", - "singleton_method", - "def User.foo\nend", - ), - ( - "def object.foo\nend\n", - "singleton_method", - "def object.foo\nend", - ), - ] { - let tree = raw_tree(source, Language::Ruby); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); - - assert_eq!( - normalizer.singleton_name(node), - ruby_private_string(source, Language::Ruby, ".rb", "singleton_name", kind, text), - "singleton_name mismatch for {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_singleton_function_matches_ruby_private_method() { - for (source, kind, text) in [ - ( - "def self.hidden(value)\n return value\nend\n", - "singleton_method", - "def self.hidden(value)\n return value\nend", - ), - ( - "def User.hidden\nend\n", - "singleton_method", - "def User.hidden\nend", - ), - ( - "def object.hidden\n value\nend\n", - "singleton_method", - "def object.hidden\n value\nend", - ), - ] { - let tree = raw_tree(source, Language::Ruby); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); - let rust = normalizer - .normalize_singleton_function(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - Language::Ruby, - ".rb", - "normalize_singleton_function", - kind, - text - ), - "normalize_singleton_function mismatch for {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_function_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def check(value)\n return value\nend\n", - Language::Ruby, - ".rb", - "method", - "def check(value)\n return value\nend", - ), - ( - "def empty\nend\n", - Language::Ruby, - ".rb", - "method", - "def empty\nend", - ), - ( - "def object.hidden\n value\nend\n", - Language::Ruby, - ".rb", - "singleton_method", - "def object.hidden\n value\nend", - ), - ( - "def check(value):\n return value\n", - Language::Python, - ".py", - "function_definition", - "def check(value):\n return value", - ), - ( - "function check(value) { return value; }\n", - Language::TypeScript, - ".ts", - "function_declaration", - "function check(value) { return value; }", - ), - ( - "class Box { check(value) { return value; } }\n", - Language::TypeScript, - ".ts", - "method_definition", - "check(value) { return value; }", - ), - ( - "function check(value)\n return value\nend\n", - Language::Lua, - ".lua", - "function_declaration", - "function check(value)\n return value\nend", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_function(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_function", - kind, - text - ), - "normalize_function mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn lambda_expression_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "fn = ->(x) { x + 1 }\n", - Language::Ruby, - ".rb", - "lambda", - "->(x) { x + 1 }", - ), - ( - "fn = lambda x: x + 1\n", - Language::Python, - ".py", - "lambda", - "lambda x: x + 1", - ), - ( - "const fn = (x) => x + 1;\n", - Language::TypeScript, - ".ts", - "arrow_function", - "(x) => x + 1", - ), - ( - "const fn = function(x) { return x + 1; };\n", - Language::TypeScript, - ".ts", - "function_expression", - "function(x) { return x + 1; }", - ), - ( - "local fn = function(x) return x + 1 end\n", - Language::Lua, - ".lua", - "expression_list", - "function(x) return x + 1 end", - ), - ( - "function f(x) return x + 1 end\n", - Language::Lua, - ".lua", - "function_declaration", - "function f(x) return x + 1 end", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.lambda_expression(node), - ruby_private_predicate(source, language, suffix, "lambda_expression?", kind, text), - "lambda_expression? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_lambda_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "fn = ->(x) { x + 1 }\n", - Language::Ruby, - ".rb", - "lambda", - "->(x) { x + 1 }", - ), - ( - "fn = lambda x: x + 1\n", - Language::Python, - ".py", - "lambda", - "lambda x: x + 1", - ), - ( - "const fn = (x) => x + 1;\n", - Language::TypeScript, - ".ts", - "arrow_function", - "(x) => x + 1", - ), - ( - "const fn = function(x) { return x + 1; };\n", - Language::TypeScript, - ".ts", - "function_expression", - "function(x) { return x + 1; }", - ), - ( - "local fn = function(x) return x + 1 end\n", - Language::Lua, - ".lua", - "expression_list", - "function(x) return x + 1 end", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_lambda(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_lambda", - kind, - text - ), - "normalize_lambda mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn lambda_expression_normalization_matches_ruby() { - for (source, language, suffix) in [ - ("fn = ->(x) { x + 1 }\n", Language::Ruby, ".rb"), - ("fn = lambda x: x + 1\n", Language::Python, ".py"), - ("const fn = (x) => x + 1;\n", Language::TypeScript, ".ts"), - ( - "const fn = function(x) { return x + 1; };\n", - Language::TypeScript, - ".ts", - ), - ( - "local fn = function(x) return x + 1 end\n", - Language::Lua, - ".lua", - ), - ] { - let root = parse_language_source(source, language, suffix); - let mut lambdas = Vec::new(); - nodes_of_type(&root, "LAMBDA", &mut lambdas); - assert!( - !lambdas.is_empty(), - "expected LAMBDA for {language:?} in {root:#?}" - ); - assert_ruby_parity(source, language, suffix); - } - } - - #[test] - fn function_name_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def run\nend\n", - Language::Ruby, - ".rb", - "method", - "def run\nend", - ), - ( - "def self.run\nend\n", - Language::Ruby, - ".rb", - "singleton_method", - "def self.run\nend", - ), - ( - "def run():\n pass\n", - Language::Python, - ".py", - "function_definition", - "def run():\n pass", - ), - ( - "function run() {}\n", - Language::TypeScript, - ".ts", - "function_declaration", - "function run() {}", - ), - ( - "class Box { run() {} }\n", - Language::TypeScript, - ".ts", - "method_definition", - "run() {}", - ), - ( - "function run()\nend\n", - Language::Lua, - ".lua", - "function_declaration", - "function run()\nend", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.function_name(node).unwrap_or_default(), - ruby_private_string(source, language, suffix, "function_name", kind, text), - "function_name mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn collect_destructured_parameter_targets_matches_ruby_private_method() { - for (source, kind, text) in [ - ( - "items.each { |(left, right)| left }\n", - "destructured_parameter", - "(left, right)", - ), - ( - "items.each do |(left, (middle, right))| left end\n", - "destructured_parameter", - "(left, (middle, right))", - ), - ] { - let tree = raw_tree(source, Language::Ruby); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); - let mut targets = Vec::new(); - normalizer.collect_destructured_parameter_targets(node, &mut targets); - let rust = Value::Array(targets.iter().map(node_value).collect()); - - assert_eq!( - rust, - ruby_private_destructured_parameter_targets_value(source, kind, text), - "collect_destructured_parameter_targets mismatch for {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_block_parameters_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "items.each { |(left, right)| left }\n", - Language::Ruby, - ".rb", - "block", - "{ |(left, right)| left }", - ), - ( - "items.each { |item, (left, right)| item }\n", - Language::Ruby, - ".rb", - "block", - "{ |item, (left, right)| item }", - ), - ( - "items.each { |item| item }\n", - Language::Ruby, - ".rb", - "block", - "{ |item| item }", - ), - ( - "def f(x):\n pass\n", - Language::Python, - ".py", - "function_definition", - "def f(x):\n pass", - ), - ( - "items.forEach((item) => item);\n", - Language::TypeScript, - ".ts", - "expression_statement", - "items.forEach((item) => item);", - ), - ( - "function f(x)\n return x\nend\n", - Language::Lua, - ".lua", - "function_declaration", - "function f(x)\n return x\nend", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_block_parameters(Some(node)) - .as_ref() - .map(node_value) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_block_parameters", - kind, - text - ), - "normalize_block_parameters mismatch for {language:?} {kind} {text:?}" - ); - } - - let mut normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); - assert!(normalizer.normalize_block_parameters(None).is_none()); - } - - #[test] - fn normalize_parameters_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def f(value = 1)\nend\n", - Language::Ruby, - ".rb", - "method_parameters", - "(value = 1)", - ), - ( - "def f(value)\nend\n", - Language::Ruby, - ".rb", - "method_parameters", - "(value)", - ), - ( - "def f(value=1):\n pass\n", - Language::Python, - ".py", - "parameters", - "(value=1)", - ), - ( - "function f(value = 1) {}\n", - Language::TypeScript, - ".ts", - "formal_parameters", - "(value = 1)", - ), - ( - "function f(value)\nend\n", - Language::Lua, - ".lua", - "parameters", - "(value)", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_parameters(Some(node)) - .as_ref() - .map(node_value) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_parameters", - kind, - text - ), - "normalize_parameters mismatch for {language:?} {kind} {text:?}" - ); - } - - let mut normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); - assert!(normalizer.normalize_parameters(None).is_none()); - } - - #[test] - fn normalize_destructured_block_parameter_matches_ruby_private_method() { - for (source, kind, text) in [ - ( - "items.each { |(left, right)| left }\n", - "destructured_parameter", - "(left, right)", - ), - ( - "items.each do |(left, (middle, right))| left end\n", - "destructured_parameter", - "(left, (middle, right))", - ), - ("items.each { |item| item }\n", "identifier", "item"), - ] { - let tree = raw_tree(source, Language::Ruby); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); - let rust = normalizer - .normalize_destructured_block_parameter(node) - .as_ref() - .map(node_value) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - Language::Ruby, - ".rb", - "normalize_destructured_block_parameter", - kind, - text - ), - "normalize_destructured_block_parameter mismatch for {kind} {text:?}" - ); - } - } - - #[test] - fn scope_matches_ruby_private_method() { - for (source, language, suffix, kind, text, mode) in [ - ("1\n", Language::Ruby, ".rb", "integer", "1", "body"), - ( - "1\n", - Language::Python, - ".py", - "expression_statement", - "1", - "body", - ), - ( - "value;\n", - Language::TypeScript, - ".ts", - "identifier", - "value", - "args", - ), - ( - "return value\n", - Language::Lua, - ".lua", - "expression_list", - "value", - "empty", - ), - ] { - let tree = raw_tree(source, language); - let root = tree.root_node(); - let node = first_raw_node(root, source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - normalizer.root_span = Some(super::span(root)); - let body = if mode == "body" { - Some(normalizer.wrap("BODY", Vec::new(), node)) - } else { - None - }; - let args = if mode == "args" { - Some(normalizer.wrap("ARGS", Vec::new(), node)) - } else { - None - }; - let rust = node_value(&normalizer.scope(body, args, node)); - - assert_eq!( - rust, - ruby_private_scope_value(source, language, suffix, kind, text, mode), - "scope mismatch for {language:?} {kind} {text:?} mode {mode}" - ); - } - } - - #[test] - fn list_matches_ruby_private_method() { - for (source, language, suffix, kind, text, mode) in [ - ( - "value\n", - Language::Ruby, - ".rb", - "identifier", - "value", - "one", - ), - ( - "value\n", - Language::Python, - ".py", - "expression_statement", - "value", - "empty", - ), - ( - "value;\n", - Language::TypeScript, - ".ts", - "identifier", - "value", - "nil", - ), - ( - "return value\n", - Language::Lua, - ".lua", - "expression_list", - "value", - "one", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - let item = normalizer.wrap("ITEM", Vec::new(), node); - let children = match mode { - "nil" => None, - "empty" => Some(Vec::new()), - "one" => Some(vec![item]), - _ => panic!("unknown list mode: {mode}"), - }; - let rust = normalizer - .list(children, node) - .as_ref() - .map(node_value) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_list_value(source, language, suffix, kind, text, mode), - "list mismatch for {language:?} {kind} {text:?} mode {mode}" - ); - } - } - - #[test] - fn unwrap_node_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "def check\n (value)\n value\nend\n", - Language::Ruby, - ".rb", - "parenthesized_statements", - "(value)", - ), - ( - "value\n(value)\n", - Language::Python, - ".py", - "expression_statement", - "value", - ), - ( - "value\n(value)\n", - Language::Python, - ".py", - "expression_statement", - "(value)", - ), - ( - "const value = (other);\n", - Language::TypeScript, - ".ts", - "parenthesized_expression", - "(other)", - ), - ( - "local first = (other)\nlocal second = left + right\n", - Language::Lua, - ".lua", - "expression_list", - "(other)", - ), - ( - "local first = (other)\nlocal second = left + right\n", - Language::Lua, - ".lua", - "expression_list", - "left + right", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.unwrap_node(node), - ruby_private_predicate(source, language, suffix, "unwrap_node?", kind, text), - "unwrap_node? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn statement_node_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "def check\n return value\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "return value", - ), - ( - "def check\n return value\nend\n", - Language::Ruby, - ".rb", - "identifier", - "check", - ), - ( - "value\n(value)\n", - Language::Python, - ".py", - "expression_statement", - "(value)", - ), - ( - "value\n(value)\n", - Language::Python, - ".py", - "identifier", - "value", - ), - ( - "function check() { return value + other; }\n", - Language::TypeScript, - ".ts", - "return_statement", - "return value + other;", - ), - ( - "function check() { return value + other; }\n", - Language::TypeScript, - ".ts", - "binary_expression", - "value + other", - ), - ( - "function check() { return value + other; }\n", - Language::TypeScript, - ".ts", - "identifier", - "value", - ), - ( - "return value\n", - Language::Lua, - ".lua", - "return_statement", - "return value", - ), - ( - "return value\n", - Language::Lua, - ".lua", - "expression_list", - "value", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.statement_node(node.kind()), - ruby_private_predicate(source, language, suffix, "statement_node?", kind, text), - "statement_node? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn local_identifier_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "def check\nend\nclass Thing; end\n", - Language::Ruby, - ".rb", - "identifier", - "check", - ), - ( - "def check\nend\nclass Thing; end\n", - Language::Ruby, - ".rb", - "constant", - "Thing", - ), - ( - "def check(value):\n pass\n", - Language::Python, - ".py", - "identifier", - "value", - ), - ( - "def check(value):\n pass\n", - Language::Python, - ".py", - "parameters", - "(value)", - ), - ( - "const value = object.field;\n", - Language::TypeScript, - ".ts", - "identifier", - "value", - ), - ( - "const value = object.field;\n", - Language::TypeScript, - ".ts", - "property_identifier", - "field", - ), - ( - "const value = object.field;\n", - Language::TypeScript, - ".ts", - "lexical_declaration", - "const value = object.field;", - ), - ( - "local value = other\nprint(value)\n", - Language::Lua, - ".lua", - "identifier", - "value", - ), - ( - "local value = other\n", - Language::Lua, - ".lua", - "expression_list", - "other", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.identifier_kind(node.kind()), - ruby_private_predicate(source, language, suffix, "local_identifier?", kind, text), - "local_identifier? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn ruby_local_name_matches_scope_stack_lookup() { - let mut normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); - normalizer.local_stack = vec![ - BTreeSet::from(["outer".to_string(), "shared".to_string()]), - BTreeSet::from(["inner".to_string()]), - ]; - - assert!(normalizer.ruby_local_name("outer")); - assert!(normalizer.ruby_local_name("inner")); - assert!(normalizer.ruby_local_name("shared")); - assert!(!normalizer.ruby_local_name("missing")); - } - - #[test] - fn ruby_vcall_identifier_matches_ruby_private_predicate() { - let cases = vec![ - ( - "ruby_vcall", - "foo\n", - Language::Ruby, - ".rb", - "identifier", - "foo", - Vec::<&str>::new(), - ), - ( - "ruby_local", - "foo\n", - Language::Ruby, - ".rb", - "identifier", - "foo", - vec!["foo"], - ), - ( - "assignment_lhs", - "foo = 1\n", - Language::Ruby, - ".rb", - "identifier", - "foo", - Vec::<&str>::new(), - ), - ( - "method_name", - "def foo\nend\n", - Language::Ruby, - ".rb", - "identifier", - "foo", - Vec::<&str>::new(), - ), - ( - "parameter", - "def f(foo)\nend\n", - Language::Ruby, - ".rb", - "identifier", - "foo", - Vec::<&str>::new(), - ), - ( - "non_identifier", - "Thing\n", - Language::Ruby, - ".rb", - "constant", - "Thing", - Vec::<&str>::new(), - ), - ( - "non_ruby", - "foo\n", - Language::Python, - ".py", - "expression_statement", - "foo", - Vec::<&str>::new(), - ), - ]; - - for (label, source, language, suffix, kind, text, locals) in cases { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - if !locals.is_empty() { - normalizer - .local_stack - .push(locals.iter().map(|name| name.to_string()).collect()); - } - - assert_eq!( - normalizer.ruby_vcall_identifier(node, super::node_text(node, source)), - ruby_private_ruby_vcall_identifier_predicate( - source, language, suffix, kind, text, &locals, - ), - "ruby_vcall_identifier? mismatch for {label}" - ); - } - } - - #[test] - fn vcall_identifier_matches_ruby_private_predicate() { - let cases = vec![ - ( - "ruby_modifier_action", - "foo if cond\n", - Language::Ruby, - ".rb", - "identifier", - "foo", - Vec::<&str>::new(), - ), - ( - "ruby_local", - "foo if cond\n", - Language::Ruby, - ".rb", - "identifier", - "foo", - vec!["foo"], - ), - ( - "method_name", - "def foo\nend\n", - Language::Ruby, - ".rb", - "identifier", - "foo", - Vec::<&str>::new(), - ), - ( - "argument", - "call(foo)\n", - Language::Ruby, - ".rb", - "identifier", - "foo", - Vec::<&str>::new(), - ), - ( - "member_read", - "def f\n user.name\nend\n", - Language::Ruby, - ".rb", - "identifier", - "name", - Vec::<&str>::new(), - ), - ( - "assignment_lhs", - "foo = bar\n", - Language::Ruby, - ".rb", - "identifier", - "foo", - Vec::<&str>::new(), - ), - ( - "python_identifier", - "foo\n", - Language::Python, - ".py", - "expression_statement", - "foo", - Vec::<&str>::new(), - ), - ( - "typescript_identifier", - "foo;\n", - Language::TypeScript, - ".ts", - "identifier", - "foo", - Vec::<&str>::new(), - ), - ( - "lua_identifier", - "foo()\n", - Language::Lua, - ".lua", - "identifier", - "foo", - Vec::<&str>::new(), - ), - ]; - - for (label, source, language, suffix, kind, text, locals) in cases { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - if !locals.is_empty() { - normalizer - .local_stack - .push(locals.iter().map(|name| name.to_string()).collect()); - } - - assert_eq!( - normalizer.vcall_identifier(node, super::node_text(node, source)), - ruby_private_vcall_identifier_predicate( - source, language, suffix, kind, text, &locals, - ), - "vcall_identifier? mismatch for {label}" - ); - } - - let source = "def f\n Thing\nend\n"; - let tree = raw_tree(source, Language::Ruby); - let node = first_raw_node(tree.root_node(), source, "constant", "Thing"); - let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); - assert!( - !normalizer.vcall_identifier(node, super::node_text(node, source)), - "vcall_identifier? must reject non-local identifiers in statement wrappers" - ); - - let source = "foo\n"; - let tree = raw_tree(source, Language::Python); - let node = first_raw_node(tree.root_node(), source, "identifier", "foo"); - let normalizer = super::TreeSitterNormalizer::new(source, Language::Python); - assert!( - !normalizer.vcall_identifier(node, super::node_text(node, source)), - "vcall_identifier? must reject Python bare identifiers" - ); - } - - #[test] - fn collect_ruby_parameter_locals_matches_ruby_private_method() { - for (source, kind, text) in [ - ( - "def f(a, b = 1, *rest, key:, **opts, &block)\nend\n", - "method_parameters", - "(a, b = 1, *rest, key:, **opts, &block)", - ), - ( - "[1].each { |item, (left, right)| item }\n", - "block_parameters", - "|item, (left, right)|", - ), - ("fn = ->(x, y:) { x }\n", "lambda_parameters", "(x, y:)"), - ("value = other\n", "assignment", "value = other"), - ] { - let tree = raw_tree(source, Language::Ruby); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); - let mut locals = BTreeSet::new(); - normalizer.collect_ruby_parameter_locals(node, &mut locals); - - assert_eq!( - locals, - ruby_private_collected_names( - source, - Language::Ruby, - ".rb", - "collect_ruby_parameter_locals", - kind, - text - ), - "collect_ruby_parameter_locals mismatch for {kind} {text:?}" - ); - } - } - - #[test] - fn collect_ruby_assignment_locals_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "value = other\n", - Language::Ruby, - ".rb", - "assignment", - "value = other", - ), - ( - "left, *rest = values\n", - Language::Ruby, - ".rb", - "assignment", - "left, *rest = values", - ), - ( - "value += 1\n", - Language::Ruby, - ".rb", - "operator_assignment", - "value += 1", - ), - ( - "begin\n work\nrescue => error\n error\nend\n", - Language::Ruby, - ".rb", - "exception_variable", - "=> error", - ), - ( - "value = other\n", - Language::Python, - ".py", - "expression_statement", - "value = other", - ), - ( - "let value = other;\n", - Language::TypeScript, - ".ts", - "variable_declarator", - "value = other", - ), - ( - "local value = other\n", - Language::Lua, - ".lua", - "assignment_statement", - "value = other", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - let mut locals = BTreeSet::new(); - normalizer.collect_ruby_assignment_locals(node, &mut locals); - - assert_eq!( - locals, - ruby_private_collected_names( - source, - language, - suffix, - "collect_ruby_assignment_locals", - kind, - text - ), - "collect_ruby_assignment_locals mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn collect_ruby_scope_locals_matches_ruby_private_method() { - for (source, language, suffix, kind, text, root) in [ - ( - "def outer(a)\n local = 1\n items.each { |item| nested = item }\n def inner(inner_arg)\n inner_local = 1\n end\nend\n", - Language::Ruby, - ".rb", - "method", - "def outer(a)\n local = 1\n items.each { |item| nested = item }\n def inner(inner_arg)\n inner_local = 1\n end\nend", - true, - ), - ( - "def outer(a)\n local = 1\nend\n", - Language::Ruby, - ".rb", - "method", - "def outer(a)\n local = 1\nend", - false, - ), - ( - "[1].each { |item| local = item }\n", - Language::Ruby, - ".rb", - "block", - "{ |item| local = item }", - true, - ), - ( - "value = other\n", - Language::Python, - ".py", - "expression_statement", - "value = other", - true, - ), - ( - "let value = other;\n", - Language::TypeScript, - ".ts", - "variable_declarator", - "value = other", - true, - ), - ( - "local value = other\n", - Language::Lua, - ".lua", - "assignment_statement", - "value = other", - true, - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - let mut locals = BTreeSet::new(); - normalizer.collect_ruby_scope_locals(node, &mut locals, root); - - assert_eq!( - locals, - ruby_private_scope_collected_names(source, language, suffix, kind, text, root), - "collect_ruby_scope_locals mismatch for {language:?} {kind} {text:?} root={root}" - ); - } - } - - #[test] - fn ruby_scope_locals_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def outer(a)\n local = 1\n items.each { |item| nested = item }\n def inner(inner_arg)\n inner_local = 1\n end\nend\n", - Language::Ruby, - ".rb", - "method", - "def outer(a)\n local = 1\n items.each { |item| nested = item }\n def inner(inner_arg)\n inner_local = 1\n end\nend", - ), - ( - "[1].each { |item| local = item }\n", - Language::Ruby, - ".rb", - "block", - "{ |item| local = item }", - ), - ( - "value = other\n", - Language::Python, - ".py", - "expression_statement", - "value = other", - ), - ( - "let value = other;\n", - Language::TypeScript, - ".ts", - "variable_declarator", - "value = other", - ), - ( - "local value = other\n", - Language::Lua, - ".lua", - "assignment_statement", - "value = other", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.ruby_scope_locals(node), - ruby_private_ruby_scope_locals(source, language, suffix, kind, text), - "ruby_scope_locals mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn with_ruby_scope_matches_ruby_private_method() { - for (source, language, suffix, kind, text, reset, initial_stack) in [ - ( - "def f(a)\n local = 1\nend\n", - Language::Ruby, - ".rb", - "method", - "def f(a)\n local = 1\nend", - false, - vec![vec!["outer"]], - ), - ( - "def f(a)\n local = 1\nend\n", - Language::Ruby, - ".rb", - "method", - "def f(a)\n local = 1\nend", - true, - vec![vec!["outer"]], - ), - ( - "[1].each { |item| local = item }\n", - Language::Ruby, - ".rb", - "block", - "{ |item| local = item }", - false, - vec![], - ), - ( - "def f(value):\n local = value\n", - Language::Python, - ".py", - "function_definition", - "def f(value):\n local = value", - true, - vec![vec!["outer"]], - ), - ( - "function f(value) { let local = value; }\n", - Language::TypeScript, - ".ts", - "function_declaration", - "function f(value) { let local = value; }", - true, - vec![vec!["outer"]], - ), - ( - "function f(value)\n local local_value = value\nend\n", - Language::Lua, - ".lua", - "function_declaration", - "function f(value)\n local local_value = value\nend", - true, - vec![vec!["outer"]], - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - normalizer.local_stack = local_stack_from(&initial_stack); - let before = local_stack_value(&normalizer.local_stack); - let inside = normalizer.with_ruby_scope(node, reset, |normalizer| { - local_stack_value(&normalizer.local_stack) - }); - let after = local_stack_value(&normalizer.local_stack); - let rust = json!({ - "before": before, - "inside": inside, - "after": after, - "result": "block-result", - }); - - assert_eq!( - rust, - ruby_private_with_ruby_scope_trace( - source, - language, - suffix, - kind, - text, - reset, - &initial_stack, - ), - "with_ruby_scope mismatch for {language:?} {kind} {text:?} reset={reset}" - ); - } - } - - #[test] - fn ruby_scope_boundary_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "def f\n value\nend\n", - Language::Ruby, - ".rb", - "method", - "def f\n value\nend", - ), - ( - "class Box\nend\n", - Language::Ruby, - ".rb", - "class", - "class Box\nend", - ), - ( - "module Admin\nend\n", - Language::Ruby, - ".rb", - "module", - "module Admin\nend", - ), - ( - "items.each { |item| item }\n", - Language::Ruby, - ".rb", - "block", - "{ |item| item }", - ), - ( - "handler = -> { value }\n", - Language::Ruby, - ".rb", - "block", - "{ value }", - ), - ( - "def f():\n return value\n break\n continue\n", - Language::Python, - ".py", - "function_definition", - "def f():\n return value\n break\n continue", - ), - ( - "def f():\n return value\n", - Language::Python, - ".py", - "block", - "return value", - ), - ( - "class Box:\n pass\n", - Language::Python, - ".py", - "class_definition", - "class Box:\n pass", - ), - ( - "function f() { return value; }\n", - Language::TypeScript, - ".ts", - "function_declaration", - "function f() { return value; }", - ), - ( - "class Box {}\n", - Language::TypeScript, - ".ts", - "class_declaration", - "class Box {}", - ), - ( - "function f()\n return value\nend\n", - Language::Lua, - ".lua", - "function_declaration", - "function f()\n return value\nend", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.ruby_scope_boundary(node), - ruby_private_predicate( - source, - language, - suffix, - "ruby_scope_boundary?", - kind, - text - ), - "ruby_scope_boundary? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn ruby_scope_child_boundary_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "def f\n value\nend\n", - Language::Ruby, - ".rb", - "method", - "def f\n value\nend", - ), - ( - "class Box\nend\n", - Language::Ruby, - ".rb", - "class", - "class Box\nend", - ), - ( - "module Admin\nend\n", - Language::Ruby, - ".rb", - "module", - "module Admin\nend", - ), - ( - "items.each { |item| item }\n", - Language::Ruby, - ".rb", - "block", - "{ |item| item }", - ), - ( - "handler = -> { value }\n", - Language::Ruby, - ".rb", - "block", - "{ value }", - ), - ( - "def f():\n return value\n", - Language::Python, - ".py", - "function_definition", - "def f():\n return value", - ), - ( - "def f():\n return value\n", - Language::Python, - ".py", - "block", - "return value", - ), - ( - "class Box:\n pass\n", - Language::Python, - ".py", - "class_definition", - "class Box:\n pass", - ), - ( - "function f() { return value; }\n", - Language::TypeScript, - ".ts", - "function_declaration", - "function f() { return value; }", - ), - ( - "class Box {}\n", - Language::TypeScript, - ".ts", - "class_declaration", - "class Box {}", - ), - ( - "function f()\n return value\nend\n", - Language::Lua, - ".lua", - "function_declaration", - "function f()\n return value\nend", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.ruby_scope_child_boundary(node), - ruby_private_predicate( - source, - language, - suffix, - "ruby_scope_child_boundary?", - kind, - text - ), - "ruby_scope_child_boundary? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn ruby_predicate_uses_normalization_adapter() { - for (language, expected) in [ - (Language::Ruby, true), - (Language::Python, false), - (Language::Lua, false), - (Language::TypeScript, false), - ] { - let normalizer = super::TreeSitterNormalizer::new("", language); - - assert_eq!( - normalizer.ruby(), - expected, - "ruby? mismatch for {language:?}" - ); - } - } - - #[test] - fn interpolated_string_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "name = \"hi #{user}\"\nplain = \"hi\"\n", - Language::Ruby, - ".rb", - "string", - "\"hi #{user}\"", - ), - ( - "name = \"hi #{user}\"\nplain = \"hi\"\n", - Language::Ruby, - ".rb", - "string", - "\"hi\"", - ), - ( - "name = f\"hi {user}\"\nplain = \"hi\"\n", - Language::Python, - ".py", - "string", - "f\"hi {user}\"", - ), - ( - "name = f\"hi {user}\"\nplain = \"hi\"\n", - Language::Python, - ".py", - "string", - "\"hi\"", - ), - ( - "const name = `hi ${user}`;\nconst plain = `hi`;\n", - Language::TypeScript, - ".ts", - "template_string", - "`hi ${user}`", - ), - ( - "const name = `hi ${user}`;\nconst plain = `hi`;\n", - Language::TypeScript, - ".ts", - "template_string", - "`hi`", - ), - ( - "local name = \"hi\"\n", - Language::Lua, - ".lua", - "expression_list", - "\"hi\"", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.interpolated_string(node), - ruby_private_predicate( - source, - language, - suffix, - "interpolated_string?", - kind, - text - ), - "interpolated_string? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_interpolated_string_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "name = \"hi #{user}\"\n", - Language::Ruby, - ".rb", - "string", - "\"hi #{user}\"", - ), - ( - "name = f\"hi {user}\"\n", - Language::Python, - ".py", - "string", - "f\"hi {user}\"", - ), - ( - "const name = `hi ${user}`;\n", - Language::TypeScript, - ".ts", - "template_string", - "`hi ${user}`", - ), - ( - "local name = \"hi\"\n", - Language::Lua, - ".lua", - "expression_list", - "\"hi\"", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = node_value(&normalizer.normalize_interpolated_string(node)); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_interpolated_string", - kind, - text - ), - "normalize_interpolated_string mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_subshell_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "value = `echo hi`\n", - Language::Ruby, - ".rb", - "subshell", - "`echo hi`", - ), - ( - "value = `echo #{name}`\n", - Language::Ruby, - ".rb", - "subshell", - "`echo #{name}`", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = node_value(&normalizer.normalize_subshell(node)); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_subshell", - kind, - text - ), - "normalize_subshell mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn const_node_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "class Thing; end\ndef check; end\n", - Language::Ruby, - ".rb", - "constant", - "Thing", - ), - ( - "class Thing; end\ndef check; end\n", - Language::Ruby, - ".rb", - "identifier", - "check", - ), - ( - "class Thing:\n pass\n", - Language::Python, - ".py", - "identifier", - "Thing", - ), - ( - "type Thing = Other;\nconst value = Thing;\n", - Language::TypeScript, - ".ts", - "type_identifier", - "Thing", - ), - ( - "type Thing = Other;\nconst value = Thing;\n", - Language::TypeScript, - ".ts", - "identifier", - "value", - ), - ( - "local Thing = {}\n", - Language::Lua, - ".lua", - "variable_list", - "Thing", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.const_kind(node.kind()), - ruby_private_predicate(source, language, suffix, "const_node?", kind, text), - "const_node? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn self_node_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ("self\nother\n", Language::Ruby, ".rb", "self", "self"), - ( - "self\nother\n", - Language::Ruby, - ".rb", - "identifier", - "other", - ), - ( - "self.value\nother.value\n", - Language::Python, - ".py", - "identifier", - "self", - ), - ( - "self.value\nother.value\n", - Language::Python, - ".py", - "identifier", - "other", - ), - ( - "this.value;\nother;\n", - Language::TypeScript, - ".ts", - "this", - "this", - ), - ( - "this.value;\nother;\n", - Language::TypeScript, - ".ts", - "identifier", - "other", - ), - ( - "print(self.value)\nprint(other.value)\n", - Language::Lua, - ".lua", - "identifier", - "self", - ), - ( - "print(self.value)\nprint(other.value)\n", - Language::Lua, - ".lua", - "identifier", - "other", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.self_node(node), - ruby_private_predicate(source, language, suffix, "self_node?", kind, text), - "self_node? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn instance_variable_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "@value\nname\n", - Language::Ruby, - ".rb", - "instance_variable", - "@value", - ), - ( - "@value\nname\n", - Language::Ruby, - ".rb", - "identifier", - "name", - ), - ( - "@decorator\ndef call():\n pass\n", - Language::Python, - ".py", - "decorator", - "@decorator", - ), - ( - "@sealed\nclass Thing {}\n", - Language::TypeScript, - ".ts", - "decorator", - "@sealed", - ), - ( - "print(value)\n", - Language::Lua, - ".lua", - "identifier", - "value", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.instance_variable(node), - ruby_private_predicate(source, language, suffix, "instance_variable?", kind, text), - "instance_variable? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn global_variable_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "$value\nname\n", - Language::Ruby, - ".rb", - "global_variable", - "$value", - ), - ( - "$value\nname\n", - Language::Ruby, - ".rb", - "identifier", - "name", - ), - ( - "value = \"$name\"\n", - Language::Python, - ".py", - "string_content", - "$name", - ), - ( - "const $value = other;\n", - Language::TypeScript, - ".ts", - "identifier", - "$value", - ), - ( - "print(\"$name\")\n", - Language::Lua, - ".lua", - "string_content", - "$name", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.global_variable(node), - ruby_private_predicate(source, language, suffix, "global_variable?", kind, text), - "global_variable? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_global_variable_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "$value\n$1\n$12\n$0\n", - Language::Ruby, - ".rb", - "global_variable", - "$value", - ), - ( - "$value\n$1\n$12\n$0\n", - Language::Ruby, - ".rb", - "global_variable", - "$1", - ), - ( - "$value\n$1\n$12\n$0\n", - Language::Ruby, - ".rb", - "global_variable", - "$12", - ), - ( - "$value\n$1\n$12\n$0\n", - Language::Ruby, - ".rb", - "global_variable", - "$0", - ), - ( - "value = \"$name\"\n", - Language::Python, - ".py", - "string_content", - "$name", - ), - ( - "const $value = 1;\n", - Language::TypeScript, - ".ts", - "identifier", - "$value", - ), - ( - "print(\"$name\")\n", - Language::Lua, - ".lua", - "string_content", - "$name", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer.normalize_global_variable(node); - - assert_eq!( - node_value(&rust), - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_global_variable", - kind, - text - ), - "normalize_global_variable mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn assignment_operator_matches_ruby_private_predicate() { - for (language, text) in [ - (Language::Ruby, "="), - (Language::Ruby, "**="), - (Language::Ruby, "??="), - (Language::Python, ":="), - (Language::Python, "//="), - (Language::Python, "&&="), - (Language::TypeScript, "??="), - (Language::TypeScript, ">>>="), - (Language::TypeScript, ":="), - (Language::Lua, "="), - (Language::Lua, "+="), - ] { - let normalizer = super::TreeSitterNormalizer::new("", language); - - assert_eq!( - normalizer.assignment_operator(text), - ruby_private_text_predicate(language, "assignment_operator?", text), - "assignment_operator? mismatch for {language:?} {text:?}" - ); - } - } - - #[test] - fn operator_assignment_operator_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "value **= other\nflag ||= fallback\n", - Language::Ruby, - ".rb", - "operator_assignment", - "value **= other", - ), - ( - "value **= other\nflag ||= fallback\n", - Language::Ruby, - ".rb", - "operator_assignment", - "flag ||= fallback", - ), - ( - "value //= other\n", - Language::Python, - ".py", - "expression_statement", - "value //= other", - ), - ( - "value ??= other;\ncount >>>= 1;\n", - Language::TypeScript, - ".ts", - "augmented_assignment_expression", - "value ??= other", - ), - ( - "value ??= other;\ncount >>>= 1;\n", - Language::TypeScript, - ".ts", - "augmented_assignment_expression", - "count >>>= 1", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.operator_assignment_operator(node), - ruby_private_string( - source, - language, - suffix, - "operator_assignment_operator", - kind, - text - ), - "operator_assignment_operator mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_logical_operator_assignment_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "value ||= fallback\n", - Language::Ruby, - ".rb", - "operator_assignment", - "value ||= fallback", - ), - ( - "value &&= fallback\n", - Language::Ruby, - ".rb", - "operator_assignment", - "value &&= fallback", - ), - ( - "value += fallback\n", - Language::Ruby, - ".rb", - "operator_assignment", - "value += fallback", - ), - ( - "@value ||= fallback\n", - Language::Ruby, - ".rb", - "operator_assignment", - "@value ||= fallback", - ), - ( - "value //= fallback\n", - Language::Python, - ".py", - "expression_statement", - "value //= fallback", - ), - ( - "value ||= fallback;\n", - Language::TypeScript, - ".ts", - "augmented_assignment_expression", - "value ||= fallback", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let left = normalizer - .assignment_left(node) - .expect("operator assignment should have left side"); - let right = normalizer - .assignment_right(node) - .and_then(|right| normalizer.normalize_node(right)); - let operator = normalizer.operator_assignment_operator(node); - let rust = normalizer - .normalize_logical_operator_assignment(left, &operator, right, node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_logical_operator_assignment_value( - source, language, suffix, kind, text - ), - "normalize_logical_operator_assignment mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_operator_assignment_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "value += other\n", - Language::Ruby, - ".rb", - "operator_assignment", - "value += other", - ), - ( - "$value += 1\n", - Language::Ruby, - ".rb", - "operator_assignment", - "$value += 1", - ), - ( - "items[index] += value\n", - Language::Ruby, - ".rb", - "operator_assignment", - "items[index] += value", - ), - ( - "object.value += 1\n", - Language::Ruby, - ".rb", - "operator_assignment", - "object.value += 1", - ), - ( - "flag ||= fallback\n", - Language::Ruby, - ".rb", - "operator_assignment", - "flag ||= fallback", - ), - ( - "flag &&= fallback\n", - Language::Ruby, - ".rb", - "operator_assignment", - "flag &&= fallback", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_operator_assignment(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_operator_assignment", - kind, - text - ), - "normalize_operator_assignment mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn first_named_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "class Thing; end\nname\n", - Language::Ruby, - ".rb", - "class", - "class Thing; end", - ), - ( - "class Thing; end\nname\n", - Language::Ruby, - ".rb", - "identifier", - "name", - ), - ( - "def check(value):\n return value\n", - Language::Python, - ".py", - "function_definition", - "def check(value):\n return value", - ), - ( - "function check(value) { return value; }\n", - Language::TypeScript, - ".ts", - "function_declaration", - "function check(value) { return value; }", - ), - ( - "print(value)\n", - Language::Lua, - ".lua", - "function_call", - "print(value)", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - let found = normalizer.first_named(node).map(|node| { - ( - node.kind().to_string(), - super::node_text(node, source).to_string(), - ) - }); - - assert_eq!( - found, - ruby_private_node_signature(source, language, suffix, "first_named", kind, text), - "first_named mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn block_child_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def check\n call\nend\n", - Language::Ruby, - ".rb", - "method", - "def check\n call\nend", - ), - ( - "items.each do\n call\nend\n", - Language::Ruby, - ".rb", - "call", - "items.each do\n call\nend", - ), - ( - "def check():\n call()\n", - Language::Python, - ".py", - "function_definition", - "def check():\n call()", - ), - ( - "function check() { call(); }\n", - Language::TypeScript, - ".ts", - "function_declaration", - "function check() { call(); }", - ), - ( - "function check()\n call()\nend\n", - Language::Lua, - ".lua", - "function_declaration", - "function check()\n call()\nend", - ), - ("name\n", Language::Ruby, ".rb", "identifier", "name"), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - let found = normalizer.block_child(node).map(|node| { - ( - node.kind().to_string(), - super::node_text(node, source).to_string(), - ) - }); - - assert_eq!( - found, - ruby_private_node_signature(source, language, suffix, "block_child", kind, text), - "block_child mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn branch_child_matches_ruby_private_method() { - for (source, language, suffix, kind, text, condition_kind, condition_text, index) in [ - ( - "if ready\n call\nelse\n stop\nend\n", - Language::Ruby, - ".rb", - "if", - "if ready\n call\nelse\n stop\nend", - "identifier", - "ready", - 0, - ), - ( - "if ready\n call\nelse\n stop\nend\n", - Language::Ruby, - ".rb", - "if", - "if ready\n call\nelse\n stop\nend", - "identifier", - "ready", - 1, - ), - ( - "if ready\n # note\n call\nend\n", - Language::Ruby, - ".rb", - "if", - "if ready\n # note\n call\nend", - "identifier", - "ready", - 0, - ), - ( - "if ready:\n call()\nelse:\n stop()\n", - Language::Python, - ".py", - "if_statement", - "if ready:\n call()\nelse:\n stop()", - "identifier", - "ready", - 1, - ), - ( - "if (ready) { call(); } else { stop(); }\n", - Language::TypeScript, - ".ts", - "if_statement", - "if (ready) { call(); } else { stop(); }", - "parenthesized_expression", - "(ready)", - 0, - ), - ( - "if ready then\n call()\nelse\n stop()\nend\n", - Language::Lua, - ".lua", - "if_statement", - "if ready then\n call()\nelse\n stop()\nend", - "identifier", - "ready", - 1, - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let condition = - first_raw_node(tree.root_node(), source, condition_kind, condition_text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - let found = normalizer.branch_child(node, condition, index).map(|node| { - ( - node.kind().to_string(), - super::node_text(node, source).to_string(), - ) - }); - - assert_eq!( - found, - ruby_private_branch_child_signature( - source, - language, - suffix, - kind, - text, - condition_kind, - condition_text, - index - ), - "branch_child mismatch for {language:?} {kind} {text:?} index {index}" - ); - } - } - - #[test] - fn explicit_alternative_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "if ready\n call\nelsif other\n stop\nend\n", - Language::Ruby, - ".rb", - "if", - "if ready\n call\nelsif other\n stop\nend", - ), - ( - "if ready\n call\nend\n", - Language::Ruby, - ".rb", - "if", - "if ready\n call\nend", - ), - ( - "if ready:\n call()\nelif other:\n stop()\n", - Language::Python, - ".py", - "if_statement", - "if ready:\n call()\nelif other:\n stop()", - ), - ( - "if (ready) { call(); } else { stop(); }\n", - Language::TypeScript, - ".ts", - "if_statement", - "if (ready) { call(); } else { stop(); }", - ), - ( - "if ready then\n call()\nelseif other then\n stop()\nend\n", - Language::Lua, - ".lua", - "if_statement", - "if ready then\n call()\nelseif other then\n stop()\nend", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - let found = normalizer.explicit_alternative(node).map(|node| { - ( - node.kind().to_string(), - super::node_text(node, source).to_string(), - ) - }); - - assert_eq!( - found, - ruby_private_node_signature( - source, - language, - suffix, - "explicit_alternative", - kind, - text - ), - "explicit_alternative mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn wrap_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "first\nsecond\n", - Language::Ruby, - ".rb", - "identifier", - "second", - ), - ( - "first\nsecond\n", - Language::Python, - ".py", - "expression_statement", - "second", - ), - ( - "first;\nsecond;\n", - Language::TypeScript, - ".ts", - "identifier", - "second", - ), - ( - "print(first)\nprint(second)\n", - Language::Lua, - ".lua", - "identifier", - "second", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - let raw_wrapped = - normalizer.wrap("OUTER", vec![Child::Symbol("child".to_string())], node); - assert_eq!( - node_value(&raw_wrapped), - ruby_private_wrap_value(source, language, suffix, kind, text, false), - "wrap raw-source mismatch for {language:?} {kind} {text:?}" - ); - - let inner = normalizer.wrap("INNER", Vec::new(), node); - let node_wrapped = normalizer.wrap_from_source_node( - "OUTER", - vec![Child::Symbol("child".to_string())], - &inner, - ); - assert_eq!( - node_value(&node_wrapped), - ruby_private_wrap_value(source, language, suffix, kind, text, true), - "wrap normalized-source mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn source_before_child_matches_ruby_private_method() { - for (source, language, suffix, kind, text, child_kind, child_text) in [ - ( - "if ready\n call\nend\n", - Language::Ruby, - ".rb", - "if", - "if ready\n call\nend", - "then", - "\n call", - ), - ( - "if ready:\n call()\n", - Language::Python, - ".py", - "if_statement", - "if ready:\n call()", - "block", - "call()", - ), - ( - "if (ready) { call(); }\n", - Language::TypeScript, - ".ts", - "if_statement", - "if (ready) { call(); }", - "statement_block", - "{ call(); }", - ), - ( - "if ready then\n call()\nend\n", - Language::Lua, - ".lua", - "if_statement", - "if ready then\n call()\nend", - "block", - "call()", - ), - ( - "puts value\n", - Language::Ruby, - ".rb", - "call", - "puts value", - "identifier", - "puts", - ), - ( - "call()\n", - Language::Python, - ".py", - "expression_statement", - "call()", - "identifier", - "call", - ), - ( - "call();\n", - Language::TypeScript, - ".ts", - "expression_statement", - "call();", - "identifier", - "call", - ), - ( - "call()\n", - Language::Lua, - ".lua", - "function_call", - "call()", - "identifier", - "call", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let child = first_raw_node(tree.root_node(), source, child_kind, child_text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - let source_node = normalizer.source_before_child(node, child); - let wrapped = normalizer.wrap_from_source_node("OUTER", Vec::new(), &source_node); - - assert_eq!( - node_value(&wrapped), - ruby_private_source_before_child_wrap_value( - source, language, suffix, kind, text, child_kind, child_text - ), - "source_before_child mismatch for {language:?} {kind} {text:?} before {child_kind} {child_text:?}" - ); - } - } - - #[test] - fn source_from_nodes_matches_ruby_private_method() { - for (source, language, suffix, first_kind, first_text, last_kind, last_text) in [ - ( - "left + right\n", - Language::Ruby, - ".rb", - "identifier", - "left", - "identifier", - "right", - ), - ( - "left = one\nright = two\n", - Language::Python, - ".py", - "identifier", - "one", - "identifier", - "two", - ), - ( - "const left = one;\nconst right = two;\n", - Language::TypeScript, - ".ts", - "identifier", - "one", - "identifier", - "two", - ), - ( - "local left = one\nlocal right = two\n", - Language::Lua, - ".lua", - "expression_list", - "one", - "expression_list", - "two", - ), - ] { - let tree = raw_tree(source, language); - let first_raw = first_raw_node(tree.root_node(), source, first_kind, first_text); - let last_raw = first_raw_node(tree.root_node(), source, last_kind, last_text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - let source_node = normalizer.source_from_nodes(first_raw, last_raw); - - assert_eq!( - node_value(&source_node), - ruby_private_source_from_nodes_value( - source, language, suffix, first_kind, first_text, last_kind, last_text - ), - "source_from_nodes mismatch for {language:?} {first_kind} {first_text:?} through {last_kind} {last_text:?}" - ); - } - } - - #[test] - fn source_from_normalized_nodes_matches_ruby_private_method() { - for (source, language, suffix, first_kind, first_text, last_kind, last_text) in [ - ( - "first\nsecond\n", - Language::Ruby, - ".rb", - "identifier", - "first", - "identifier", - "second", - ), - ( - "first\nsecond\n", - Language::Python, - ".py", - "expression_statement", - "first", - "expression_statement", - "second", - ), - ( - "first;\nsecond;\n", - Language::TypeScript, - ".ts", - "expression_statement", - "first;", - "expression_statement", - "second;", - ), - ( - "print(first)\nprint(second)\n", - Language::Lua, - ".lua", - "function_call", - "print(first)", - "function_call", - "print(second)", - ), - ( - "first + second\n", - Language::Ruby, - ".rb", - "identifier", - "first", - "identifier", - "second", - ), - ] { - let tree = raw_tree(source, language); - let first_raw = first_raw_node(tree.root_node(), source, first_kind, first_text); - let last_raw = first_raw_node(tree.root_node(), source, last_kind, last_text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - let first_node = normalizer.wrap("FIRST", Vec::new(), first_raw); - let last_node = normalizer.wrap("LAST", Vec::new(), last_raw); - let source_node = normalizer.source_from_normalized_nodes(&first_node, &last_node); - - assert_eq!( - node_value(&source_node), - ruby_private_source_from_normalized_nodes_value( - source, language, suffix, first_kind, first_text, last_kind, last_text - ), - "source_from_normalized_nodes mismatch for {language:?} {first_kind} {first_text:?} through {last_kind} {last_text:?}" - ); - } - } - - #[test] - fn named_field_matches_ruby_private_method() { - for (source, language, suffix, kind, text, field) in [ - ( - "def check(value)\n value\nend\n", - Language::Ruby, - ".rb", - "method", - "def check(value)\n value\nend", - "name", - ), - ( - "def check(value)\n value\nend\n", - Language::Ruby, - ".rb", - "method", - "def check(value)\n value\nend", - "missing", - ), - ( - "if ready:\n call()\n", - Language::Python, - ".py", - "if_statement", - "if ready:\n call()", - "body", - ), - ( - "if ready:\n call()\n", - Language::Python, - ".py", - "if_statement", - "if ready:\n call()", - "condition", - ), - ( - "function check(value) { return value; }\n", - Language::TypeScript, - ".ts", - "function_declaration", - "function check(value) { return value; }", - "body", - ), - ( - "function check(value)\n return value\nend\n", - Language::Lua, - ".lua", - "function_declaration", - "function check(value)\n return value\nend", - "body", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - let found = normalizer.named_field(node, field).map(|node| { - ( - node.kind().to_string(), - super::node_text(node, source).to_string(), - ) - }); - - assert_eq!( - found, - ruby_private_named_field_signature(source, language, suffix, kind, text, field), - "named_field mismatch for {language:?} {kind} {text:?} field {field}" - ); - } - } - - #[test] - fn parent_node_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def check\nend\n", - Language::Ruby, - ".rb", - "identifier", - "check", - ), - ("value\n", Language::Ruby, ".rb", "program", "value\n"), - ( - "if ready:\n call()\n", - Language::Python, - ".py", - "identifier", - "ready", - ), - ( - "call(value);\n", - Language::TypeScript, - ".ts", - "identifier", - "value", - ), - ( - "call(value)\n", - Language::Lua, - ".lua", - "identifier", - "value", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - let found = normalizer.parent_node(node).map(|node| { - ( - node.kind().to_string(), - super::node_text(node, source).to_string(), - ) - }); - - assert_eq!( - found, - ruby_private_node_signature(source, language, suffix, "parent_node", kind, text), - "parent_node mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn next_sibling_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ("a + b\n", Language::Ruby, ".rb", "identifier", "a"), - ("a + b\n", Language::Python, ".py", "identifier", "a"), - ("a + b;\n", Language::TypeScript, ".ts", "identifier", "a"), - ("print(a, b)\n", Language::Lua, ".lua", "identifier", "a"), - ("a\n", Language::Ruby, ".rb", "identifier", "a"), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - let found = normalizer.next_sibling(node).map(|node| { - ( - node.kind().to_string(), - super::node_text(node, source).to_string(), - ) - }); - - assert_eq!( - found, - ruby_private_node_signature(source, language, suffix, "next_sibling", kind, text), - "next_sibling mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn prev_sibling_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ("a + b\n", Language::Ruby, ".rb", "identifier", "b"), - ("a + b\n", Language::Python, ".py", "identifier", "b"), - ("a + b;\n", Language::TypeScript, ".ts", "identifier", "b"), - ("print(a, b)\n", Language::Lua, ".lua", "identifier", "b"), - ("a\n", Language::Ruby, ".rb", "identifier", "a"), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - let found = normalizer.prev_sibling(node).map(|node| { - ( - node.kind().to_string(), - super::node_text(node, source).to_string(), - ) - }); - - assert_eq!( - found, - ruby_private_node_signature(source, language, suffix, "prev_sibling", kind, text), - "prev_sibling mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn next_named_sibling_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ("a + b\n", Language::Ruby, ".rb", "identifier", "a"), - ("a + b\n", Language::Python, ".py", "identifier", "a"), - ("a + b;\n", Language::TypeScript, ".ts", "identifier", "a"), - ("print(a, b)\n", Language::Lua, ".lua", "identifier", "a"), - ("a\n", Language::Ruby, ".rb", "identifier", "a"), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - let found = normalizer.next_named_sibling(node).map(|node| { - ( - node.kind().to_string(), - super::node_text(node, source).to_string(), - ) - }); - - assert_eq!( - found, - ruby_private_node_signature( - source, - language, - suffix, - "next_named_sibling", - kind, - text - ), - "next_named_sibling mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn ternary_statement_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "def f(cond, a, b)\n cond ? a : b\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "cond ? a : b", - ), - ( - "value = a if cond else b\n", - Language::Python, - ".py", - "conditional_expression", - "a if cond else b", - ), - ( - "const value = cond ? a : b;\n", - Language::TypeScript, - ".ts", - "ternary_expression", - "cond ? a : b", - ), - ( - "local value = cond and a or b\n", - Language::Lua, - ".lua", - "expression_list", - "cond and a or b", - ), - ( - "def f(cond)\n cond\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "cond", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.ternary_statement(node), - ruby_private_predicate(source, language, suffix, "ternary_statement?", kind, text), - "ternary_statement? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_ternary_statement_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def f(cond, a, b)\n cond ? a : b\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "cond ? a : b", - ), - ( - "value = a if cond else b\n", - Language::Python, - ".py", - "conditional_expression", - "a if cond else b", - ), - ( - "const value = cond ? a : b;\n", - Language::TypeScript, - ".ts", - "ternary_expression", - "cond ? a : b", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_ternary_statement(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_ternary_statement", - kind, - text - ), - "normalize_ternary_statement mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn ternary_statement_normalization_matches_ruby() { - for (source, language, suffix, if_text) in [ - ( - "def f(cond, a, b)\n cond ? a : b\nend\n", - Language::Ruby, - ".rb", - "cond ? a : b", - ), - ( - "def f(cond, a, b):\n return a if cond else b\n", - Language::Python, - ".py", - "a if cond else b", - ), - ( - "function f(cond: boolean, a: number, b: number) { return cond ? a : b; }\n", - Language::TypeScript, - ".ts", - "cond ? a : b", - ), - ] { - let root = parse_language_source(source, language, suffix); - let if_node = first_node(&root, "IF", if_text); - assert_eq!(child_node(if_node, 0).text, "cond"); - assert_eq!(child_node(if_node, 1).text, "a"); - assert_eq!(child_node(if_node, 2).text, "b"); - assert_ruby_parity(source, language, suffix); - } - } - - #[test] - fn case_argument_list_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "def f(x)\n return case x\n when 1 then :one\n else :other\n end\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "case x\n when 1 then :one\n else :other\n end", - ), - ( - "case x\nwhen 1 then :one\nelse :other\nend\n", - Language::Ruby, - ".rb", - "case", - "case x\nwhen 1 then :one\nelse :other\nend", - ), - ( - "match value:\n case 1:\n one()\n", - Language::Python, - ".py", - "case_clause", - "case 1:\n one()", - ), - ( - "switch (value) { case 1: one(); break; }\n", - Language::TypeScript, - ".ts", - "switch_case", - "case 1: one(); break;", - ), - ( - "if value == 1 then one() end\n", - Language::Lua, - ".lua", - "if_statement", - "if value == 1 then one() end", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.case_argument_list(node), - ruby_private_predicate(source, language, suffix, "case_argument_list?", kind, text), - "case_argument_list? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn leading_function_statement_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "def outer\n def inner\n x\n end\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "def inner\n x\n end", - ), - ( - "def outer():\n def inner():\n x\n", - Language::Python, - ".py", - "block", - "def inner():\n x", - ), - ( - "function outer()\n function inner()\n x()\n end\nend\n", - Language::Lua, - ".lua", - "block", - "function inner()\n x()\n end", - ), - ( - "function outer() { function inner() { x; } }\n", - Language::TypeScript, - ".ts", - "function_declaration", - "function inner() { x; }", - ), - ( - "def outer\n x\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "x", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.leading_function_statement(node), - ruby_private_predicate( - source, - language, - suffix, - "leading_function_statement?", - kind, - text - ), - "leading_function_statement? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_leading_function_statement_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def outer\n def inner\n x\n end\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "def inner\n x\n end", - ), - ( - "def outer():\n def inner():\n x\n", - Language::Python, - ".py", - "block", - "def inner():\n x", - ), - ( - "function outer()\n function inner()\n x()\n end\nend\n", - Language::Lua, - ".lua", - "block", - "function inner()\n x()\n end", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_leading_function_statement(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_leading_function_statement", - kind, - text - ), - "normalize_leading_function_statement mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn leading_function_statement_normalization_matches_ruby() { - for (source, language, suffix) in [ - ( - "def outer\n def inner\n x\n end\nend\n", - Language::Ruby, - ".rb", - ), - ( - "def outer():\n def inner():\n x\n", - Language::Python, - ".py", - ), - ( - "function outer()\n function inner()\n x()\n end\nend\n", - Language::Lua, - ".lua", - ), - ] { - let root = parse_language_source(source, language, suffix); - let mut defns = Vec::new(); - nodes_of_type(&root, "DEFN", &mut defns); - assert!( - defns - .iter() - .any(|node| matches!(node.children.first(), Some(Child::Symbol(name)) if name == "inner")), - "expected nested DEFN inner for {language:?} in {root:#?}" - ); - let mut iters = Vec::new(); - nodes_of_type(&root, "ITER", &mut iters); - assert!( - iters.iter().all(|node| !node.text.contains("inner")), - "nested function must not normalize as ITER for {language:?}: {iters:#?}" - ); - assert_ruby_parity(source, language, suffix); - } - } - - #[test] - fn leading_owner_statement_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "def outer\n class Inner\n value\n end\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "class Inner\n value\n end", - ), - ( - "def outer\n module Inner\n value\n end\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "module Inner\n value\n end", - ), - ( - "def outer():\n class Inner:\n pass\n", - Language::Python, - ".py", - "block", - "class Inner:\n pass", - ), - ( - "function outer() { class Inner {} }\n", - Language::TypeScript, - ".ts", - "class_declaration", - "class Inner {}", - ), - ( - "function outer()\n Inner = {}\nend\n", - Language::Lua, - ".lua", - "block", - "Inner = {}", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.leading_owner_statement(node), - ruby_private_predicate( - source, - language, - suffix, - "leading_owner_statement?", - kind, - text - ), - "leading_owner_statement? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_leading_owner_statement_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def outer\n class Inner\n value\n end\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "class Inner\n value\n end", - ), - ( - "def outer\n module Inner\n value\n end\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "module Inner\n value\n end", - ), - ( - "def outer():\n class Inner:\n pass\n", - Language::Python, - ".py", - "block", - "class Inner:\n pass", - ), - ( - "function outer() { class Inner {} }\n", - Language::TypeScript, - ".ts", - "class_declaration", - "class Inner {}", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_leading_owner_statement(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_leading_owner_statement", - kind, - text - ), - "normalize_leading_owner_statement mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn modifier_keyword_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def f\n value if cond\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "value if cond", - ), - ( - "def f\n value unless cond\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "value unless cond", - ), - ( - "def f\n value while cond\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "value while cond", - ), - ( - "def f\n value until cond\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "value until cond", - ), - ( - "def f\n if cond\n value\n end\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "if cond\n value\n end", - ), - ( - "def f():\n if cond:\n value()\n", - Language::Python, - ".py", - "block", - "if cond:\n value()", - ), - ( - "function f() { if (cond) { value(); } }\n", - Language::TypeScript, - ".ts", - "if_statement", - "if (cond) { value(); }", - ), - ( - "function f()\n if cond then\n value()\n end\nend\n", - Language::Lua, - ".lua", - "block", - "if cond then\n value()\n end", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer.modifier_keyword(node).unwrap_or_default(); - - assert_eq!( - rust, - ruby_private_string(source, language, suffix, "modifier_keyword", kind, text), - "modifier_keyword mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn modifier_parts_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def f\n value if cond\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "value if cond", - ), - ( - "def f\n value unless cond\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "value unless cond", - ), - ( - "def f\n if cond\n value\n end\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "if cond\n value\n end", - ), - ( - "def f():\n if cond:\n value()\n", - Language::Python, - ".py", - "block", - "if cond:\n value()", - ), - ( - "function f() { if (cond) { value(); } }\n", - Language::TypeScript, - ".ts", - "if_statement", - "if (cond) { value(); }", - ), - ( - "function f()\n if cond then\n value()\n end\nend\n", - Language::Lua, - ".lua", - "block", - "if cond then\n value()\n end", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer.modifier_parts(node).map(|(action, condition)| { - ( - ( - action.kind().to_string(), - super::node_text(action, source).to_string(), - ), - ( - condition.kind().to_string(), - super::node_text(condition, source).to_string(), - ), - ) - }); - - assert_eq!( - rust, - ruby_private_modifier_parts_signature(source, language, suffix, kind, text), - "modifier_parts mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn modifier_statement_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "def f\n value if cond\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "value if cond", - ), - ( - "def f\n return value if cond\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "return value if cond", - ), - ( - "def f\n if cond\n value\n end\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "if cond\n value\n end", - ), - ( - "def f():\n if cond:\n value()\n", - Language::Python, - ".py", - "block", - "if cond:\n value()", - ), - ( - "function f() { if (cond) { value(); } }\n", - Language::TypeScript, - ".ts", - "if_statement", - "if (cond) { value(); }", - ), - ( - "function f()\n if cond then\n value()\n end\nend\n", - Language::Lua, - ".lua", - "block", - "if cond then\n value()\n end", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.modifier_statement(node), - ruby_private_predicate(source, language, suffix, "modifier_statement?", kind, text), - "modifier_statement? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_modifier_action_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "return value if cond\n", - Language::Ruby, - ".rb", - "return", - "return value", - ), - ("break if done\n", Language::Ruby, ".rb", "break", "break"), - ( - "value if cond\n", - Language::Ruby, - ".rb", - "identifier", - "value", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_modifier_action(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_modifier_action", - kind, - text - ), - "normalize_modifier_action mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_modifier_statement_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def f\n value if cond\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "value if cond", - ), - ( - "def f\n value unless cond\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "value unless cond", - ), - ( - "def f\n value while cond\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "value while cond", - ), - ( - "def f\n value until cond\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "value until cond", - ), - ( - "def f\n return value if cond\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "return value if cond", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_modifier_statement(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_modifier_statement", - kind, - text - ), - "normalize_modifier_statement mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn modifier_return_action_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "return value if ready\n", - Language::Ruby, - ".rb", - "return", - "return value", - ), - ("break if done\n", Language::Ruby, ".rb", "break", "break"), - ("next if skip\n", Language::Ruby, ".rb", "next", "next"), - ( - "return value if ready\n", - Language::Ruby, - ".rb", - "identifier", - "ready", - ), - ( - "def f():\n return value\n break\n continue\n", - Language::Python, - ".py", - "return_statement", - "return value", - ), - ( - "def f():\n return value\n break\n continue\n", - Language::Python, - ".py", - "break_statement", - "break", - ), - ( - "def f():\n return value\n break\n continue\n", - Language::Python, - ".py", - "continue_statement", - "continue", - ), - ( - "def f():\n return value\n", - Language::Python, - ".py", - "identifier", - "value", - ), - ( - "function f() { return value; break; continue; }\n", - Language::TypeScript, - ".ts", - "return_statement", - "return value;", - ), - ( - "function f() { return value; break; continue; }\n", - Language::TypeScript, - ".ts", - "break_statement", - "break;", - ), - ( - "function f() { return value; break; continue; }\n", - Language::TypeScript, - ".ts", - "continue_statement", - "continue;", - ), - ( - "function f() { return value; }\n", - Language::TypeScript, - ".ts", - "identifier", - "value", - ), - ( - "return value\n", - Language::Lua, - ".lua", - "return_statement", - "return value", - ), - ( - "return value\n", - Language::Lua, - ".lua", - "expression_list", - "value", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.modifier_return_action(node), - ruby_private_predicate( - source, - language, - suffix, - "modifier_return_action?", - kind, - text - ), - "modifier_return_action? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn call_block_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "items.each do |item|\n item\nend\n", - Language::Ruby, - ".rb", - "call", - "items.each do |item|\n item\nend", - ), - ( - "items.map { |item| item }\n", - Language::Ruby, - ".rb", - "call", - "items.map { |item| item }", - ), - ( - "def f\n items.map { |item| item }\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "items.map { |item| item }", - ), - ("items.each\n", Language::Ruby, ".rb", "call", "items.each"), - ( - "def f():\n value()\n", - Language::Python, - ".py", - "function_definition", - "def f():\n value()", - ), - ( - "function f()\n value()\nend\n", - Language::Lua, - ".lua", - "function_declaration", - "function f()\n value()\nend", - ), - ( - "function f() { value(); }\n", - Language::TypeScript, - ".ts", - "function_declaration", - "function f() { value(); }", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - let found = normalizer.call_block(node).map(|node| { - ( - node.kind().to_string(), - super::node_text(node, source).to_string(), - ) - }); - - assert_eq!( - found, - ruby_private_node_signature(source, language, suffix, "call_block", kind, text), - "call_block mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn statement_block_call_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def f\n items.map { |item| item }\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "items.map { |item| item }", - ), - ( - "items.map { |item| item }\n", - Language::Ruby, - ".rb", - "call", - "items.map { |item| item }", - ), - ( - "def f\n foo(bar) { baz }\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "foo(bar) { baz }", - ), - ( - "user.name()\n", - Language::Python, - ".py", - "attribute", - "user.name", - ), - ( - "def f():\n value()\n", - Language::Python, - ".py", - "function_definition", - "def f():\n value()", - ), - ( - "user.name();\n", - Language::TypeScript, - ".ts", - "member_expression", - "user.name", - ), - ( - "function f() { value(); }\n", - Language::TypeScript, - ".ts", - "function_declaration", - "function f() { value(); }", - ), - ( - "user.name()\n", - Language::Lua, - ".lua", - "dot_index_expression", - "user.name", - ), - ( - "function f()\n value()\nend\n", - Language::Lua, - ".lua", - "function_declaration", - "function f()\n value()\nend", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - let found = normalizer.statement_block_call(node).map(|node| { - ( - node.kind().to_string(), - super::node_text(node, source).to_string(), - ) - }); - - assert_eq!( - found, - ruby_private_node_signature( - source, - language, - suffix, - "statement_block_call", - kind, - text - ), - "statement_block_call mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn statement_call_with_block_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "def f\n items.map { |item| item }\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "items.map { |item| item }", - ), - ( - "items.map { |item| item }\n", - Language::Ruby, - ".rb", - "call", - "items.map { |item| item }", - ), - ( - "def f\n foo(bar) { baz }\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "foo(bar) { baz }", - ), - ( - "def f\n items.map\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "items.map", - ), - ( - "def f():\n value(lambda item: item)\n", - Language::Python, - ".py", - "function_definition", - "def f():\n value(lambda item: item)", - ), - ( - "items.map(item => item);\n", - Language::TypeScript, - ".ts", - "expression_statement", - "items.map(item => item);", - ), - ( - "items:map(function(item) return item end)\n", - Language::Lua, - ".lua", - "function_call", - "items:map(function(item) return item end)", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.statement_call_with_block(node), - ruby_private_predicate( - source, - language, - suffix, - "statement_call_with_block?", - kind, - text - ), - "statement_call_with_block? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_statement_call_with_block_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [( - "def f\n items.map { |item| item }\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "items.map { |item| item }", - )] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_statement_call_with_block(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_statement_call_with_block", - kind, - text - ), - "normalize_statement_call_with_block mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn visibility_inline_def_call_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "private def hidden; value; end\n", - Language::Ruby, - ".rb", - "call", - "private def hidden; value; end", - ), - ( - "public def visible\n value\nend\n", - Language::Ruby, - ".rb", - "call", - "public def visible\n value\nend", - ), - ( - "private :hidden\n", - Language::Ruby, - ".rb", - "call", - "private :hidden", - ), - ( - "private(value)\n", - Language::Python, - ".py", - "expression_statement", - "private(value)", - ), - ( - "private(value);\n", - Language::TypeScript, - ".ts", - "call_expression", - "private(value)", - ), - ( - "private(value)\n", - Language::Lua, - ".lua", - "function_call", - "private(value)", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.visibility_inline_def_call(node), - ruby_private_predicate( - source, - language, - suffix, - "visibility_inline_def_call?", - kind, - text - ), - "visibility_inline_def_call? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn visibility_inline_def_statement_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "class C\n private def hidden\n value\n end\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "private def hidden\n value\n end", - ), - ( - "class C\n module_function def helper\n value\n end\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "module_function def helper\n value\n end", - ), - ( - "class C\n private :hidden\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "private :hidden", - ), - ( - "private(value)\n", - Language::Python, - ".py", - "expression_statement", - "private(value)", - ), - ( - "private(value);\n", - Language::TypeScript, - ".ts", - "expression_statement", - "private(value);", - ), - ( - "private(value)\n", - Language::Lua, - ".lua", - "function_call", - "private(value)", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - let function = normalizer.named_children(node).into_iter().next().expect( - "visibility_inline_def_statement test target should have a first named child", - ); - - assert_eq!( - normalizer.visibility_inline_def_statement(node, function), - ruby_private_visibility_inline_def_statement_predicate( - source, language, suffix, kind, text - ), - "visibility_inline_def_statement? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_visibility_inline_def_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "private def hidden\n value\nend\n", - Language::Ruby, - ".rb", - "call", - "private def hidden\n value\nend", - ), - ( - "public def visible\n value\nend\n", - Language::Ruby, - ".rb", - "call", - "public def visible\n value\nend", - ), - ( - "module_function def self.helper\n value\nend\n", - Language::Ruby, - ".rb", - "call", - "module_function def self.helper\n value\nend", - ), - ( - "private(value)\n", - Language::Python, - ".py", - "expression_statement", - "private(value)", - ), - ( - "private(value);\n", - Language::TypeScript, - ".ts", - "call_expression", - "private(value)", - ), - ( - "private(value)\n", - Language::Lua, - ".lua", - "function_call", - "private(value)", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_visibility_inline_def(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_visibility_inline_def", - kind, - text - ), - "normalize_visibility_inline_def mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn inline_def_from_argument_list_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "class C\n private def hidden\n value\n end\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "def hidden\n value\n end", - ), - ( - "class C\n private def self.hidden\n value\n end\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "def self.hidden\n value\n end", - ), - ( - "class C\n private :hidden\nend\n", - Language::Ruby, - ".rb", - "argument_list", - ":hidden", - ), - ( - "private(value)\n", - Language::Python, - ".py", - "argument_list", - "(value)", - ), - ( - "private(value);\n", - Language::TypeScript, - ".ts", - "arguments", - "(value)", - ), - ( - "private(value)\n", - Language::Lua, - ".lua", - "arguments", - "(value)", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .inline_def_from_argument_list(Some(node)) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "inline_def_from_argument_list", - kind, - text - ), - "inline_def_from_argument_list mismatch for {language:?} {kind} {text:?}" - ); - } - - for (source, language, suffix) in [ - ("private def hidden\n value\nend\n", Language::Ruby, ".rb"), - ("private(value)\n", Language::Python, ".py"), - ("private(value);\n", Language::TypeScript, ".ts"), - ("private(value)\n", Language::Lua, ".lua"), - ] { - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .inline_def_from_argument_list(None) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_inline_def_from_argument_list_nil_value(source, language, suffix), - "inline_def_from_argument_list nil mismatch for {language:?}" - ); - } - } - - #[test] - fn inline_def_from_source_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "class C\n private def hidden\n value\n end\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "def hidden\n value\n end", - ), - ( - "class C\n private def self.hidden\n value\n end\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "def self.hidden\n value\n end", - ), - ( - "def hidden\n value\nend\n", - Language::Ruby, - ".rb", - "method", - "def hidden\n value\nend", - ), - ( - "def self.hidden\n value\nend\n", - Language::Ruby, - ".rb", - "singleton_method", - "def self.hidden\n value\nend", - ), - ( - "class C\n private :hidden\nend\n", - Language::Ruby, - ".rb", - "argument_list", - ":hidden", - ), - ( - "def hidden():\n value\n", - Language::Python, - ".py", - "function_definition", - "def hidden():\n value", - ), - ( - "function hidden() {\n value;\n}\n", - Language::TypeScript, - ".ts", - "function_declaration", - "function hidden() {\n value;\n}", - ), - ( - "function hidden()\n value()\nend\n", - Language::Lua, - ".lua", - "function_declaration", - "function hidden()\n value()\nend", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .inline_def_from_source(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "inline_def_from_source", - kind, - text - ), - "inline_def_from_source mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn inline_def_from_statement_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "class C\n private def hidden\n value\n end\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "private def hidden\n value\n end", - ), - ( - "class C\n module_function def self.hidden\n value\n end\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "module_function def self.hidden\n value\n end", - ), - ( - "private def hidden\n value\nend\n", - Language::Ruby, - ".rb", - "call", - "private def hidden\n value\nend", - ), - ( - "class C\n private :hidden\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "private :hidden", - ), - ( - "private(value)\n", - Language::Python, - ".py", - "expression_statement", - "private(value)", - ), - ( - "private(value);\n", - Language::TypeScript, - ".ts", - "expression_statement", - "private(value);", - ), - ( - "private(value)\n", - Language::Lua, - ".lua", - "function_call", - "private(value)", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .inline_def_from_statement(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "inline_def_from_statement", - kind, - text - ), - "inline_def_from_statement mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn inline_def_body_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "class C\n private def hidden\n value\n end\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "def hidden\n value\n end", - ), - ( - "class C\n private def self.hidden\n value\n end\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "def self.hidden\n value\n end", - ), - ( - "class C\n private def empty\n end\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "def empty\n end", - ), - ( - "def hidden():\n value\n", - Language::Python, - ".py", - "function_definition", - "def hidden():\n value", - ), - ( - "function hidden() {\n value;\n}\n", - Language::TypeScript, - ".ts", - "function_declaration", - "function hidden() {\n value;\n}", - ), - ( - "function hidden()\n value()\nend\n", - Language::Lua, - ".lua", - "function_declaration", - "function hidden()\n value()\nend", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer.inline_def_body(node).map(|body| { - ( - body.kind().to_string(), - super::node_text(body, source).to_string(), - ) - }); - - assert_eq!( - rust, - ruby_private_node_signature( - source, - language, - suffix, - "inline_def_body", - kind, - text - ), - "inline_def_body mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn inline_def_receiver_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "class C\n private def hidden\n value\n end\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "def hidden\n value\n end", - ), - ( - "class C\n private def self.hidden\n value\n end\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "def self.hidden\n value\n end", - ), - ( - "class C\n private def Owner.hidden\n value\n end\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "def Owner.hidden\n value\n end", - ), - ( - "class C\n private def Owner::Nested.hidden\n value\n end\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "def Owner::Nested.hidden\n value\n end", - ), - ( - "def hidden():\n value\n", - Language::Python, - ".py", - "function_definition", - "def hidden():\n value", - ), - ( - "function hidden() {\n value;\n}\n", - Language::TypeScript, - ".ts", - "function_declaration", - "function hidden() {\n value;\n}", - ), - ( - "function hidden()\n value()\nend\n", - Language::Lua, - ".lua", - "function_declaration", - "function hidden()\n value()\nend", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer.inline_def_receiver(node).map(|receiver| { - ( - receiver.kind().to_string(), - super::node_text(receiver, source).to_string(), - ) - }); - - assert_eq!( - rust, - ruby_private_node_signature( - source, - language, - suffix, - "inline_def_receiver", - kind, - text - ), - "inline_def_receiver mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn inline_def_name_after_receiver_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "class C\n private def self.hidden\n value\n end\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "def self.hidden\n value\n end", - ), - ( - "class C\n private def Owner.hidden\n value\n end\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "def Owner.hidden\n value\n end", - ), - ( - "class C\n private def Owner::Nested.hidden\n value\n end\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "def Owner::Nested.hidden\n value\n end", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - let receiver = normalizer - .inline_def_receiver(node) - .expect("inline def receiver should exist for name-after-receiver case"); - let rust = normalizer - .inline_def_name_after_receiver(node, receiver) - .unwrap_or_default(); - - assert_eq!( - rust, - ruby_private_inline_def_name_after_receiver(source, language, suffix, kind, text), - "inline_def_name_after_receiver mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn inline_parameter_begin_marker_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def f(a); a; end\n", - Language::Ruby, - ".rb", - "method", - "def f(a); a; end", - ), - ( - "def f a; a; end\n", - Language::Ruby, - ".rb", - "method", - "def f a; a; end", - ), - ( - "def f(a)\n a\nend\n", - Language::Ruby, - ".rb", - "method", - "def f(a)\n a\nend", - ), - ( - "def f(a):\n return a\n", - Language::Python, - ".py", - "function_definition", - "def f(a):\n return a", - ), - ( - "function f(a) { return a; }\n", - Language::TypeScript, - ".ts", - "function_declaration", - "function f(a) { return a; }", - ), - ( - "function f(a)\n return a\nend\n", - Language::Lua, - ".lua", - "function_declaration", - "function f(a)\n return a\nend", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .inline_parameter_begin_marker(node) - .as_ref() - .map(node_value) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_inline_parameter_begin_marker_value( - source, language, suffix, kind, text - ), - "inline_parameter_begin_marker mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn prepend_inline_parameter_begin_matches_ruby_private_method() { - let scalar = test_node("VCALL", Vec::new()); - let block = test_node( - "BLOCK", - vec![Child::Node(Box::new(scalar.clone())), Child::Nil], - ); - let empty_block = test_node("BLOCK", vec![Child::Nil]); - - let cases = vec![ - ( - "no_marker", - "def f(a)\n a\nend\n", - Language::Ruby, - ".rb", - "method", - "def f(a)\n a\nend", - Some(scalar.clone()), - ), - ( - "marker_nil_body", - "def f(a); a; end\n", - Language::Ruby, - ".rb", - "method", - "def f(a); a; end", - None, - ), - ( - "marker_scalar_body", - "def f(a); a; end\n", - Language::Ruby, - ".rb", - "method", - "def f(a); a; end", - Some(scalar.clone()), - ), - ( - "marker_block_body", - "def f(a); a; end\n", - Language::Ruby, - ".rb", - "method", - "def f(a); a; end", - Some(block), - ), - ( - "marker_empty_block", - "def f(a); a; end\n", - Language::Ruby, - ".rb", - "method", - "def f(a); a; end", - Some(empty_block), - ), - ( - "non_ruby", - "def f(a):\n return a\n", - Language::Python, - ".py", - "function_definition", - "def f(a):\n return a", - Some(scalar), - ), - ]; - - for (label, source, language, suffix, kind, text, body) in cases { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .prepend_inline_parameter_begin(node, body.clone()) - .as_ref() - .map(node_value) - .unwrap_or(Value::Null); - let body_value = body.as_ref().map(node_value).unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_prepend_inline_parameter_begin_value( - source, - language, - suffix, - kind, - text, - &body_value, - ), - "prepend_inline_parameter_begin mismatch for {label}" - ); - } - } - - #[test] - fn scalar_argument_list_value_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def f\n return yield\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "yield", - ), - ( - "def f\n return nil\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "nil", - ), - ( - "def f\n return true\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "true", - ), - ( - "def f\n return false\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "false", - ), - ( - "def f\n return :ok?\nend\n", - Language::Ruby, - ".rb", - "argument_list", - ":ok?", - ), - ( - "def f\n return 12\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "12", - ), - ( - "def f\n return -12\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "-12", - ), - ( - "def f\n return name\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "name", - ), - ( - "def f():\n return value\n", - Language::Python, - ".py", - "identifier", - "value", - ), - ( - "function f() { return value; }\n", - Language::TypeScript, - ".ts", - "identifier", - "value", - ), - ( - "function f()\n return value\nend\n", - Language::Lua, - ".lua", - "expression_list", - "value", - ), - ( - "function f() { return yield; }\n", - Language::TypeScript, - ".ts", - "yield_expression", - "yield", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .scalar_argument_list_value(node) - .as_ref() - .map(node_value) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "scalar_argument_list_value", - kind, - text, - ), - "scalar_argument_list_value mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn local_or_call_for_name_matches_ruby_private_method() { - for (source, language, suffix, kind, text, name, local) in [ - ( - "def f\n {name:}\nend\n", - Language::Ruby, - ".rb", - "hash_key_symbol", - "name", - "name", - false, - ), - ( - "def f\n {name:}\nend\n", - Language::Ruby, - ".rb", - "hash_key_symbol", - "name", - "name", - true, - ), - ( - "def f():\n value\n", - Language::Python, - ".py", - "identifier", - "f", - "f", - false, - ), - ( - "function f() { value; }\n", - Language::TypeScript, - ".ts", - "identifier", - "value", - "value", - false, - ), - ( - "function f()\n value()\nend\n", - Language::Lua, - ".lua", - "identifier", - "value", - "value", - false, - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - if local { - normalizer - .local_stack - .push(BTreeSet::from([name.to_string()])); - } - let rust = node_value(&normalizer.local_or_call_for_name(name, node)); - - assert_eq!( - rust, - ruby_private_local_or_call_for_name_value( - source, language, suffix, kind, text, name, local - ), - "local_or_call_for_name mismatch for {language:?} {name:?} local={local}" - ); - } - } - - #[test] - fn literal_arguments_from_text_normalization_matches_ruby() { - let symbol_source = "puts :ok\n"; - let root = parse_language_source(symbol_source, Language::Ruby, ".rb"); - let fcall = first_node(&root, "FCALL", "puts :ok"); - assert_eq!( - fcall.children.first(), - Some(&Child::Symbol("puts".to_string())) - ); - let args = child_node(fcall, 1); - assert_eq!(args.r#type, "LIST"); - let lit = child_node(args, 0); - assert_eq!(lit.r#type, "LIT"); - assert_eq!(lit.children.first(), Some(&Child::Symbol("ok".to_string()))); - assert_ruby_parity(symbol_source, Language::Ruby, ".rb"); - - let heredoc_source = "def f\n puts <<~TXT\n hi\n TXT\nend\n"; - let root = parse_language_source(heredoc_source, Language::Ruby, ".rb"); - let fcall = first_node(&root, "FCALL", "puts <<~TXT"); - let args = child_node(fcall, 1); - assert_eq!(args.r#type, "LIST"); - let dstr = child_node(args, 0); - assert_eq!(dstr.r#type, "DSTR"); - assert_eq!(child_types(dstr), vec!["STR"]); - let body = child_node(dstr, 0); - assert_eq!( - body.children.first(), - Some(&Child::String("\n hi\n ".to_string())) - ); - assert_ruby_parity(heredoc_source, Language::Ruby, ".rb"); - } - - #[test] - fn literal_symbol_arguments_matches_ruby_scan_contract() { - assert_eq!( - super::literal_symbol_arguments(":one, :two?, :three!, :four=, :1, ::Name"), - vec![ - "one".to_string(), - "two?".to_string(), - "three!".to_string(), - "four=".to_string(), - "Name".to_string(), - ] - ); - } - - #[test] - fn elide_tail_returns_matches_ruby_private_method() { - let leaf = |node_type: &str| test_node(node_type, vec![Child::String("value".to_string())]); - let return_leaf = || test_node("RETURN", vec![Child::Node(Box::new(leaf("LVAR")))]); - let protected_def = test_node( - "DEFN", - vec![ - Child::Symbol("kept".to_string()), - Child::Node(Box::new(test_node( - "SCOPE", - vec![Child::Nil, Child::Nil, Child::Node(Box::new(return_leaf()))], - ))), - ], - ); - let cases = vec![ - None, - Some(return_leaf()), - Some(test_node( - "BLOCK", - vec![ - Child::Node(Box::new(leaf("LVAR"))), - Child::Node(Box::new(return_leaf())), - ], - )), - Some(test_node( - "SCOPE", - vec![Child::Nil, Child::Nil, Child::Node(Box::new(return_leaf()))], - )), - Some(test_node( - "IF", - vec![ - Child::Node(Box::new(leaf("COND"))), - Child::Node(Box::new(return_leaf())), - Child::Node(Box::new(return_leaf())), - ], - )), - Some(test_node( - "UNLESS", - vec![ - Child::Node(Box::new(leaf("COND"))), - Child::Node(Box::new(return_leaf())), - Child::Node(Box::new(return_leaf())), - ], - )), - Some(test_node( - "CASE", - vec![ - Child::Node(Box::new(leaf("LVAR"))), - Child::Node(Box::new(return_leaf())), - ], - )), - Some(test_node( - "CASE2", - vec![Child::Node(Box::new(return_leaf()))], - )), - Some(test_node( - "WHEN", - vec![ - Child::Node(Box::new(leaf("LIST"))), - Child::Node(Box::new(return_leaf())), - Child::Node(Box::new(return_leaf())), - ], - )), - Some(test_node( - "RESCUE", - vec![ - Child::Node(Box::new(return_leaf())), - Child::Node(Box::new(return_leaf())), - ], - )), - Some(test_node( - "RESBODY", - vec![ - Child::Node(Box::new(leaf("LIST"))), - Child::Node(Box::new(return_leaf())), - Child::Node(Box::new(return_leaf())), - ], - )), - Some(protected_def), - ]; - let normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); - - for node in cases { - let input = node.as_ref().map(node_value).unwrap_or(Value::Null); - let rust = normalizer - .elide_tail_returns(node) - .as_ref() - .map(node_value) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_elide_tail_returns_value(&input, true), - "elide_tail_returns mismatch for input {input}" - ); - } - - let non_ruby = Some(return_leaf()); - let input = non_ruby.as_ref().map(node_value).unwrap_or(Value::Null); - let normalizer = super::TreeSitterNormalizer::new("", Language::Python); - let rust = normalizer - .elide_tail_returns(non_ruby) - .as_ref() - .map(node_value) - .unwrap_or(Value::Null); - - assert_eq!(rust, input); - assert_eq!(ruby_private_elide_tail_returns_value(&input, false), input); - } - - #[test] - fn elide_implicit_nil_body_matches_ruby_private_method() { - let normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); - let leaf = || test_node("LVAR", vec![Child::String("value".to_string())]); - let nil_node = || test_node("NIL", Vec::new()); - let cases = vec![ - None, - Some(nil_node()), - Some(leaf()), - Some(test_node( - "BLOCK", - vec![ - Child::Node(Box::new(leaf())), - Child::Node(Box::new(nil_node())), - Child::Node(Box::new(nil_node())), - ], - )), - Some(test_node( - "BLOCK", - vec![Child::Nil, Child::Node(Box::new(nil_node()))], - )), - Some(test_node( - "BLOCK", - vec![ - Child::Node(Box::new(leaf())), - Child::Node(Box::new(leaf())), - Child::Node(Box::new(nil_node())), - ], - )), - ]; - - for node in cases { - let input = node.as_ref().map(node_value).unwrap_or(Value::Null); - let rust = normalizer - .elide_implicit_nil_body(node) - .as_ref() - .map(node_value) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_elide_implicit_nil_body_value(&input, true), - "elide_implicit_nil_body mismatch for input {input}" - ); - } - - let non_ruby = Some(nil_node()); - let input = non_ruby.as_ref().map(node_value).unwrap_or(Value::Null); - let normalizer = super::TreeSitterNormalizer::new("", Language::Python); - let rust = normalizer - .elide_implicit_nil_body(non_ruby) - .as_ref() - .map(node_value) - .unwrap_or(Value::Null); - - assert_eq!(rust, input); - assert_eq!( - ruby_private_elide_implicit_nil_body_value(&input, false), - input - ); - } - - #[test] - fn drop_trailing_nil_statement_matches_ruby_private_method() { - let normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); - let leaf = |node_type: &str| test_node(node_type, vec![Child::Symbol("value".to_string())]); - let nil_node = || test_node("NIL", Vec::new()); - let block = |children| test_node("BLOCK", children); - - for node in [ - None, - Some(nil_node()), - Some(block(vec![ - Child::Node(Box::new(leaf("LASGN"))), - Child::Node(Box::new(nil_node())), - ])), - Some(block(vec![ - Child::Node(Box::new(leaf("LASGN"))), - Child::Node(Box::new(nil_node())), - Child::Node(Box::new(nil_node())), - ])), - Some(block(vec![ - Child::Node(Box::new(leaf("LASGN"))), - Child::Nil, - Child::Node(Box::new(nil_node())), - ])), - Some(block(vec![Child::Nil, Child::Node(Box::new(nil_node()))])), - Some(block(vec![ - Child::Node(Box::new(leaf("LASGN"))), - Child::Nil, - Child::Node(Box::new(leaf("VCALL"))), - ])), - Some(block(vec![ - Child::Node(Box::new(leaf("LASGN"))), - Child::Nil, - Child::Node(Box::new(leaf("VCALL"))), - Child::Node(Box::new(nil_node())), - ])), - ] { - let input = node.as_ref().map(node_value).unwrap_or(Value::Null); - let rust = normalizer - .drop_trailing_nil_statement(node) - .as_ref() - .map(node_value) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_drop_trailing_nil_statement_value(&input), - "drop_trailing_nil_statement mismatch for input {input}" - ); - } - } - - #[test] - fn symbol_literal_node_matches_ruby_private_predicate() { - let normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); - for (node, node_type, child_kind) in [ - (None, None, None), - ( - Some(test_node("LIT", vec![Child::Symbol("value".to_string())])), - Some("LIT"), - Some("symbol"), - ), - ( - Some(test_node("LIT", vec![Child::String("value".to_string())])), - Some("LIT"), - Some("string"), - ), - (Some(test_node("LIT", Vec::new())), Some("LIT"), None), - ( - Some(test_node("STR", vec![Child::Symbol("value".to_string())])), - Some("STR"), - Some("symbol"), - ), - ( - Some(test_node( - "LIT", - vec![Child::Node(Box::new(test_node("NIL", Vec::new())))], - )), - Some("LIT"), - Some("node"), - ), - ( - Some(test_node("LIT", vec![Child::Nil])), - Some("LIT"), - Some("nil"), - ), - ] { - assert_eq!( - normalizer.symbol_literal_node(node.as_ref()), - ruby_private_symbol_literal_node_predicate(node_type, child_kind), - "symbol_literal_node? mismatch for node_type={node_type:?} child_kind={child_kind:?}" - ); - } - } - - #[test] - fn same_ts_node_matches_ruby_private_predicate() { - for ( - source, - language, - suffix, - left_kind, - left_text, - left_index, - right_kind, - right_text, - right_index, - ) in [ - ( - "value\nvalue\n", - Language::Ruby, - ".rb", - "identifier", - "value", - 0, - "identifier", - "value", - 0, - ), - ( - "value\nvalue\n", - Language::Ruby, - ".rb", - "identifier", - "value", - 0, - "identifier", - "value", - 1, - ), - ( - "value\nvalue\n", - Language::Python, - ".py", - "expression_statement", - "value", - 0, - "expression_statement", - "value", - 0, - ), - ( - "value\nvalue\n", - Language::Python, - ".py", - "expression_statement", - "value", - 0, - "expression_statement", - "value", - 1, - ), - ( - "value;\nvalue;\n", - Language::TypeScript, - ".ts", - "expression_statement", - "value;", - 0, - "expression_statement", - "value;", - 1, - ), - ( - "value()\nvalue()\n", - Language::Lua, - ".lua", - "function_call", - "value()", - 0, - "function_call", - "value()", - 0, - ), - ( - "value()\nvalue()\n", - Language::Lua, - ".lua", - "function_call", - "value()", - 0, - "function_call", - "value()", - 1, - ), - ] { - let tree = raw_tree(source, language); - let left = nth_raw_node(tree.root_node(), source, left_kind, left_text, left_index); - let right = nth_raw_node( - tree.root_node(), - source, - right_kind, - right_text, - right_index, - ); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.same_ts_node(left, right), - ruby_private_same_ts_node_predicate( - source, - language, - suffix, - left_kind, - left_text, - left_index, - right_kind, - right_text, - right_index - ), - "same_ts_node? mismatch for {language:?} {left_kind}:{left_text:?}[{left_index}] vs {right_kind}:{right_text:?}[{right_index}]" - ); - } - } - - #[test] - fn parent_named_child_matches_ruby_private_predicate() { - for ( - source, - language, - suffix, - parent_kind, - parent_text, - parent_index, - child_kind, - child_text, - child_index, - ) in [ - ( - "def f\n {name:}\nend\n", - Language::Ruby, - ".rb", - "pair", - "name:", - 0, - "hash_key_symbol", - "name", - 0, - ), - ( - "def f\n {name:}\nend\n", - Language::Ruby, - ".rb", - "pair", - "name:", - 0, - "identifier", - "f", - 0, - ), - ( - "def f():\n value\n", - Language::Python, - ".py", - "function_definition", - "def f():\n value", - 0, - "identifier", - "f", - 0, - ), - ( - "def f():\n value\n", - Language::Python, - ".py", - "block", - "value", - 0, - "identifier", - "f", - 0, - ), - ( - "function f() { value; }\n", - Language::TypeScript, - ".ts", - "function_declaration", - "function f() { value; }", - 0, - "identifier", - "f", - 0, - ), - ( - "function f() { value; }\n", - Language::TypeScript, - ".ts", - "statement_block", - "{ value; }", - 0, - "identifier", - "f", - 0, - ), - ( - "function f()\n value()\nend\n", - Language::Lua, - ".lua", - "function_declaration", - "function f()\n value()\nend", - 0, - "identifier", - "f", - 0, - ), - ( - "function f()\n value()\nend\n", - Language::Lua, - ".lua", - "block", - "value()", - 0, - "identifier", - "f", - 0, - ), - ] { - let tree = raw_tree(source, language); - let parent = nth_raw_node( - tree.root_node(), - source, - parent_kind, - parent_text, - parent_index, - ); - let child = nth_raw_node( - tree.root_node(), - source, - child_kind, - child_text, - child_index, - ); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.parent_named_child(parent, child), - ruby_private_parent_named_child_predicate( - source, - language, - suffix, - parent_kind, - parent_text, - parent_index, - child_kind, - child_text, - child_index - ), - "parent_named_child? mismatch for {language:?} {parent_kind}:{parent_text:?}[{parent_index}] -> {child_kind}:{child_text:?}[{child_index}]" - ); - } - } - - #[test] - fn node_key_matches_ruby_private_method() { - for (source, language, suffix, kind, text, index) in [ - ( - "value\nvalue\n", - Language::Ruby, - ".rb", - "identifier", - "value", - 0, - ), - ( - "value\nvalue\n", - Language::Ruby, - ".rb", - "identifier", - "value", - 1, - ), - ( - "value\nvalue\n", - Language::Python, - ".py", - "expression_statement", - "value", - 1, - ), - ( - "value;\nvalue;\n", - Language::TypeScript, - ".ts", - "expression_statement", - "value;", - 0, - ), - ( - "value()\nvalue()\n", - Language::Lua, - ".lua", - "function_call", - "value()", - 1, - ), - ] { - let tree = raw_tree(source, language); - let node = nth_raw_node(tree.root_node(), source, kind, text, index); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.node_key(node), - ruby_private_node_key_signature(source, language, suffix, kind, text, index), - "node_key mismatch for {language:?} {kind}:{text:?}[{index}]" - ); - } - } - - #[test] - fn bare_identifier_text_matches_ruby_private_predicate() { - for text in [ - "value", - "_value", - "value1", - "value?", - "value!", - "value=", - " value? ", - "", - "1value", - "value-name", - "value?name", - "value??", - "value!=", - "value =", - ] { - assert_eq!( - super::bare_identifier_text(text), - ruby_private_text_predicate(Language::Ruby, "bare_identifier_text?", text), - "bare_identifier_text? mismatch for {text:?}" - ); - } - } - - #[test] - fn hidden_match_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "match(value)\n", - Language::Ruby, - ".rb", - "call", - "match(value)", - ), - ( - "match value:\n case 1:\n result\n", - Language::Python, - ".py", - "match_statement", - "match value:\n case 1:\n result", - ), - ( - "match(value)\n", - Language::Python, - ".py", - "expression_statement", - "match(value)", - ), - ( - "match(value);\n", - Language::TypeScript, - ".ts", - "expression_statement", - "match(value);", - ), - ( - "match(value)\n", - Language::Lua, - ".lua", - "function_call", - "match(value)", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.hidden_match(node), - ruby_private_predicate(source, language, suffix, "hidden_match?", kind, text), - "hidden_match? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn kind_type_matches_ruby_private_method() { - for kind in [ - "", - "body_statement", - "block_body", - "block", - "statements", - "expression_statement", - "alreadyCAPS", - "argument-list??", - "foo__bar", - "123kind", - "é_node", - ] { - assert_eq!( - super::kind_type(kind), - ruby_private_text_string(Language::Ruby, "kind_type", kind), - "kind_type mismatch for {kind:?}" - ); - } - } - - #[test] - fn ts_node_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ("ready?\n", Language::Ruby, ".rb", "call", "ready?"), - ( - "value\n", - Language::Python, - ".py", - "expression_statement", - "value", - ), - ( - "let value = 1;\n", - Language::TypeScript, - ".ts", - "identifier", - "value", - ), - ( - "value = 1\n", - Language::Lua, - ".lua", - "variable_list", - "value", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - - assert_eq!( - super::ts_node(Some(node)), - ruby_private_predicate(source, language, suffix, "ts_node?", kind, text), - "ts_node? raw-node mismatch for {language:?} {kind}:{text:?}" - ); - } - - assert_eq!(super::ts_node(None), ruby_private_ts_node_value("nil")); - assert!(!ruby_private_ts_node_value("string")); - assert!(!ruby_private_ts_node_value("normalized_node")); - } - - #[test] - fn command_call_statement_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "def f\n puts value\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "puts value", - ), - ( - "def f\n foo { value }\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "foo { value }", - ), - ( - "def f\n foo\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "foo", - ), - ( - "def f\n user.name value\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "user.name value", - ), - ( - "print(value)\n", - Language::Python, - ".py", - "expression_statement", - "print(value)", - ), - ( - "console.log(value);\n", - Language::TypeScript, - ".ts", - "expression_statement", - "console.log(value);", - ), - ( - "print(value)\n", - Language::Lua, - ".lua", - "function_call", - "print(value)", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.command_call_statement(node), - ruby_private_predicate( - source, - language, - suffix, - "command_call_statement?", - kind, - text - ), - "command_call_statement? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_command_call_statement_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def f\n puts value\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "puts value", - ), - ( - "def f\n foo { value }\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "foo { value }", - ), - ( - "print(value)\n", - Language::Python, - ".py", - "expression_statement", - "print(value)", - ), - ( - "console.log(value);\n", - Language::TypeScript, - ".ts", - "expression_statement", - "console.log(value);", - ), - ( - "print(value)\n", - Language::Lua, - ".lua", - "function_call", - "print(value)", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_command_call_statement(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_command_call_statement", - kind, - text - ), - "normalize_command_call_statement mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn zero_child_identifier_call_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ("foo?\n", Language::Ruby, ".rb", "call", "foo?"), - ("foo!\n", Language::Ruby, ".rb", "call", "foo!"), - ("foo()\n", Language::Ruby, ".rb", "call", "foo()"), - ( - "foo()\n", - Language::Python, - ".py", - "expression_statement", - "foo()", - ), - ( - "foo();\n", - Language::TypeScript, - ".ts", - "call_expression", - "foo()", - ), - ("foo()\n", Language::Lua, ".lua", "function_call", "foo()"), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.zero_child_identifier_call(node), - ruby_private_predicate( - source, - language, - suffix, - "zero_child_identifier_call?", - kind, - text - ), - "zero_child_identifier_call? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn zero_child_identifier_call_normalization_matches_ruby() { - for source in ["foo?\n", "foo!\n"] { - let root = parse_language_source(source, Language::Ruby, ".rb"); - let text = source.trim(); - let vcall = first_node(&root, "VCALL", text); - assert_eq!( - vcall.children.first(), - Some(&Child::Symbol(text.to_string())) - ); - assert_ruby_parity(source, Language::Ruby, ".rb"); - } - } - - #[test] - fn normalize_zero_child_call_matches_ruby_private_method() { - for source in ["foo?\n", "foo!\n", "foo()\n"] { - let text = source.trim(); - let tree = raw_tree(source, Language::Ruby); - let node = first_raw_node(tree.root_node(), source, "call", text); - let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); - let rust = normalizer.normalize_zero_child_call(node); - - assert_eq!( - node_value(&rust), - ruby_private_normalize_method_value( - source, - Language::Ruby, - ".rb", - "normalize_zero_child_call", - "call", - text - ), - "normalize_zero_child_call mismatch for {text:?}" - ); - } - } - - #[test] - fn normalize_const_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ("Foo\n", Language::Ruby, ".rb", "constant", "Foo"), - ( - "Foo::Bar\n", - Language::Ruby, - ".rb", - "scope_resolution", - "Foo::Bar", - ), - ( - "class Foo::Bar::Baz\nend\n", - Language::Ruby, - ".rb", - "scope_resolution", - "Foo::Bar::Baz", - ), - ( - "type Alias = Foo;\n", - Language::TypeScript, - ".ts", - "type_identifier", - "Foo", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer.normalize_const(node); - - assert_eq!( - node_value(&rust), - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_const", - kind, - text - ), - "normalize_const mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn assignment_receiver_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ("value += 1\n", Language::Ruby, ".rb", "identifier", "value"), - ( - "@value += 1\n", - Language::Ruby, - ".rb", - "instance_variable", - "@value", - ), - ( - "$value += 1\n", - Language::Ruby, - ".rb", - "global_variable", - "$value", - ), - ("VALUE += 1\n", Language::Ruby, ".rb", "constant", "VALUE"), - ( - "user.value += 1\n", - Language::Ruby, - ".rb", - "call", - "user.value", - ), - ( - "value += 1\n", - Language::Python, - ".py", - "identifier", - "value", - ), - ( - "user.value += 1\n", - Language::Python, - ".py", - "attribute", - "user.value", - ), - ( - "value += 1;\n", - Language::TypeScript, - ".ts", - "identifier", - "value", - ), - ( - "user.value += 1;\n", - Language::TypeScript, - ".ts", - "member_expression", - "user.value", - ), - ( - "value = 1\n", - Language::Lua, - ".lua", - "variable_list", - "value", - ), - ( - "user.value = 1\n", - Language::Lua, - ".lua", - "variable_list", - "user.value", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .assignment_receiver(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "assignment_receiver", - kind, - text - ), - "assignment_receiver mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn assignment_target_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "@value = 1\n", - Language::Ruby, - ".rb", - "instance_variable", - "@value", - ), - ( - "$value = 1\n", - Language::Ruby, - ".rb", - "global_variable", - "$value", - ), - ( - "items[index] = value\n", - Language::Ruby, - ".rb", - "element_reference", - "items[index]", - ), - ( - "user.value = 1\n", - Language::Ruby, - ".rb", - "call", - "user.value", - ), - ( - "user.value = 1\n", - Language::Python, - ".py", - "attribute", - "user.value", - ), - ( - "user.value = 1;\n", - Language::TypeScript, - ".ts", - "member_expression", - "user.value", - ), - ( - "user.value = 1\n", - Language::Lua, - ".lua", - "variable_list", - "user.value", - ), - ( - "value = 1\n", - Language::Lua, - ".lua", - "variable_list", - "value", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let source_node = normalizer.parent_node(node).unwrap_or(node); - let right = normalizer - .assignment_right(source_node) - .and_then(|right| normalizer.normalize_node(right)); - let rust = normalizer - .assignment_target(node, right, source_node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_assignment_target_value(source, language, suffix, kind, text), - "assignment_target mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn augmented_assignment_value_matches_ruby_private_method() { - for (source, language, suffix, kind, text, operator) in [ - ( - "value += 1\n", - Language::Ruby, - ".rb", - "identifier", - "value", - "+", - ), - ( - "@value *= 2\n", - Language::Ruby, - ".rb", - "instance_variable", - "@value", - "*", - ), - ( - "$value += 1\n", - Language::Ruby, - ".rb", - "global_variable", - "$value", - "+", - ), - ( - "VALUE -= 1\n", - Language::Ruby, - ".rb", - "constant", - "VALUE", - "-", - ), - ( - "user.value += 1\n", - Language::Ruby, - ".rb", - "call", - "user.value", - "+", - ), - ( - "value += 1\n", - Language::Python, - ".py", - "identifier", - "value", - "+", - ), - ( - "user.value += 1\n", - Language::Python, - ".py", - "attribute", - "user.value", - "+", - ), - ( - "value += 1;\n", - Language::TypeScript, - ".ts", - "identifier", - "value", - "+", - ), - ( - "user.value += 1;\n", - Language::TypeScript, - ".ts", - "member_expression", - "user.value", - "+", - ), - ( - "value = 1\n", - Language::Lua, - ".lua", - "variable_list", - "value", - "+", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let source_node = normalizer.parent_node(node).unwrap_or(node); - let right_raw = normalizer.assignment_right(source_node); - let rust = - normalizer.augmented_assignment_value(node, operator, right_raw, source_node); - - assert_eq!( - node_value(&rust), - ruby_private_augmented_assignment_value( - source, language, suffix, kind, text, operator - ), - "augmented_assignment_value mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn target_name_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "value = other\n", - Language::Ruby, - ".rb", - "identifier", - "value", - ), - ( - "$value = other\n", - Language::Ruby, - ".rb", - "global_variable", - "$value", - ), - ( - "VALUE = other\n", - Language::Ruby, - ".rb", - "constant", - "VALUE", - ), - ( - "a, *rest = values\n", - Language::Ruby, - ".rb", - "rest_assignment", - "*rest", - ), - ( - "value = other\n", - Language::Python, - ".py", - "identifier", - "value", - ), - ( - "let value = other;\n", - Language::TypeScript, - ".ts", - "identifier", - "value", - ), - ( - "value = other\n", - Language::Lua, - ".lua", - "variable_list", - "value", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - Value::String(normalizer.target_name(node)), - ruby_private_normalize_method_value( - source, - language, - suffix, - "target_name", - kind, - text - ), - "target_name mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_multiple_assignment_matches_ruby_private_method() { - for (source, kind, text) in [ - ("a, b = values\n", "assignment", "a, b = values"), - ("$a, b = values\n", "assignment", "$a, b = values"), - ("a, *rest = values\n", "assignment", "a, *rest = values"), - ] { - let tree = raw_tree(source, Language::Ruby); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); - let left = normalizer - .assignment_left(node) - .expect("multiple assignment should have left side"); - let right = normalizer - .assignment_right(node) - .and_then(|right| normalizer.normalize_node(right)); - let rust = normalizer.normalize_multiple_assignment(left, right, node); - - assert_eq!( - node_value(&rust), - ruby_private_normalize_multiple_assignment_value( - source, - Language::Ruby, - ".rb", - kind, - text - ), - "normalize_multiple_assignment mismatch for {text:?}" - ); - } - } - - #[test] - fn normalize_assignment_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "value = other\n", - Language::Ruby, - ".rb", - "assignment", - "value = other", - ), - ( - "@value = other\n", - Language::Ruby, - ".rb", - "assignment", - "@value = other", - ), - ( - "$value = other\n", - Language::Ruby, - ".rb", - "assignment", - "$value = other", - ), - ( - "items[index] = value\n", - Language::Ruby, - ".rb", - "assignment", - "items[index] = value", - ), - ( - "user.value = other\n", - Language::Ruby, - ".rb", - "assignment", - "user.value = other", - ), - ( - "a, b = values\n", - Language::Ruby, - ".rb", - "assignment", - "a, b = values", - ), - ( - "value = other\n", - Language::Python, - ".py", - "expression_statement", - "value = other", - ), - ( - "user.value = other\n", - Language::Python, - ".py", - "expression_statement", - "user.value = other", - ), - ( - "value = other;\n", - Language::TypeScript, - ".ts", - "expression_statement", - "value = other;", - ), - ( - "user.value = other;\n", - Language::TypeScript, - ".ts", - "expression_statement", - "user.value = other;", - ), - ( - "value = other\n", - Language::Lua, - ".lua", - "assignment_statement", - "value = other", - ), - ( - "user.value = other\n", - Language::Lua, - ".lua", - "assignment_statement", - "user.value = other", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_assignment(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_assignment", - kind, - text - ), - "normalize_assignment mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_assignment_lhs_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "value = other\n", - Language::Ruby, - ".rb", - "identifier", - "value", - ), - ( - "@value = other\n", - Language::Ruby, - ".rb", - "instance_variable", - "@value", - ), - ( - "$value = other\n", - Language::Ruby, - ".rb", - "global_variable", - "$value", - ), - ( - "items[index] = value\n", - Language::Ruby, - ".rb", - "element_reference", - "items[index]", - ), - ( - "user.value = other\n", - Language::Ruby, - ".rb", - "call", - "user.value", - ), - ( - "value = other\n", - Language::Python, - ".py", - "identifier", - "value", - ), - ( - "user.value = other\n", - Language::Python, - ".py", - "attribute", - "user.value", - ), - ( - "value = other;\n", - Language::TypeScript, - ".ts", - "identifier", - "value", - ), - ( - "user.value = other;\n", - Language::TypeScript, - ".ts", - "member_expression", - "user.value", - ), - ( - "value = other\n", - Language::Lua, - ".lua", - "variable_list", - "value", - ), - ( - "user.value = other\n", - Language::Lua, - ".lua", - "variable_list", - "user.value", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_assignment_lhs(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_assignment_lhs", - kind, - text - ), - "normalize_assignment_lhs mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_begin_matches_ruby_private_method() { - for (source, text) in [ - ("begin\n work\n done\nend\n", "begin\n work\n done\nend"), - ( - "begin\n work\nensure\n cleanup\nend\n", - "begin\n work\nensure\n cleanup\nend", - ), - ( - "begin\n work\nrescue Error => e\n handle\nend\n", - "begin\n work\nrescue Error => e\n handle\nend", - ), - ( - "begin\n work\nrescue Error => e\n handle\nensure\n cleanup\nend\n", - "begin\n work\nrescue Error => e\n handle\nensure\n cleanup\nend", - ), - ] { - let tree = raw_tree(source, Language::Ruby); - let node = first_raw_node(tree.root_node(), source, "begin", text); - let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); - let rust = normalizer - .normalize_begin(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - Language::Ruby, - ".rb", - "normalize_begin", - "begin", - text - ), - "normalize_begin mismatch for {text:?}" - ); - } - } - - #[test] - fn normalize_block_argument_matches_ruby_private_method() { - for (source, text) in [ - ("foo(&block)\n", "&block"), - ("foo(&:to_s)\n", "&:to_s"), - ("foo(&method(:bar))\n", "&method(:bar)"), - ] { - let tree = raw_tree(source, Language::Ruby); - let node = first_raw_node(tree.root_node(), source, "block_argument", text); - let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); - let rust = normalizer - .normalize_block_argument(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - Language::Ruby, - ".rb", - "normalize_block_argument", - "block_argument", - text - ), - "normalize_block_argument mismatch for {text:?}" - ); - } - } - - #[test] - fn normalize_body_nodes_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ("\n", Language::Ruby, ".rb", "__root__", ""), - ("value\n", Language::Ruby, ".rb", "__root__", ""), - ("first\nsecond\n", Language::Ruby, ".rb", "__root__", ""), - ( - "first()\nsecond()\n", - Language::Python, - ".py", - "__root__", - "", - ), - ( - "first();\nsecond();\n", - Language::TypeScript, - ".ts", - "__root__", - "", - ), - ("first()\nsecond()\n", Language::Lua, ".lua", "__root__", ""), - ] { - let tree = raw_tree(source, language); - let target = if kind == "__root__" { - tree.root_node() - } else { - first_raw_node(tree.root_node(), source, kind, text) - }; - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let nodes = normalizer.named_children(target); - let rust = normalizer - .normalize_body_nodes(nodes, target) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_body_nodes_value(source, language, suffix, kind, text), - "normalize_body_nodes mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_children_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def f\n one\n two\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "one\n two", - ), - ( - "def f\n value = other\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "value = other", - ), - ( - "def f\n x = <<~TXT\n hi\n TXT\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "x = <<~TXT\n hi\n TXT", - ), - ( - "def f():\n one()\n two()\n", - Language::Python, - ".py", - "block", - "one()\n two()", - ), - ( - "def f():\n value = other\n", - Language::Python, - ".py", - "block", - "value = other", - ), - ( - "function f(){ one(); two(); }\n", - Language::TypeScript, - ".ts", - "statement_block", - "{ one(); two(); }", - ), - ( - "function f(){ value = other; }\n", - Language::TypeScript, - ".ts", - "assignment_expression", - "value = other", - ), - ( - "function f()\n one()\n two()\nend\n", - Language::Lua, - ".lua", - "block", - "one()\n two()", - ), - ( - "function f()\n value = other\nend\n", - Language::Lua, - ".lua", - "block", - "value = other", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = children_value(&normalizer.normalize_children(node)); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_children", - kind, - text - ), - "normalize_children mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_class_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "class Thing; end\n", - Language::Ruby, - ".rb", - "class", - "class Thing; end", - ), - ( - "class Thing:\n pass\n", - Language::Python, - ".py", - "class_definition", - "class Thing:\n pass", - ), - ( - "class Thing {}\n", - Language::TypeScript, - ".ts", - "class_declaration", - "class Thing {}", - ), - ( - "local Thing = {}\n", - Language::Lua, - ".lua", - "variable_declaration", - "local Thing = {}", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_class(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_class", - kind, - text - ), - "normalize_class mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_impl_matches_ruby_private_method() { - for (source, kind, text) in [( - "impl Thing {\n fn call(&self) {\n work();\n }\n}\n", - "impl_item", - "impl Thing {\n fn call(&self) {\n work();\n }\n}", - )] { - let tree = raw_tree(source, Language::Rust); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Rust); - let rust = normalizer - .normalize_impl(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - Language::Rust, - ".rs", - "normalize_impl", - kind, - text - ), - "normalize_impl mismatch for {kind} {text:?}" - ); - } - } - - #[test] - fn rust_impl_normalization_matches_ruby() { - let source = "impl Thing {\n fn call(&self) {\n work();\n }\n}\n"; - let root = parse_language_source(source, Language::Rust, ".rs"); - let class_node = first_node(&root, "CLASS", source.trim_end()); - - assert_eq!(child_node(class_node, 0).r#type, "CONST"); - assert_ruby_parity(source, Language::Rust, ".rs"); - } - - #[test] - fn normalize_body_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def f\n value\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "value", - ), - ( - "def f\n return value\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "return value", - ), - ( - "def f\n items[index]\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "items[index]", - ), - ( - "def f\n [first, second]\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "[first, second]", - ), - ( - "def f\n value if ready?\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "value if ready?", - ), - ( - "def f\n left && right\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "left && right", - ), - ( - "def f():\n return value\n", - Language::Python, - ".py", - "block", - "return value", - ), - ( - "def f():\n value = other\n", - Language::Python, - ".py", - "block", - "value = other", - ), - ( - "function f() {\n return value;\n}\n", - Language::TypeScript, - ".ts", - "return_statement", - "return value;", - ), - ( - "function f() {\n value = other;\n}\n", - Language::TypeScript, - ".ts", - "expression_statement", - "value = other;", - ), - ( - "function f()\n return value\nend\n", - Language::Lua, - ".lua", - "block", - "return value", - ), - ( - "function f()\n value = other\nend\n", - Language::Lua, - ".lua", - "block", - "value = other", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_body(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_body", - kind, - text - ), - "normalize_body mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_return_value_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def f\n return nil\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "nil", - ), - ( - "def f\n return items[index]\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "items[index]", - ), - ( - "def f\n return left && right\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "left && right", - ), - ( - "def f\n return condition ? yes : no\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "condition ? yes : no", - ), - ( - "def f\n return foo { value }\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "foo { value }", - ), - ( - "def f\n return user.name\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "user.name", - ), - ( - "def f\n return !value\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "!value", - ), - ( - "def f\n return left + right\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "left + right", - ), - ( - "def f\n return foo(bar)\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "foo(bar)", - ), - ( - "def f():\n return value + other\n", - Language::Python, - ".py", - "binary_operator", - "value + other", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_return_value(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_return_value", - kind, - text - ), - "normalize_return_value mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_return_node_matches_ruby_private_method() { - for (source, language, suffix, kind, text, elide_symbol) in [ - ( - "return :ok if cond\n", - Language::Ruby, - ".rb", - "return", - "return :ok", - false, - ), - ( - "return :ok if cond\n", - Language::Ruby, - ".rb", - "return", - "return :ok", - true, - ), - ( - "return value if cond\n", - Language::Ruby, - ".rb", - "return", - "return value", - true, - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_return_node_with_elide_symbol(node, elide_symbol) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_return_node_value( - source, - language, - suffix, - kind, - text, - elide_symbol - ), - "normalize_return_node mismatch for {language:?} {kind} {text:?} elide_symbol={elide_symbol}" - ); - } - } - - #[test] - fn normalize_return_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "return :ok if cond\n", - Language::Ruby, - ".rb", - "return", - "return :ok", - ), - ("break if done\n", Language::Ruby, ".rb", "break", "break"), - ( - "next value if done\n", - Language::Ruby, - ".rb", - "next", - "next value", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_return(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_return", - kind, - text - ), - "normalize_return mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn call_arguments_matches_ruby_private_method() { - for (source, language, suffix, kind, text, function_mode) in [ - ( - "foo(value)\n", - Language::Ruby, - ".rb", - "call", - "foo(value)", - "auto", - ), - ( - "foo(left + right)\n", - Language::Ruby, - ".rb", - "call", - "foo(left + right)", - "auto", - ), - ( - "foo(user.name)\n", - Language::Ruby, - ".rb", - "call", - "foo(user.name)", - "auto", - ), - ( - "user.name(value)\n", - Language::Ruby, - ".rb", - "call", - "user.name(value)", - "none", - ), - ( - "foo(value)\n", - Language::Python, - ".py", - "call", - "foo(value)", - "auto", - ), - ( - "foo(value);\n", - Language::TypeScript, - ".ts", - "call_expression", - "foo(value)", - "auto", - ), - ( - "foo(value)\n", - Language::Lua, - ".lua", - "function_call", - "foo(value)", - "auto", - ), - ( - "user.name(value)\n", - Language::Lua, - ".lua", - "function_call", - "user.name(value)", - "none", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let function = match function_mode { - "auto" => normalizer - .named_field(node, "function") - .or_else(|| normalizer.named_field(node, "call")) - .or_else(|| normalizer.named_children(node).into_iter().next()), - "none" => None, - other => panic!("unknown function mode {other:?}"), - }; - let rust = Value::Array( - normalizer - .call_arguments(node, function) - .iter() - .map(node_value) - .collect(), - ); - - assert_eq!( - rust, - ruby_private_call_arguments_value( - source, - language, - suffix, - kind, - text, - function_mode - ), - "call_arguments mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_call_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ("ready?\n", Language::Ruby, ".rb", "call", "ready?"), - ("foo(value)\n", Language::Ruby, ".rb", "call", "foo(value)"), - ( - "user.name(value)\n", - Language::Ruby, - ".rb", - "call", - "user.name(value)", - ), - ( - "def f\n foo { bar }\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "foo { bar }", - ), - ( - "foo(value)\n", - Language::Python, - ".py", - "expression_statement", - "foo(value)", - ), - ( - "foo(value);\n", - Language::TypeScript, - ".ts", - "call_expression", - "foo(value)", - ), - ( - "foo(value)\n", - Language::Lua, - ".lua", - "function_call", - "foo(value)", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_call(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_call", - kind, - text - ), - "normalize_call mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_call_with_block_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "items.map { |item| item }\n", - Language::Ruby, - ".rb", - "call", - "items.map { |item| item }", - ), - ( - "items.each do |item|\n item\nend\n", - Language::Ruby, - ".rb", - "call", - "items.each do |item|\n item\nend", - ), - ( - "foo(1) { bar }\n", - Language::Ruby, - ".rb", - "call", - "foo(1) { bar }", - ), - ( - "def f\n foo { bar }\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "foo { bar }", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_call_with_block(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_call_with_block", - kind, - text - ), - "normalize_call_with_block mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_call_without_block_matches_ruby_private_method() { - for (source, language, suffix, kind, text, block_mode) in [ - ( - "foo(value)\n", - Language::Ruby, - ".rb", - "call", - "foo(value)", - "none", - ), - ( - "user.name(value)\n", - Language::Ruby, - ".rb", - "call", - "user.name(value)", - "none", - ), - ( - "foo(1) { bar }\n", - Language::Ruby, - ".rb", - "call", - "foo(1) { bar }", - "auto", - ), - ( - "items.map(1) { |item| item }\n", - Language::Ruby, - ".rb", - "call", - "items.map(1) { |item| item }", - "auto", - ), - ( - "Foo { bar }\n", - Language::Ruby, - ".rb", - "call", - "Foo { bar }", - "auto", - ), - ( - "foo(value)\n", - Language::Python, - ".py", - "expression_statement", - "foo(value)", - "none", - ), - ( - "foo(value);\n", - Language::TypeScript, - ".ts", - "call_expression", - "foo(value)", - "none", - ), - ( - "foo(value)\n", - Language::Lua, - ".lua", - "function_call", - "foo(value)", - "none", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let block = match block_mode { - "auto" => normalizer.call_block(node), - "none" => None, - other => panic!("unknown block mode {other:?}"), - }; - let rust = normalizer - .normalize_call_without_block(node, block) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_call_without_block_value( - source, language, suffix, kind, text, block_mode - ), - "normalize_call_without_block mismatch for {language:?} {kind} {text:?} with block mode {block_mode:?}" - ); - } - } - - #[test] - fn command_arguments_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "foo value\n", - Language::Ruby, - ".rb", - "argument_list", - "value", - ), - ( - "foo :name\n", - Language::Ruby, - ".rb", - "argument_list", - ":name", - ), - ( - "foo left + right\n", - Language::Ruby, - ".rb", - "argument_list", - "left + right", - ), - ( - "foo user.name\n", - Language::Ruby, - ".rb", - "argument_list", - "user.name", - ), - ( - "foo(value)\n", - Language::Python, - ".py", - "argument_list", - "(value)", - ), - ( - "foo(left + right)\n", - Language::Python, - ".py", - "argument_list", - "(left + right)", - ), - ( - "foo(value);\n", - Language::TypeScript, - ".ts", - "arguments", - "(value)", - ), - ( - "foo(value)\n", - Language::Lua, - ".lua", - "arguments", - "(value)", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = Value::Array( - normalizer - .command_arguments(node) - .iter() - .map(node_value) - .collect(), - ); - - assert_eq!( - rust, - ruby_private_command_arguments_value(source, language, suffix, kind, text), - "command_arguments mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn const_for_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ("Foo\n", Language::Ruby, ".rb", "constant", "Foo"), - ("foo\n", Language::Ruby, ".rb", "identifier", "foo"), - ( - "class Foo:\n pass\n", - Language::Python, - ".py", - "identifier", - "Foo", - ), - ( - "type Alias = Foo;\n", - Language::TypeScript, - ".ts", - "type_identifier", - "Foo", - ), - ( - "local Foo = {}\n", - Language::Lua, - ".lua", - "variable_list", - "Foo", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer.const_for(Some(node), node); - - assert_eq!( - node_value(&rust), - ruby_private_normalize_method_value( - source, - language, - suffix, - "const_for", - kind, - text - ), - "const_for mismatch for {language:?} {kind} {text:?}" - ); - } - - for (source, language, suffix) in [ - ("class Foo\nend\n", Language::Ruby, ".rb"), - ("class Foo:\n pass\n", Language::Python, ".py"), - ("class Foo {}\n", Language::TypeScript, ".ts"), - ("local Foo = {}\n", Language::Lua, ".lua"), - ] { - let tree = raw_tree(source, language); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer.const_for(None, tree.root_node()); - - assert_eq!( - node_value(&rust), - ruby_private_const_for_nil_value(source, language, suffix), - "const_for nil mismatch for {language:?}" - ); - } - } - - #[test] - fn normalize_patterns_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "case value\nwhen 1\n one\nend\n", - Language::Ruby, - ".rb", - "when", - "when 1\n one", - ), - ( - "case\nwhen ready\n one\nend\n", - Language::Ruby, - ".rb", - "when", - "when ready\n one", - ), - ( - "case value\nwhen Foo::Bar\n one\nend\n", - Language::Ruby, - ".rb", - "when", - "when Foo::Bar\n one", - ), - ( - "case value\nwhen Foo\n one\nend\n", - Language::Ruby, - ".rb", - "when", - "when Foo\n one", - ), - ( - "match value:\n case 1:\n one()\n", - Language::Python, - ".py", - "case_clause", - "case 1:\n one()", - ), - ( - "switch (value) { case 1: one(); default: other(); }\n", - Language::TypeScript, - ".ts", - "switch_case", - "case 1: one();", - ), - ("return 1\n", Language::Lua, ".lua", "expression_list", "1"), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = Value::Array( - normalizer - .normalize_patterns(node) - .iter() - .map(node_value) - .collect(), - ); - - assert_eq!( - rust, - ruby_private_normalize_patterns_value(source, language, suffix, kind, text), - "normalize_patterns mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn case_value_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "case value\nwhen 1\n one\nend\n", - Language::Ruby, - ".rb", - "case", - "case value\nwhen 1\n one\nend", - ), - ( - "case\nwhen ready\n one\nend\n", - Language::Ruby, - ".rb", - "case", - "case\nwhen ready\n one\nend", - ), - ( - "match value:\n case 1:\n one()\n", - Language::Python, - ".py", - "match_statement", - "match value:\n case 1:\n one()", - ), - ( - "switch (value) { case 1: one(); }\n", - Language::TypeScript, - ".ts", - "switch_statement", - "switch (value) { case 1: one(); }", - ), - ( - "if value == 1 then one() end\n", - Language::Lua, - ".lua", - "if_statement", - "if value == 1 then one() end", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer.case_value(node).map(|value| { - ( - value.kind().to_string(), - super::node_text(value, source).to_string(), - ) - }); - - assert_eq!( - rust, - ruby_private_node_signature(source, language, suffix, "case_value", kind, text), - "case_value mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn case_arms_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "case value\nwhen 1\n one\nwhen 2\n two\nelse\n other\nend\n", - Language::Ruby, - ".rb", - "case", - "case value\nwhen 1\n one\nwhen 2\n two\nelse\n other\nend", - ), - ( - "match value:\n case 1:\n one()\n case _:\n other()\n", - Language::Python, - ".py", - "match_statement", - "match value:\n case 1:\n one()\n case _:\n other()", - ), - ( - "switch (value) { case 1: one(); default: other(); }\n", - Language::TypeScript, - ".ts", - "switch_statement", - "switch (value) { case 1: one(); default: other(); }", - ), - ( - "if value == 1 then one() end\n", - Language::Lua, - ".lua", - "if_statement", - "if value == 1 then one() end", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .case_arms(node) - .into_iter() - .map(|arm| { - ( - arm.kind().to_string(), - super::node_text(arm, source).to_string(), - ) - }) - .collect::>(); - - assert_eq!( - rust, - ruby_private_node_list_signature(source, language, suffix, "case_arms", kind, text), - "case_arms mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn when_body_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "case value\nwhen 1\n one\nend\n", - Language::Ruby, - ".rb", - "when", - "when 1\n one", - ), - ( - "match value:\n case 1:\n one()\n", - Language::Python, - ".py", - "case_clause", - "case 1:\n one()", - ), - ( - "switch (value) { case 1: one(); default: other(); }\n", - Language::TypeScript, - ".ts", - "switch_case", - "case 1: one();", - ), - ( - "switch (value) { case 1: one(); default: other(); }\n", - Language::TypeScript, - ".ts", - "switch_default", - "default: other();", - ), - ( - "if value == 1 then one() end\n", - Language::Lua, - ".lua", - "if_statement", - "if value == 1 then one() end", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer.when_body(node).map(|body| { - ( - body.kind().to_string(), - super::node_text(body, source).to_string(), - ) - }); - - assert_eq!( - rust, - ruby_private_node_signature(source, language, suffix, "when_body", kind, text), - "when_body mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_when_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "case value\nwhen 1\n one\nend\n", - Language::Ruby, - ".rb", - "when", - "when 1\n one", - ), - ( - "case value\nwhen Foo::Bar\n one\nend\n", - Language::Ruby, - ".rb", - "when", - "when Foo::Bar\n one", - ), - ( - "match value:\n case 1:\n one()\n", - Language::Python, - ".py", - "case_clause", - "case 1:\n one()", - ), - ( - "switch (value) { case 1: one(); break; default: other(); }\n", - Language::TypeScript, - ".ts", - "switch_case", - "case 1: one(); break;", - ), - ( - "if value == 1 then one() else other() end\n", - Language::Lua, - ".lua", - "if_statement", - "if value == 1 then one() else other() end", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_when(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_when", - kind, - text - ), - "normalize_when mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn case_else_body_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "case value\nwhen 1\n one\nelse\n other\nend\n", - Language::Ruby, - ".rb", - "case", - "case value\nwhen 1\n one\nelse\n other\nend", - ), - ( - "case value\nwhen 1\n one\nend\n", - Language::Ruby, - ".rb", - "case", - "case value\nwhen 1\n one\nend", - ), - ( - "match value:\n case 1:\n one()\n case _:\n other()\n", - Language::Python, - ".py", - "match_statement", - "match value:\n case 1:\n one()\n case _:\n other()", - ), - ( - "match value:\n case 1:\n one()\n", - Language::Python, - ".py", - "match_statement", - "match value:\n case 1:\n one()", - ), - ( - "switch (value) { case 1: one(); break; default: other(); }\n", - Language::TypeScript, - ".ts", - "switch_statement", - "switch (value) { case 1: one(); break; default: other(); }", - ), - ( - "switch (value) { case 1: one(); break; }\n", - Language::TypeScript, - ".ts", - "switch_statement", - "switch (value) { case 1: one(); break; }", - ), - ( - "if value == 1 then one() else other() end\n", - Language::Lua, - ".lua", - "if_statement", - "if value == 1 then one() else other() end", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .case_else_body(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "case_else_body", - kind, - text - ), - "case_else_body mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_case_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "case value\nwhen 1\n one\nwhen 2\n two\nelse\n other\nend\n", - Language::Ruby, - ".rb", - "case", - "case value\nwhen 1\n one\nwhen 2\n two\nelse\n other\nend", - ), - ( - "case\nwhen ready\n one\nelse\n other\nend\n", - Language::Ruby, - ".rb", - "case", - "case\nwhen ready\n one\nelse\n other\nend", - ), - ( - "match value:\n case 1:\n one()\n case _:\n other()\n", - Language::Python, - ".py", - "match_statement", - "match value:\n case 1:\n one()\n case _:\n other()", - ), - ( - "switch (value) { case 1: one(); break; default: other(); }\n", - Language::TypeScript, - ".ts", - "switch_statement", - "switch (value) { case 1: one(); break; default: other(); }", - ), - ( - "if value == 1 then one() else other() end\n", - Language::Lua, - ".lua", - "if_statement", - "if value == 1 then one() else other() end", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_case(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_case", - kind, - text - ), - "normalize_case mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn dotted_call_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ("user.name\n", Language::Ruby, ".rb", "call", "user.name"), - ("user&.name\n", Language::Ruby, ".rb", "call", "user&.name"), - ("user\n", Language::Ruby, ".rb", "identifier", "user"), - ( - "user.name()\n", - Language::Python, - ".py", - "attribute", - "user.name", - ), - ( - "user\n", - Language::Python, - ".py", - "expression_statement", - "user", - ), - ( - "user.name();\n", - Language::TypeScript, - ".ts", - "member_expression", - "user.name", - ), - ("user;\n", Language::TypeScript, ".ts", "identifier", "user"), - ( - "user.name()\n", - Language::Lua, - ".lua", - "dot_index_expression", - "user.name", - ), - ("user()\n", Language::Lua, ".lua", "function_call", "user()"), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.dotted_call(node), - ruby_private_predicate(source, language, suffix, "dotted_call?", kind, text), - "dotted_call? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn dotted_expression_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "def f\n user.name\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "user.name", - ), - ("user.name\n", Language::Ruby, ".rb", "call", "user.name"), - ( - "user.name\n", - Language::Python, - ".py", - "expression_statement", - "user.name", - ), - ( - "user.name()\n", - Language::Python, - ".py", - "attribute", - "user.name", - ), - ( - "user.name;\n", - Language::TypeScript, - ".ts", - "expression_statement", - "user.name;", - ), - ( - "user.name;\n", - Language::TypeScript, - ".ts", - "member_expression", - "user.name", - ), - ( - "user.name()\n", - Language::Lua, - ".lua", - "dot_index_expression", - "user.name", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.dotted_expression(node), - ruby_private_predicate(source, language, suffix, "dotted_expression?", kind, text), - "dotted_expression? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn dotted_expression_normalization_matches_ruby() { - for (source, language, suffix) in [ - ("def f\n user.name\nend\n", Language::Ruby, ".rb"), - ("user.name\n", Language::Python, ".py"), - ] { - assert_ruby_parity(source, language, suffix); - } - } - - #[test] - fn normalize_else_or_branch_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "if ready\n call\nelse\n stop\nend\n", - Language::Ruby, - ".rb", - "else", - "else\n stop", - ), - ( - "if ready\n call\nelse\n user.name\nend\n", - Language::Ruby, - ".rb", - "else", - "else\n user.name", - ), - ( - "if ready:\n call()\nelse:\n stop()\n", - Language::Python, - ".py", - "else_clause", - "else:\n stop()", - ), - ( - "if ready:\n call()\nelse:\n if backup:\n stop()\n", - Language::Python, - ".py", - "else_clause", - "else:\n if backup:\n stop()", - ), - ( - "if (ready) { call(); } else { stop(); }\n", - Language::TypeScript, - ".ts", - "else_clause", - "else { stop(); }", - ), - ( - "if ready then\n call()\nelse\n stop()\nend\n", - Language::Lua, - ".lua", - "else_statement", - "else\n stop()", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_else_or_branch(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_else_or_branch", - kind, - text - ), - "normalize_else_or_branch mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_if_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "if ready\n call\nelse\n stop\nend\n", - Language::Ruby, - ".rb", - "if", - "if ready\n call\nelse\n stop\nend", - ), - ( - "call if ready\n", - Language::Ruby, - ".rb", - "if_modifier", - "call if ready", - ), - ( - "unless ready\n call\nend\n", - Language::Ruby, - ".rb", - "unless", - "unless ready\n call\nend", - ), - ( - "if ready:\n call()\nelse:\n stop()\n", - Language::Python, - ".py", - "if_statement", - "if ready:\n call()\nelse:\n stop()", - ), - ( - "if ready:\n call()\nelif other:\n stop()\n", - Language::Python, - ".py", - "if_statement", - "if ready:\n call()\nelif other:\n stop()", - ), - ( - "if (ready) { call(); } else { stop(); }\n", - Language::TypeScript, - ".ts", - "if_statement", - "if (ready) { call(); } else { stop(); }", - ), - ( - "if ready then\n call()\nelseif other then\n stop()\nend\n", - Language::Lua, - ".lua", - "if_statement", - "if ready then\n call()\nelseif other then\n stop()\nend", - ), - ( - "if ready then\n call()\nelse\n stop()\nend\n", - Language::Lua, - ".lua", - "if_statement", - "if ready then\n call()\nelse\n stop()\nend", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_if(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_if", - kind, - text - ), - "normalize_if mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_elsif_matches_ruby_private_method() { - for (source, kind, text) in [ - ( - "if ready\n call\nelsif other\n stop\nend\n", - "elsif", - "elsif other\n stop", - ), - ( - "if ready\n call\nelsif other\n stop\nelse\n done\nend\n", - "elsif", - "elsif other\n stop\nelse\n done", - ), - ] { - let tree = raw_tree(source, Language::Ruby); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); - let rust = node_value(&normalizer.normalize_elsif(node)); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - Language::Ruby, - ".rb", - "normalize_elsif", - kind, - text - ), - "normalize_elsif mismatch for {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_loop_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "while ready\n work\nend\n", - Language::Ruby, - ".rb", - "while", - "while ready\n work\nend", - ), - ( - "work while ready\n", - Language::Ruby, - ".rb", - "while_modifier", - "work while ready", - ), - ( - "work until ready\n", - Language::Ruby, - ".rb", - "until_modifier", - "work until ready", - ), - ( - "for item in items\n work\nend\n", - Language::Ruby, - ".rb", - "for", - "for item in items\n work\nend", - ), - ( - "while ready:\n work()\n", - Language::Python, - ".py", - "while_statement", - "while ready:\n work()", - ), - ( - "for item in items:\n work()\n", - Language::Python, - ".py", - "for_statement", - "for item in items:\n work()", - ), - ( - "while ready do\n work()\nend\n", - Language::Lua, - ".lua", - "while_statement", - "while ready do\n work()\nend", - ), - ( - "while (ready) { work(); }\n", - Language::TypeScript, - ".ts", - "while_statement", - "while (ready) { work(); }", - ), - ( - "for (let i = 0; i < n; i++) { work(i); }\n", - Language::TypeScript, - ".ts", - "for_statement", - "for (let i = 0; i < n; i++) { work(i); }", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let node_type = super::loop_kind(node.kind()).expect("test node should be a loop kind"); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_loop(node, node_type) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_loop", - kind, - text - ), - "normalize_loop mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn ruby_elsif_normalization_matches_ruby() { - for source in [ - "if ready\n call\nelsif other\n stop\nend\n", - "if ready\n call\nelsif other\n stop\nelse\n done\nend\n", - ] { - let root = parse_language_source(source, Language::Ruby, ".rb"); - let if_node = first_node(&root, "IF", source.trim_end()); - - assert_eq!( - child_node(if_node, 2).r#type, - "IF", - "expected Ruby elsif alternative to normalize as nested IF: {if_node:#?}" - ); - assert_ruby_parity(source, Language::Ruby, ".rb"); - } - } - - #[test] - fn normalize_dotted_expression_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def f\n user.name\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "user.name", - ), - ( - "def f\n user.name { value }\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "user.name { value }", - ), - ( - "user.name\n", - Language::Python, - ".py", - "expression_statement", - "user.name", - ), - ( - "user.name;\n", - Language::TypeScript, - ".ts", - "expression_statement", - "user.name;", - ), - ( - "user.name()\n", - Language::Lua, - ".lua", - "dot_index_expression", - "user.name", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_dotted_expression(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_dotted_expression", - kind, - text - ), - "normalize_dotted_expression mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_dotted_call_expression_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def f\n user.name\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "user.name", - ), - ( - "def f\n user.name(1)\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "user.name(1)", - ), - ( - "def f\n user&.name\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "user&.name", - ), - ( - "def f\n user.name { value }\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "user.name { value }", - ), - ( - "user.name\n", - Language::Python, - ".py", - "expression_statement", - "user.name", - ), - ( - "user.name;\n", - Language::TypeScript, - ".ts", - "member_expression", - "user.name", - ), - ( - "user.name()\n", - Language::Lua, - ".lua", - "dot_index_expression", - "user.name", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_dotted_call_expression(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_dotted_call_expression", - kind, - text - ), - "normalize_dotted_call_expression mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn argument_list_call_with_block_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "def f\n return foo { bar }\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "foo { bar }", - ), - ( - "def f\n return foo do\n bar\n end\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "foo do\n bar\n end", - ), - ( - "def f\n return foo(1) { bar }\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "foo(1) { bar }", - ), - ( - "def f\n foo { bar }\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "foo { bar }", - ), - ( - "def f\n return foo.bar { baz }\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "foo.bar { baz }", - ), - ( - "def f\n return Foo { bar }\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "Foo { bar }", - ), - ( - "def f():\n return foo(lambda: bar)\n", - Language::Python, - ".py", - "argument_list", - "(lambda: bar)", - ), - ( - "function f(){ return foo(() => bar); }\n", - Language::TypeScript, - ".ts", - "arguments", - "(() => bar)", - ), - ( - "function f() return foo(function() return bar end) end\n", - Language::Lua, - ".lua", - "arguments", - "(function() return bar end)", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.argument_list_call_with_block(node), - ruby_private_predicate( - source, - language, - suffix, - "argument_list_call_with_block?", - kind, - text - ), - "argument_list_call_with_block? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_argument_list_call_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def f\n return foo { bar }\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "foo { bar }", - ), - ( - "def f\n return foo do\n bar\n end\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "foo do\n bar\n end", - ), - ( - "def f\n return foo(1) { bar }\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "foo(1) { bar }", - ), - ( - "def f\n foo { bar }\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "foo { bar }", - ), - ( - "def f():\n return foo(lambda: bar)\n", - Language::Python, - ".py", - "argument_list", - "(lambda: bar)", - ), - ( - "function f(){ return foo(() => bar); }\n", - Language::TypeScript, - ".ts", - "arguments", - "(() => bar)", - ), - ( - "function f() return foo(function() return bar end) end\n", - Language::Lua, - ".lua", - "arguments", - "(function() return bar end)", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_argument_list_call(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_argument_list_call", - kind, - text - ), - "normalize_argument_list_call mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_argument_list_call_with_block_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def f\n return foo { bar }\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "foo { bar }", - ), - ( - "def f\n return foo do\n bar\n end\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "foo do\n bar\n end", - ), - ( - "def f\n return foo(1) { bar }\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "foo(1) { bar }", - ), - ( - "def f\n foo { bar }\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "foo { bar }", - ), - ( - "def f():\n return foo(lambda: bar)\n", - Language::Python, - ".py", - "argument_list", - "(lambda: bar)", - ), - ( - "function f(){ return foo(() => bar); }\n", - Language::TypeScript, - ".ts", - "arguments", - "(() => bar)", - ), - ( - "function f() return foo(function() return bar end) end\n", - Language::Lua, - ".lua", - "arguments", - "(function() return bar end)", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_argument_list_call_with_block(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_argument_list_call_with_block", - kind, - text - ), - "normalize_argument_list_call_with_block mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn safe_navigation_call_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ("user&.name\n", Language::Ruby, ".rb", "call", "user&.name"), - ("user.name\n", Language::Ruby, ".rb", "call", "user.name"), - ( - "user.name()\n", - Language::Python, - ".py", - "attribute", - "user.name", - ), - ( - "user?.name;\n", - Language::TypeScript, - ".ts", - "member_expression", - "user?.name", - ), - ( - "user?.name();\n", - Language::TypeScript, - ".ts", - "call_expression", - "user?.name()", - ), - ( - "user.name;\n", - Language::TypeScript, - ".ts", - "member_expression", - "user.name", - ), - ( - "user.name()\n", - Language::Lua, - ".lua", - "dot_index_expression", - "user.name", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.safe_navigation_call(node), - ruby_private_predicate( - source, - language, - suffix, - "safe_navigation_call?", - kind, - text - ), - "safe_navigation_call? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn rescue_source_end_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "begin\n work\nrescue Error => e\n handle\nend\n", - Language::Ruby, - ".rb", - "rescue", - "rescue Error => e\n handle", - ), - ( - "try:\n work()\nexcept Error as e:\n handle()\n", - Language::Python, - ".py", - "except_clause", - "except Error as e:\n handle()", - ), - ( - "try { work(); } catch (e) { handle(); }\n", - Language::TypeScript, - ".ts", - "catch_clause", - "catch (e) { handle(); }", - ), - ("work()\n", Language::Lua, ".lua", "function_call", "work()"), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer.rescue_source_end(node).map(|source_end| { - ( - source_end.kind().to_string(), - super::node_text(source_end, source).to_string(), - ) - }); - - assert_eq!( - rust, - ruby_private_node_signature( - source, - language, - suffix, - "rescue_source_end", - kind, - text - ), - "rescue_source_end mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn rescue_exception_variable_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "begin\n work\nrescue Error => e\n handle\nend\n", - Language::Ruby, - ".rb", - "rescue", - "rescue Error => e\n handle", - ), - ( - "begin\n work\nrescue Error\n handle\nend\n", - Language::Ruby, - ".rb", - "rescue", - "rescue Error\n handle", - ), - ( - "try:\n work()\nexcept Error as e:\n handle()\n", - Language::Python, - ".py", - "except_clause", - "except Error as e:\n handle()", - ), - ( - "try:\n work()\nexcept Error:\n handle()\n", - Language::Python, - ".py", - "except_clause", - "except Error:\n handle()", - ), - ( - "try { work(); } catch (e) { handle(); }\n", - Language::TypeScript, - ".ts", - "catch_clause", - "catch (e) { handle(); }", - ), - ("work()\n", Language::Lua, ".lua", "function_call", "work()"), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .rescue_exception_variable(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "rescue_exception_variable", - kind, - text - ), - "rescue_exception_variable mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_rescue_clause_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "begin\n work\nrescue Error => e\n handle\nend\n", - Language::Ruby, - ".rb", - "rescue", - "rescue Error => e\n handle", - ), - ( - "begin\n work\nrescue Net::Error\n handle\nend\n", - Language::Ruby, - ".rb", - "rescue", - "rescue Net::Error\n handle", - ), - ( - "try:\n work()\nexcept Error as e:\n handle(e)\n", - Language::Python, - ".py", - "except_clause", - "except Error as e:\n handle(e)", - ), - ( - "try { work(); } catch (e) { handle(e); }\n", - Language::TypeScript, - ".ts", - "catch_clause", - "catch (e) { handle(e); }", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_rescue_clause(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_rescue_clause", - kind, - text - ), - "normalize_rescue_clause mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_rescue_modifier_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [( - "value rescue fallback\n", - Language::Ruby, - ".rb", - "rescue_modifier", - "value rescue fallback", - )] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_rescue_modifier(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_rescue_modifier", - kind, - text - ), - "normalize_rescue_modifier mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn prepend_rescue_exception_assignment_matches_ruby_private_method() { - fn synthetic_node( - node_type: &str, - text: &str, - first_lineno: usize, - first_column: usize, - last_lineno: usize, - last_column: usize, - children: Vec, - ) -> Node { - Node { - r#type: node_type.to_string(), - children, - first_lineno, - first_column, - last_lineno, - last_column, - text: text.to_string(), - } - } - - let source = "assign\nbody\n"; - let assignment = synthetic_node("LASGN", "assign", 1, 0, 1, 6, Vec::new()); - let body = synthetic_node("VCALL", "body", 2, 0, 2, 4, Vec::new()); - let block = synthetic_node( - "BLOCK", - "body", - 2, - 0, - 2, - 4, - vec![Child::Node(Box::new(body.clone())), Child::Nil], - ); - - for (label, body_node, assignment_node) in [ - ("no_assignment", Some(body.clone()), None), - ("no_body", None, Some(assignment.clone())), - ("block_body", Some(block), Some(assignment.clone())), - ("scalar_body", Some(body), Some(assignment)), - ] { - let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); - let rust = normalizer - .prepend_rescue_exception_assignment(body_node.clone(), assignment_node.clone()) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - let body_value = body_node.as_ref().map(node_value).unwrap_or(Value::Null); - let assignment_value = assignment_node - .as_ref() - .map(node_value) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_prepend_rescue_exception_assignment_value( - source, - &body_value, - &assignment_value - ), - "prepend_rescue_exception_assignment mismatch for {label}" - ); - } - } - - #[test] - fn dotted_call_parts_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ("user.name\n", Language::Ruby, ".rb", "call", "user.name"), - ("user&.name\n", Language::Ruby, ".rb", "call", "user&.name"), - ( - "user.name()\n", - Language::Python, - ".py", - "attribute", - "user.name", - ), - ( - "user.name();\n", - Language::TypeScript, - ".ts", - "member_expression", - "user.name", - ), - ( - "user.name()\n", - Language::Lua, - ".lua", - "dot_index_expression", - "user.name", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .dotted_call_parts(node, None) - .map(|(receiver, method)| { - ( - receiver.kind().to_string(), - super::node_text(receiver, source).to_string(), - method, - ) - }); - - assert_eq!( - rust, - ruby_private_dotted_call_parts(source, language, suffix, kind, text), - "dotted_call_parts mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn dotted_call_parts_normalization_matches_ruby() { - for (source, language, suffix) in [ - ("user.name\n", Language::Ruby, ".rb"), - ("user&.name\n", Language::Ruby, ".rb"), - ("user.name()\n", Language::Python, ".py"), - ("user.name();\n", Language::TypeScript, ".ts"), - ("user.name()\n", Language::Lua, ".lua"), - ] { - let root = parse_language_source(source, language, suffix); - if language != Language::Lua { - let mut calls = Vec::new(); - nodes_of_type(&root, "CALL", &mut calls); - let mut qcalls = Vec::new(); - nodes_of_type(&root, "QCALL", &mut qcalls); - assert!( - calls - .iter() - .chain(qcalls.iter()) - .any(|node| matches!(node.children.get(1), Some(Child::Symbol(method)) if method == "name")), - "expected dotted call method name for {language:?} in {root:#?}" - ); - } - assert_ruby_parity(source, language, suffix); - } - } - - #[test] - fn leading_if_statement_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "def f\n if x\n y\n end\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "if x\n y\n end", - ), - ( - "def f():\n if x:\n y()\n", - Language::Python, - ".py", - "block", - "if x:\n y()", - ), - ( - "function f()\n if x then\n y()\n end\nend\n", - Language::Lua, - ".lua", - "block", - "if x then\n y()\n end", - ), - ( - "function f() { if (x) { y(); } }\n", - Language::TypeScript, - ".ts", - "if_statement", - "if (x) { y(); }", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.leading_if_statement(node), - ruby_private_predicate( - source, - language, - suffix, - "leading_if_statement?", - kind, - text - ), - "leading_if_statement? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_leading_if_statement_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def f\n if x\n y\n end\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "if x\n y\n end", - ), - ( - "def f():\n if x:\n y()\n", - Language::Python, - ".py", - "block", - "if x:\n y()", - ), - ( - "function f()\n if x then\n y()\n end\nend\n", - Language::Lua, - ".lua", - "block", - "if x then\n y()\n end", - ), - ( - "function f() { if (x) { y(); } }\n", - Language::TypeScript, - ".ts", - "if_statement", - "if (x) { y(); }", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_leading_if_statement(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_leading_if_statement", - kind, - text - ), - "normalize_leading_if_statement mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn leading_if_statement_normalization_matches_ruby() { - for (source, language, suffix) in [ - ("def f\n if x\n y\n end\nend\n", Language::Ruby, ".rb"), - ( - "def f():\n if x:\n y()\n", - Language::Python, - ".py", - ), - ( - "function f()\n if x then\n y()\n end\nend\n", - Language::Lua, - ".lua", - ), - ( - "function f() { if (x) { y(); } }\n", - Language::TypeScript, - ".ts", - ), - ] { - let root = parse_language_source(source, language, suffix); - let mut if_nodes = Vec::new(); - nodes_of_type(&root, "IF", &mut if_nodes); - assert!( - !if_nodes.is_empty(), - "expected IF node for {language:?} in {root:#?}" - ); - assert_ruby_parity(source, language, suffix); - } - } - - #[test] - fn leading_case_statement_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "def f(x)\n case x\n when 1 then y\n else z\n end\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "case x\n when 1 then y\n else z\n end", - ), - ( - "def f(x):\n match x:\n case 1:\n y()\n", - Language::Python, - ".py", - "block", - "match x:\n case 1:\n y()", - ), - ( - "function f(x) { switch (x) { case 1: y(); break; default: z(); } }\n", - Language::TypeScript, - ".ts", - "switch_statement", - "switch (x) { case 1: y(); break; default: z(); }", - ), - ( - "function f(x)\n if x == 1 then y() end\nend\n", - Language::Lua, - ".lua", - "block", - "if x == 1 then y() end", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.leading_case_statement(node), - ruby_private_predicate( - source, - language, - suffix, - "leading_case_statement?", - kind, - text - ), - "leading_case_statement? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_leading_case_statement_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def f(x)\n case x\n when 1 then y\n else z\n end\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "case x\n when 1 then y\n else z\n end", - ), - ( - "def f(x):\n match x:\n case 1:\n y()\n", - Language::Python, - ".py", - "block", - "match x:\n case 1:\n y()", - ), - ( - "function f(x) { switch (x) { case 1: y(); break; default: z(); } }\n", - Language::TypeScript, - ".ts", - "switch_statement", - "switch (x) { case 1: y(); break; default: z(); }", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_leading_case_statement(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_leading_case_statement", - kind, - text - ), - "normalize_leading_case_statement mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn leading_case_statement_normalization_matches_ruby() { - for (source, language, suffix) in [ - ( - "def f(x)\n case x\n when 1 then y\n else z\n end\nend\n", - Language::Ruby, - ".rb", - ), - ( - "def f(x):\n match x:\n case 1:\n y()\n", - Language::Python, - ".py", - ), - ( - "function f(x) { switch (x) { case 1: y(); break; default: z(); } }\n", - Language::TypeScript, - ".ts", - ), - ] { - let root = parse_language_source(source, language, suffix); - let mut case_nodes = Vec::new(); - nodes_of_type(&root, "CASE", &mut case_nodes); - assert!( - !case_nodes.is_empty(), - "expected CASE node for {language:?} in {root:#?}" - ); - assert_ruby_parity(source, language, suffix); - } - } - - #[test] - fn leading_loop_statement_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "def f(x)\n while x\n y\n end\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "while x\n y\n end", - ), - ( - "def f(x):\n while x:\n y()\n", - Language::Python, - ".py", - "block", - "while x:\n y()", - ), - ( - "function f(x)\n while x do\n y()\n end\nend\n", - Language::Lua, - ".lua", - "block", - "while x do\n y()\n end", - ), - ( - "function f(x) { while (x) { y(); } }\n", - Language::TypeScript, - ".ts", - "while_statement", - "while (x) { y(); }", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.leading_loop_statement(node), - ruby_private_predicate( - source, - language, - suffix, - "leading_loop_statement?", - kind, - text - ), - "leading_loop_statement? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_leading_loop_statement_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def f(x)\n while x\n y\n end\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "while x\n y\n end", - ), - ( - "def f(x)\n until x\n y\n end\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "until x\n y\n end", - ), - ( - "def f(x):\n while x:\n y()\n", - Language::Python, - ".py", - "block", - "while x:\n y()", - ), - ( - "function f(x)\n while x do\n y()\n end\nend\n", - Language::Lua, - ".lua", - "block", - "while x do\n y()\n end", - ), - ( - "function f(x) { while (x) { y(); } }\n", - Language::TypeScript, - ".ts", - "while_statement", - "while (x) { y(); }", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_leading_loop_statement(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_leading_loop_statement", - kind, - text - ), - "normalize_leading_loop_statement mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn leading_loop_statement_normalization_matches_ruby() { - for (source, language, suffix) in [ - ( - "def f(x)\n while x\n y\n end\nend\n", - Language::Ruby, - ".rb", - ), - ( - "def f(x):\n while x:\n y()\n", - Language::Python, - ".py", - ), - ( - "function f(x)\n while x do\n y()\n end\nend\n", - Language::Lua, - ".lua", - ), - ( - "function f(x) { while (x) { y(); } }\n", - Language::TypeScript, - ".ts", - ), - ] { - let root = parse_language_source(source, language, suffix); - let mut while_nodes = Vec::new(); - nodes_of_type(&root, "WHILE", &mut while_nodes); - assert!( - !while_nodes.is_empty(), - "expected WHILE node for {language:?} in {root:#?}" - ); - assert_ruby_parity(source, language, suffix); - } - } - - #[test] - fn rescue_body_statement_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "def f\n work\nrescue Error => e\n handle\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "work\nrescue Error => e\n handle", - ), - ( - "try:\n work()\nexcept Error as e:\n handle(e)\n", - Language::Python, - ".py", - "try_statement", - "try:\n work()\nexcept Error as e:\n handle(e)", - ), - ( - "try { work(); } catch (e) { handle(e); }\n", - Language::TypeScript, - ".ts", - "try_statement", - "try { work(); } catch (e) { handle(e); }", - ), - ( - "local ok, err = pcall(work)\n", - Language::Lua, - ".lua", - "variable_declaration", - "local ok, err = pcall(work)", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.rescue_body_statement(node), - ruby_private_predicate( - source, - language, - suffix, - "rescue_body_statement?", - kind, - text - ), - "rescue_body_statement? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_rescue_body_statement_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def f\n work\nrescue Error => e\n handle\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "work\nrescue Error => e\n handle", - ), - ( - "try:\n work()\nexcept Error as e:\n handle(e)\n", - Language::Python, - ".py", - "try_statement", - "try:\n work()\nexcept Error as e:\n handle(e)", - ), - ( - "try { work(); } catch (e) { handle(e); }\n", - Language::TypeScript, - ".ts", - "try_statement", - "try { work(); } catch (e) { handle(e); }", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_rescue_body_statement(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_rescue_body_statement", - kind, - text - ), - "normalize_rescue_body_statement mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn rescue_body_statement_normalization_matches_ruby() { - for (source, language, suffix) in [ - ( - "def f\n work\nrescue Error => e\n handle\nend\n", - Language::Ruby, - ".rb", - ), - ( - "try:\n work()\nexcept Error as e:\n handle(e)\n", - Language::Python, - ".py", - ), - ( - "try { work(); } catch (e) { handle(e); }\n", - Language::TypeScript, - ".ts", - ), - ] { - let root = parse_language_source(source, language, suffix); - let mut rescue_nodes = Vec::new(); - nodes_of_type(&root, "RESCUE", &mut rescue_nodes); - assert!( - !rescue_nodes.is_empty(), - "expected RESCUE node for {language:?} in {root:#?}" - ); - let mut resbody_nodes = Vec::new(); - nodes_of_type(&root, "RESBODY", &mut resbody_nodes); - assert!( - !resbody_nodes.is_empty(), - "expected RESBODY node for {language:?} in {root:#?}" - ); - assert_ruby_parity(source, language, suffix); - } - } - - #[test] - fn ensure_body_statement_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "def f\n work\nensure\n cleanup\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "work\nensure\n cleanup", - ), - ( - "try:\n work()\nfinally:\n cleanup()\n", - Language::Python, - ".py", - "try_statement", - "try:\n work()\nfinally:\n cleanup()", - ), - ( - "try { work(); } finally { cleanup(); }\n", - Language::TypeScript, - ".ts", - "try_statement", - "try { work(); } finally { cleanup(); }", - ), - ( - "work()\ncleanup()\n", - Language::Lua, - ".lua", - "function_call", - "work()", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.ensure_body_statement(node), - ruby_private_predicate( - source, - language, - suffix, - "ensure_body_statement?", - kind, - text - ), - "ensure_body_statement? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn ensure_body_statement_normalization_matches_ruby() { - for (source, language, suffix) in [ - ( - "def f\n work\nensure\n cleanup\nend\n", - Language::Ruby, - ".rb", - ), - ( - "try:\n work()\nfinally:\n cleanup()\n", - Language::Python, - ".py", - ), - ( - "try { work(); } finally { cleanup(); }\n", - Language::TypeScript, - ".ts", - ), - ( - "try:\n work()\nexcept Error as e:\n handle(e)\nfinally:\n cleanup()\n", - Language::Python, - ".py", - ), - ] { - let root = parse_language_source(source, language, suffix); - let mut ensure_nodes = Vec::new(); - nodes_of_type(&root, "ENSURE", &mut ensure_nodes); - assert!( - !ensure_nodes.is_empty(), - "expected ENSURE node for {language:?} in {root:#?}" - ); - assert_ruby_parity(source, language, suffix); - } - } - - #[test] - fn normalize_ensure_body_statement_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def f\n work\nensure\n cleanup\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "work\nensure\n cleanup", - ), - ( - "try:\n work()\nfinally:\n cleanup()\n", - Language::Python, - ".py", - "try_statement", - "try:\n work()\nfinally:\n cleanup()", - ), - ( - "try:\n work()\nexcept Error as e:\n handle(e)\nfinally:\n cleanup()\n", - Language::Python, - ".py", - "try_statement", - "try:\n work()\nexcept Error as e:\n handle(e)\nfinally:\n cleanup()", - ), - ( - "try { work(); } finally { cleanup(); }\n", - Language::TypeScript, - ".ts", - "try_statement", - "try { work(); } finally { cleanup(); }", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_ensure_body_statement(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_ensure_body_statement", - kind, - text - ), - "normalize_ensure_body_statement mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_ensure_clause_matches_ruby_private_method() { - for (source, kind, text) in [ - ( - "begin\n work\nensure\n cleanup\nend\n", - "ensure", - "ensure\n cleanup", - ), - ( - "begin\n work\nensure\n user.name\nend\n", - "ensure", - "ensure\n user.name", - ), - ( - "begin\n work\nensure\n user.name\n cleanup\nend\n", - "ensure", - "ensure\n user.name\n cleanup", - ), - ] { - let tree = raw_tree(source, Language::Ruby); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); - let rust = normalizer - .normalize_ensure_clause(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - Language::Ruby, - ".rb", - "normalize_ensure_clause", - kind, - text - ), - "normalize_ensure_clause mismatch for {kind} {text:?}" - ); - } - } - - #[test] - fn ruby_begin_ensure_clause_keeps_all_body_statements() { - let source = "begin\n work\nensure\n user.name\n cleanup\nend\n"; - let root = parse_language_source(source, Language::Ruby, ".rb"); - let ensure = first_node(&root, "ENSURE", "work\nensure\n user.name\n cleanup"); - let ensure_body = child_node(ensure, 1); - - assert_eq!( - child_types(ensure_body), - vec!["CALL", "VCALL"], - "Ruby ensure clause body must retain all statements: {ensure:#?}" - ); - assert_ruby_parity(source, Language::Ruby, ".rb"); - } - - #[test] - fn array_literal_statement_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "def f\n [a, b]\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "[a, b]", - ), - ( - "def f():\n [a, b]\n", - Language::Python, - ".py", - "block", - "[a, b]", - ), - ( - "function f() { [a, b]; }\n", - Language::TypeScript, - ".ts", - "expression_statement", - "[a, b];", - ), - ( - "function f()\n {a, b}\nend\n", - Language::Lua, - ".lua", - "block", - "\n {a, b}", - ), - ( - "function f()\n {x = a, y = b}\nend\n", - Language::Lua, - ".lua", - "block", - "\n {x = a, y = b}", - ), - ( - "local rocks_path = table.concat({rocks_tree, \"a_rock\"})\n", - Language::Lua, - ".lua", - "arguments", - "({rocks_tree, \"a_rock\"})", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.array_literal_statement(node), - ruby_private_predicate( - source, - language, - suffix, - "array_literal_statement?", - kind, - text - ), - "array_literal_statement? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn array_literal_statement_normalization_matches_ruby() { - for (source, language, suffix) in [ - ("def f\n [a, b]\nend\n", Language::Ruby, ".rb"), - ("def f():\n [a, b]\n", Language::Python, ".py"), - ("function f() { [a, b]; }\n", Language::TypeScript, ".ts"), - ("function f()\n {a, b}\nend\n", Language::Lua, ".lua"), - ] { - let root = parse_language_source(source, language, suffix); - let mut lists = Vec::new(); - nodes_of_type(&root, "LIST", &mut lists); - assert!( - lists - .iter() - .any(|node| node.text.contains('a') && node.text.contains('b')), - "expected LIST for {language:?} in {root:#?}" - ); - assert_ruby_parity(source, language, suffix); - } - } - - #[test] - fn normalize_array_literal_statement_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def f\n [a, b]\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "[a, b]", - ), - ( - "def f\n []\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "[]", - ), - ( - "def f():\n [a, b]\n", - Language::Python, - ".py", - "block", - "[a, b]", - ), - ("def f():\n []\n", Language::Python, ".py", "block", "[]"), - ( - "function f() { [a, b]; }\n", - Language::TypeScript, - ".ts", - "expression_statement", - "[a, b];", - ), - ( - "function f() { []; }\n", - Language::TypeScript, - ".ts", - "expression_statement", - "[];", - ), - ( - "function f()\n {a, b}\nend\n", - Language::Lua, - ".lua", - "block", - "\n {a, b}", - ), - ( - "assert.same(install, { bin = { P\"bin/binfile\" } })\n", - Language::Lua, - ".lua", - "arguments", - "(install, { bin = { P\"bin/binfile\" } })", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_array_literal_statement(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_array_literal_statement", - kind, - text - ), - "normalize_array_literal_statement mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn element_reference_statement_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "def f\n items[0]\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "items[0]", - ), - ( - "def f\n [0]\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "[0]", - ), - ( - "def f():\n items[0]\n", - Language::Python, - ".py", - "block", - "items[0]", - ), - ( - "return items[0]\n", - Language::Python, - ".py", - "subscript", - "items[0]", - ), - ( - "function f() { items[0]; }\n", - Language::TypeScript, - ".ts", - "expression_statement", - "items[0];", - ), - ( - "return items[0];\n", - Language::TypeScript, - ".ts", - "subscript_expression", - "items[0]", - ), - ( - "return items[1]\n", - Language::Lua, - ".lua", - "expression_list", - "items[1]", - ), - ( - "print(items[1])\n", - Language::Lua, - ".lua", - "bracket_index_expression", - "items[1]", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.element_reference_statement(node), - ruby_private_predicate( - source, - language, - suffix, - "element_reference_statement?", - kind, - text - ), - "element_reference_statement? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_element_reference_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def f\n items[0]\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "items[0]", - ), - ( - "def f\n self[0]\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "self[0]", - ), - ( - "return items[0]\n", - Language::Python, - ".py", - "subscript", - "items[0]", - ), - ( - "return items[0];\n", - Language::TypeScript, - ".ts", - "subscript_expression", - "items[0]", - ), - ( - "print(items[1])\n", - Language::Lua, - ".lua", - "bracket_index_expression", - "items[1]", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_element_reference(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_element_reference", - kind, - text - ), - "normalize_element_reference mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_element_reference_statement_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def f\n items[0]\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "items[0]", - ), - ( - "def f\n self[0]\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "self[0]", - ), - ( - "def f():\n items[0]\n", - Language::Python, - ".py", - "block", - "items[0]", - ), - ( - "return items[0]\n", - Language::Python, - ".py", - "subscript", - "items[0]", - ), - ( - "function f() { items[0]; }\n", - Language::TypeScript, - ".ts", - "expression_statement", - "items[0];", - ), - ( - "return items[0];\n", - Language::TypeScript, - ".ts", - "subscript_expression", - "items[0]", - ), - ( - "return items[1]\n", - Language::Lua, - ".lua", - "expression_list", - "items[1]", - ), - ( - "print(items[1])\n", - Language::Lua, - ".lua", - "bracket_index_expression", - "items[1]", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_element_reference_statement(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_element_reference_statement", - kind, - text - ), - "normalize_element_reference_statement mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn element_reference_statement_normalization_matches_ruby() { - for (source, language, suffix) in [ - ("def f\n items[0]\nend\n", Language::Ruby, ".rb"), - ("def f():\n items[0]\n", Language::Python, ".py"), - ("function f() { items[0]; }\n", Language::TypeScript, ".ts"), - ("return items[1]\n", Language::Lua, ".lua"), - ] { - let root = parse_language_source(source, language, suffix); - let mut calls = Vec::new(); - nodes_of_type(&root, "CALL", &mut calls); - assert!( - calls.iter().any(|node| { - matches!(node.children.get(1), Some(Child::Symbol(message)) if message == "[]") - && node.text.contains("items") - }), - "expected element reference CALL for {language:?} in {root:#?}" - ); - assert_ruby_parity(source, language, suffix); - } - } - - #[test] - fn hash_literal_statement_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "def f\n {a: b}\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "{a: b}", - ), - ( - "def f():\n {\"a\": b}\n", - Language::Python, - ".py", - "block", - "{\"a\": b}", - ), - ( - "function f() { ({a: b}); }\n", - Language::TypeScript, - ".ts", - "expression_statement", - "({a: b});", - ), - ( - "return {a: b};\n", - Language::TypeScript, - ".ts", - "object", - "{a: b}", - ), - ( - "function f()\n {a = b}\nend\n", - Language::Lua, - ".lua", - "block", - "\n {a = b}", - ), - ( - "function f()\n {a, b}\nend\n", - Language::Lua, - ".lua", - "block", - "\n {a, b}", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.hash_literal_statement(node), - ruby_private_predicate( - source, - language, - suffix, - "hash_literal_statement?", - kind, - text - ), - "hash_literal_statement? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_hash_literal_statement_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def f\n {a: b}\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "{a: b}", - ), - ( - "def f():\n {\"a\": b}\n", - Language::Python, - ".py", - "block", - "{\"a\": b}", - ), - ( - "function f() { ({a: b}); }\n", - Language::TypeScript, - ".ts", - "expression_statement", - "({a: b});", - ), - ( - "return {a: b};\n", - Language::TypeScript, - ".ts", - "object", - "{a: b}", - ), - ( - "function f()\n {a = b}\nend\n", - Language::Lua, - ".lua", - "block", - "\n {a = b}", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_hash_literal_statement(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_hash_literal_statement", - kind, - text - ), - "normalize_hash_literal_statement mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_pair_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def f\n {a: b}\nend\n", - Language::Ruby, - ".rb", - "pair", - "a: b", - ), - ( - "def f\n {name:}\nend\n", - Language::Ruby, - ".rb", - "pair", - "name:", - ), - ( - "def f\n {\"a\" => b}\nend\n", - Language::Ruby, - ".rb", - "pair", - "\"a\" => b", - ), - ( - "def f():\n {\"a\": b}\n", - Language::Python, - ".py", - "pair", - "\"a\": b", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_pair(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_pair", - kind, - text - ), - "normalize_pair mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn hash_literal_statement_normalization_matches_ruby() { - for (source, language, suffix) in [ - ("def f\n {a: b}\nend\n", Language::Ruby, ".rb"), - ("def f():\n {\"a\": b}\n", Language::Python, ".py"), - ("function f() { ({a: b}); }\n", Language::TypeScript, ".ts"), - ("function f()\n {a = b}\nend\n", Language::Lua, ".lua"), - ] { - let root = parse_language_source(source, language, suffix); - let mut hashes = Vec::new(); - nodes_of_type(&root, "HASH", &mut hashes); - assert!( - hashes - .iter() - .any(|node| node.text.contains('a') && node.text.contains('b')), - "expected hash literal HASH for {language:?} in {root:#?}" - ); - assert_ruby_parity(source, language, suffix); - } - } - - #[test] - fn empty_body_statement_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "def f():\n pass\n", - Language::Python, - ".py", - "block", - "pass", - ), - ( - "function f() {}\n", - Language::TypeScript, - ".ts", - "statement_block", - "{}", - ), - ( - "function f() { work(); }\n", - Language::TypeScript, - ".ts", - "statement_block", - "{ work(); }", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.empty_body_statement(node), - ruby_private_predicate( - source, - language, - suffix, - "empty_body_statement?", - kind, - text - ), - "empty_body_statement? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn empty_body_statement_normalization_matches_ruby() { - for (source, language, suffix) in [ - ("def f():\n pass\n", Language::Python, ".py"), - ("function f() {}\n", Language::TypeScript, ".ts"), - ] { - let root = parse_language_source(source, language, suffix); - let mut defns = Vec::new(); - nodes_of_type(&root, "DEFN", &mut defns); - let scope = child_node(defns[0], 1); - assert!( - matches!(scope.children.get(2), Some(Child::Nil)), - "expected empty body for {language:?} in {root:#?}" - ); - assert_ruby_parity(source, language, suffix); - } - } - - #[test] - fn heredoc_body_statement_matches_ruby_private_predicate() { - let ruby_source = "def f\n puts <<~TXT\n hi\n TXT\nend\n"; - for (source, language, suffix, kind, text) in [ - ( - ruby_source, - Language::Ruby, - ".rb", - "body_statement", - "puts <<~TXT\n hi\n TXT", - ), - (ruby_source, Language::Ruby, ".rb", "call", "puts <<~TXT"), - ( - "def f():\n value = 1\n", - Language::Python, - ".py", - "block", - "value = 1", - ), - ( - "function f() { value = 1; }\n", - Language::TypeScript, - ".ts", - "statement_block", - "{ value = 1; }", - ), - ( - "function f()\n value = 1\nend\n", - Language::Lua, - ".lua", - "block", - "value = 1", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.heredoc_body_statement(node), - ruby_private_predicate( - source, - language, - suffix, - "heredoc_body_statement?", - kind, - text - ), - "heredoc_body_statement? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn heredoc_call_for_body_matches_ruby_private_predicate() { - let ruby_arg_source = "def f\n puts <<~TXT\n hi\n TXT\nend\n"; - let ruby_receiver_source = "def emit\n <<~ZIG.chomp\n hi\n ZIG\nend\n"; - for (source, language, suffix, kind, text) in [ - ( - ruby_arg_source, - Language::Ruby, - ".rb", - "body_statement", - "puts <<~TXT\n hi\n TXT", - ), - ( - ruby_arg_source, - Language::Ruby, - ".rb", - "call", - "puts <<~TXT", - ), - ( - ruby_arg_source, - Language::Ruby, - ".rb", - "argument_list", - "<<~TXT", - ), - ( - ruby_arg_source, - Language::Ruby, - ".rb", - "method", - "def f\n puts <<~TXT\n hi\n TXT\nend", - ), - ( - ruby_receiver_source, - Language::Ruby, - ".rb", - "call", - "<<~ZIG.chomp", - ), - ( - ruby_receiver_source, - Language::Ruby, - ".rb", - "heredoc_beginning", - "<<~ZIG", - ), - ( - "def f():\n value = 1\n", - Language::Python, - ".py", - "block", - "value = 1", - ), - ( - "function f() { value = 1; }\n", - Language::TypeScript, - ".ts", - "statement_block", - "{ value = 1; }", - ), - ( - "function f()\n value = 1\nend\n", - Language::Lua, - ".lua", - "block", - "value = 1", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.heredoc_call_for_body(node), - ruby_private_predicate( - source, - language, - suffix, - "heredoc_call_for_body?", - kind, - text - ), - "heredoc_call_for_body? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn with_current_heredoc_body_restores_previous_body() { - let source = "def f\n puts <<~TXT\n hi\n TXT\nend\n"; - let tree = raw_tree(source, Language::Ruby); - let body = first_raw_node(tree.root_node(), source, "heredoc_body", "\n hi\n TXT"); - let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); - normalizer.current_heredoc_body_span = Some([9, 2, 9, 7]); - - let result = normalizer.with_current_heredoc_body(Some(body), |normalizer| { - assert_eq!( - normalizer.current_heredoc_body_span, - Some(super::span(body)) - ); - "result" - }); - - assert_eq!(result, "result"); - assert_eq!(normalizer.current_heredoc_body_span, Some([9, 2, 9, 7])); - } - - #[test] - fn normalize_interpolation_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "name = \"#{user}\"\n", - Language::Ruby, - ".rb", - "interpolation", - "#{user}", - ), - ( - "name = \"#{a; b}\"\n", - Language::Ruby, - ".rb", - "interpolation", - "#{a; b}", - ), - ( - "name = f\"hi {user}\"\n", - Language::Python, - ".py", - "interpolation", - "{user}", - ), - ( - "const name = `hi ${user}`;\n", - Language::TypeScript, - ".ts", - "template_substitution", - "${user}", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_interpolation(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_interpolation", - kind, - text - ), - "normalize_interpolation mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_heredoc_children_matches_ruby_private_method() { - for (source, kind, text) in [ - ( - "def f\n puts <<~TXT\n hi\n TXT\nend\n", - "heredoc_body", - "\n hi\n TXT", - ), - ( - "def f\n puts <<~TXT\n hi #{name}\n TXT\nend\n", - "heredoc_body", - "\n hi #{name}\n TXT", - ), - ] { - let tree = raw_tree(source, Language::Ruby); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); - let rust = children_value(&normalizer.normalize_heredoc_children(node)); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - Language::Ruby, - ".rb", - "normalize_heredoc_children", - kind, - text - ), - "normalize_heredoc_children mismatch for {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_heredoc_beginning_matches_ruby_private_method() { - for (source, kind, text) in [( - "def emit\n <<~ZIG.chomp\n hi\n ZIG\nend\n", - "heredoc_beginning", - "<<~ZIG", - )] { - let tree = raw_tree(source, Language::Ruby); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); - let rust = node_value(&normalizer.normalize_heredoc_beginning(node)); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - Language::Ruby, - ".rb", - "normalize_heredoc_beginning", - kind, - text - ), - "normalize_heredoc_beginning mismatch for {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_heredoc_beginning_uses_current_body_for_multiple_heredocs() { - let source = "def f\n puts <<~A, <<~B\n one\n A\n two\n B\nend\n"; - let tree = raw_tree(source, Language::Ruby); - let beginning = first_raw_node(tree.root_node(), source, "heredoc_beginning", "<<~B"); - let body = first_raw_node(tree.root_node(), source, "heredoc_body", "\n two\n B"); - let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); - - let dstr = normalizer.with_current_heredoc_body(Some(body), |normalizer| { - normalizer.normalize_heredoc_beginning(beginning) - }); - - let content = child_node(&dstr, 0); - assert_eq!(content.r#type, "STR"); - assert_eq!( - content.children, - vec![Child::String("\n two\n ".to_string())] - ); - } - - #[test] - fn normalize_heredoc_body_statement_matches_ruby_private_method() { - for (source, kind, text) in [ - ( - "def f\n puts <<~TXT\n hi\n TXT\nend\n", - "body_statement", - "puts <<~TXT\n hi\n TXT", - ), - ( - "def emit\n <<~ZIG.chomp\n hi\n ZIG\nend\n", - "body_statement", - "<<~ZIG.chomp\n hi\n ZIG", - ), - ( - "def f\n puts <<~A, <<~B\n one\n A\n two\n B\nend\n", - "body_statement", - "puts <<~A, <<~B\n one\n A\n two\n B", - ), - ] { - let tree = raw_tree(source, Language::Ruby); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); - let rust = normalizer - .normalize_heredoc_body_statement(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - Language::Ruby, - ".rb", - "normalize_heredoc_body_statement", - kind, - text - ), - "normalize_heredoc_body_statement mismatch for {kind} {text:?}" - ); - } - } - - #[test] - fn interpolated_statement_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "def f\n \"hi #{name}\"\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "\"hi #{name}\"", - ), - ( - "def f():\n f\"hi {name}\"\n", - Language::Python, - ".py", - "block", - "f\"hi {name}\"", - ), - ( - "function f() { `hi ${name}`; }\n", - Language::TypeScript, - ".ts", - "expression_statement", - "`hi ${name}`;", - ), - ( - "function f()\n \"hi\"\nend\n", - Language::Lua, - ".lua", - "block", - "\n \"hi\"", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.interpolated_statement(node), - ruby_private_predicate( - source, - language, - suffix, - "interpolated_statement?", - kind, - text - ), - "interpolated_statement? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn interpolated_statement_normalization_matches_ruby() { - let source = "def f\n \"hi #{name}\"\nend\n"; - let root = parse_language_source(source, Language::Ruby, ".rb"); - let dstr = first_node(&root, "DSTR", "\"hi #{name}\""); - - assert_eq!(child_types(dstr), vec!["STR", "EVSTR"]); - assert_ruby_parity(source, Language::Ruby, ".rb"); - } - - #[test] - fn normalize_interpolated_statement_matches_ruby_private_method() { - for (source, kind, text) in [ - ( - "def f\n \"hi #{name}\"\nend\n", - "body_statement", - "\"hi #{name}\"", - ), - ( - "def f\n \"#{first} #{last}\"\nend\n", - "body_statement", - "\"#{first} #{last}\"", - ), - ] { - let tree = raw_tree(source, Language::Ruby); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); - let rust = node_value(&normalizer.normalize_interpolated_statement(node)); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - Language::Ruby, - ".rb", - "normalize_interpolated_statement", - kind, - text - ), - "normalize_interpolated_statement mismatch for {kind} {text:?}" - ); - } - } - - #[test] - fn concatenated_string_statement_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "def f\n \"a\" \"b\"\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "\"a\" \"b\"", - ), - ( - "def f():\n \"a\" \"b\"\n", - Language::Python, - ".py", - "block", - "\"a\" \"b\"", - ), - ( - "function f() { \"a\"; }\n", - Language::TypeScript, - ".ts", - "expression_statement", - "\"a\";", - ), - ( - "function f()\n \"a\"\nend\n", - Language::Lua, - ".lua", - "block", - "\n \"a\"", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.concatenated_string_statement(node), - ruby_private_predicate( - source, - language, - suffix, - "concatenated_string_statement?", - kind, - text - ), - "concatenated_string_statement? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn concatenated_string_statement_normalization_matches_ruby() { - for (source, language, suffix, expected_text, expected_types) in [ - ( - "def f\n \"a\" \"b\"\nend\n", - Language::Ruby, - ".rb", - "\"a\"", - vec!["STR", "STR"], - ), - ( - "def f\n \"a\" \"b #{name}\"\nend\n", - Language::Ruby, - ".rb", - "\"b #{name}\"", - vec!["STR", "STR", "EVSTR"], - ), - ( - "def f():\n \"a\" \"b\"\n", - Language::Python, - ".py", - "\"a\"", - vec!["STR", "STR"], - ), - ( - "def f():\n \"a\" f\"b {name}\"\n", - Language::Python, - ".py", - "f\"b {name}\"", - vec!["STR", "STRING_START", "STR", "EVSTR", "STRING_END"], - ), - ] { - let root = parse_language_source(source, language, suffix); - let dstr = first_node(&root, "DSTR", expected_text); - - assert_eq!(child_types(dstr), expected_types); - assert_ruby_parity(source, language, suffix); - } - } - - #[test] - fn normalize_concatenated_string_statement_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def f\n \"a\" \"b\"\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "\"a\" \"b\"", - ), - ( - "def f\n \"a\" \"b #{name}\"\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "\"a\" \"b #{name}\"", - ), - ( - "def f():\n \"a\" \"b\"\n", - Language::Python, - ".py", - "block", - "\"a\" \"b\"", - ), - ( - "def f():\n \"a\" f\"b {name}\"\n", - Language::Python, - ".py", - "block", - "\"a\" f\"b {name}\"", - ), - ( - "function f() { \"a\"; }\n", - Language::TypeScript, - ".ts", - "expression_statement", - "\"a\";", - ), - ( - "function f()\n \"a\"\nend\n", - Language::Lua, - ".lua", - "block", - "\n \"a\"", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer.normalize_concatenated_string_statement(node); - - assert_eq!( - node_value(&rust), - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_concatenated_string_statement", - kind, - text - ), - "normalize_concatenated_string_statement mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_chained_string_matches_ruby_private_method() { - for (source, language, suffix, ruby_kind, ruby_text, rust_kind, rust_text) in [ - ( - "def f\n \"a\" \"b\"\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "\"a\" \"b\"", - "chained_string", - "\"a\" \"b\"", - ), - ( - "def f\n \"a\" \"b #{name}\"\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "\"a\" \"b #{name}\"", - "chained_string", - "\"a\" \"b #{name}\"", - ), - ( - "def f():\n \"a\" \"b\"\n", - Language::Python, - ".py", - "block", - "\"a\" \"b\"", - "concatenated_string", - "\"a\" \"b\"", - ), - ( - "def f():\n \"a\" f\"b {name}\"\n", - Language::Python, - ".py", - "block", - "\"a\" f\"b {name}\"", - "concatenated_string", - "\"a\" f\"b {name}\"", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, rust_kind, rust_text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer.normalize_chained_string(node); - - assert_eq!( - node_value(&rust), - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_chained_string", - ruby_kind, - ruby_text - ), - "normalize_chained_string mismatch for {language:?} {rust_kind} {rust_text:?}" - ); - } - } - - #[test] - fn dynamic_string_source_matches_ruby_private_method() { - for (source, language, suffix, ruby_kind, ruby_text, rust_kind, rust_text) in [ - ( - "def f\n \"a\" \"b #{name}\"\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "\"a\" \"b #{name}\"", - "chained_string", - "\"a\" \"b #{name}\"", - ), - ( - "def f\n \"a\" \"b\"\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "\"a\" \"b\"", - "chained_string", - "\"a\" \"b\"", - ), - ( - "def f():\n \"a\" f\"b {name}\"\n", - Language::Python, - ".py", - "block", - "\"a\" f\"b {name}\"", - "concatenated_string", - "\"a\" f\"b {name}\"", - ), - ( - "def f():\n \"a\" \"b\"\n", - Language::Python, - ".py", - "block", - "\"a\" \"b\"", - "concatenated_string", - "\"a\" \"b\"", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, rust_kind, rust_text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let mut normalized_children = Vec::new(); - for child in normalizer.named_children(node) { - let normalized = normalizer.normalize_node(child); - normalized_children.push((child, normalized)); - } - let rust = normalizer - .dynamic_string_source(&normalized_children) - .map(|node| { - ( - node.kind().to_string(), - super::node_text(node, source).to_string(), - ) - }); - let ruby = ruby_private_dynamic_string_source_signature( - source, language, suffix, ruby_kind, ruby_text, - ); - - assert_eq!( - rust, ruby, - "dynamic_string_source mismatch for {language:?} {rust_kind} {rust_text:?}" - ); - } - } - - #[test] - fn terminal_statement_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "def f\n foo()\nend\n", - Language::Ruby, - ".rb", - "argument_list", - "()", - ), - ( - "def f\n foo\n foo()\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "foo\n foo()", - ), - ( - "def f():\n foo()\n", - Language::Python, - ".py", - "argument_list", - "()", - ), - ( - "def f():\n foo\n", - Language::Python, - ".py", - "block", - "foo", - ), - ( - "function f() { foo(); }\n", - Language::TypeScript, - ".ts", - "arguments", - "()", - ), - ( - "function f()\n foo()\nend\n", - Language::Lua, - ".lua", - "arguments", - "()", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.terminal_statement(node), - ruby_private_predicate(source, language, suffix, "terminal_statement?", kind, text), - "terminal_statement? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_terminal_statement_matches_ruby_private_method() { - let cases = vec![ - ( - "yield\n", - Language::Ruby, - ".rb", - "yield", - "yield", - "yield", - Vec::<&str>::new(), - ), - ( - "@name\n", - Language::Ruby, - ".rb", - "instance_variable", - "instance_variable", - "@name", - Vec::<&str>::new(), - ), - ( - "$1\n$value\n", - Language::Ruby, - ".rb", - "global_variable", - "global_variable", - "$1", - Vec::<&str>::new(), - ), - ( - "$1\n$value\n", - Language::Ruby, - ".rb", - "global_variable", - "global_variable", - "$value", - Vec::<&str>::new(), - ), - ( - "nil\ntrue\nfalse\n", - Language::Ruby, - ".rb", - "nil", - "nil", - "nil", - Vec::<&str>::new(), - ), - ( - "nil\ntrue\nfalse\n", - Language::Ruby, - ".rb", - "true", - "true", - "true", - Vec::<&str>::new(), - ), - ( - "nil\ntrue\nfalse\n", - Language::Ruby, - ".rb", - "false", - "false", - "false", - Vec::<&str>::new(), - ), - ( - ":ready\n", - Language::Ruby, - ".rb", - "simple_symbol", - "simple_symbol", - ":ready", - Vec::<&str>::new(), - ), - ( - "-123\n", - Language::Ruby, - ".rb", - "unary", - "unary", - "-123", - Vec::<&str>::new(), - ), - ( - "[]\n", - Language::Ruby, - ".rb", - "array", - "array", - "[]", - Vec::<&str>::new(), - ), - ( - "foo\n", - Language::Ruby, - ".rb", - "identifier", - "identifier", - "foo", - Vec::<&str>::new(), - ), - ( - "foo\n", - Language::Ruby, - ".rb", - "identifier", - "identifier", - "foo", - vec!["foo"], - ), - ( - "foo\n", - Language::Python, - ".py", - "expression_statement", - "identifier", - "foo", - Vec::<&str>::new(), - ), - ( - "foo;\n", - Language::TypeScript, - ".ts", - "identifier", - "identifier", - "foo", - Vec::<&str>::new(), - ), - ( - "foo()\n", - Language::Lua, - ".lua", - "identifier", - "identifier", - "foo", - Vec::<&str>::new(), - ), - ( - "foo()\n", - Language::Ruby, - ".rb", - "argument_list", - "argument_list", - "()", - Vec::<&str>::new(), - ), - ]; - - for (source, language, suffix, ruby_kind, rust_kind, text, locals) in cases { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, rust_kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - if !locals.is_empty() { - normalizer - .local_stack - .push(locals.iter().map(|name| name.to_string()).collect()); - } - let rust = node_value(&normalizer.normalize_terminal_statement(node)); - - assert_eq!( - rust, - ruby_private_normalize_terminal_statement_value( - source, - language, - suffix, - ruby_kind, - text, - &locals, - ), - "normalize_terminal_statement mismatch for {language:?} ruby={ruby_kind} rust={rust_kind} {text:?} locals={locals:?}" - ); - } - } - - #[test] - fn operator_assignment_statement_parts_matches_ruby_private_method() { - for (source, language, suffix, ruby_kind, ruby_text, rust_kind, rust_text) in [ - ( - "def f\n x += 1\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "x += 1", - "operator_assignment", - "x += 1", - ), - ( - "def f\n x ||= y\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "x ||= y", - "operator_assignment", - "x ||= y", - ), - ( - "def f\n x += 1\n y += 2\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "x += 1\n y += 2", - "body_statement", - "x += 1\n y += 2", - ), - ( - "def f():\n x += 1\n", - Language::Python, - ".py", - "block", - "x += 1", - "augmented_assignment", - "x += 1", - ), - ( - "function f() { obj.x ||= y; }\n", - Language::TypeScript, - ".ts", - "augmented_assignment_expression", - "obj.x ||= y", - "augmented_assignment_expression", - "obj.x ||= y", - ), - ( - "function f() { x += 1; }\n", - Language::TypeScript, - ".ts", - "expression_statement", - "x += 1;", - "expression_statement", - "x += 1;", - ), - ( - "function f()\n x = x + 1\nend\n", - Language::Lua, - ".lua", - "block", - "x = x + 1", - "block", - "x = x + 1", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, rust_kind, rust_text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer.operator_assignment_statement_parts(node).map( - |(left, operator, right)| { - ( - left.kind().to_string(), - super::node_text(left, source).to_string(), - operator, - right.kind().to_string(), - super::node_text(right, source).to_string(), - ) - }, - ); - let ruby = ruby_private_operator_assignment_statement_parts_signature( - source, language, suffix, ruby_kind, ruby_text, - ); - - assert_eq!( - rust, ruby, - "operator_assignment_statement_parts mismatch for {language:?} {rust_kind} {rust_text:?}" - ); - } - } - - #[test] - fn operator_assignment_statement_matches_ruby_private_predicate() { - for (source, language, suffix, kind, text) in [ - ( - "def f\n x += 1\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "x += 1", - ), - ( - "def f\n x ||= y\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "x ||= y", - ), - ( - "def f\n x = 1\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "x = 1", - ), - ( - "def f\n x += 1\n y += 2\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "x += 1\n y += 2", - ), - ( - "def f():\n x += 1\n", - Language::Python, - ".py", - "block", - "x += 1", - ), - ( - "function f() { x += 1; }\n", - Language::TypeScript, - ".ts", - "expression_statement", - "x += 1;", - ), - ( - "function f()\n x = x + 1\nend\n", - Language::Lua, - ".lua", - "block", - "x = x + 1", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let normalizer = super::TreeSitterNormalizer::new(source, language); - - assert_eq!( - normalizer.operator_assignment_statement(node), - ruby_private_predicate( - source, - language, - suffix, - "operator_assignment_statement?", - kind, - text - ), - "operator_assignment_statement? mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn normalize_operator_assignment_statement_matches_ruby_private_method() { - for (source, language, suffix, kind, text) in [ - ( - "def f\n x += 1\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "x += 1", - ), - ( - "def f\n x ||= y\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "x ||= y", - ), - ( - "def f\n items[index] += value\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "items[index] += value", - ), - ( - "def f\n object.value += 1\nend\n", - Language::Ruby, - ".rb", - "body_statement", - "object.value += 1", - ), - ( - "def f():\n x += 1\n", - Language::Python, - ".py", - "block", - "x += 1", - ), - ( - "function f() { x += 1; }\n", - Language::TypeScript, - ".ts", - "augmented_assignment_expression", - "x += 1", - ), - ( - "function f() { obj.x ||= y; }\n", - Language::TypeScript, - ".ts", - "augmented_assignment_expression", - "obj.x ||= y", - ), - ] { - let tree = raw_tree(source, language); - let node = first_raw_node(tree.root_node(), source, kind, text); - let mut normalizer = super::TreeSitterNormalizer::new(source, language); - let rust = normalizer - .normalize_operator_assignment_statement(node) - .map(|node| node_value(&node)) - .unwrap_or(Value::Null); - - assert_eq!( - rust, - ruby_private_normalize_method_value( - source, - language, - suffix, - "normalize_operator_assignment_statement", - kind, - text - ), - "normalize_operator_assignment_statement mismatch for {language:?} {kind} {text:?}" - ); - } - } - - #[test] - fn python_f_string_interpolation_next_to_equals_is_evstr_not_assignment() { - let root = parse_language_source( - r#" -class Tag: - @property - def markup(self): - return f"[{self.name}={self.parameters}]" -"#, - Language::Python, - ".py", - ); - let dstr = first_node(&root, "DSTR", r#"f"[{self.name}={self.parameters}]""#); - - let types = child_types(dstr); - assert_eq!( - types, - vec![ - "STRING_START", - "STR", - "EVSTR", - "STR", - "EVSTR", - "STR", - "STRING_END" - ], - "expected Ruby-style f-string interpolation parts in {dstr:#?}" - ); - assert!( - !types.contains(&"LASGN"), - "interpolation next to '=' must not normalize as assignment: {dstr:#?}" - ); - } - - #[test] - fn python_relative_import_prefix_only_has_no_children() { - let root = parse_language_source( - r#" -if __name__ == "__main__": - from . import box as box -"#, - Language::Python, - ".py", - ); - let relative_import = first_node(&root, "RELATIVE_IMPORT", "."); - - assert!( - relative_import.children.is_empty(), - "Ruby exposes bare relative import prefix as an empty RELATIVE_IMPORT: {relative_import:#?}" - ); - } - - #[test] - fn python_annotation_type_wrappers_match_ruby_tree_shape() { - let root = parse_language_source( - r#" -from typing import Callable - -_is_single_cell_widths: Callable[[str], bool] = value -last_measured_character: str | None = None -fileno: Callable[[], int] | None = value -"#, - Language::Python, - ".py", - ); - - let str_list_type = first_node(&root, "TYPE", "[str]"); - assert_eq!(child_types(str_list_type), vec!["LVAR"]); - assert_eq!( - child_node(str_list_type, 0).children, - vec![Child::String("str".to_string())] - ); - - let empty_list_type = first_node(&root, "TYPE", "[]"); - assert!( - empty_list_type.children.is_empty(), - "Ruby keeps Callable[[]] list type empty: {empty_list_type:#?}" - ); - - let union_type = first_node(&root, "TYPE", "str | None"); - assert_eq!(child_types(union_type), vec!["LVAR", "NIL"]); - } - - #[test] - fn python_docstring_only_class_body_stays_block_wrapped() { - let root = parse_language_source( - r#" -class ColorParseError(Exception): - """The color could not be parsed.""" -"#, - Language::Python, - ".py", - ); - let class_node = first_node( - &root, - "CLASS", - "class ColorParseError(Exception):\n \"\"\"The color could not be parsed.\"\"\"", - ); - let scope = child_node(class_node, 2); - let body = child_node(scope, 2); - - assert_eq!(body.r#type, "BLOCK"); - assert_eq!( - child_types(body), - vec!["STRING_START", "STR", "STRING_END"], - "Ruby exposes docstring-only class body as BLOCK of string parts: {body:#?}" - ); - } - - #[test] - fn python_ellipsis_only_function_body_is_empty_scope_with_root_source() { - assert_ruby_parity( - r#"def __rich__(): - ... -"#, - Language::Python, - ".py", - ); - } - - #[test] - fn python_explicit_return_none_is_not_elided_from_function_body() { - let source = r#" -class Thing: - def _repr_latex_(self): - return None -"#; - let root = parse_language_source(source, Language::Python, ".py"); - let defn = first_node( - &root, - "DEFN", - "def _repr_latex_(self):\n return None", - ); - let scope = child_node(defn, 1); - let body = child_node(scope, 2); - - assert_eq!(body.r#type, "RETURN"); - assert_eq!( - child_node(body, 0).r#type, - "NIL", - "Ruby only elides implicit nil bodies for Ruby, not explicit Python return None: {scope:#?}" - ); - assert_ruby_parity(source, Language::Python, ".py"); - } - - #[test] - fn python_with_attribute_item_uses_ruby_clause_children() { - let root = parse_language_source( - r#" -def page(self): - with self._console._lock: - buffer = self._console._buffer[:] -"#, - Language::Python, - ".py", - ); - let clause = first_node(&root, "WITH_CLAUSE", "self._console._lock"); - - assert_eq!( - child_types(clause), - vec!["CALL", "LVAR"], - "Ruby with_clause exposes attribute receiver and field separately: {clause:#?}" - ); - assert_eq!(child_node(clause, 0).text, "self._console"); - assert_eq!(child_node(clause, 1).text, "_lock"); - } - - #[test] - fn python_bare_identifier_expression_statement_has_no_children() { - let root = parse_language_source( - r#" -def _is_jupyter(): - try: - get_ipython # type: ignore[name-defined] - except NameError: - return False -"#, - Language::Python, - ".py", - ); - let expression = first_node(&root, "EXPRESSION_STATEMENT", "get_ipython"); - - assert!( - expression.children.is_empty(), - "Ruby parser exposes bare identifier expression statements without named children: {expression:#?}" - ); - } - - #[test] - fn python_bare_identifier_only_block_has_no_children() { - assert_ruby_parity( - r#" -def get_exception(): - try: - pass - except: - foobarbaz -"#, - Language::Python, - ".py", - ); - } - - #[test] - fn python_bare_dotted_expression_statement_normalizes_as_call() { - let root = parse_language_source("os.get_terminal_size\n", Language::Python, ".py"); - let call = first_node(&root, "CALL", "os.get_terminal_size"); - - assert_eq!( - child_types(call), - vec!["LVAR"], - "bare Python dotted expression statements should normalize as calls: {call:#?}" - ); - } - - #[test] - fn python_bare_comparison_expression_statement_keeps_statement_wrapper() { - let root = parse_language_source( - r#" -def test_get_style(): - console.get_style("repr.brace") == Style(bold=True) -"#, - Language::Python, - ".py", - ); - let expression = first_node( - &root, - "EXPRESSION_STATEMENT", - r#"console.get_style("repr.brace") == Style(bold=True)"#, - ); - - assert_eq!( - child_types(expression), - vec!["CALL", "FCALL"], - "Ruby exposes bare comparison statements as expression_statement operand children: {expression:#?}" - ); - } - - #[test] - fn python_delete_statement_matches_ruby_block_contexts() { - assert_ruby_parity( - r#" -def save(self, clear): - if clear: - del self._record_buffer[:] - with self._record_buffer_lock: - del self._record_buffer[:] - text = "" -"#, - Language::Python, - ".py", - ); - } - - #[test] - fn python_single_subscript_expression_block_exposes_subscript_children() { - assert_ruby_parity( - r#" -def test_render(): - with pytest.raises(KeyError): - top["asdasd"] -"#, - Language::Python, - ".py", - ); - } - - #[test] - fn python_single_if_block_under_try_matches_ruby_if_shape() { - let root = parse_language_source( - r#" -def load(args): - try: - if args.path == "-": - json_data = sys.stdin.read() - else: - json_data = Path(args.path).read_text() - except Exception as error: - sys.exit(-1) -"#, - Language::Python, - ".py", - ); - let if_node = first_node( - &root, - "IF", - "if args.path == \"-\":\n json_data = sys.stdin.read()\n else:\n json_data = Path(args.path).read_text()", - ); - - assert_eq!( - child_types(if_node), - vec!["OPCALL", "LASGN", "ELSE_CLAUSE"], - "Ruby normalizes this Python try-body child as an IF: {if_node:#?}" - ); - assert_eq!(child_types(child_node(if_node, 2)), vec!["BLOCK"]); - } - - #[test] - fn python_single_decorated_definition_block_exposes_decorator_and_function() { - assert_ruby_parity( - r#" -def test_inspect_swig_edge_case(): - class Thing: - @property - def __class__(self): - raise AttributeError -"#, - Language::Python, - ".py", - ); - } - - #[test] - fn python_nested_class_inside_class_body_matches_ruby_iter_shape() { - let root = parse_language_source( - r#" -def test_can_handle_special_characters_in_docstrings(): - class Something: - class Thing: - pass -"#, - Language::Python, - ".py", - ); - let iter = first_node(&root, "ITER", "class Thing:\n pass"); - - assert_eq!(child_node(iter, 0).r#type, "VCALL"); - assert_eq!( - child_node(iter, 0).children, - vec![Child::Symbol("Thing".to_string()), Child::Nil] - ); - assert_eq!(child_node(iter, 1).r#type, "SCOPE"); - } - - #[test] - fn lua_local_assignment_call_rhs_matches_ruby_expression_list_shape() { - let root = parse_language_source( - r#"local test_env = require("spec.util.test_env") -"#, - Language::Lua, - ".lua", - ); - let expression_list = - first_node(&root, "EXPRESSION_LIST", r#"require("spec.util.test_env")"#); - - assert_eq!( - child_types(expression_list), - vec!["LVAR", "ARGUMENTS"], - "Ruby exposes a Lua call RHS expression_list as the call function and arguments, without a FUNCTION_CALL wrapper: {expression_list:#?}" - ); - } - - #[test] - fn lua_local_assignment_member_rhs_matches_ruby_expression_list_shape() { - let root = parse_language_source("local run = test_env.run\n", Language::Lua, ".lua"); - let expression_list = first_node(&root, "EXPRESSION_LIST", "test_env.run"); - - assert_eq!( - child_types(expression_list), - vec!["LVAR", "LVAR"], - "Ruby exposes a Lua dotted RHS expression_list as receiver and field, without a DOT_INDEX_EXPRESSION wrapper: {expression_list:#?}" - ); - } - - #[test] - fn lua_table_string_entry_matches_ruby_field_shape() { - let root = parse_language_source( - "local extra_rocks = {\n \"/luasocket-${LUASOCKET}.src.rock\",\n}\n", - Language::Lua, - ".lua", - ); - let expression_list = first_node( - &root, - "EXPRESSION_LIST", - "{\n \"/luasocket-${LUASOCKET}.src.rock\",\n}", - ); - let field = child_node(expression_list, 0); - let string = child_node(field, 0); - - assert_eq!( - child_types(expression_list), - vec!["FIELD"], - "Ruby exposes a Lua table constructor assignment RHS as its field children: {expression_list:#?}" - ); - assert_eq!(string.r#type, "STR"); - assert_eq!( - string.children, - vec![Child::String("/luasocket-${LUASOCKET}.src.rock".to_string())], - "Ruby normalizes a Lua table string field from string_content, without quotes: {string:#?}" - ); - } - - #[test] - fn lua_table_dollar_string_entry_matches_ruby_str_not_gvar() { - let root = parse_language_source( - "local incdirs = { \"$(FOO1_INCDIR)\" }\n", - Language::Lua, - ".lua", - ); - let string = first_node(&root, "STR", "$(FOO1_INCDIR)"); - let mut gvars = Vec::new(); - nodes_of_type(&root, "GVAR", &mut gvars); - - assert_eq!( - string.children, - vec![Child::String("$(FOO1_INCDIR)".to_string())], - "Ruby normalizes Lua table strings starting with $ as STR, not GVAR: {string:#?}" - ); - assert!( - gvars.is_empty(), - "Lua string_content starting with $ must not normalize as GVAR: {gvars:#?}" - ); - } - - #[test] - fn lua_table_call_entry_matches_ruby_field_children_shape() { - assert_ruby_parity( - "assert.same(install, { bin = { P\"bin/binfile\" } })\n", - Language::Lua, - ".lua", - ); - } - - #[test] - fn lua_table_identifier_entry_matches_ruby_empty_field_shape() { - assert_ruby_parity( - "local rocks_path = table.concat({rocks_tree, \"a_rock\"})\n", - Language::Lua, - ".lua", - ); - } - - #[test] - fn lua_single_call_function_body_matches_ruby_block_shape() { - assert_ruby_parity( - "before_each(function()\n test_env.setup_specs(extra_rocks)\nend)\n", - Language::Lua, - ".lua", - ); - } - - #[test] - fn lua_single_assignment_function_body_matches_ruby_lasgn_shape() { - assert_ruby_parity( - "lazy_setup(function()\n git = git_repo.start()\nend)\n", - Language::Lua, - ".lua", - ); - } - - #[test] - fn lua_single_bare_assignment_function_body_matches_ruby_lasgn_shape() { - let root = parse_language_source("function()\n x = y\nend\n", Language::Lua, ".lua"); - let defn = first_node(&root, "DEFN", "function()\n x = y\nend"); - let scope = child_node(defn, 1); - let body = child_node(scope, 2); - let right = child_node(body, 1); - - assert_eq!(body.r#type, "LASGN"); - assert_eq!(body.children.first(), Some(&Child::String("x".to_string()))); - assert_eq!(right.r#type, "EXPRESSION_LIST"); - assert!( - right.children.is_empty(), - "Ruby exposes a bare identifier Lua single-assignment RHS with no children: {right:#?}" - ); - } - - #[test] - fn lua_single_dotted_assignment_function_body_normalizes_as_attribute_assignment() { - let root = parse_language_source( - "function()\n package.path = oldpath\nend\n", - Language::Lua, - ".lua", - ); - let defn = first_node(&root, "DEFN", "function()\n package.path = oldpath\nend"); - let scope = child_node(defn, 1); - let body = child_node(scope, 2); - let assignment = body; - let receiver = child_node(assignment, 0); - let args = child_node(assignment, 2); - - assert_eq!(body.r#type, "ATTRASGN"); - assert_eq!(receiver.r#type, "LVAR"); - assert_eq!( - receiver.children, - vec![Child::String("package".to_string())] - ); - assert_eq!( - assignment.children.get(1), - Some(&Child::Symbol("path=".to_string())) - ); - assert_eq!(args.r#type, "LIST"); - } - - #[test] - fn lua_single_local_assignment_function_body_matches_ruby_lasgn_shape() { - assert_ruby_parity( - "it(function()\n local output = run.luarocks(\"show --rock-tree luacov\")\nend)\n", - Language::Lua, - ".lua", - ); - } - - #[test] - fn lua_assigned_function_expression_matches_ruby_expression_list_shape() { - assert_ruby_parity( - "local test_with_location = function(location)\n lfs.mkdir(location)\nend\n", - Language::Lua, - ".lua", - ); - } - - #[test] - fn lua_assigned_function_if_else_matches_fixed_ruby_if_shape() { - assert_ruby_parity( - "local make_unreadable = function(path)\n if is_win then\n fs.execute(\"x\")\n else\n fs.execute(\"y\")\n end\nend\n", - Language::Lua, - ".lua", - ); - } - - #[test] - fn lua_single_return_function_body_matches_ruby_opcall_shape() { - let source = "function sum.sum(a, b)\n return a + b\nend\n"; - let root = parse_language_source(source, Language::Lua, ".lua"); - let defn = first_node( - &root, - "DEFN", - "function sum.sum(a, b)\n return a + b\nend", - ); - let scope = child_node(defn, 1); - let body = child_node(scope, 2); - let returned = child_node(body, 0); - - assert_eq!(body.r#type, "RETURN"); - assert_eq!(returned.r#type, "OPCALL"); - assert_eq!( - returned.children.get(1), - Some(&Child::Symbol("+".to_string())), - "Ruby exposes a single Lua return body as RETURN wrapping the returned operator call: {body:#?}" - ); - assert_ruby_parity(source, Language::Lua, ".lua"); - } - - #[test] - fn lua_top_level_return_identifier_matches_ruby_empty_expression_list() { - let root = parse_language_source("return sum\n", Language::Lua, ".lua"); - let return_node = first_node(&root, "RETURN", "return sum"); - let expression_list = child_node(return_node, 0); - - assert_eq!(expression_list.r#type, "EXPRESSION_LIST"); - assert!( - expression_list.children.is_empty(), - "Ruby exposes a Lua return of a bare identifier as an empty expression_list: {expression_list:#?}" - ); - } - - #[test] - fn lua_top_level_return_scalar_literals_match_ruby_empty_expression_list() { - for literal in ["true", "false", "nil", "0"] { - let root = parse_language_source(&format!("return {literal}\n"), Language::Lua, ".lua"); - let return_node = first_node(&root, "RETURN", &format!("return {literal}")); - let expression_list = child_node(return_node, 0); - - assert_eq!(expression_list.r#type, "EXPRESSION_LIST"); - assert!( - expression_list.children.is_empty(), - "Ruby exposes a Lua return of {literal} as an empty expression_list: {expression_list:#?}" - ); - } - } - - #[test] - fn lua_assignment_scalar_literals_match_ruby_empty_expression_list() { - for literal in ["true", "false", "nil", "0"] { - let root = - parse_language_source(&format!("tmpfile = {literal}\n"), Language::Lua, ".lua"); - let assignment = first_node(&root, "LASGN", &format!("tmpfile = {literal}")); - let expression_list = child_node(assignment, 1); - - assert_eq!(expression_list.r#type, "EXPRESSION_LIST"); - assert!( - expression_list.children.is_empty(), - "Ruby exposes a Lua scalar literal assignment RHS as an empty expression_list: {expression_list:#?}" - ); - } - } - - #[test] - fn lua_no_paren_string_argument_matches_ruby_string_content_shape() { - let root = parse_language_source("V\"foo\"\n", Language::Lua, ".lua"); - let call = first_node(&root, "FUNCTION_CALL", "V\"foo\""); - let arguments = child_node(call, 1); - let string = child_node(arguments, 0); - - assert_eq!(arguments.r#type, "ARGUMENTS"); - assert_eq!(arguments.text, "\"foo\""); - assert_eq!(string.r#type, "STR"); - assert_eq!(string.text, "foo"); - assert_eq!(string.children, vec![Child::String("foo".to_string())]); - } - - #[test] - fn lua_long_string_assignment_matches_ruby_expression_list_content_shape() { - assert_ruby_parity( - "local c_module_source = [[\n #include \n]]\n", - Language::Lua, - ".lua", - ); - } - - #[test] - fn lua_elseif_branch_is_preserved_as_if_alternative() { - let root = parse_language_source( - r#"if test_env.LUA_V == "5.1" then - one() -elseif test_env.LUA_V == "5.2" then - two() -end -"#, - Language::Lua, - ".lua", - ); - let if_node = first_node( - &root, - "IF", - "if test_env.LUA_V == \"5.1\" then\n one()\nelseif test_env.LUA_V == \"5.2\" then\n two()\nend", - ); - let alternative = child_node(if_node, 2); - - assert_eq!(alternative.r#type, "ELSEIF_STATEMENT"); - } - - #[test] - fn lua_binary_assignment_rhs_matches_ruby_expression_list_shape() { - let root = parse_language_source( - "local rockspec = testing_paths.fixtures_dir .. \"/build_only_deps-0.1-1.rockspec\"\n", - Language::Lua, - ".lua", - ); - let expression_list = first_node( - &root, - "EXPRESSION_LIST", - "testing_paths.fixtures_dir .. \"/build_only_deps-0.1-1.rockspec\"", - ); - - assert_eq!( - child_types(expression_list), - vec!["DOT_INDEX_EXPRESSION", "STR"], - "Ruby exposes a Lua binary RHS expression_list as the binary operands, without a BINARY_EXPRESSION wrapper: {expression_list:#?}" - ); - } - - #[test] - fn lua_local_declaration_without_rhs_matches_ruby_empty_variable_list() { - let root = parse_language_source("local tmpdir\n", Language::Lua, ".lua"); - let variable_list = first_node(&root, "VARIABLE_LIST", "tmpdir"); - - assert!( - variable_list.children.is_empty(), - "Ruby exposes a Lua local declaration without RHS as an empty VARIABLE_LIST: {variable_list:#?}" - ); - } - - #[test] - fn lua_multi_local_declaration_without_rhs_keeps_ruby_variable_list_children() { - let root = parse_language_source("local cfg, fs\n", Language::Lua, ".lua"); - let variable_list = first_node(&root, "VARIABLE_LIST", "cfg, fs"); - - assert_eq!( - child_types(variable_list), - vec!["LVAR", "LVAR"], - "Ruby keeps children for a multi-name Lua local declaration without RHS: {variable_list:#?}" - ); - } - - #[test] - fn lua_single_generic_for_variable_matches_ruby_empty_variable_list() { - let root = parse_language_source( - "for f in lfs.dir(spec_quick) do end\n", - Language::Lua, - ".lua", - ); - let variable_list = first_node(&root, "VARIABLE_LIST", "f"); - - assert!( - variable_list.children.is_empty(), - "Ruby exposes a single Lua generic-for variable list as empty: {variable_list:#?}" - ); - } - - #[test] - fn lua_multi_generic_for_variable_list_keeps_ruby_children() { - let root = - parse_language_source("for _, t in ipairs(tests) do end\n", Language::Lua, ".lua"); - let variable_list = first_node(&root, "VARIABLE_LIST", "_, t"); - - assert_eq!( - child_types(variable_list), - vec!["LVAR", "LVAR"], - "Ruby keeps children for a multi-name Lua generic-for variable list: {variable_list:#?}" - ); - } - - #[test] - fn normalizes_safe_navigation_inside_multi_statement_else_body() { - let root = parse_source( - r#" -def x(cond, node) - if cond - node.storage = :stack - else - node.storage = :heap - current_fn_ctx&.record_heap_use! - end -end -"#, - ); - let mut qcalls = Vec::new(); - nodes_of_type(&root, "QCALL", &mut qcalls); - - assert!( - qcalls - .iter() - .any(|node| node.text == "current_fn_ctx&.record_heap_use!"), - "expected normalized QCALL for current_fn_ctx safe navigation, got {qcalls:#?} in {root:#?}" - ); - } - - #[test] - fn normalizes_visibility_wrapped_singleton_def() { - let root = parse_source( - r#" -private_class_method def self.collect_payload_binding_names(node, names) - if node.is_a?(AST::Identifier) - return - end - AST.wrapped_children(node).each { |child| collect_payload_binding_names(child, names) if child.is_a?(AST::Locatable) } -end -"#, - ); - let mut defs = Vec::new(); - nodes_of_type(&root, "DEFS", &mut defs); - - assert!( - defs.iter().any(|node| node.children.get(1) - == Some(&Child::Symbol("collect_payload_binding_names".to_string()))), - "expected normalized DEFS for visibility-wrapped singleton def, got {root:#?}" - ); - - let def = defs - .into_iter() - .find(|node| { - node.children.get(1) - == Some(&Child::Symbol("collect_payload_binding_names".to_string())) - }) - .expect("visibility-wrapped singleton def should normalize to DEFS"); - let mut calls = Vec::new(); - nodes_of_type(def, "CALL", &mut calls); - nodes_of_type(def, "FCALL", &mut calls); - calls.sort_by_key(|node| (node.first_lineno, node.first_column)); - let ordered = calls - .iter() - .map(|node| (node.first_lineno, node.text.as_str())) - .collect::>(); - - let first_if_call = ordered - .iter() - .position(|(_line, text)| *text == "node.is_a?(AST::Identifier)") - .expect("expected identifier guard call"); - let recursive_call = ordered - .iter() - .position(|(_line, text)| *text == "collect_payload_binding_names(child, names)") - .expect("expected recursive payload scan call"); - assert!( - first_if_call < recursive_call, - "expected method body calls in source order, got {ordered:#?} in {root:#?}" - ); - } - - #[test] - fn normalizes_heredoc_beginning_as_dynamic_string_receiver() { - let root = parse_source( - r#" -def emit - <<~ZIG.chomp - hi - ZIG -end -"#, - ); - let mut calls = Vec::new(); - nodes_of_type(&root, "CALL", &mut calls); - - let call = calls - .iter() - .find(|node| node.text == "<<~ZIG.chomp") - .expect("expected heredoc chomp call"); - assert_eq!( - call.children.get(1), - Some(&Child::Symbol("chomp".to_string())) - ); - assert_eq!( - call.children - .first() - .and_then(super::node) - .map(|node| node.r#type.as_str()), - Some("DSTR") - ); - } - - #[test] - fn flatten_and_matches_ruby_ast_helper() { - let left = Node { - r#type: "LVAR".to_string(), - children: vec![Child::String("a".to_string())], - first_lineno: 1, - first_column: 0, - last_lineno: 1, - last_column: 1, - text: "a".to_string(), - }; - let right = Node { - r#type: "LVAR".to_string(), - children: vec![Child::String("b".to_string())], - first_lineno: 1, - first_column: 5, - last_lineno: 1, - last_column: 6, - text: "b".to_string(), - }; - let and_node = Node { - r#type: "AND".to_string(), - children: vec![Child::Node(Box::new(left)), Child::Node(Box::new(right))], - first_lineno: 1, - first_column: 0, - last_lineno: 1, - last_column: 6, - text: "a && b".to_string(), - }; - - assert_eq!(super::flatten_and(&and_node).len(), 2); - } -} +#[path = "ast-test.rs"] +mod tests; diff --git a/gems/decomplex/test/ast_test.rb b/gems/decomplex/test/ast_test.rb index 295192024..d487e51d4 100644 --- a/gems/decomplex/test/ast_test.rb +++ b/gems/decomplex/test/ast_test.rb @@ -328,6 +328,7 @@ def test_unary_not_statement_predicate def test_unary_not_expression_predicate normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + normalizer.instance_variable_set(:@document, fake_document(:ruby)) ruby_source = "def check\n !flag\n !!flag\n -flag\n not flag\nend\n" assert normalizer.send(:unary_not_expression?, ruby_syntax_node(ruby_source, "unary", "!flag")) @@ -337,6 +338,7 @@ def test_unary_not_expression_predicate with_language_file("function check(flag: boolean) { return !flag; }\n", ".ts", :typescript) do |file| document = parse_syntax(file, :typescript) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) node = ts_nodes(document.root).find { |candidate| candidate.kind == "unary_expression" && candidate.text == "!flag" } refute_nil node assert normalizer.send(:unary_not_expression?, node) @@ -344,6 +346,7 @@ def test_unary_not_expression_predicate with_language_file("if not flag:\n pass\n", ".py", :python) do |file| document = parse_syntax(file, :python) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) node = ts_nodes(document.root).find { |candidate| candidate.kind == "not_operator" && candidate.text == "not flag" } refute_nil node refute normalizer.send(:unary_not_expression?, node) @@ -351,6 +354,7 @@ def test_unary_not_expression_predicate with_language_file("if not flag then end\n", ".lua", :lua) do |file| document = parse_syntax(file, :lua) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) node = ts_nodes(document.root).find { |candidate| candidate.kind == "unary_expression" && candidate.text == "not flag" } refute_nil node refute normalizer.send(:unary_not_expression?, node) @@ -359,6 +363,7 @@ def test_unary_not_expression_predicate def test_unary_minus_expression_predicate normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + normalizer.instance_variable_set(:@document, fake_document(:ruby)) ruby_source = "def check\n -flag\n !flag\n value\nend\n" assert normalizer.send(:unary_minus_expression?, ruby_syntax_node(ruby_source, "unary", "-flag")) @@ -401,6 +406,50 @@ def test_tree_sitter_normalizer_selects_language_specific_normalization_adapters end end + def test_tree_sitter_normalizer_rejects_unsupported_normalization_languages + error = assert_raises(Decomplex::Ast::UnsupportedLanguageError) do + Decomplex::Ast::TreeSitterNormalizationAdapter.for(fake_document(:go)) + end + + assert_includes error.message, ":go" + end + + def test_parse_semantic_returns_language_neutral_ruby_facts + with_language_file(<<~RB, ".rb", :ruby) do |file| + class User + def active? + admin? + end + end + RB + root, = Decomplex::Ast.parse_semantic(file, language: :ruby) + + assert Decomplex::Ast.semantic_node?(root) + assert_equal :root, root.type + assert_equal :ruby, root.language + assert root.children.any? { |node| node.type == :owner && node[:name] == "User" } + assert root.children.any? { |node| node.type == :function && node[:name] == "active?" } + assert root.children.any? { |node| node.type == :call && node[:message] == "admin?" } + refute root.children.any? { |node| %i[DEFN VCALL FCALL CALL].include?(node.type) } + end + end + + def test_parse_semantic_returns_language_neutral_python_facts + with_python_file(<<~PY) do |file| + def check(user): + return user.active() + PY + root, = Decomplex::Ast.parse_semantic(file, language: :python) + + assert Decomplex::Ast.semantic_node?(root) + assert_equal :root, root.type + assert_equal :python, root.language + assert root.children.any? { |node| node.type == :function && node[:name] == "check" } + assert root.children.any? { |node| node.type == :call && node[:receiver] == "user" && node[:message] == "active" } + refute root.children.any? { |node| %i[DEFN VCALL FCALL CALL].include?(node.type) } + end + end + def test_safe_navigation_call_recognizes_typescript_optional_chain with_language_file("user?.name;\nuser?.name();\n", ".ts", :typescript) do |file| document = parse_syntax(file, :typescript) diff --git a/gems/decomplex/test/decision_pressure_test.rb b/gems/decomplex/test/decision_pressure_test.rb index db4906cb9..492c01f2b 100644 --- a/gems/decomplex/test/decision_pressure_test.rb +++ b/gems/decomplex/test/decision_pressure_test.rb @@ -155,4 +155,17 @@ def a(n) ensure f&.unlink end + + def test_scan_does_not_use_legacy_ast_parse + Decomplex::Ast.stub(:parse, ->(*) { raise "legacy Ast.parse should not be used" }) do + r = rank(<<~RB) + def a(node) + ti = node.full_type + return 1 if ti.is_a?(Type) + end + RB + + assert_equal ".full_type", r.first[:contract] + end + end end diff --git a/gems/decomplex/test/derived_state_test.rb b/gems/decomplex/test/derived_state_test.rb index 8002dad2d..a201944d6 100644 --- a/gems/decomplex/test/derived_state_test.rb +++ b/gems/decomplex/test/derived_state_test.rb @@ -104,4 +104,18 @@ def f(a) assert_equal "b", out.first[:derived] assert_equal "a", out.first[:source] end + + def test_scan_does_not_use_legacy_ast_parse + Decomplex::Ast.stub(:parse, ->(*) { raise "legacy Ast.parse should not be used" }) do + out = scan(<<~RB) + def f(a) + b = a + 1 + a = recompute(a) + use(b) + end + RB + + assert_equal 1, out.size + end + end end diff --git a/gems/decomplex/test/examples_oracle_test.rb b/gems/decomplex/test/examples_oracle_test.rb new file mode 100644 index 000000000..038f77902 --- /dev/null +++ b/gems/decomplex/test/examples_oracle_test.rb @@ -0,0 +1,239 @@ +# frozen_string_literal: true + +require "json" +require "minitest/autorun" +require_relative "../lib/decomplex/detector_runner" + +class ExamplesOracleTest < Minitest::Test + EXAMPLES_ROOT = File.expand_path("../examples", __dir__) + ORACLE_DIR = File.join(EXAMPLES_ROOT, "oracles") + SOURCE_EXTENSIONS = %w[ + .rb .rs .zig .py .js .ts .cs .lua .c .cpp .java .kt .swift .go + ].freeze + LOCATION_KEYS = %w[ + at boundaries boundary_crossings component_lines defn examples file + gap_lines line locations predicate raw reason sites span spans source + ].freeze + + ORACLE_PATHS = Dir[File.join(ORACLE_DIR, "*.json")].sort.freeze + FIXTURE_PATHS = Dir[File.join(EXAMPLES_ROOT, "*", "*")] + .select { |path| SOURCE_EXTENSIONS.include?(File.extname(path)) } + .sort + .freeze + + def test_shared_oracle_files_exist + refute_empty ORACLE_PATHS + end + + def test_each_detector_has_one_fixture_per_language + languages = FIXTURE_PATHS.map { |path| File.basename(File.dirname(path)) }.uniq.sort + detectors = ORACLE_PATHS.map { |path| File.basename(path, ".json") }.sort + + detectors.each do |detector| + actual = FIXTURE_PATHS + .select { |path| File.basename(path, File.extname(path)) == detector } + .map { |path| File.basename(File.dirname(path)) } + .sort + assert_equal languages, actual, "#{detector} fixture languages" + end + end + + FIXTURE_PATHS.each_with_index do |fixture_path, index| + language = File.basename(File.dirname(fixture_path)) + detector = File.basename(fixture_path, File.extname(fixture_path)) + method_name = "test_#{index}_#{language}_#{detector.tr("-", "_")}_matches_shared_oracle" + + define_method(method_name) do + assert_fixture_matches_shared_oracle(fixture_path) + end + end + + private + + def assert_fixture_matches_shared_oracle(fixture_path) + detector = File.basename(fixture_path, File.extname(fixture_path)) + oracle_path = File.join(ORACLE_DIR, "#{detector}.json") + + assert File.file?(oracle_path), "missing shared oracle #{oracle_path}" + + oracle = JSON.parse(File.read(oracle_path)) + expected = oracle.fetch("expected") + assert meaningful?(expected), "#{oracle_path} expected projection is empty" + + options = symbolize_options(oracle.fetch("options", {})) + actual = JSON.parse( + Decomplex::DetectorRunner.canonical_json( + oracle.fetch("detector"), + [fixture_path], + engine: oracle.fetch("engine", "ruby"), + **options + ) + ) + + assert_equal expected, project_detector_output(detector, actual) + end + + def symbolize_options(options) + options.each_with_object({}) { |(key, value), out| out[key.to_sym] = value } + end + + def project_detector_output(detector, output) + case detector + when "co-update" + { + "co_written_pairs" => rows(output["co_written_pairs"], %w[pair support]), + "neglected_updates" => rows(output["neglected_updates"], %w[pair support has missing]) + } + when "decision-pressure" + present_rows(output) + when "predicate-alias" + { + "alias_clusters" => Array(output["alias_clusters"]).map do |row| + { "name_count" => Array(row["names"]).size } + end + } + when "miner" + { + "missing_abstractions" => present_rows(output["missing_abstractions"]) + } + when "semantic-alias" + { + "alias_clusters" => Array(output["alias_clusters"]).map do |row| + { "name_count" => Array(row["names"]).size } + end + } + when "flay-similarity" + findings = Array(output["findings"]) + defn_findings = findings.select { |row| row["node"].to_s == "defn" } + findings = defn_findings unless defn_findings.empty? + findings.map do |row| + pick(row, %w[clone_type node]).merge("site_count" => Array(row["sites"]).size) + end.uniq + when "temporal-ordering-pressure" + Array(output).empty? ? [] : [{ "present" => true }] + when "state-branch-density" + Array(output).map do |row| + { "present" => !row.empty? } + end + when "redundant-nil-guard" + rows(output, %w[local]).uniq + when "state-mesh" + project_state_mesh(output) + when "inconsistent-rename-clone" + Array(output).map do |row| + pick(row, %w[ref_name]).merge("divergent_count" => Array(row["divergent"]).size) + end + when "derived-state" + rows(output, %w[derived source]) + when "implicit-control-flow" + { + "ordered_protocols" => present_rows(output["ordered_protocols"]), + "order_drift" => present_rows(output["order_drift"]) + } + when "weighted-inlined-complexity" + Array(output).map do |row| + pick(row, %w[method depth]).merge("callee_count" => Array(row["single_caller_callees"]).size) + end + when "locality-drag" + rows(output, %w[variable]) + when "operational-discontinuity" + rows(output, %w[resets confidence]) + when "oversized-predicate" + Array(output["findings"]).map do |row| + pick(row, %w[count]).merge("atom_count" => Array(row["atoms"]).size) + end + when "path-condition" + present_rows(output["neglected"]) + when "sequence-mine" + rows(output["broken"], %w[pair support has missing]) + when "function-lcom" + present_rows(output) + when "false-simplicity" + rows(output, %w[kind]) + when "fat-union" + present_rows(output["fat_unions"]) + when "local-flow" + Array(output).map do |method| + { + "statement_count" => Array(method["statements"]).size, + "boundary_count" => Array(method["boundaries"]).size + } + end + when "structural-topology" + { "present" => !Array(output["methods"]).empty? || !Array(output["edges"]).empty? } + else + scrub_locations(output) + end + end + + def project_state_mesh(output) + { "state_mesh" => { "present" => meaningful?(output.fetch("state_mesh", {})) } } + end + + def project_protocols(rows) + rows(rows, %w[protocol dependency states support observed missing]) + end + + def present_rows(value) + Array(value).empty? ? [] : [{ "present" => true }] + end + + def rows(value, keys) + Array(value).map { |row| pick(row, keys) } + end + + def pick(row, keys) + keys.each_with_object({}) do |key, out| + out[key] = canonical_value(row[key]) if row.key?(key) + end + end + + def canonical_value(value) + case value + when Hash + value.keys.map(&:to_s).sort.each_with_object({}) do |key, out| + original = value.key?(key) ? key : value.keys.find { |candidate| candidate.to_s == key } + out[key] = canonical_value(value.fetch(original)) + end + when Array + value.map { |item| canonical_value(item) } + when Symbol + value.to_s + else + value + end + end + + def scrub_locations(value) + case value + when Hash + value.keys.map(&:to_s).sort.each_with_object({}) do |key, out| + next if LOCATION_KEYS.include?(key) + + original = value.key?(key) ? key : value.keys.find { |candidate| candidate.to_s == key } + out[key] = scrub_locations(value.fetch(original)) + end + when Array + value.map { |item| scrub_locations(item) } + when Symbol + value.to_s + else + value + end + end + + def meaningful?(value) + case value + when Hash + value.any? { |_key, item| meaningful?(item) } + when Array + !value.empty? && value.any? { |item| meaningful?(item) } + when NilClass + false + when String + !value.empty? + else + true + end + end +end diff --git a/gems/decomplex/test/false_simplicity_test.rb b/gems/decomplex/test/false_simplicity_test.rb index 9ccb053a3..cd50333ea 100644 --- a/gems/decomplex/test/false_simplicity_test.rb +++ b/gems/decomplex/test/false_simplicity_test.rb @@ -30,24 +30,6 @@ def scan2(ruby1, ruby2) Decomplex::FalseSimplicity.scan(paths) end - def ast(type, children = [], line: 1) - Decomplex::Ast::Node.new( - type: type, - children: children, - first_lineno: line, - first_column: 0, - last_lineno: line, - last_column: 1, - text: "" - ) - end - - def scan_ast(root, language:) - detector = Decomplex::FalseSimplicity.new("inline", [], language: language) - detector.walk(root, [], []) - Decomplex::FalseSimplicity::Report.new(detector.hits, detector.classrecs) - end - def has(r, kind, detail = nil) r.hits.any? { |h| h.kind == kind && (detail.nil? || h.detail == detail) } end @@ -58,29 +40,6 @@ def details(r, kind) # ---- 1. hidden dynamic dispatch ------------------------------------- - def test_non_ruby_languages_do_not_inherit_ruby_lexicon - root = ast(:ROOT, [ - ast(:CALL, [ast(:LVAR, ["obj"]), :send, nil]), - ast(:CALL, [ast(:CONST, [:File]), :read, nil]), - ast(:FCALL, [:getattr, nil]), - ast(:FCALL, [:eval, nil]) - ]) - - ruby = scan_ast(root, language: :ruby) - assert has(ruby, :dynamic_dispatch, "send") - assert has(ruby, :hidden_io, "File.read") - - python = scan_ast(root, language: :python) - refute has(python, :dynamic_dispatch, "send") - refute has(python, :hidden_io, "File.read") - assert has(python, :dynamic_dispatch, "getattr") - - zig = scan_ast(root, language: :zig) - refute has(zig, :dynamic_dispatch, "send") - refute has(zig, :hidden_io, "File.read") - assert has(zig, :metaprogramming, "eval") - end - def test_dynamic_dispatch_positive r = scan(<<~RB) def a(o); o.send(:m, 1); end @@ -107,6 +66,18 @@ def k; yield 9; end :>=, 2 end + def test_scan_does_not_use_legacy_ast_parse + f = Tempfile.new(["fs", ".rb"]) + f.write("def a(o); o.send(:m); end\n") + f.close + @tmp = [f] + + Decomplex::Ast.stub(:parse, ->(*) { raise "legacy Ast.parse called" }) do + r = Decomplex::FalseSimplicity.scan([f.path]) + assert has(r, :dynamic_dispatch, "send") + end + end + def test_dynamic_dispatch_no_false_positive r = scan(<<~RB) def a(o); o.run(1); end diff --git a/gems/decomplex/test/fat_union_test.rb b/gems/decomplex/test/fat_union_test.rb index fde97706e..43344638d 100644 --- a/gems/decomplex/test/fat_union_test.rb +++ b/gems/decomplex/test/fat_union_test.rb @@ -197,4 +197,20 @@ def lower_b(n) ensure f end + + def test_scan_does_not_use_legacy_ast_parse + Decomplex::Ast.stub(:parse, ->(*) { raise "legacy Ast.parse should not be used" }) do + fu = scan(<<~RB) + def lower(n) + case n + when AST::Call then n.line; n.ty + when AST::Func then n.line; n.ty + when AST::Lit then n.line; n.ty + end + end + RB + + assert_equal 1, fu.size + end + end end diff --git a/gems/decomplex/test/inconsistent_rename_clone_test.rb b/gems/decomplex/test/inconsistent_rename_clone_test.rb index 49cec2e1c..35a8a50b7 100644 --- a/gems/decomplex/test/inconsistent_rename_clone_test.rb +++ b/gems/decomplex/test/inconsistent_rename_clone_test.rb @@ -81,4 +81,25 @@ def replace(parent, old_child, new_child) RB assert_empty out end + + def test_scan_does_not_use_legacy_ast_parse + Decomplex::Ast.stub(:parse, ->(*) { raise "legacy Ast.parse should not be used" }) do + out = scan(<<~RB) + def original + src = fetch(1) + check(src) + store(src) + finalize(src) + end + def pasted + dst = fetch(2) + check(dst) + store(src) + finalize(dst) + end + RB + + refute_empty out + end + end end diff --git a/gems/decomplex/test/local_flow_test.rb b/gems/decomplex/test/local_flow_test.rb index 3d2f2b9b0..5d0cfeb33 100644 --- a/gems/decomplex/test/local_flow_test.rb +++ b/gems/decomplex/test/local_flow_test.rb @@ -2,6 +2,7 @@ require "minitest/autorun" require "tempfile" +require_relative "../lib/decomplex/ast" require_relative "../lib/decomplex/local_flow" class LocalFlowTest < Minitest::Test @@ -58,6 +59,18 @@ class Worker assert_equal Set["input"], helper.statements.first.reads end + def test_scan_does_not_use_legacy_ast_parse + Decomplex::Ast.stub(:parse, ->(*) { raise "legacy Ast.parse should not be used" }) do + summaries = scan(<<~RB) + def top_level(value) + result = value + end + RB + + assert_equal ["top_level"], summaries.map(&:name) + end + end + private def scan(code) diff --git a/gems/decomplex/test/ordered_protocol_mine_test.rb b/gems/decomplex/test/ordered_protocol_mine_test.rb index 3ff52c68b..08fd01399 100644 --- a/gems/decomplex/test/ordered_protocol_mine_test.rb +++ b/gems/decomplex/test/ordered_protocol_mine_test.rb @@ -46,6 +46,24 @@ def drift; validate(node); prepare(node); commit(node); end assert_equal %w[phase], hit[:states] end + def test_scan_does_not_use_legacy_ast_parse + file = Tempfile.new(["ordered_protocol", ".rb"]) + file.write(<<~RB) + class CompilerPhase + def prepare; @phase = :prepared; end + def validate; @valid = @phase; end + def run; prepare; validate; end + end + RB + file.close + @files << file + + Decomplex::Ast.stub(:parse, ->(*) { raise "legacy Ast.parse called" }) do + report = Decomplex::OrderedProtocolMine.scan([file.path]) + refute_empty report.ordered_protocols + end + end + def test_reports_single_state_dependent_protocol_pressure report = scan(<<~RB) class BillingService diff --git a/gems/decomplex/test/oversized_predicate_test.rb b/gems/decomplex/test/oversized_predicate_test.rb index fe808b0ca..6bc7133c9 100644 --- a/gems/decomplex/test/oversized_predicate_test.rb +++ b/gems/decomplex/test/oversized_predicate_test.rb @@ -2,6 +2,7 @@ require "minitest/autorun" require "tmpdir" +require_relative "../lib/decomplex/ast" require_relative "../lib/decomplex/oversized_predicate" class OversizedPredicateTest < Minitest::Test @@ -32,6 +33,24 @@ def eligible(t, info) end end + def test_scan_uses_syntax_not_ast_facades + Decomplex::Ast.stub(:parse, ->(*) { raise "legacy Ast.parse should not be used" }) do + Decomplex::Ast.stub(:parse_semantic, ->(*) { raise "Ast.parse_semantic should not be used" }) do + with_file(<<~RUBY) do |file| + def eligible(t, info) + if t.map? && !t.numeric_map? && !info.close_zig && !t.sharded? + true + end + end + RUBY + findings = Decomplex::OversizedPredicate.scan([file]).findings + + assert_equal 1, findings.size + end + end + end + end + def test_nested_or_conditions_count_as_atoms with_file(<<~RUBY) do |file| def ready(a, b, c, d) diff --git a/gems/decomplex/test/path_condition_test.rb b/gems/decomplex/test/path_condition_test.rb index cf7ab897f..be4c6b36f 100644 --- a/gems/decomplex/test/path_condition_test.rb +++ b/gems/decomplex/test/path_condition_test.rb @@ -75,4 +75,15 @@ def only(x, y); go(x) if x.a? && y.b?; end assert_empty r.scattered(min_scatter: 2) assert_empty r.neglected(min_support: 3) end + + def test_scan_does_not_use_legacy_ast_parse + Decomplex::Ast.stub(:parse, ->(*) { raise "legacy Ast.parse should not be used" }) do + r = rep(<<~RB) + def one(x, y); go(x) if x.a? && y.b?; end + def two(x, y); go(x) if x.a? && y.b?; end + RB + + assert_equal 1, r.scattered(min_scatter: 2).size + end + end end diff --git a/gems/decomplex/test/predicate_alias_test.rb b/gems/decomplex/test/predicate_alias_test.rb index aef8d6fb5..2ddadfdae 100644 --- a/gems/decomplex/test/predicate_alias_test.rb +++ b/gems/decomplex/test/predicate_alias_test.rb @@ -57,4 +57,15 @@ def somewhere assert_equal 1, rm.size assert_equal "framey?", rm.first[:predicate] end + + def test_scan_does_not_use_legacy_ast_parse + Decomplex::Ast.stub(:parse, ->(*) { raise "legacy Ast.parse should not be used" }) do + pa = Decomplex::PredicateAlias.scan(files(<<~RB)) + def first?; true; end + def second?; true; end + RB + + assert_equal [%w[first? second?]], pa.alias_clusters.map { |cluster| cluster[:names].sort } + end + end end diff --git a/gems/decomplex/test/redundant_nil_guard_test.rb b/gems/decomplex/test/redundant_nil_guard_test.rb index 7846b6083..6d5203770 100644 --- a/gems/decomplex/test/redundant_nil_guard_test.rb +++ b/gems/decomplex/test/redundant_nil_guard_test.rb @@ -233,4 +233,15 @@ def use(x) ensure f&.unlink end + + def test_scan_does_not_use_legacy_ast_parse + Decomplex::Ast.stub(:parse, ->(*) { raise "legacy Ast.parse should not be used" }) do + assert_equal ["x&.call"], guards(<<~RB) + def use(x) + return if x.nil? + x&.call + end + RB + end + end end diff --git a/gems/decomplex/test/semantic_alias_test.rb b/gems/decomplex/test/semantic_alias_test.rb index ffe30beb5..bffca7ca8 100644 --- a/gems/decomplex/test/semantic_alias_test.rb +++ b/gems/decomplex/test/semantic_alias_test.rb @@ -19,7 +19,7 @@ def test_canon_strips_receiver_polarity_and_self_ivar assert_equal "provenance == :frame", c assert_equal "provenance == :frame", Decomplex::SemanticAlias.canon("@provenance == :frame") assert_equal "provenance == :frame", Decomplex::SemanticAlias.canon("self.provenance == :frame") - t, neg = Decomplex::Ast.canon_polarity("!x.heap?") + t, neg = Decomplex::SemanticAlias.canon_polarity("!x.heap?") assert_equal "x.heap?", t assert neg end @@ -64,4 +64,15 @@ def use(n); n.provenance == :heap ? 1 : 2; end assert_equal 1, rm.size assert_equal "heap?", rm.first[:predicate] end + + def test_scan_does_not_use_legacy_ast_parse + Decomplex::Ast.stub(:parse, ->(*) { raise "legacy Ast.parse should not be used" }) do + r = scan(<<~RB) + def frame?; @provenance == :frame; end + def is_frame?; provenance == :frame; end + RB + + assert_equal [%w[frame? is_frame?]], r.alias_clusters.map { |cluster| cluster[:names].sort } + end + end end diff --git a/gems/decomplex/test/sequence_mine_test.rb b/gems/decomplex/test/sequence_mine_test.rb index 67b07743d..2224b72f7 100644 --- a/gems/decomplex/test/sequence_mine_test.rb +++ b/gems/decomplex/test/sequence_mine_test.rb @@ -25,6 +25,21 @@ def d; alloc_mark(w); body4; cleanup(w); end assert(pairs.any? { |h| h[:pair] == %w[alloc_mark cleanup] && h[:support] == 4 }) end + def test_scan_uses_syntax_not_ast_facades + Decomplex::Ast.stub(:parse, ->(*) { raise "legacy Ast.parse should not be used" }) do + Decomplex::Ast.stub(:parse_semantic, ->(*) { raise "Ast.parse_semantic should not be used" }) do + r = scan(<<~RB) + def a; alloc_mark(x); cleanup(x); end + def b; alloc_mark(y); cleanup(y); end + def c; alloc_mark(z); cleanup(z); end + def d; alloc_mark(w); cleanup(w); end + RB + + assert(r.co_called_pairs(min_support: 4).any? { |h| h[:pair] == %w[alloc_mark cleanup] }) + end + end + end + def test_method_calling_one_without_the_other_is_broken_protocol r = scan(<<~RB) def a; alloc_mark(x); cleanup(x); end diff --git a/gems/decomplex/test/state_branch_density_test.rb b/gems/decomplex/test/state_branch_density_test.rb index 561fdf1bf..8d818198e 100644 --- a/gems/decomplex/test/state_branch_density_test.rb +++ b/gems/decomplex/test/state_branch_density_test.rb @@ -68,6 +68,23 @@ def pure(a, b) assert_empty rows end + def test_scan_uses_syntax_facts_not_legacy_ast_parse + f = Tempfile.new(["state_branch", ".rb"]) + f.write(<<~RB) + def risky(user) + pay if user.name + end + RB + f.close + @files << f + + Decomplex::Ast.stub(:parse, ->(*) { raise "legacy Ast.parse should not be used" }) do + rows = Decomplex::StateBranchDensity.scan([f.path]).findings + + assert_equal ["user.name"], rows.first[:state_refs] + end + end + def test_groups_multiple_state_branches_per_method_and_keeps_spans rows = scan(<<~RB) def lifecycle(order) diff --git a/gems/decomplex/test/state_mesh_test.rb b/gems/decomplex/test/state_mesh_test.rb index 9c29ee0b8..2d4a2ae4b 100644 --- a/gems/decomplex/test/state_mesh_test.rb +++ b/gems/decomplex/test/state_mesh_test.rb @@ -49,6 +49,28 @@ def c(x); x.storage = :heap; end sm.writes.each { |w| assert_equal "storage", w.norm } end + def test_scan_uses_syntax_facts_for_writes_and_reads + f = Tempfile.new(["sm", ".rb"]) + f.write(<<~RB) + def a(x); x.storage = :heap; end + def b(x); x.storage = :frame; end + def c(x); use(x.storage); end + RB + f.close + @tempfiles << f + no_misses = Struct.new(:reification_misses).new([]) + + Decomplex::Ast.stub(:parse, ->(*) { raise "legacy Ast.parse should not be used" }) do + Decomplex::SemanticAlias.stub(:scan, ->(*) { no_misses }) do + sm = Decomplex::StateMesh.scan([f.path]) + sm.run + + assert_equal 2, sm.writes.size + assert_equal 1, sm.reads.size + end + end + end + def test_discover_ivar_writes sm = scan(<<~RB) def a; @storage = :heap; end @@ -352,4 +374,4 @@ def test_normalize_strips_at assert_equal "storage", sm.normalize("storage") assert_equal "provenance", sm.normalize("@provenance") end -end \ No newline at end of file +end diff --git a/gems/decomplex/test/structural_topology_test.rb b/gems/decomplex/test/structural_topology_test.rb index 26f857d6c..1cf2dc536 100644 --- a/gems/decomplex/test/structural_topology_test.rb +++ b/gems/decomplex/test/structural_topology_test.rb @@ -2,6 +2,7 @@ require "minitest/autorun" require "tempfile" +require_relative "../lib/decomplex/ast" require_relative "../lib/decomplex/structural_topology" class StructuralTopologyTest < Minitest::Test @@ -91,6 +92,25 @@ def prepare; end end end + def test_scan_uses_syntax_facts_not_legacy_ast_parse + with_ruby_file(<<~RB) do |path| + class Runner + def run + prepare + end + + def prepare; end + end + RB + Decomplex::Ast.stub(:parse, ->(*) { raise "legacy Ast.parse should not be used" }) do + graph = Decomplex::StructuralTopology.scan([path]) + + assert graph.method_for("Runner", "run") + assert_includes graph.edges_for_owner("Runner").map(&:callee_name), "prepare" + end + end + end + def test_tracks_top_level_same_file_helper_edges with_ruby_file(<<~RB) do |path| def run diff --git a/gems/decomplex/test/syntax_test.rb b/gems/decomplex/test/syntax_test.rb index 3e73304bb..ca8c730c7 100644 --- a/gems/decomplex/test/syntax_test.rb +++ b/gems/decomplex/test/syntax_test.rb @@ -259,6 +259,40 @@ def classify(node) end end + def test_tree_sitter_ruby_adapter_applies_method_visibility + grammar = ENV["DECOMPLEX_TS_RUBY_PATH"] + skip "set DECOMPLEX_TS_RUBY_PATH to run Tree-sitter adapter smoke test" unless grammar && File.file?(grammar) + + with_file(<<~RB) do |path| + class Worker + def run; end + + private + def prepare; end + def validate; end + + public :validate + protected + def guarded; end + + private def inline_helper; end + def self.build; end + def Worker.explicit; end + end + RB + doc = Decomplex::Syntax.parse(path, parser: "tree_sitter", language: :ruby) + functions = doc.function_defs.to_h { |fn| [fn.name, fn] } + + assert_equal :public, functions.fetch("run").visibility + assert_equal :private, functions.fetch("prepare").visibility + assert_equal :public, functions.fetch("validate").visibility + assert_equal :protected, functions.fetch("guarded").visibility + assert_equal :private, functions.fetch("inline_helper").visibility + assert_equal :public, functions.fetch("self.build").visibility + assert_equal :public, functions.fetch("Worker.explicit").visibility + end + end + def test_tree_sitter_language_profiles_extract_portable_facts_when_grammars_are_available profiles = { python: [ @@ -393,8 +427,11 @@ class Worker: def __init__(self, items): self.items = items - def call(self): - self.items.append("x") + def call(self): + self.items.append("x") + + def run(items): + prepare(items) PY doc = Decomplex::Syntax.parse(path, parser: "tree_sitter", language: :python) @@ -403,6 +440,8 @@ def call(self): ["Worker", "__init__", "self", "items", "items"] assert_includes doc.call_sites.map { |call| [call.owner, call.function, call.receiver, call.message] }, ["Worker", "call", "self.items", "append"] + assert_includes doc.call_sites.map { |call| [call.function, call.receiver, call.message, call.arguments] }, + ["run", "self", "prepare", ["items"]] end end diff --git a/gems/decomplex/test/temporal_ordering_pressure_test.rb b/gems/decomplex/test/temporal_ordering_pressure_test.rb index 12ab8bb6d..776a22a5f 100644 --- a/gems/decomplex/test/temporal_ordering_pressure_test.rb +++ b/gems/decomplex/test/temporal_ordering_pressure_test.rb @@ -56,6 +56,23 @@ def helper_two; @state = :two; end assert_empty rows end + def test_scan_uses_syntax_facts_not_legacy_ast_parse + f = Tempfile.new(["temporal", ".rb"]) + f.write(<<~RB) + class BillingService + def set_user(user); @user = user; end + def set_cart(cart); @cart = cart; end + def validate_user; @validated = @user && @cart; end + end + RB + f.close + @files << f + + Decomplex::Ast.stub(:parse, ->(*) { raise "legacy Ast.parse should not be used" }) do + refute_empty Decomplex::TemporalOrderingPressure.scan([f.path]) + end + end + def test_requires_shared_state_not_just_many_independent_writers rows = scan(<<~RB) class IndependentSetters diff --git a/gems/decomplex/test/weighted_inlined_cognitive_complexity_test.rb b/gems/decomplex/test/weighted_inlined_cognitive_complexity_test.rb index 7276fc7b2..e80b2190a 100644 --- a/gems/decomplex/test/weighted_inlined_cognitive_complexity_test.rb +++ b/gems/decomplex/test/weighted_inlined_cognitive_complexity_test.rb @@ -2,6 +2,7 @@ require "minitest/autorun" require "tempfile" +require_relative "../lib/decomplex/ast" require_relative "../lib/decomplex/weighted_inlined_cognitive_complexity" class WeightedInlinedCognitiveComplexityTest < Minitest::Test @@ -244,6 +245,28 @@ def right(item) assert_equal %w[left right], left[:call_chain] end + def test_scan_does_not_use_legacy_ast_parse + Decomplex::Ast.stub(:parse, ->(*) { raise "legacy Ast.parse should not be used" }) do + out = scan(<<~RB, min_score: 2, min_hidden: 1, max_depth: 2) + class Pipeline + def run(input) + prepare(input) + end + + def prepare(input) + if input.ready? + if input.valid? && !input.locked? + true + end + end + end + end + RB + + refute_empty out + end + end + def test_handles_modules_inline_visibility_loops_rescue_and_shared_reason out = scan(<<~RB, min_score: 0, min_hidden: 0, max_depth: 1) class EmptyOwner; end From c5153a7671b781e5a10bc9cc53f6be29f608d687 Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Fri, 19 Jun 2026 15:50:34 +0000 Subject: [PATCH 26/52] Add parallel decomplex test runner audit --- Gemfile | 1 + Gemfile.lock | 3 + gems/decomplex/CONTRIBUTING.md | 14 ++ .../cross-language-fixture-commit-audit.md | 193 ++++++++++++++++++ 4 files changed, 211 insertions(+) create mode 100644 gems/decomplex/docs/agents/cross-language-fixture-commit-audit.md diff --git a/Gemfile b/Gemfile index 186513287..0084b635d 100644 --- a/Gemfile +++ b/Gemfile @@ -8,6 +8,7 @@ group :development do gem 'minitest', '~> 5.25' gem 'rspec' gem 'parallel_rspec' + gem "parallel_tests", "~> 5.7", require: false gem 'tty-cursor', require: false gem 'tty-reader', require: false gem 'tty-screen', require: false diff --git a/Gemfile.lock b/Gemfile.lock index 9d3a35aa8..f9afad6c6 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -124,6 +124,8 @@ GEM parallel_rspec (3.0.0) rake (> 10.0) rspec + parallel_tests (5.7.0) + parallel parlour (9.1.2) commander (~> 5.0) parser @@ -298,6 +300,7 @@ DEPENDENCIES mutant-rspec nil-kill! parallel_rspec + parallel_tests (~> 5.7) reek rspec rubocop diff --git a/gems/decomplex/CONTRIBUTING.md b/gems/decomplex/CONTRIBUTING.md index 2cf473eaf..4b2dbab82 100644 --- a/gems/decomplex/CONTRIBUTING.md +++ b/gems/decomplex/CONTRIBUTING.md @@ -36,6 +36,20 @@ Prefer one clear signal over a broad blended score. If a metric is noisy, mark it tier 3 or make it supporting evidence for convergence/root-cause clusters. +## Running Tests + +Run the Decomplex Minitest suite in parallel with: + +```bash +bundle exec parallel_test gems/decomplex/test +``` + +For a smaller local run while debugging output, cap workers explicitly: + +```bash +bundle exec parallel_test gems/decomplex/test -n 4 --serialize-stdout +``` + ## Language Support New language work should go through the syntax/profile boundary: diff --git a/gems/decomplex/docs/agents/cross-language-fixture-commit-audit.md b/gems/decomplex/docs/agents/cross-language-fixture-commit-audit.md new file mode 100644 index 000000000..2790f1c4f --- /dev/null +++ b/gems/decomplex/docs/agents/cross-language-fixture-commit-audit.md @@ -0,0 +1,193 @@ +# Cross-Language Fixture Commit Audit + +Audited commit: `cda67cd87` (`Add cross-language decomplex oracle fixtures`). + +This document flags places where the commit moved in the right direction +functionally, but still leaves architecture and oracle precision gaps that +should be fixed before treating the new language support as real parity. + +## Architecture Flags + +These are places where language-specific parser quirks are still in +detectors or in the base syntax normalizer. New languages will keep forcing +edits in these same files unless these become adapter responsibilities. + +### 1. Base `TreeSitterLanguageAdapter` Is Still A Cross-Language Grammar Table + +`gems/decomplex/lib/decomplex/syntax.rb` contains many raw grammar node +names in the base adapter: + +- `call_target` matches `function_call`, `method_call`, `dot_index_expression`, + `variable_list`, `identifier`, and `simple_identifier` directly in the base + adapter (`syntax.rb:380-387`). +- `function_params` has grammar-specific parameter list handling for + `method_declaration`, `function_value_parameters`, and direct `parameter` + children (`syntax.rb:666-681`). +- `generic_function_body_node` and `generic_function_body_statements` know + about `function_body`, `statement_block`, `compound_statement`, + `declaration_list`, `statements`, and `statement_list` (`syntax.rb:763-788`). +- local read/write extraction knows about wrapper nodes such as + `argument`, `pattern`, `directly_assignable_expression`, `value_argument`, + `property_declaration`, `short_var_declaration`, and + `local_variable_declaration` (`syntax.rb:814-995`). +- branch/case normalization knows raw wrapper kinds and tokens such as + `block`, `body_statement`, `statements`, `statement_list`, `case`, `match`, + `switch`, and `when` (`syntax.rb:1735-1803`). +- state/member extraction embeds grammar node names such as + `navigation_expression`, `directly_assignable_expression`, + `dot_index_expression`, and `variable_list` (`syntax.rb:2367-2496`). + +Expected direction: each adapter should map its parser's AST nodes into +language-neutral roles such as `body`, `statement`, `local_declaration`, +`assignment_lhs`, `field_access`, `call`, `call_arguments`, `branch`, +`case_arm`, and `state_target`. Detectors and the base adapter should consume +those roles, not raw grammar names. + +### 2. Base Syntax Still Contains Language Text Rules + +Some rules are textual language conventions, not generic syntax: + +- Lua comments are added through a hard-coded `--` prefix in + `generic_source_boundary` (`syntax.rb:869-876`). +- `self`/`this` normalization is hard-coded globally + (`syntax.rb:2576-2579`). +- declaration/type parsing strips a mixed set of language keywords + (`public`, `private`, `protected`, `internal`, `static`, `readonly`, + `const`, `pub`, `mut`, `var`, `let`) in one regex (`syntax.rb:2312-2339`). +- namespace filtering hard-codes `std`, `builtin`, `build_options`, and + capitalized dotted constants globally (`syntax.rb:2471-2476`). + +Expected direction: comment delimiters, self receiver names, visibility/type +modifiers, and namespace conventions should live in the language adapter or a +per-language lexicon. + +### 3. `FlaySimilarity` Contains Its Own Language Vocabulary + +`gems/decomplex/lib/decomplex/flay_similarity.rb` directly enumerates raw +Tree-sitter grammar node kinds: + +- identifier, literal, skip, clone candidate, body, and call kind lists + (`flay_similarity.rb:25-52`); +- candidate selection by raw node kind (`flay_similarity.rb:266-290`); +- call/message normalization for `argument_list`, `arguments`, `call_suffix`, + `navigation_expression`, `directly_assignable_expression`, and + `navigation_suffix` (`flay_similarity.rb:343-369`). + +Expected direction: Flay should fingerprint normalized semantic nodes or a +syntax-provided structural stream. Adding a language should not require adding +its node kinds to the detector. + +### 4. `RedundantNilGuard` Reimplements A Mini Syntax Adapter + +`gems/decomplex/lib/decomplex/redundant_nil_guard.rb` now operates through +Tree-sitter, but it reimplements body, branch, assignment, call, receiver, +nil-predicate, safe-navigation, and field-like normalization internally: + +- body/statement wrappers: `statements`, `statement_list` + (`redundant_nil_guard.rb:243-258`); +- branch wrappers/tokens: `if`, `unless`, `body_statement`, `block`, + `statements`, `statement_list` (`redundant_nil_guard.rb:260-307`); +- call and receiver extraction for `call`, `call_expression`, + `function_call`, `invocation_expression`, `method_invocation`, + `method_call`, `argument_list`, `arguments`, `call_suffix` + (`redundant_nil_guard.rb:351-419`); +- subject keys and field-like nodes include raw syntax names and `self`/`this` + handling (`redundant_nil_guard.rb:422-538`). + +Expected direction: this detector should consume normalized branch facts, +nil-check facts, safe-navigation facts, and local assignment facts from +`Syntax`/adapters. Otherwise every language with a different nil predicate or +safe-call spelling will keep changing this detector. + +### 5. `WeightedInlinedCognitiveComplexity` Scores Raw Tree-Sitter Nodes + +`gems/decomplex/lib/decomplex/weighted_inlined_cognitive_complexity.rb` +contains grammar-specific logic in the local scorer: + +- boolean node kinds include `binary`, `binary_expression`, + `boolean_operator`, `conjunction_expression`, `disjunction_expression` + (`weighted_inlined_cognitive_complexity.rb:156-159`); +- branch/loop detection embeds `if_statement`, `if_expression`, + `if_modifier`, `body_statement`, `block`, `statements`, `statement_list`, + `for_statement`, `for_in_statement`, and text checks for `if`, `for`, + `while`, `loop`, and `match` (`weighted_inlined_cognitive_complexity.rb:162-209`). + +Expected direction: WICC should score normalized control-flow events produced +by syntax adapters, not inspect raw parser nodes. + +### 6. `FatUnion` Parses Dispatch Semantics With Detector Regexes + +`gems/decomplex/lib/decomplex/fat_union.rb` contains language-specific dispatch +normalization in detector code: + +- variant constants are parsed with `CONSTANT_PATTERN` and + `IF_DISPATCH_PATTERN` (`fat_union.rb:12-13`); +- `if` dispatch is inferred by regexing the predicate text + (`fat_union.rb:69-105`); +- `case ` is stripped from arm text inside the detector + (`fat_union.rb:133-135`). + +Expected direction: syntax adapters should expose normalized dispatch sites: +subject, variant patterns, arm spans, and arm member reads. The detector should +only rank the product-vs-sum smell. + +### 7. Protocol Normalization Splits Language Spellings In Generic Syntax + +`gems/decomplex/lib/decomplex/syntax/protocols.rb` strips method names with +`split(/[.:]/)` for effects and calls (`syntax/protocols.rb:34`, +`syntax/protocols.rb:50`, `syntax/protocols.rb:62`). + +Expected direction: adapters should expose normalized method names and receiver +paths. Generic protocol mining should not know that Lua uses `:` or that some +languages use dotted member paths. + +## Oracle Specificity Flags + +These projections were made less specific than they should be. They pass the +fixture matrix, but they do not prove enough about detector correctness. + +### Must Tighten After Normalization Fixes + +| Detector | Current projection | Why this is under-specific | Minimum target | +| --- | --- | --- | --- | +| `decision-pressure` | only `present` (`examples_oracle_test.rb:87-88`) | hides contract normalization drift such as `.symbol` vs `~local` and hides decision-count drift | assert normalized contract class/key, decision count, essential count, and method count | +| `fat-union` | only `present` (`examples_oracle_test.rb:153-154`) | hides common/variant member drift; Lua currently classifies `name`/`value` differently than Ruby | assert normalized `common`, `variant`, `degenerate`, `support`, and `scatter` | +| `function-lcom` | only `present` (`examples_oracle_test.rb:149-150`) | hides data-flow shape drift; Java produced a different component count/mode during development | assert mode, component count, and preferably local/component variable counts after local-flow normalization | +| `implicit-control-flow` | only presence for `ordered_protocols` and `order_drift` (`examples_oracle_test.rb:128-132`) | hides missing protocol edges and state names; Lua previously dropped the `validate -> commit` edge | assert protocol pair, dependency, state set, support, observed calls, and missing calls | +| `miner` | only `missing_abstractions` presence (`examples_oracle_test.rb:95-98`) | hides conjunction atom count and neglected-condition absence in some languages | assert kind, support, scatter, member count, and neglected-condition missing atom/pattern count | +| `path-condition` | only `present` (`examples_oracle_test.rb:145-146`) | hides duplicate findings and path atom drift; Python/Lua produced duplicate rows during development | assert exact neglected count and normalized pattern atom count | +| `state-branch-density` | one present row per finding (`examples_oracle_test.rb:114-117`) | does not prove the detector associated branches with the intended state | assert normalized state refs, branch count/density bucket, and method/finding count | +| `state-mesh` | only `state_mesh.present` (`examples_oracle_test.rb:120-121`, `examples_oracle_test.rb:169-171`) | hides field-name/count drift; several languages produced fields `a,b` where Ruby projected one field | assert normalized total fields and normalized field set once field declarations/read/write parity is fixed | +| `structural-topology` | only graph presence (`examples_oracle_test.rb:162-163`) | hides method count, call edge multiplicity, and missing loop/conditional edge kinds | assert method count and normalized unique edge types or a normalized edge multiset | +| `temporal-ordering-pressure` | only `present` (`examples_oracle_test.rb:110-111`) | does not prove the same lifecycle ordering was found | assert normalized owner, method sequence/orderings, and supporting method count | + +### Also Too Loose + +| Detector | Current projection | Risk | Minimum target | +| --- | --- | --- | --- | +| `semantic-alias` | only alias cluster name count (`examples_oracle_test.rb:99-104`) | removed the reification-miss check to pass languages where the miss was not normalized | restore a normalized reification-miss presence/count check | +| `redundant-nil-guard` | `rows(...).uniq` (`examples_oracle_test.rb:118-119`) | hides duplicate reports for the same local; duplicates are likely detector bugs | assert exact count after the detector dedupes by span/local/guard | +| `flay-similarity` | prefers `defn` findings and ignores nested clone findings when present (`examples_oracle_test.rb:105-112`) | acceptable as a first top-level clone check, but it will not catch excess nested clone noise | keep the `defn` assertion, add a max/noise assertion once structural fingerprints are normalized | + +### Structured Enough For Now + +The following projections still assert detector-specific normalized content and +are not the immediate problem: `co-update`, `derived-state`, +`false-simplicity`, `inconsistent-rename-clone`, `local-flow`, +`locality-drag`, `operational-discontinuity`, `oversized-predicate`, +`predicate-alias`, `sequence-mine`, and `weighted-inlined-complexity`. + +## Recommended Repair Order + +1. Move syntax role extraction out of the base adapter into per-language + adapters: body statements, calls, arguments, member access, branch/case + arms, local declarations, assignments, comments, self receivers, and + visibility/type modifiers. +2. Add normalized syntax facts needed by detectors: nil guard facts, structural + fingerprint nodes, control-flow events, dispatch variants, and protocol + paths. +3. Delete detector-local grammar vocabularies from `FlaySimilarity`, + `RedundantNilGuard`, `WeightedInlinedCognitiveComplexity`, and `FatUnion`. +4. Tighten the oracle projections in the table above and regenerate shared + oracle JSON only after the normalization makes the expected values stable + across Ruby, Rust, Zig, and the newly added languages. From 2e6ce58f6ee7878fd4b3121947e50131b29f7977 Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Fri, 19 Jun 2026 16:40:37 +0000 Subject: [PATCH 27/52] Fix Decomplex cross-language syntax boundaries --- .../cross-language-fixture-commit-audit.md | 254 +++------ gems/decomplex/examples/c/function-lcom.c | 3 +- .../examples/c/state-branch-density.c | 2 +- gems/decomplex/examples/cpp/function-lcom.cpp | 3 +- .../examples/cpp/state-branch-density.cpp | 2 +- .../examples/csharp/function-lcom.cs | 3 +- .../examples/csharp/state-branch-density.cs | 2 +- gems/decomplex/examples/go/function-lcom.go | 3 +- .../examples/go/state-branch-density.go | 2 +- .../examples/java/function-lcom.java | 3 +- .../examples/java/state-branch-density.java | 2 +- .../javascript/state-branch-density.js | 2 +- .../examples/kotlin/function-lcom.kt | 3 +- .../examples/kotlin/state-branch-density.kt | 14 +- gems/decomplex/examples/kotlin/state-mesh.kt | 22 +- .../examples/kotlin/structural-topology.kt | 17 +- gems/decomplex/examples/lua/function-lcom.lua | 3 +- .../examples/lua/state-branch-density.lua | 2 +- .../examples/oracles/decision-pressure.json | 5 +- .../decomplex/examples/oracles/fat-union.json | 21 +- .../examples/oracles/function-lcom.json | 6 +- .../oracles/implicit-control-flow.json | 78 ++- gems/decomplex/examples/oracles/miner.json | 20 +- .../examples/oracles/path-condition.json | 9 +- .../examples/oracles/semantic-alias.json | 4 +- .../oracles/state-branch-density.json | 6 +- .../examples/oracles/state-mesh.json | 10 +- .../examples/oracles/structural-topology.json | 24 +- .../oracles/temporal-ordering-pressure.json | 14 +- .../examples/python/state-branch-density.py | 6 +- .../examples/ruby/state-branch-density.rb | 12 +- .../examples/ruby/structural-topology.rb | 2 +- .../examples/rust/state-branch-density.rs | 6 +- .../examples/rust/structural-topology.rs | 2 +- .../examples/swift/function-lcom.swift | 3 +- .../examples/swift/state-branch-density.swift | 14 +- .../examples/swift/structural-topology.swift | 17 +- .../typescript/state-branch-density.ts | 2 +- .../examples/zig/state-branch-density.zig | 6 +- .../examples/zig/structural-topology.zig | 2 +- .../lib/decomplex/decision_pressure.rb | 22 +- gems/decomplex/lib/decomplex/fat_union.rb | 133 +---- .../lib/decomplex/flay_similarity.rb | 323 +---------- gems/decomplex/lib/decomplex/locality_drag.rb | 15 +- .../lib/decomplex/redundant_nil_guard.rb | 536 +---------------- .../decomplex/lib/decomplex/semantic_alias.rb | 13 + .../lib/decomplex/structural_topology.rb | 1 + gems/decomplex/lib/decomplex/syntax.rb | 230 ++------ .../lib/decomplex/syntax/adapters.rb | 358 ++++++++++++ .../lib/decomplex/syntax/clone_similarity.rb | 275 +++++++++ .../lib/decomplex/syntax/complexity.rb | 187 ++++++ .../lib/decomplex/syntax/contracts.rb | 40 ++ .../lib/decomplex/syntax/dispatch.rb | 148 +++++ .../lib/decomplex/syntax/nil_guards.rb | 537 ++++++++++++++++++ gems/decomplex/lib/decomplex/syntax/ruby.rb | 2 +- .../weighted_inlined_cognitive_complexity.rb | 195 +------ .../test/architecture_invariants_test.rb | 91 +++ gems/decomplex/test/examples_oracle_test.rb | 111 +++- gems/decomplex/test/flay_similarity_test.rb | 5 +- gems/decomplex/test/syntax_test.rb | 25 +- .../decomplex_architecture_invariants_spec.rb | 80 +++ 61 files changed, 2319 insertions(+), 1619 deletions(-) create mode 100644 gems/decomplex/lib/decomplex/syntax/adapters.rb create mode 100644 gems/decomplex/lib/decomplex/syntax/clone_similarity.rb create mode 100644 gems/decomplex/lib/decomplex/syntax/complexity.rb create mode 100644 gems/decomplex/lib/decomplex/syntax/contracts.rb create mode 100644 gems/decomplex/lib/decomplex/syntax/dispatch.rb create mode 100644 gems/decomplex/lib/decomplex/syntax/nil_guards.rb create mode 100644 gems/decomplex/test/architecture_invariants_test.rb create mode 100644 spec/decomplex_architecture_invariants_spec.rb diff --git a/gems/decomplex/docs/agents/cross-language-fixture-commit-audit.md b/gems/decomplex/docs/agents/cross-language-fixture-commit-audit.md index 2790f1c4f..285959b6f 100644 --- a/gems/decomplex/docs/agents/cross-language-fixture-commit-audit.md +++ b/gems/decomplex/docs/agents/cross-language-fixture-commit-audit.md @@ -2,192 +2,68 @@ Audited commit: `cda67cd87` (`Add cross-language decomplex oracle fixtures`). -This document flags places where the commit moved in the right direction -functionally, but still leaves architecture and oracle precision gaps that -should be fixed before treating the new language support as real parity. - -## Architecture Flags - -These are places where language-specific parser quirks are still in -detectors or in the base syntax normalizer. New languages will keep forcing -edits in these same files unless these become adapter responsibilities. - -### 1. Base `TreeSitterLanguageAdapter` Is Still A Cross-Language Grammar Table - -`gems/decomplex/lib/decomplex/syntax.rb` contains many raw grammar node -names in the base adapter: - -- `call_target` matches `function_call`, `method_call`, `dot_index_expression`, - `variable_list`, `identifier`, and `simple_identifier` directly in the base - adapter (`syntax.rb:380-387`). -- `function_params` has grammar-specific parameter list handling for - `method_declaration`, `function_value_parameters`, and direct `parameter` - children (`syntax.rb:666-681`). -- `generic_function_body_node` and `generic_function_body_statements` know - about `function_body`, `statement_block`, `compound_statement`, - `declaration_list`, `statements`, and `statement_list` (`syntax.rb:763-788`). -- local read/write extraction knows about wrapper nodes such as - `argument`, `pattern`, `directly_assignable_expression`, `value_argument`, - `property_declaration`, `short_var_declaration`, and - `local_variable_declaration` (`syntax.rb:814-995`). -- branch/case normalization knows raw wrapper kinds and tokens such as - `block`, `body_statement`, `statements`, `statement_list`, `case`, `match`, - `switch`, and `when` (`syntax.rb:1735-1803`). -- state/member extraction embeds grammar node names such as - `navigation_expression`, `directly_assignable_expression`, - `dot_index_expression`, and `variable_list` (`syntax.rb:2367-2496`). - -Expected direction: each adapter should map its parser's AST nodes into -language-neutral roles such as `body`, `statement`, `local_declaration`, -`assignment_lhs`, `field_access`, `call`, `call_arguments`, `branch`, -`case_arm`, and `state_target`. Detectors and the base adapter should consume -those roles, not raw grammar names. - -### 2. Base Syntax Still Contains Language Text Rules - -Some rules are textual language conventions, not generic syntax: - -- Lua comments are added through a hard-coded `--` prefix in - `generic_source_boundary` (`syntax.rb:869-876`). -- `self`/`this` normalization is hard-coded globally - (`syntax.rb:2576-2579`). -- declaration/type parsing strips a mixed set of language keywords - (`public`, `private`, `protected`, `internal`, `static`, `readonly`, - `const`, `pub`, `mut`, `var`, `let`) in one regex (`syntax.rb:2312-2339`). -- namespace filtering hard-codes `std`, `builtin`, `build_options`, and - capitalized dotted constants globally (`syntax.rb:2471-2476`). - -Expected direction: comment delimiters, self receiver names, visibility/type -modifiers, and namespace conventions should live in the language adapter or a -per-language lexicon. - -### 3. `FlaySimilarity` Contains Its Own Language Vocabulary - -`gems/decomplex/lib/decomplex/flay_similarity.rb` directly enumerates raw -Tree-sitter grammar node kinds: - -- identifier, literal, skip, clone candidate, body, and call kind lists - (`flay_similarity.rb:25-52`); -- candidate selection by raw node kind (`flay_similarity.rb:266-290`); -- call/message normalization for `argument_list`, `arguments`, `call_suffix`, - `navigation_expression`, `directly_assignable_expression`, and - `navigation_suffix` (`flay_similarity.rb:343-369`). - -Expected direction: Flay should fingerprint normalized semantic nodes or a -syntax-provided structural stream. Adding a language should not require adding -its node kinds to the detector. - -### 4. `RedundantNilGuard` Reimplements A Mini Syntax Adapter - -`gems/decomplex/lib/decomplex/redundant_nil_guard.rb` now operates through -Tree-sitter, but it reimplements body, branch, assignment, call, receiver, -nil-predicate, safe-navigation, and field-like normalization internally: - -- body/statement wrappers: `statements`, `statement_list` - (`redundant_nil_guard.rb:243-258`); -- branch wrappers/tokens: `if`, `unless`, `body_statement`, `block`, - `statements`, `statement_list` (`redundant_nil_guard.rb:260-307`); -- call and receiver extraction for `call`, `call_expression`, - `function_call`, `invocation_expression`, `method_invocation`, - `method_call`, `argument_list`, `arguments`, `call_suffix` - (`redundant_nil_guard.rb:351-419`); -- subject keys and field-like nodes include raw syntax names and `self`/`this` - handling (`redundant_nil_guard.rb:422-538`). - -Expected direction: this detector should consume normalized branch facts, -nil-check facts, safe-navigation facts, and local assignment facts from -`Syntax`/adapters. Otherwise every language with a different nil predicate or -safe-call spelling will keep changing this detector. - -### 5. `WeightedInlinedCognitiveComplexity` Scores Raw Tree-Sitter Nodes - -`gems/decomplex/lib/decomplex/weighted_inlined_cognitive_complexity.rb` -contains grammar-specific logic in the local scorer: - -- boolean node kinds include `binary`, `binary_expression`, - `boolean_operator`, `conjunction_expression`, `disjunction_expression` - (`weighted_inlined_cognitive_complexity.rb:156-159`); -- branch/loop detection embeds `if_statement`, `if_expression`, - `if_modifier`, `body_statement`, `block`, `statements`, `statement_list`, - `for_statement`, `for_in_statement`, and text checks for `if`, `for`, - `while`, `loop`, and `match` (`weighted_inlined_cognitive_complexity.rb:162-209`). - -Expected direction: WICC should score normalized control-flow events produced -by syntax adapters, not inspect raw parser nodes. - -### 6. `FatUnion` Parses Dispatch Semantics With Detector Regexes - -`gems/decomplex/lib/decomplex/fat_union.rb` contains language-specific dispatch -normalization in detector code: - -- variant constants are parsed with `CONSTANT_PATTERN` and - `IF_DISPATCH_PATTERN` (`fat_union.rb:12-13`); -- `if` dispatch is inferred by regexing the predicate text - (`fat_union.rb:69-105`); -- `case ` is stripped from arm text inside the detector - (`fat_union.rb:133-135`). - -Expected direction: syntax adapters should expose normalized dispatch sites: -subject, variant patterns, arm spans, and arm member reads. The detector should -only rank the product-vs-sum smell. - -### 7. Protocol Normalization Splits Language Spellings In Generic Syntax - -`gems/decomplex/lib/decomplex/syntax/protocols.rb` strips method names with -`split(/[.:]/)` for effects and calls (`syntax/protocols.rb:34`, -`syntax/protocols.rb:50`, `syntax/protocols.rb:62`). - -Expected direction: adapters should expose normalized method names and receiver -paths. Generic protocol mining should not know that Lua uses `:` or that some -languages use dotted member paths. - -## Oracle Specificity Flags - -These projections were made less specific than they should be. They pass the -fixture matrix, but they do not prove enough about detector correctness. - -### Must Tighten After Normalization Fixes - -| Detector | Current projection | Why this is under-specific | Minimum target | -| --- | --- | --- | --- | -| `decision-pressure` | only `present` (`examples_oracle_test.rb:87-88`) | hides contract normalization drift such as `.symbol` vs `~local` and hides decision-count drift | assert normalized contract class/key, decision count, essential count, and method count | -| `fat-union` | only `present` (`examples_oracle_test.rb:153-154`) | hides common/variant member drift; Lua currently classifies `name`/`value` differently than Ruby | assert normalized `common`, `variant`, `degenerate`, `support`, and `scatter` | -| `function-lcom` | only `present` (`examples_oracle_test.rb:149-150`) | hides data-flow shape drift; Java produced a different component count/mode during development | assert mode, component count, and preferably local/component variable counts after local-flow normalization | -| `implicit-control-flow` | only presence for `ordered_protocols` and `order_drift` (`examples_oracle_test.rb:128-132`) | hides missing protocol edges and state names; Lua previously dropped the `validate -> commit` edge | assert protocol pair, dependency, state set, support, observed calls, and missing calls | -| `miner` | only `missing_abstractions` presence (`examples_oracle_test.rb:95-98`) | hides conjunction atom count and neglected-condition absence in some languages | assert kind, support, scatter, member count, and neglected-condition missing atom/pattern count | -| `path-condition` | only `present` (`examples_oracle_test.rb:145-146`) | hides duplicate findings and path atom drift; Python/Lua produced duplicate rows during development | assert exact neglected count and normalized pattern atom count | -| `state-branch-density` | one present row per finding (`examples_oracle_test.rb:114-117`) | does not prove the detector associated branches with the intended state | assert normalized state refs, branch count/density bucket, and method/finding count | -| `state-mesh` | only `state_mesh.present` (`examples_oracle_test.rb:120-121`, `examples_oracle_test.rb:169-171`) | hides field-name/count drift; several languages produced fields `a,b` where Ruby projected one field | assert normalized total fields and normalized field set once field declarations/read/write parity is fixed | -| `structural-topology` | only graph presence (`examples_oracle_test.rb:162-163`) | hides method count, call edge multiplicity, and missing loop/conditional edge kinds | assert method count and normalized unique edge types or a normalized edge multiset | -| `temporal-ordering-pressure` | only `present` (`examples_oracle_test.rb:110-111`) | does not prove the same lifecycle ordering was found | assert normalized owner, method sequence/orderings, and supporting method count | - -### Also Too Loose - -| Detector | Current projection | Risk | Minimum target | -| --- | --- | --- | --- | -| `semantic-alias` | only alias cluster name count (`examples_oracle_test.rb:99-104`) | removed the reification-miss check to pass languages where the miss was not normalized | restore a normalized reification-miss presence/count check | -| `redundant-nil-guard` | `rows(...).uniq` (`examples_oracle_test.rb:118-119`) | hides duplicate reports for the same local; duplicates are likely detector bugs | assert exact count after the detector dedupes by span/local/guard | -| `flay-similarity` | prefers `defn` findings and ignores nested clone findings when present (`examples_oracle_test.rb:105-112`) | acceptable as a first top-level clone check, but it will not catch excess nested clone noise | keep the `defn` assertion, add a max/noise assertion once structural fingerprints are normalized | - -### Structured Enough For Now - -The following projections still assert detector-specific normalized content and -are not the immediate problem: `co-update`, `derived-state`, -`false-simplicity`, `inconsistent-rename-clone`, `local-flow`, -`locality-drag`, `operational-discontinuity`, `oversized-predicate`, -`predicate-alias`, `sequence-mine`, and `weighted-inlined-complexity`. - -## Recommended Repair Order - -1. Move syntax role extraction out of the base adapter into per-language - adapters: body statements, calls, arguments, member access, branch/case - arms, local declarations, assignments, comments, self receivers, and - visibility/type modifiers. -2. Add normalized syntax facts needed by detectors: nil guard facts, structural - fingerprint nodes, control-flow events, dispatch variants, and protocol - paths. -3. Delete detector-local grammar vocabularies from `FlaySimilarity`, - `RedundantNilGuard`, `WeightedInlinedCognitiveComplexity`, and `FatUnion`. -4. Tighten the oracle projections in the table above and regenerate shared - oracle JSON only after the normalization makes the expected values stable - across Ruby, Rust, Zig, and the newly added languages. +Status: resolved in the current working tree. + +## Architecture Guardrails Added + +- `spec/decomplex_architecture_invariants_spec.rb` adds a root RSpec static + architecture guard matching the repo's existing invariant style. +- `gems/decomplex/test/architecture_invariants_test.rb` adds the same guard to + the Decomplex minitest suite. +- The guards fail if detector files use raw Tree-sitter node APIs such as + `children`, `named_children`, `child_by_field_name`, byte/point offsets, + `TreeSitter*` classes, or raw node duck typing. +- The guards fail if `syntax.rb` starts hosting detector-specific syntax + extension facts such as clone candidates, dispatch sites, nil guard facts, + or local complexity facts. +- The guards fail if concrete language adapter implementations move back into + `syntax.rb`, or if language profiles instantiate the base + `TreeSitterLanguageAdapter` directly. + +## Burned Down Architecture Items + +- `FlaySimilarity` now consumes `document.clone_candidates`; parser-specific + clone fingerprinting lives in `syntax/clone_similarity.rb`. +- `WeightedInlinedCognitiveComplexity` and `LocalityDrag` now consume + `document.local_complexity_scores`; local scoring lives in + `syntax/complexity.rb`. +- `RedundantNilGuard` now consumes `document.redundant_nil_guard_findings`; + nil-guard parsing lives in `syntax/nil_guards.rb`. +- `DecisionPressure` now gets local assignment contracts through + `document.local_contract_assignments`; contract extraction lives in + `syntax/contracts.rb`. +- `FatUnion` now consumes `document.dispatch_sites`; dispatch extraction lives + in `syntax/dispatch.rb`. +- Concrete language adapter behavior has moved from `syntax.rb` into + `syntax/ruby.rb` and `syntax/adapters.rb`. + +## Oracle Strength Restored + +The shared example oracle now asserts detector-specific normalized content +instead of mere finding presence for the previously weak detectors: + +- `decision-pressure`: contract, decision count, essential count, method count. +- `miner`: conjunction members, support, scatter, neglected-condition pattern. +- `semantic-alias`: normalized canonical predicate and reification miss count. +- `flay-similarity`: clone type, node kind, site count. +- `temporal-ordering-pressure`: owner, method counts, writer count, orderings, + state fields, shared fields. +- `state-branch-density`: normalized method name, decisions, state refs. +- `state-mesh`: total fields/writes/reads/re-derivations and field names. +- `implicit-control-flow`: protocol pair, dependency, support, observed/missing + calls, states. +- `path-condition`: normalized pattern, support, missing guard, action. +- `function-lcom`: mode, component count, local count, statement count, + terminal join. +- `fat-union`: common members, variant members, degeneracy, support, scatter, + variant set. +- `structural-topology`: method count and exact normalized edge rows. + +## Verification + +- `bundle exec rspec spec/decomplex_architecture_invariants_spec.rb` +- `bundle exec ruby -I gems/decomplex/test gems/decomplex/test/architecture_invariants_test.rb` +- `bundle exec ruby -I gems/decomplex/test gems/decomplex/test/examples_oracle_test.rb` +- `bundle exec ruby -I gems/decomplex/test -I gems/decomplex/lib -e 'Dir["gems/decomplex/test/*_test.rb"].sort.each { |path| require File.expand_path(path) }'` + +Current result: all pass, including the full Decomplex suite with 0 skips. diff --git a/gems/decomplex/examples/c/function-lcom.c b/gems/decomplex/examples/c/function-lcom.c index 0220f023b..80f5b6338 100644 --- a/gems/decomplex/examples/c/function-lcom.c +++ b/gems/decomplex/examples/c/function-lcom.c @@ -5,8 +5,7 @@ Result mixed(int price, int tax, Logger logger) { int timestamp = now(); Buffer buffer = Buffer_init(); - int stamp = timestamp; - buffer.push(stamp); + buffer.push(timestamp); logger.info(buffer); return Result_init(rounded, buffer); diff --git a/gems/decomplex/examples/c/state-branch-density.c b/gems/decomplex/examples/c/state-branch-density.c index 87f820256..4595b4dc8 100644 --- a/gems/decomplex/examples/c/state-branch-density.c +++ b/gems/decomplex/examples/c/state-branch-density.c @@ -1,2 +1,2 @@ typedef struct StateBranchChecker { int checked; } StateBranchChecker; -void check(StateBranchChecker *self, User user) { if (user.admin) { self->checked = true; } if (self->checked && user.name == "admin") { print("hello"); } } +void check(StateBranchChecker *self, bool admin, const char *name) { if (admin) { self->checked = true; } if (self->checked && name == "admin") { print("hello"); } } diff --git a/gems/decomplex/examples/cpp/function-lcom.cpp b/gems/decomplex/examples/cpp/function-lcom.cpp index 413b3d068..aaa03424a 100644 --- a/gems/decomplex/examples/cpp/function-lcom.cpp +++ b/gems/decomplex/examples/cpp/function-lcom.cpp @@ -5,8 +5,7 @@ Result mixed(int price, int tax, Logger logger) { auto timestamp = now(); auto buffer = Buffer.init(); - auto stamp = timestamp; - buffer.push(stamp); + buffer.push(timestamp); logger.info(buffer); return Result.init(rounded, buffer); diff --git a/gems/decomplex/examples/cpp/state-branch-density.cpp b/gems/decomplex/examples/cpp/state-branch-density.cpp index 378dc859a..1bee1354f 100644 --- a/gems/decomplex/examples/cpp/state-branch-density.cpp +++ b/gems/decomplex/examples/cpp/state-branch-density.cpp @@ -1 +1 @@ -class StateBranchChecker { public: bool checked; void check(User user) { if (user.admin) { this->checked = true; } if (this->checked && user.name == "admin") { print("hello"); } } }; +class StateBranchChecker { public: bool checked; void check(bool admin, string name) { if (admin) { this->checked = true; } if (this->checked && name == "admin") { print("hello"); } } }; diff --git a/gems/decomplex/examples/csharp/function-lcom.cs b/gems/decomplex/examples/csharp/function-lcom.cs index 6528a671a..433cc611c 100644 --- a/gems/decomplex/examples/csharp/function-lcom.cs +++ b/gems/decomplex/examples/csharp/function-lcom.cs @@ -5,8 +5,7 @@ class Example { static Result mixed(int price, int tax, Logger logger) { var timestamp = now(); var buffer = Buffer.init(); - var stamp = timestamp; - buffer.push(stamp); + buffer.push(timestamp); logger.info(buffer); return Result.init(rounded, buffer); diff --git a/gems/decomplex/examples/csharp/state-branch-density.cs b/gems/decomplex/examples/csharp/state-branch-density.cs index d4549f587..10f872540 100644 --- a/gems/decomplex/examples/csharp/state-branch-density.cs +++ b/gems/decomplex/examples/csharp/state-branch-density.cs @@ -1 +1 @@ -class StateBranchChecker { bool checked; void check(User user) { if (user.admin) { this.checked = true; } if (this.checked && user.name == "admin") { print("hello"); } } } +class StateBranchChecker { bool checked; void check(bool admin, string name) { if (admin) { this.checked = true; } if (this.checked && name == "admin") { print("hello"); } } } diff --git a/gems/decomplex/examples/go/function-lcom.go b/gems/decomplex/examples/go/function-lcom.go index 9b8d9a98f..0a6d644fc 100644 --- a/gems/decomplex/examples/go/function-lcom.go +++ b/gems/decomplex/examples/go/function-lcom.go @@ -6,8 +6,7 @@ func mixed(price int, tax int, logger Logger) Result { timestamp := now() buffer := Buffer_init() - stamp := timestamp - buffer.push(stamp) + buffer.push(timestamp) logger.info(buffer) return Result_init(rounded, buffer) diff --git a/gems/decomplex/examples/go/state-branch-density.go b/gems/decomplex/examples/go/state-branch-density.go index 615befc24..460cc9a17 100644 --- a/gems/decomplex/examples/go/state-branch-density.go +++ b/gems/decomplex/examples/go/state-branch-density.go @@ -1,3 +1,3 @@ package main type StateBranchChecker struct { checked bool } -func (self *StateBranchChecker) check(user User) { if user.admin { self.checked = true } if self.checked && user.name == "admin" { print("hello") } } +func (self *StateBranchChecker) check(admin bool, name string) { if admin { self.checked = true } if self.checked && name == "admin" { print("hello") } } diff --git a/gems/decomplex/examples/java/function-lcom.java b/gems/decomplex/examples/java/function-lcom.java index 6528a671a..433cc611c 100644 --- a/gems/decomplex/examples/java/function-lcom.java +++ b/gems/decomplex/examples/java/function-lcom.java @@ -5,8 +5,7 @@ class Example { static Result mixed(int price, int tax, Logger logger) { var timestamp = now(); var buffer = Buffer.init(); - var stamp = timestamp; - buffer.push(stamp); + buffer.push(timestamp); logger.info(buffer); return Result.init(rounded, buffer); diff --git a/gems/decomplex/examples/java/state-branch-density.java b/gems/decomplex/examples/java/state-branch-density.java index a022b8707..64436338f 100644 --- a/gems/decomplex/examples/java/state-branch-density.java +++ b/gems/decomplex/examples/java/state-branch-density.java @@ -1 +1 @@ -class StateBranchChecker { boolean checked; void check(User user) { if (user.admin) { this.checked = true; } if (this.checked && user.name == "admin") { print("hello"); } } } +class StateBranchChecker { boolean checked; void check(boolean admin, String name) { if (admin) { this.checked = true; } if (this.checked && name == "admin") { print("hello"); } } } diff --git a/gems/decomplex/examples/javascript/state-branch-density.js b/gems/decomplex/examples/javascript/state-branch-density.js index ebb16349d..8c4153da3 100644 --- a/gems/decomplex/examples/javascript/state-branch-density.js +++ b/gems/decomplex/examples/javascript/state-branch-density.js @@ -1 +1 @@ -class StateBranchChecker { check(user) { if (user.admin) { this.checked = true; } if (this.checked && user.name == "admin") { print("hello"); } } } +class StateBranchChecker { check(admin, name) { if (admin) { this.checked = true; } if (this.checked && name == "admin") { print("hello"); } } } diff --git a/gems/decomplex/examples/kotlin/function-lcom.kt b/gems/decomplex/examples/kotlin/function-lcom.kt index ece21d20b..24c32f01b 100644 --- a/gems/decomplex/examples/kotlin/function-lcom.kt +++ b/gems/decomplex/examples/kotlin/function-lcom.kt @@ -5,8 +5,7 @@ fun mixed(price: Int, tax: Int, logger: Logger): Result { val timestamp = now() val buffer = Buffer.init() - val stamp = timestamp - buffer.push(stamp) + buffer.push(timestamp) logger.info(buffer) return Result.init(rounded, buffer) diff --git a/gems/decomplex/examples/kotlin/state-branch-density.kt b/gems/decomplex/examples/kotlin/state-branch-density.kt index ef04608c6..2e75fd989 100644 --- a/gems/decomplex/examples/kotlin/state-branch-density.kt +++ b/gems/decomplex/examples/kotlin/state-branch-density.kt @@ -1 +1,13 @@ -class StateBranchChecker { var checked = false; fun check(user: User) { if (user.admin) { this.checked = true } if (this.checked && user.name == "admin") { print("hello") } } } +class StateBranchChecker { + var checked = false + + fun check(admin: Boolean, name: String) { + if (admin) { + this.checked = true + } + + if (this.checked && name == "admin") { + print("hello") + } + } +} diff --git a/gems/decomplex/examples/kotlin/state-mesh.kt b/gems/decomplex/examples/kotlin/state-mesh.kt index 8f1ca6f93..63ce4525a 100644 --- a/gems/decomplex/examples/kotlin/state-mesh.kt +++ b/gems/decomplex/examples/kotlin/state-mesh.kt @@ -1 +1,21 @@ -class StateMeshExample { var a = 0; var b = 0; fun initialize() { this.a = 1; this.b = 2 } fun writer() { this.a = 3 } fun reader(): Int { return this.a + this.b } fun a_alias(): Int { return this.a } } +class StateMeshExample { + var a = 0 + var b = 0 + + fun initialize() { + this.a = 1 + this.b = 2 + } + + fun writer() { + this.a = 3 + } + + fun reader(): Int { + return this.a + this.b + } + + fun a_alias(): Int { + return this.a + } +} diff --git a/gems/decomplex/examples/kotlin/structural-topology.kt b/gems/decomplex/examples/kotlin/structural-topology.kt index 3284753d7..f30679d5f 100644 --- a/gems/decomplex/examples/kotlin/structural-topology.kt +++ b/gems/decomplex/examples/kotlin/structural-topology.kt @@ -1 +1,16 @@ -class Worker { fun run(items: Items) { this.prepare(); if (this.ready()) { this.validate() }; for (item in items) { this.helper(item) } } private fun prepare() {} private fun ready(): Boolean { return true } fun validate() {} private fun helper(item: Item) { item.use() } } +class Worker { + fun run(items: Items) { + this.prepare() + if (this.ready()) { + this.validate() + } + for (item in items) { + this.helper(item) + } + } + + private fun prepare() {} + private fun ready(): Boolean { return true } + fun validate() {} + private fun helper(item: Item) { item.use() } +} diff --git a/gems/decomplex/examples/lua/function-lcom.lua b/gems/decomplex/examples/lua/function-lcom.lua index bcad2d5bd..eb1c84c69 100644 --- a/gems/decomplex/examples/lua/function-lcom.lua +++ b/gems/decomplex/examples/lua/function-lcom.lua @@ -5,8 +5,7 @@ function mixed(price, tax, logger) local timestamp = now() local buffer = Buffer.init() - local stamp = timestamp - buffer.push(stamp) + buffer.push(timestamp) logger.info(buffer) return Result.init(rounded, buffer) diff --git a/gems/decomplex/examples/lua/state-branch-density.lua b/gems/decomplex/examples/lua/state-branch-density.lua index 5fa066c60..7fb2235ee 100644 --- a/gems/decomplex/examples/lua/state-branch-density.lua +++ b/gems/decomplex/examples/lua/state-branch-density.lua @@ -1,2 +1,2 @@ StateBranchChecker = {} -function StateBranchChecker:check(user) if user.admin then self.checked = true end if self.checked and user.name == "admin" then print("hello") end end +function StateBranchChecker:check(admin, name) if admin then self.checked = true end if self.checked and name == "admin" then print("hello") end end diff --git a/gems/decomplex/examples/oracles/decision-pressure.json b/gems/decomplex/examples/oracles/decision-pressure.json index ce2920db3..34e94069d 100644 --- a/gems/decomplex/examples/oracles/decision-pressure.json +++ b/gems/decomplex/examples/oracles/decision-pressure.json @@ -5,7 +5,10 @@ }, "expected": [ { - "present": true + "contract": ".symbol", + "decisions": 1, + "essential": 0, + "methods": 1 } ] } diff --git a/gems/decomplex/examples/oracles/fat-union.json b/gems/decomplex/examples/oracles/fat-union.json index c82ab01cf..09fa37249 100644 --- a/gems/decomplex/examples/oracles/fat-union.json +++ b/gems/decomplex/examples/oracles/fat-union.json @@ -5,7 +5,26 @@ }, "expected": [ { - "present": true + "common": [ + "col", + "line", + "parent", + "span", + "ty" + ], + "variant": [ + "name", + "recv", + "value" + ], + "degenerate": false, + "support": 1, + "scatter": 1, + "variant_set": [ + "AST.Call", + "AST.Func", + "AST.Lit" + ] } ] } diff --git a/gems/decomplex/examples/oracles/function-lcom.json b/gems/decomplex/examples/oracles/function-lcom.json index 7c46a6af4..884ce9f7d 100644 --- a/gems/decomplex/examples/oracles/function-lcom.json +++ b/gems/decomplex/examples/oracles/function-lcom.json @@ -5,7 +5,11 @@ }, "expected": [ { - "present": true + "mode": "late_join", + "components": 2, + "locals": 8, + "statements": 8, + "terminal_join": true } ] } diff --git a/gems/decomplex/examples/oracles/implicit-control-flow.json b/gems/decomplex/examples/oracles/implicit-control-flow.json index 0e101cfd6..b04cdd332 100644 --- a/gems/decomplex/examples/oracles/implicit-control-flow.json +++ b/gems/decomplex/examples/oracles/implicit-control-flow.json @@ -6,12 +6,86 @@ "expected": { "ordered_protocols": [ { - "present": true + "protocol": [ + "prepare", + "validate" + ], + "dependency": [ + "write_read" + ], + "support": 4, + "observed": [ + "prepare", + "validate" + ], + "missing": [ + + ], + "states": [ + "status" + ] + }, + { + "protocol": [ + "validate", + "commit" + ], + "dependency": [ + "write_read" + ], + "support": 4, + "observed": [ + "validate", + "commit" + ], + "missing": [ + + ], + "states": [ + "valid" + ] + }, + { + "protocol": [ + "validate", + "prepare" + ], + "dependency": [ + "read_write" + ], + "support": 1, + "observed": [ + "validate", + "prepare" + ], + "missing": [ + + ], + "states": [ + "status" + ] } ], "order_drift": [ { - "present": true + "protocol": [ + "prepare", + "validate" + ], + "dependency": [ + "write_read" + ], + "support": 4, + "observed": [ + "validate", + "prepare" + ], + "missing": [ + + ], + "states": [ + "status" + ] } ] } diff --git a/gems/decomplex/examples/oracles/miner.json b/gems/decomplex/examples/oracles/miner.json index adce8efd1..86539e517 100644 --- a/gems/decomplex/examples/oracles/miner.json +++ b/gems/decomplex/examples/oracles/miner.json @@ -6,7 +6,25 @@ "expected": { "missing_abstractions": [ { - "present": true + "kind": "conjunction", + "members": [ + "a", + "b", + "c" + ], + "support": 3, + "scatter": 3 + } + ], + "neglected_conditions": [ + { + "pattern": [ + "a", + "b", + "c" + ], + "support": 3, + "missing": "c" } ] } diff --git a/gems/decomplex/examples/oracles/path-condition.json b/gems/decomplex/examples/oracles/path-condition.json index 7e16ab337..ab60d7313 100644 --- a/gems/decomplex/examples/oracles/path-condition.json +++ b/gems/decomplex/examples/oracles/path-condition.json @@ -5,7 +5,14 @@ }, "expected": [ { - "present": true + "pattern": [ + "x.p", + "y.q", + "z.r" + ], + "support": 3, + "missing": "z.r", + "action": "go(x)" } ] } diff --git a/gems/decomplex/examples/oracles/semantic-alias.json b/gems/decomplex/examples/oracles/semantic-alias.json index 25ad9ce3f..308d1b8a0 100644 --- a/gems/decomplex/examples/oracles/semantic-alias.json +++ b/gems/decomplex/examples/oracles/semantic-alias.json @@ -6,8 +6,10 @@ "expected": { "alias_clusters": [ { + "canon": "provenance == FRAME", "name_count": 2 } - ] + ], + "reification_miss_count": 1 } } diff --git a/gems/decomplex/examples/oracles/state-branch-density.json b/gems/decomplex/examples/oracles/state-branch-density.json index e46cc034a..ef98b453e 100644 --- a/gems/decomplex/examples/oracles/state-branch-density.json +++ b/gems/decomplex/examples/oracles/state-branch-density.json @@ -5,7 +5,11 @@ }, "expected": [ { - "present": true + "decisions": 1, + "method": "check", + "state_refs": [ + "checked" + ] } ] } diff --git a/gems/decomplex/examples/oracles/state-mesh.json b/gems/decomplex/examples/oracles/state-mesh.json index e5d212598..0d70466c3 100644 --- a/gems/decomplex/examples/oracles/state-mesh.json +++ b/gems/decomplex/examples/oracles/state-mesh.json @@ -5,7 +5,13 @@ }, "expected": { "state_mesh": { - "present": true - } + "total_fields": 1, + "total_writes": 3, + "total_reads": 2, + "total_re_derivations": 0 + }, + "field_names": [ + "a" + ] } } diff --git a/gems/decomplex/examples/oracles/structural-topology.json b/gems/decomplex/examples/oracles/structural-topology.json index 97f827883..91ab0e67b 100644 --- a/gems/decomplex/examples/oracles/structural-topology.json +++ b/gems/decomplex/examples/oracles/structural-topology.json @@ -4,6 +4,28 @@ "options": { }, "expected": { - "present": true + "method_count": 5, + "edges": [ + { + "caller_name": "run", + "callee_name": "prepare", + "type": "always" + }, + { + "caller_name": "run", + "callee_name": "ready", + "type": "conditional" + }, + { + "caller_name": "run", + "callee_name": "validate", + "type": "conditional" + }, + { + "caller_name": "run", + "callee_name": "helper", + "type": "iterates" + } + ] } } diff --git a/gems/decomplex/examples/oracles/temporal-ordering-pressure.json b/gems/decomplex/examples/oracles/temporal-ordering-pressure.json index ac90e3e0b..8542c51b5 100644 --- a/gems/decomplex/examples/oracles/temporal-ordering-pressure.json +++ b/gems/decomplex/examples/oracles/temporal-ordering-pressure.json @@ -5,7 +5,19 @@ }, "expected": [ { - "present": true + "owner": "TemporalOrderExample", + "public_methods": 4, + "state_methods": 4, + "writers": 3, + "orderings": "4!", + "state_fields": [ + "a", + "b" + ], + "shared_fields": [ + "a", + "b" + ] } ] } diff --git a/gems/decomplex/examples/python/state-branch-density.py b/gems/decomplex/examples/python/state-branch-density.py index 90ec12429..b4dfdf071 100644 --- a/gems/decomplex/examples/python/state-branch-density.py +++ b/gems/decomplex/examples/python/state-branch-density.py @@ -1,6 +1,6 @@ class StateBranchChecker: - def check(self, user): - if user.admin: + def check(self, admin, name): + if admin: self.checked = True - if self.checked and user.name == "admin": + if self.checked and name == "admin": print("hello") diff --git a/gems/decomplex/examples/ruby/state-branch-density.rb b/gems/decomplex/examples/ruby/state-branch-density.rb index af05b1135..8c15b796a 100644 --- a/gems/decomplex/examples/ruby/state-branch-density.rb +++ b/gems/decomplex/examples/ruby/state-branch-density.rb @@ -1,18 +1,12 @@ # frozen_string_literal: true -class StateBranchUser < T::Struct - const :name, String - const :admin, T::Boolean -end - class StateBranchChecker - sig { params(user: StateBranchUser).void } - def check(user) - if user.admin + def check(admin, name) + if admin @checked = true end - if @checked && user.name == "admin" + if @checked && name == "admin" puts "hello" end end diff --git a/gems/decomplex/examples/ruby/structural-topology.rb b/gems/decomplex/examples/ruby/structural-topology.rb index 5100898cb..ced19bfeb 100644 --- a/gems/decomplex/examples/ruby/structural-topology.rb +++ b/gems/decomplex/examples/ruby/structural-topology.rb @@ -3,7 +3,7 @@ class Worker def run(items) prepare - if ready + if ready() validate end items.each do |item| diff --git a/gems/decomplex/examples/rust/state-branch-density.rs b/gems/decomplex/examples/rust/state-branch-density.rs index d08344808..7575342c9 100644 --- a/gems/decomplex/examples/rust/state-branch-density.rs +++ b/gems/decomplex/examples/rust/state-branch-density.rs @@ -3,12 +3,12 @@ struct StateBranchChecker { } impl StateBranchChecker { - fn check(&mut self, user: User) { - if user.admin { + fn check(&mut self, admin: bool, name: String) { + if admin { self.checked = true; } - if self.checked && user.name == "admin" { + if self.checked && name == "admin" { print("hello"); } } diff --git a/gems/decomplex/examples/rust/structural-topology.rs b/gems/decomplex/examples/rust/structural-topology.rs index bd29439db..bb16bd86a 100644 --- a/gems/decomplex/examples/rust/structural-topology.rs +++ b/gems/decomplex/examples/rust/structural-topology.rs @@ -3,7 +3,7 @@ struct Worker; impl Worker { pub fn run(&self, items: Items) { self.prepare(); - if true { + if self.ready() { self.validate(); } for item in items { diff --git a/gems/decomplex/examples/swift/function-lcom.swift b/gems/decomplex/examples/swift/function-lcom.swift index 12197c331..d2adc99af 100644 --- a/gems/decomplex/examples/swift/function-lcom.swift +++ b/gems/decomplex/examples/swift/function-lcom.swift @@ -5,8 +5,7 @@ func mixed(price: Int, tax: Int, logger: Logger) -> Result { let timestamp = now() let buffer = Buffer.init() - let stamp = timestamp - buffer.push(stamp) + buffer.push(timestamp) logger.info(buffer) return Result.init(rounded, buffer) diff --git a/gems/decomplex/examples/swift/state-branch-density.swift b/gems/decomplex/examples/swift/state-branch-density.swift index bbcf5c6c0..34451cf30 100644 --- a/gems/decomplex/examples/swift/state-branch-density.swift +++ b/gems/decomplex/examples/swift/state-branch-density.swift @@ -1 +1,13 @@ -class StateBranchChecker { var checked = false; func check(user: User) { if user.admin { self.checked = true } if self.checked && user.name == "admin" { print("hello") } } } +class StateBranchChecker { + var checked = false + + func check(admin: Bool, name: String) { + if admin { + self.checked = true + } + + if self.checked && name == "admin" { + print("hello") + } + } +} diff --git a/gems/decomplex/examples/swift/structural-topology.swift b/gems/decomplex/examples/swift/structural-topology.swift index 1becdb230..b12f247d8 100644 --- a/gems/decomplex/examples/swift/structural-topology.swift +++ b/gems/decomplex/examples/swift/structural-topology.swift @@ -1 +1,16 @@ -class Worker { func run(items: Items) { self.prepare(); if self.ready() { self.validate() }; for item in items { self.helper(item: item) } } private func prepare() {} private func ready() -> Bool { return true } func validate() {} private func helper(item: Item) { item.use() } } +class Worker { + func run(items: Items) { + self.prepare() + if self.ready() { + self.validate() + } + for item in items { + self.helper(item: item) + } + } + + private func prepare() {} + private func ready() -> Bool { return true } + func validate() {} + private func helper(item: Item) { item.use() } +} diff --git a/gems/decomplex/examples/typescript/state-branch-density.ts b/gems/decomplex/examples/typescript/state-branch-density.ts index ebb16349d..57427b860 100644 --- a/gems/decomplex/examples/typescript/state-branch-density.ts +++ b/gems/decomplex/examples/typescript/state-branch-density.ts @@ -1 +1 @@ -class StateBranchChecker { check(user) { if (user.admin) { this.checked = true; } if (this.checked && user.name == "admin") { print("hello"); } } } +class StateBranchChecker { check(admin: boolean, name: string) { if (admin) { this.checked = true; } if (this.checked && name == "admin") { print("hello"); } } } diff --git a/gems/decomplex/examples/zig/state-branch-density.zig b/gems/decomplex/examples/zig/state-branch-density.zig index 282cae925..d9af8376c 100644 --- a/gems/decomplex/examples/zig/state-branch-density.zig +++ b/gems/decomplex/examples/zig/state-branch-density.zig @@ -1,12 +1,12 @@ const StateBranchChecker = struct { checked: bool, - pub fn check(self: *StateBranchChecker, user: User) void { - if (user.admin) { + pub fn check(self: *StateBranchChecker, admin: bool, name: []const u8) void { + if (admin) { self.checked = true; } - if (self.checked and user.name == "admin") { + if (self.checked and name == "admin") { print("hello"); } } diff --git a/gems/decomplex/examples/zig/structural-topology.zig b/gems/decomplex/examples/zig/structural-topology.zig index 743477099..833286c3d 100644 --- a/gems/decomplex/examples/zig/structural-topology.zig +++ b/gems/decomplex/examples/zig/structural-topology.zig @@ -1,7 +1,7 @@ const Worker = struct { pub fn run(self: *Worker, items: Items) void { self.prepare(); - if (true) { + if (self.ready()) { self.validate(); } for (items) |item| { diff --git a/gems/decomplex/lib/decomplex/decision_pressure.rb b/gems/decomplex/lib/decomplex/decision_pressure.rb index 372a21155..ce486dbed 100644 --- a/gems/decomplex/lib/decomplex/decision_pressure.rb +++ b/gems/decomplex/lib/decomplex/decision_pressure.rb @@ -21,7 +21,7 @@ def self.scan(files) files.each do |file| document = Syntax.parse(file, parser: "tree_sitter") assignment_maps = document.local_methods.to_h do |method| - [method.name, build_assignment_map(method)] + [method.name, build_assignment_map(document, method)] end document.call_sites.each do |call| @@ -39,6 +39,7 @@ def self.scan(files) guard.concat(rescue_nil_hits(document, assignment_maps)) end + guard.uniq! { |hit| [hit.contract, hit.file, hit.defn, hit.line] } Report.new(guard, dispatch) end @@ -85,25 +86,12 @@ def self.rescue_nil_hits(document, assignment_maps) end end - def self.build_assignment_map(method) - method.statements.each_with_object({}) do |statement, map| - next unless statement.writes.size == 1 - - name = statement.writes.first.to_s - map[name] ||= simple_source_contract(statement.source) + def self.build_assignment_map(document, method) + document.local_contract_assignments(method).transform_values do |source| + contract_of(source, {}) end.compact end - def self.simple_source_contract(source) - match = source.to_s.match(/\A\s*[A-Za-z_]\w*\s*=\s*(.+?)\s*\z/m) - return nil unless match - - rhs = match[1].strip - return nil if rhs.match?(/\s(?:if|unless|rescue)\s|\?|:/) - - contract_of(rhs, {}) - end - def self.contract_of(receiver, assignment_map, depth = 0) source = receiver.to_s.strip return nil if source.empty? || depth >= 8 diff --git a/gems/decomplex/lib/decomplex/fat_union.rb b/gems/decomplex/lib/decomplex/fat_union.rb index b98dd4584..7d87accec 100644 --- a/gems/decomplex/lib/decomplex/fat_union.rb +++ b/gems/decomplex/lib/decomplex/fat_union.rb @@ -9,140 +9,15 @@ module Decomplex # is a union whose common core should be a struct, with a small union # for the genuinely-varying part. class FatUnion - CONSTANT_PATTERN = /\A[A-Z]\w*(?:(?:::|\.)[A-Z]\w*)*\z/ - IF_DISPATCH_PATTERN = /\A(?.+?)\s*(?:==|===)\s*(?[A-Z]\w*(?:(?:::|\.)[A-Z]\w*)*)\z/ - Site = Struct.new(:variant_set, :arm_members, :outside, :file, - :defn, :line, :span, keyword_init: true) - def self.scan(files, min_variants: 3, min_common: 2, ratio: 0.6) sites = files.flat_map do |file| document = Syntax.parse(file, parser: "tree_sitter") - new(document).sites + document.dispatch_sites end Report.new(sites, min_variants: min_variants, min_common: min_common, ratio: ratio) end - attr_reader :document - - def initialize(document) - @document = document - end - - def sites - arms = document.branch_arms - case_sites = arms - .select { |arm| arm.kind == :case } - .group_by { |arm| [arm.file, arm.function, arm.decision_span, arm.predicate] } - .filter_map { |_key, case_arms| record_case(case_arms) } - case_sites + if_dispatch_sites(arms) - end - - private - - def record_case(arms) - predicate = arms.first.predicate.to_s - return nil if predicate.empty? - - arm_members = {} - arms.each do |arm| - variants = constant_patterns(arm.member) - next if variants.empty? - - members = members_inside(predicate, arm.function, arm.span) - variants.each { |variant| (arm_members[variant] ||= []).concat(members) } - end - return nil if arm_members.size < 2 - - arm_members.transform_values!(&:uniq) - Site.new( - variant_set: arm_members.keys.sort, - arm_members: arm_members, - outside: members_outside(predicate, arms.first.function, arms.first.decision_span), - file: arms.first.file, - defn: arms.first.function, - line: arms.first.decision_line, - span: arms.first.decision_span - ) - end - - def if_dispatch_sites(arms) - arms.select { |arm| arm.kind == :if && arm.member == "then" } - .filter_map { |arm| [arm, if_dispatch_match(arm.predicate)] } - .reject { |_arm, match| match.nil? } - .group_by { |arm, match| [arm.file, arm.function, match[:subject]] } - .filter_map { |_key, matched| record_if_dispatch(matched) } - end - - def record_if_dispatch(matched) - predicate = matched.first[1][:subject] - arm_members = {} - matched.each do |arm, match| - members = members_inside(predicate, arm.function, arm.span) - (arm_members[match[:variant]] ||= []).concat(members) - end - return nil if arm_members.size < 2 - - arm_members.transform_values!(&:uniq) - Site.new( - variant_set: arm_members.keys.sort, - arm_members: arm_members, - outside: members_outside(predicate, matched.first[0].function, matched.first[0].decision_span), - file: matched.first[0].file, - defn: matched.first[0].function, - line: matched.first[0].decision_line, - span: matched.first[0].decision_span - ) - end - - def if_dispatch_match(predicate) - source = predicate.to_s.strip - source = source[1...-1].strip if source.start_with?("(") && source.end_with?(")") - match = source.match(IF_DISPATCH_PATTERN) - return nil unless match - - { subject: match[:subject].strip, variant: match[:variant].strip } - end - - def members_inside(predicate, function, span) - member_calls(predicate, function) - .select { |call| inside_span?(call.span, span) } - .map { |call| member_name(call) } - .uniq - end - - def members_outside(predicate, function, decision_span) - member_calls(predicate, function) - .reject { |call| inside_span?(call.span, decision_span) } - .map { |call| member_name(call) } - .uniq - end - - def member_calls(predicate, function) - document.call_sites.select do |call| - call.function == function && - call.receiver.to_s == predicate && - !call.message.to_s.empty? - end - end - - def member_name(call) - call.message.to_s.sub(/=\z/, "") - end - - def constant_patterns(member) - member.to_s.split(/\s*,\s*/).map { |pattern| pattern.sub(/\Acase\s+/, "") } - .select { |pattern| pattern.match?(CONSTANT_PATTERN) } - end - - def inside_span?(inner, outer) - return false unless inner && outer - - starts_after_or_at = (inner[0] > outer[0]) || (inner[0] == outer[0] && inner[1] >= outer[1]) - ends_before_or_at = (inner[2] < outer[2]) || (inner[2] == outer[2] && inner[3] <= outer[3]) - starts_after_or_at && ends_before_or_at - end - class Report def initialize(sites, min_variants:, min_common:, ratio:) @sites = sites @@ -175,16 +50,16 @@ def fat_unions next if common.size < @min_common next if total.zero? || common.size.to_f / total < @ratio - locs = group.map { |s| "#{s.file}:#{s.defn}:#{s.line}" } + locs = group.map { |s| "#{s.file}:#{s.function}:#{s.line}" } { variant_set: vset, common: common.sort, variant: variant.sort, degenerate: variant.empty?, support: group.size, - scatter: group.map { |s| [s.file, s.defn] }.uniq.size, + scatter: group.map { |s| [s.file, s.function] }.uniq.size, rank: group.size * common.size, kind: :case_dispatch, members: vset, at: locs.first, sites: locs.uniq, - spans: group.to_h { |s| ["#{s.file}:#{s.defn}:#{s.line}", s.span] } + spans: group.to_h { |s| ["#{s.file}:#{s.function}:#{s.line}", s.span] } } end.sort_by { |h| [h[:degenerate] ? 0 : 1, -h[:rank]] } end diff --git a/gems/decomplex/lib/decomplex/flay_similarity.rb b/gems/decomplex/lib/decomplex/flay_similarity.rb index 8d9e6b56b..39d92333c 100644 --- a/gems/decomplex/lib/decomplex/flay_similarity.rb +++ b/gems/decomplex/lib/decomplex/flay_similarity.rb @@ -4,53 +4,15 @@ require_relative "syntax" module Decomplex - # Tree-sitter structural similarity scanner for Type-2 / Type-3 clone pressure. + # Structural similarity scanner for Type-2 / Type-3 clone pressure. # - # The public class name is retained for report compatibility. The detector no - # longer shells through the flay gem: it builds language-neutral structural - # fingerprints from Tree-sitter node kinds, normalizing identifiers/literals - # so renamed-but-isomorphic code groups as Type-2. Type-3 uses a small fuzzy - # signature over child statements, matching functions/subtrees with a missing - # or inserted child within the configured fuzzy budget. + # Parser-specific structural fingerprinting is owned by Syntax adapters. This + # detector ranks already-normalized clone candidates and emits report rows. class FlaySimilarity DEFAULT_MASS = 32 DEFAULT_FUZZY = 1 MAX_FUZZY_CHILDREN = 14 - MethodSpan = Struct.new(:name, :first_line, :last_line, keyword_init: true) - Candidate = Struct.new(:file, :line, :span, :method_name, :node_name, :mass, - :fingerprint, :raw, :child_fingerprints, - :child_masses, keyword_init: true) - - IDENTIFIER_KINDS = %w[ - identifier constant type_identifier field_identifier property_identifier - shorthand_property_identifier_pattern simple_identifier variable_name - ].freeze - LITERAL_KINDS = %w[ - string string_content string_literal interpreted_string_literal raw_string_literal - integer float int number rational imaginary character char_literal - symbol simple_symbol true false nil none null - ].freeze - SKIP_CANDIDATE_KINDS = %w[ - comment identifier constant type_identifier field_identifier property_identifier - parameters formal_parameters parameter_list argument_list arguments - block_parameters call_suffix function_value_parameters method_parameters value_argument - scope_resolution - ].freeze - CLONE_CANDIDATE_KINDS = %w[ - array assignment assignment_statement block case case_clause class - class_definition class_declaration compound_statement conjunction_expression control_structure_body - do_block enum_declaration for for_statement function_body hash if if_statement match_expression - match_statement method method_definition module operator_assignment singleton_method statements - struct_declaration switch_case switch_expression switch_statement - unless until while while_statement - ].freeze - BODY_KINDS = %w[ - body block body_statement declaration_list statement_block compound_statement - function_body statements suite do_block - ].freeze - CALL_KINDS = %w[call call_expression function_call method_call method_invocation invocation_expression].freeze - def self.scan(files, mass: DEFAULT_MASS, fuzzy: DEFAULT_FUZZY) new(files, mass: mass, fuzzy: fuzzy).scan end @@ -59,7 +21,6 @@ def initialize(files, mass:, fuzzy:) @files = files @mass = mass @fuzzy = fuzzy - @method_spans = {} end def scan @@ -77,68 +38,18 @@ def scan def candidates_for_file(file) return [] unless Syntax.supported_source?(file, parser: "tree_sitter") - doc = Syntax.parse(file, parser: "tree_sitter") - @method_spans[file] = collect_method_spans(doc) - out = [] - seen = Set.new - - doc.function_defs.each do |fn| - candidate = candidate_for(file, fn.body, node_name: "defn") - add_candidate(out, seen, candidate) if candidate - end - - walk(doc.root) do |node| - next unless candidate_node?(node) - - add_candidate(out, seen, candidate_for(file, node)) + Syntax.parse(file, parser: "tree_sitter").clone_candidates.select do |candidate| + candidate.mass >= effective_mass_floor end - - out rescue StandardError [] end - def add_candidate(out, seen, candidate) - return unless candidate - return if candidate.mass < effective_mass_floor - return if typed_struct_schema_text?(candidate.raw) - - key = [candidate.file, candidate.line, candidate.span, candidate.node_name, candidate.fingerprint] - return if seen.include?(key) - - seen << key - out << candidate - end - - def candidate_for(file, node, node_name: nil) - fp, mass = fingerprint(node) - return nil if fp.to_s.empty? - - line = line(node) - method = method_span_for(file, line) - children = fuzzy_children_for(node) - child_data = children.map { |child| fingerprint(child) }.reject { |child_fp, child_mass| child_fp.to_s.empty? || child_mass.zero? } - - Candidate.new( - file: file, - line: line, - span: span(node), - method_name: method.name, - node_name: node_name || flay_node_name(node), - mass: mass, - fingerprint: fp, - raw: normalize_text(node.text), - child_fingerprints: child_data.map(&:first), - child_masses: child_data.map(&:last) - ) - end - def type2_findings(candidates) candidates.group_by(&:fingerprint).values.filter_map do |cluster| cluster = uniq_sites(cluster) next if cluster.size < 2 next if cluster.map(&:raw).uniq.size < 2 - next if typed_struct_schema_cluster?(cluster) finding_for(cluster, clone_type: :type2, mass: cluster.map(&:mass).min) end @@ -161,7 +72,6 @@ def type3_findings(candidates) cluster = uniq_sites(rows.map(&:first)) next if cluster.size < 2 next if cluster.map(&:fingerprint).uniq.size < 2 - next if typed_struct_schema_cluster?(cluster) key = cluster.map { |candidate| [candidate.file, candidate.line, candidate.node_name] }.sort next if seen.include?(key) @@ -185,8 +95,12 @@ def finding_for(cluster, clone_type:, mass:) end def prune_nested_findings(findings) + defn_site_sets = findings.select { |finding| finding[:node].to_s == "defn" } + .map { |finding| [finding[:clone_type], site_identities(finding)] } kept = [] findings.each do |finding| + next if finding[:node].to_s != "defn" && + defn_site_sets.include?([finding[:clone_type], site_identities(finding)]) next if kept.any? { |larger| nested_finding?(finding, larger) } kept << finding @@ -219,12 +133,18 @@ def site_file(site) parts[0...-2].join(":") end + def site_identities(finding) + Array(finding[:sites]).map do |site| + parts = site.to_s.split(":") + [parts[0...-2].join(":"), parts[-2]] + end.sort + end + def spans_for(cluster) cluster.each_with_object({}) do |candidate, out| out[site_for(candidate)] = if candidate.node_name == "defn" - method = method_span_for(candidate.file, candidate.line) - [method.first_line, 0, method.last_line, 1] + [candidate.span[0], 0, candidate.span[2], 1] else candidate.span end @@ -263,217 +183,8 @@ def fuzzy_signatures(candidate) signatures end - def candidate_node?(node) - return false unless ts_node?(node) - return false unless node.named? - return false if SKIP_CANDIDATE_KINDS.include?(node.kind) - return false unless CLONE_CANDIDATE_KINDS.include?(node.kind) - return false if typed_struct_schema_text?(node.text) - - node.named_child_count.positive? - end - def effective_mass_floor @effective_mass_floor ||= [@mass, (@mass * 23.0 / 8.0).ceil].max end - - def fuzzy_children_for(node) - body = body_node(node) - source = body || node - children = source.named_children - children = node.named_children if children.empty? - children.reject { |child| SKIP_CANDIDATE_KINDS.include?(child.kind) || typed_struct_schema_text?(child.text) } - end - - def body_node(node) - named_field(node, "body") || - node.named_children.find { |child| BODY_KINDS.include?(child.kind) } - end - - def fingerprint(node, active = nil) - return ["", 0] unless ts_node?(node) - active ||= Set.new - key = node_key(node) - return ["", 0] if active.include?(key) - - active << key - begin - return ["", 0] if node.kind == "comment" - return fingerprint_call(node, active) if CALL_KINDS.include?(node.kind) && call_message(node) - - if node.child_count.zero? - token = terminal_token(node) - return ["", 0] if token.empty? - - return [token, 1] - end - - child_parts = [] - mass = 1 - node.children.each do |child| - child_fp, child_mass = fingerprint(child, active) - next if child_fp.empty? - - child_parts << child_fp - mass += child_mass - end - - return [terminal_token(node), 1] if child_parts.empty? - - ["#{node.kind}(#{child_parts.join(' ')})", mass] - ensure - active.delete(key) - end - end - - def fingerprint_call(node, active) - message = call_message(node) - child_parts = [] - mass = 1 - node.children.each do |child| - child_fp, child_mass = fingerprint(child, active) - next if child_fp.empty? - - child_parts << child_fp - mass += child_mass - end - ["#{node.kind}<#{message}>(#{child_parts.join(' ')})", mass] - end - - def call_message(node) - return nil unless node.children.any? { |child| %w[argument_list arguments call_suffix].include?(child.kind) } - - callee = named_field(node, "function") || named_field(node, "callee") - return callee_message(callee) if callee - - argument_node = node.children.find { |child| %w[argument_list arguments call_suffix].include?(child.kind) } - named_before_args = node.named_children.select do |child| - argument_node.nil? || child.start_byte < argument_node.start_byte - end - callee_message(named_before_args.last) - end - - def callee_message(node) - return nil unless ts_node?(node) - return node.text if IDENTIFIER_KINDS.include?(node.kind) - return navigation_suffix_message(node) if %w[navigation_expression directly_assignable_expression].include?(node.kind) - - leaf = node.named_children.reverse.find { |child| IDENTIFIER_KINDS.include?(child.kind) } - leaf&.text - end - - def navigation_suffix_message(node) - suffix = node.named_children.reverse.find { |child| child.kind == "navigation_suffix" } - leaf = suffix&.named_children&.reverse&.find { |child| IDENTIFIER_KINDS.include?(child.kind) } - leaf&.text - end - - def terminal_token(node) - kind = node.kind.to_s - return "id" if IDENTIFIER_KINDS.include?(kind) - return literal_token(kind) if LITERAL_KINDS.include?(kind) - - text = normalize_text(node.text) - return "" if text.empty? - return "id" if text.match?(/\A[A-Za-z_]\w*[!?=]?\z/) - return "lit" if text.match?(/\A(?::[A-Za-z_]\w*|[-+]?\d+(?:\.\d+)?|".*"|'.*')\z/) - - "#{kind}:#{text}" - end - - def literal_token(kind) - case kind - when "true", "false" then "bool" - when "nil", "none", "null" then "nil" - else "lit" - end - end - - def flay_node_name(node) - return "defn" if %w[method function_definition function_declaration method_definition function_item].include?(node.kind) - return "defs" if node.kind == "singleton_method" - - node.kind - end - - def typed_struct_schema_cluster?(cluster) - cluster.all? { |candidate| typed_struct_schema_line?(candidate.file, candidate.line) || typed_struct_schema_text?(candidate.raw) } - end - - def typed_struct_schema_line?(file, line_no) - source_line(file, line_no).match?(/\A\s*(?:const|prop)\s+:[A-Za-z_]\w*\b/) - end - - def typed_struct_schema_text?(text) - text.to_s.match?(/<\s*T::Struct\b/) || - text.to_s.lines.all? { |line| line.strip.empty? || line.match?(/\A\s*(?:const|prop)\s+:[A-Za-z_]\w*\b/) } - end - - def source_line(file, line_no) - (@source_lines ||= {}) - (@source_lines[file] ||= File.readlines(file))[line_no - 1].to_s - rescue StandardError - "" - end - - def collect_method_spans(document) - document.function_defs.map do |fn| - MethodSpan.new(name: fn.name.to_s, first_line: fn.span[0], last_line: fn.span[2]) - end.sort_by { |span| [span.first_line, -span.last_line] } - rescue StandardError - [] - end - - def method_span_for(file, line_no) - spans = @method_spans[file] || [] - spans.find { |span| span.first_line <= line_no && line_no <= span.last_line } || - MethodSpan.new(name: "(top-level)", first_line: line_no, last_line: line_no) - end - - def walk(node, &block) - return unless ts_node?(node) - - pending = [node] - seen = Set.new - until pending.empty? - current = pending.pop - next unless ts_node?(current) - key = node_key(current) - next if seen.include?(key) - - seen << key - yield current - current.children.reverse_each { |child| pending << child } - end - end - - def named_field(node, name) - node.child_by_field_name(name) - rescue StandardError - nil - end - - def ts_node?(node) - node && node.respond_to?(:kind) && node.respond_to?(:children) - end - - def node_key(node) - [node.kind, node.start_byte, node.end_byte] - rescue StandardError - node.object_id - end - - def span(node) - [node.start_point.row + 1, node.start_point.column, - node.end_point.row + 1, node.end_point.column] - end - - def line(node) - node.start_point.row + 1 - end - - def normalize_text(text) - text.to_s.strip.gsub(/\s+/, " ") - end end end diff --git a/gems/decomplex/lib/decomplex/locality_drag.rb b/gems/decomplex/lib/decomplex/locality_drag.rb index 0c70fa079..4054b4728 100644 --- a/gems/decomplex/lib/decomplex/locality_drag.rb +++ b/gems/decomplex/lib/decomplex/locality_drag.rb @@ -2,7 +2,7 @@ require "set" require_relative "local_flow" -require_relative "weighted_inlined_cognitive_complexity" +require_relative "syntax" module Decomplex # Finds locals that are initialized substantially before their first use @@ -28,8 +28,14 @@ def self.scan( min_score: DEFAULT_MIN_SCORE, max_findings_per_method: DEFAULT_MAX_FINDINGS_PER_METHOD ) + summaries = LocalFlow.scan(files) + complexity_scores = Array(files).each_with_object({}) do |file, scores| + document = Syntax.parse(file, parser: "tree_sitter") + scores.merge!(document.local_complexity_scores) + end new( - LocalFlow.scan(files), + summaries, + complexity_scores: complexity_scores, min_unrelated_statements: min_unrelated_statements, min_gap_lines: min_gap_lines, min_local_complexity: min_local_complexity, @@ -40,6 +46,7 @@ def self.scan( def initialize( summaries, + complexity_scores:, min_unrelated_statements:, min_gap_lines:, min_local_complexity:, @@ -52,7 +59,7 @@ def initialize( @min_local_complexity = min_local_complexity.to_f @min_score = min_score.to_i @max_findings_per_method = max_findings_per_method.to_i - @scorer = WeightedInlinedCognitiveComplexity::LocalScorer.new + @complexity_scores = complexity_scores end def findings @@ -68,7 +75,7 @@ def findings def findings_for(summary) return [] if summary.statements.size < @min_unrelated_statements + 2 - local_complexity = @scorer.score(summary.node)[:score].to_f + local_complexity = @complexity_scores.fetch(summary.id, { score: 0.0 })[:score].to_f return [] if local_complexity < @min_local_complexity findings = summary.statements.each_with_index.flat_map do |statement, index| diff --git a/gems/decomplex/lib/decomplex/redundant_nil_guard.rb b/gems/decomplex/lib/decomplex/redundant_nil_guard.rb index b6a7456a8..f0214cd9a 100644 --- a/gems/decomplex/lib/decomplex/redundant_nil_guard.rb +++ b/gems/decomplex/lib/decomplex/redundant_nil_guard.rb @@ -1,542 +1,26 @@ # frozen_string_literal: true -require "set" require_relative "syntax" module Decomplex # Finds nil checks or safe-navigation performed after the same stable subject # is already proven non-nil on the current intra-method path. class RedundantNilGuard - Finding = Struct.new(:file, :defn, :line, :span, :local, :guard, - :proof, keyword_init: true) do - def to_h - loc = "#{file}:#{defn}:#{line}" - super.merge(at: loc, spans: { loc => span }) - end - end - Flow = Struct.new(:known, :terminated, keyword_init: true) - NilFact = Struct.new(:local, :non_nil_when_true, keyword_init: true) - - TERMINATING_CALLS = %w[raise fail abort exit exit!].freeze - def self.scan(files) - files.flat_map do |file| - document = Syntax.parse(file, parser: "tree_sitter") - new(document).scan - end.sort_by { |f| [f.file, f.line, f.local, f.guard] }.map(&:to_h) - end - - attr_reader :document, :findings - - def initialize(document) - @document = document - @findings = [] - end - - def scan - document.function_defs.each do |function| - process_block(method_statements(function.body), function.name, Set.new) - end - findings - end - - private - - def process_block(stmts, function, known) - current = known.dup - stmts.each do |stmt| - flow = process_stmt(stmt, function, current) - current = flow.known - return flow if flow.terminated - end - Flow.new(known: current, terminated: false) - end - - def process_stmt(node, function, known) - return Flow.new(known: known.dup, terminated: false) unless ts_node?(node) - - if if_node?(node) - process_branch(node, function, known) - elsif assignment_node?(node) - inspect_node(assignment_rhs(node), function, known) - next_known = known.dup - next_known.delete(assignment_lhs_name(node).to_s) - Flow.new(known: next_known, terminated: false) - else - inspect_node(node, function, known) - Flow.new(known: known.dup, terminated: terminating?(node)) - end - end - - def process_branch(node, function, known) - cond = branch_condition(node) - inspect_node(cond, function, known) - - then_known = known_for_branch(node, true, cond, known) - else_known = known_for_branch(node, false, cond, known) - then_flow = process_block(stmts_for(branch_then_body(node)), function, then_known) - else_flow = process_block(stmts_for(branch_else_body(node)), function, else_known) - - if then_flow.terminated && else_flow.terminated - Flow.new(known: Set.new, terminated: true) - elsif then_flow.terminated - Flow.new(known: else_flow.known, terminated: false) - elsif else_flow.terminated - Flow.new(known: then_flow.known, terminated: false) - else - Flow.new(known: then_flow.known & else_flow.known, terminated: false) - end - end - - def known_for_branch(node, body_branch, cond, known) - next_known = known.dup - cond_true_branch = unless_node?(node) ? !body_branch : body_branch - branch_nil_facts(cond, cond_true_branch).each { |fact| next_known.add(fact.local) } - next_known - end - - def inspect_node(node, function, known) - return unless ts_node?(node) - - recorded = record_redundant(node, function, known) - return if recorded && safe_navigation_call?(node) - return if method_like_node?(node) - - node.children.each { |child| inspect_node(child, function, known) } - end - - def record_redundant(node, function, known) - local = redundant_nil_subject(node, known) - return false unless local - - @findings << Finding.new( - file: document.file, - defn: function, - line: line(node), - span: span(node), - local: local, - guard: normalize_text(node.text), - proof: "#{local} is already proven non-nil on this path" - ) - true - end - - def redundant_nil_subject(node, known) - subject = safe_navigation_subject(node) - return subject if subject && known.include?(subject) - - fact = nil_fact(node) - return nil unless fact && known.include?(fact.local) - - fact.local - end - - def nil_fact(node) - return nil unless ts_node?(node) - return nil_fact(node.named_children.first) if parenthesized_wrapper?(node) - - if nil_predicate_call?(node) - subject = subject_key(call_receiver_node(node)) - return subject ? NilFact.new(local: subject, non_nil_when_true: false) : nil - end - if non_nil_predicate_call?(node) - subject = subject_key(call_receiver_node(node)) - return subject ? NilFact.new(local: subject, non_nil_when_true: true) : nil - end - - if unary_not?(node) - return negated_nil_fact(node.named_children.first) - end - - comparison_nil_fact(node) - end - - def branch_nil_facts(node, cond_truth) - return [] unless ts_node?(node) - return branch_nil_facts(node.named_children.first, cond_truth) if parenthesized_wrapper?(node) - - if boolean_and?(node) - return [] unless cond_truth - - return flatten_boolean_and(node).flat_map { |child| branch_nil_facts(child, true) } - end - - if unary_not?(node) - return branch_nil_facts(node.named_children.first, !cond_truth) - end - - safe_receiver = safe_nav_receiver_fact(node) - return [safe_receiver] if safe_receiver && cond_truth - - fact = nil_fact(node) - return [fact] if fact && cond_truth == fact.non_nil_when_true - - truthy = truthy_subject_fact(node) - truthy && cond_truth ? [truthy] : [] - end - - def safe_nav_receiver_fact(node) - subject = safe_navigation_subject(node) - subject ? NilFact.new(local: subject, non_nil_when_true: true) : nil - end - - def truthy_subject_fact(node) - subject = subject_key(node) - return nil unless subject - - NilFact.new(local: subject, non_nil_when_true: true) - end - - def negated_nil_fact(node) - fact = nil_fact(node) - return nil unless fact - - NilFact.new(local: fact.local, - non_nil_when_true: !fact.non_nil_when_true) - end - - def comparison_nil_fact(node) - return nil unless ts_node?(node) && node.kind == "binary" - - operator = direct_operator(node) - return nil unless %w[== !=].include?(operator) - - left, right = node.named_children - subject = nil - if nil_literal?(right) - subject = subject_key(left) - elsif nil_literal?(left) - subject = subject_key(right) - end - return nil unless subject - - NilFact.new(local: subject, non_nil_when_true: operator == "!=") - end - - def method_statements(node) - body = method_body_node(node) - return [] unless body - - stmts_for(body) - end - - def method_body_node(node) - return nil unless ts_node?(node) - - case node.kind - when "method", "singleton_method", "argument_list", "function_definition", "function_item", - "function_declaration", "method_declaration" - node.named_children.reverse.find do |child| - %w[body_statement block compound_statement function_body statement_block].include?(child.kind) - end - when "body_statement", "block", "compound_statement", "function_body", "statement_block" - if method_like_node?(node) - node.named_children.reverse.find do |child| - %w[body_statement block compound_statement function_body statement_block].include?(child.kind) - end - else - node - end - end - end - - def stmts_for(node) - return [] unless ts_node?(node) - return [node] if if_node?(node) - return [node] if assignment_node?(node) - return [node] if call_node?(node) - - named = node.named_children.reject { |child| child.kind == "comment" } - if named.size == 1 && %w[statements statement_list].include?(named.first.kind) - return [named.first] if if_node?(named.first) - - named = named.first.named_children.reject { |child| child.kind == "comment" } - end - return [node] if named.empty? && !node.text.to_s.strip.empty? - - named - end - - def if_node?(node) - return false unless ts_node?(node) - return true if %w[if unless if_statement if_expression if_modifier unless_modifier].include?(node.kind) && node.named_children.any? - return true if node.kind == "expression_statement" && node.text.to_s.lstrip.start_with?("if ") - return false unless %w[body_statement block statements statement_list].include?(node.kind) - - first_token = node.children.first - return true if first_token && !first_token.named? && %w[if unless].include?(first_token.kind.to_s) - - seen_named = false - node.children.any? do |child| - seen_named ||= child.named? - seen_named && !child.named? && %w[if unless].include?(child.kind.to_s) - end - end - - def unless_node?(node) - node.kind.to_s.include?("unless") || first_token_kind(node) == "unless" - end - - def modifier_if_node?(node) - return true if %w[if_modifier unless_modifier].include?(node.kind) - return false unless %w[body_statement block statements statement_list].include?(node.kind) - - seen_named = false - node.children.any? do |child| - seen_named ||= child.named? - seen_named && !child.named? && %w[if unless].include?(child.kind.to_s) + findings = files.flat_map do |file| + Syntax.parse(file, parser: "tree_sitter").redundant_nil_guard_findings end + dedupe(findings) + .sort_by { |finding| [finding.file, finding.line, finding.local, finding.guard] } + .map(&:to_h) end - def branch_condition(node) - modifier_if_node?(node) ? node.named_children.last : node.named_children.first - end - - def branch_then_body(node) - if modifier_if_node?(node) - node.named_children.first - else - node.named_children.find { |child| child.kind == "then" } || node.named_children[1] - end - end - - def branch_else_body(node) - return nil if modifier_if_node?(node) - - node.named_children.find { |child| %w[else elsif].include?(child.kind) } || node.named_children[2] - end - - def assignment_node?(node) - ts_node?(node) && (%w[assignment assignment_expression assignment_statement].include?(node.kind) || flat_assignment_statement?(node)) - end - - def assignment_lhs_name(node) - assignment_lhs(node)&.text - end - - def assignment_lhs(node) - node.named_children.first if assignment_node?(node) - end - - def assignment_rhs(node) - node.named_children[1] if assignment_node?(node) - end - - def flat_assignment_statement?(node) - return false unless ts_node?(node) && node.kind == "body_statement" - - node.children.count { |child| !child.named? && child.text == "=" } == 1 && - node.named_children.size >= 2 - end - - def nil_predicate_call?(node) - call_node?(node) && %w[nil? is_none is_null isNull].include?(call_message(node).to_s) - end - - def non_nil_predicate_call?(node) - call_node?(node) && %w[is_some isSome present?].include?(call_message(node).to_s) - end - - def safe_navigation_call?(node) - ts_node?(node) && node.kind == "call" && - node.children.any? { |child| !child.named? && child.text == "&." } - end - - def safe_navigation_subject(node) - return nil unless safe_navigation_call?(node) - - subject_key(call_receiver_node(node)) - end - - def call_receiver_node(node) - return nil unless call_node?(node) - - if adjacent_field_call?(node) - return named_field(node, "object") || named_field(node, "receiver") || - named_field(node, "expression") || named_field(node, "operand") || - node.named_children.first + def self.dedupe(findings) + findings.group_by do |finding| + [finding.file, finding.defn, finding.line, finding.local, finding.guard.to_s.delete_suffix("()")] + end.values.map do |group| + group.max_by { |finding| finding.guard.to_s.length } end - - if %w[call call_expression function_call invocation_expression method_invocation method_call].include?(node.kind) - if node.kind == "call" - names = node.named_children.select { |child| %w[identifier simple_identifier].include?(child.kind) } - return names.first if names.size >= 2 - end - - if %w[invocation_expression method_invocation].include?(node.kind) - names = node.named_children.select { |child| %w[identifier simple_identifier].include?(child.kind) } - return names.first if names.size >= 2 - end - - callee = named_field(node, "function") || named_field(node, "callee") || node.named_children.first - if field_like_node?(callee) - return named_field(callee, "object") || named_field(callee, "receiver") || - named_field(callee, "expression") || named_field(callee, "operand") || - callee.named_children.first - end - end - - node.named_children.first - end - - def call_message(node) - return nil unless call_node?(node) - - if adjacent_field_call?(node) - field = named_field(node, "field") || named_field(node, "property") || - named_field(node, "name") || named_field(node, "suffix") || - node.named_children.last - return field&.text.to_s.sub(/\A[.?]+/, "") - end - - if %w[call call_expression function_call invocation_expression method_invocation method_call].include?(node.kind) - if node.kind == "call" - names = node.named_children.select { |child| %w[identifier simple_identifier].include?(child.kind) } - return names.last.text if names.size >= 2 - end - - if %w[invocation_expression method_invocation].include?(node.kind) - names = node.named_children.select { |child| %w[identifier simple_identifier].include?(child.kind) } - return names[1].text if names.size >= 2 - end - - callee = named_field(node, "function") || named_field(node, "callee") || node.named_children.first - if field_like_node?(callee) - field = named_field(callee, "field") || named_field(callee, "property") || - named_field(callee, "name") || named_field(callee, "suffix") || - callee.named_children.last - return field&.text.to_s.sub(/\A[.?]+/, "") - end - return callee.text if %w[identifier simple_identifier].include?(callee&.kind) - end - - node.named_children.reverse.find { |child| %w[identifier simple_identifier].include?(child.kind) }&.text - end - - def call_has_arguments?(node) - ts_node?(node) && - (node.named_children.any? { |child| %w[argument_list arguments call_suffix].include?(child.kind) } || - %w[argument_list arguments call_suffix].include?(next_sibling(node)&.kind)) - end - - def subject_key(node) - return nil unless ts_node?(node) - - case node.kind - when "identifier", "simple_identifier" - node.text - when "self", "this" - "self" - when "call", "call_expression", "function_call", "method_invocation", "invocation_expression", "method_call" - return nil if call_has_arguments?(node) - - receiver = call_receiver_node(node) - message = call_message(node) - return nil unless message && stable_reader_name?(message) - return "self.#{message}" if receiver&.kind == "self" - - recv_key = subject_key(receiver) - recv_key ? "#{recv_key}.#{message}" : nil - else - nil - end - end - - def stable_reader_name?(name) - text = name.to_s - !(text.end_with?("=", "!") || text == "[]") - end - - def nil_literal?(node) - ts_node?(node) && node.kind == "nil" - end - - def unary_not?(node) - ts_node?(node) && node.kind == "unary" && - node.children.any? { |child| !child.named? && child.text == "!" } - end - - def parenthesized_wrapper?(node) - ts_node?(node) && %w[condition_clause parenthesized_expression parenthesized_statements].include?(node.kind) && - node.named_children.size == 1 - end - - def boolean_and?(node) - ts_node?(node) && node.kind == "binary" && direct_operator(node) == "&&" - end - - def flatten_boolean_and(node) - return [node] unless boolean_and?(node) - - node.named_children.flat_map { |child| flatten_boolean_and(child) } - end - - def direct_operator(node) - node.children.find { |child| !child.named? && !%w[( )].include?(child.text.to_s) }&.text.to_s - end - - def terminating?(node) - return false unless ts_node?(node) - return true if %w[return break next].include?(node.kind) - return true if node.text.to_s.strip.match?(/\A(?:return|break|next)\b/) - return true if node.kind == "identifier" && TERMINATING_CALLS.include?(node.text.to_s) - - call_node?(node) && TERMINATING_CALLS.include?(call_message(node).to_s) - end - - def method_like_node?(node) - ts_node?(node) && %w[method singleton_method function_definition function_item function_declaration method_declaration].include?(node.kind) - end - - def call_node?(node) - ts_node?(node) && - (%w[call argument_list call_expression function_call invocation_expression method_invocation method_call].include?(node.kind) || - adjacent_field_call?(node)) - end - - def adjacent_field_call?(node) - field_like_node?(node) && %w[argument_list arguments call_suffix].include?(next_sibling(node)&.kind) - end - - def next_sibling(node) - node.next_sibling - rescue StandardError - nil - end - - def first_token_kind(node) - node.children.find { |child| !child.named? }&.kind.to_s - end - - def line(node) - node.start_point.row + 1 - end - - def span(node) - [node.start_point.row + 1, node.start_point.column, node.end_point.row + 1, node.end_point.column] - end - - def normalize_text(text) - text.to_s.lines.map(&:strip).reject(&:empty?).join(" ") - end - - def named_field(node, name) - node.child_by_field_name(name) - rescue StandardError - nil - end - - def field_like_node?(node) - ts_node?(node) && - %w[ - attribute directly_assignable_expression dot_index_expression expression_list field field_access - field_expression member_access_expression member_expression navigation_expression scoped_identifier - selector_expression variable_list - ].include?(node.kind) - end - - def ts_node?(node) - node && node.respond_to?(:kind) && node.respond_to?(:children) end end end diff --git a/gems/decomplex/lib/decomplex/semantic_alias.rb b/gems/decomplex/lib/decomplex/semantic_alias.rb index d3c60352a..b88c1679b 100644 --- a/gems/decomplex/lib/decomplex/semantic_alias.rb +++ b/gems/decomplex/lib/decomplex/semantic_alias.rb @@ -48,7 +48,20 @@ def self.scan(files) span: comparison.span ) end + document.branch_arms.each do |arm| + next unless arm.predicate.to_s.match?(/(?:==|!=)/) + + uses << Use.new( + canon: canon(arm.predicate), + file: arm.file, + defn: arm.function, + line: arm.decision_line, + raw: arm.predicate, + span: arm.decision_span + ) + end end + uses.uniq! { |use| [use.file, use.defn, use.line, use.canon, use.raw] } Report.new(preds, uses) end diff --git a/gems/decomplex/lib/decomplex/structural_topology.rb b/gems/decomplex/lib/decomplex/structural_topology.rb index 964ef7561..e0f6692ee 100644 --- a/gems/decomplex/lib/decomplex/structural_topology.rb +++ b/gems/decomplex/lib/decomplex/structural_topology.rb @@ -25,6 +25,7 @@ def self.scan(files) edges = documents.flat_map do |file, document| EdgeFacts.new(file, document, methods).edges end + edges.uniq! { |edge| [edge.caller, edge.callee, edge.type] } Graph.new(methods, edges) end diff --git a/gems/decomplex/lib/decomplex/syntax.rb b/gems/decomplex/lib/decomplex/syntax.rb index 042d9cb13..a79ba7615 100644 --- a/gems/decomplex/lib/decomplex/syntax.rb +++ b/gems/decomplex/lib/decomplex/syntax.rb @@ -383,7 +383,7 @@ def call_target(document, node) generic_call_target(document, node) when "attribute", "selector_expression", "field", "field_access", "member_expression", "member_access_expression", "field_expression", "expression_list", - "dot_index_expression", "variable_list", "identifier", "simple_identifier" + "navigation_expression", "dot_index_expression", "variable_list", "identifier", "simple_identifier" adjacent_argument_call_target(node) end end @@ -401,177 +401,6 @@ def state_target(lhs) end end - class RubySyntaxAdapter < TreeSitterLanguageAdapter; end - - class PythonSyntaxAdapter < TreeSitterLanguageAdapter - def visibility(_document, node) - name = function_name(node).to_s - return :private if name.start_with?("_") && !name.start_with?("__") - - :public - end - - def call_target(document, node) - python_adjacent_call_target(node) || super - end - - def local_methods(document) - super - end - - private - - def python_function_body_statements(node) - body = named_field(node, "body") || - node.named_children.find { |child| child.kind == "block" } - return [] unless body - - body.named_children.reject { |child| child.kind == "comment" } - end - - def python_adjacent_call_target(node) - return nil unless %w[identifier].include?(node.kind) - - args = next_sibling(node) - return nil unless args&.kind == "argument_list" - - { - receiver: "self", - message: node.text, - arguments: args.named_children.map { |child| normalize_text(child.text) } - } - rescue StandardError - nil - end - end - - class GoSyntaxAdapter < TreeSitterLanguageAdapter - def visibility(_document, node) - exported_name_visibility(function_name(node)) - end - end - - class RustSyntaxAdapter < TreeSitterLanguageAdapter - def visibility(_document, node) - modifier_visibility(node) || :private - end - end - - class JavaScriptSyntaxAdapter < TreeSitterLanguageAdapter - def visibility(_document, node) - modifier_visibility(node) || private_name_visibility(node) - end - - private - - def private_name_visibility(node) - function_name(node).to_s.start_with?("#") ? :private : :public - end - end - - class CppSyntaxAdapter < TreeSitterLanguageAdapter - def visibility(_document, node) - modifier_visibility(node) || cpp_visibility(node) - end - end - - class CSharpSyntaxAdapter < TreeSitterLanguageAdapter - def visibility(_document, node) - modifier_visibility(node) || :private - end - end - - class CSyntaxAdapter < TreeSitterLanguageAdapter - def visibility(_document, node) - c_visibility(node) - end - end - - class LuaSyntaxAdapter < TreeSitterLanguageAdapter - def generated_prelude?(document, node) - return false unless line(node) == 1 - - first_line = document.lines.first.to_s - first_line.include?("_tl_compat") && first_line.include?("compat53.module") - end - end - - class ZigSyntaxAdapter < TreeSitterLanguageAdapter - def visibility(_document, node) - modifier_visibility(node) || :private - end - - def state_declaration(node) - return zig_container_field_declaration(node) if node.kind == "container_field" - - super - end - - private - - def zig_container_field_declaration(node) - name = node.named_children.find { |child| child.kind == "identifier" } - return nil unless name - - { field: name.text, type: declared_type_text(node, name) } - end - end - - class CppSyntaxAdapter - def implicit_state_accesses? - true - end - - private - - def cpp_visibility(node) - visibility = previous_cpp_access_specifier(node) - return visibility if visibility - - owner = nearest_owner_declaration(node) - return :public if owner&.kind == "struct_specifier" - - :private - end - - def previous_cpp_access_specifier(node) - sibling = prev_sibling(node) - while sibling - return sibling.text.to_sym if sibling.kind == "access_specifier" && - %w[public private protected].include?(sibling.text) - - sibling = prev_sibling(sibling) - end - nil - end - - def nearest_owner_declaration(node) - parent = parent_node(node) - seen = Set.new - while parent && !seen.include?(node_key(parent)) - seen << node_key(parent) - return parent if %w[class_specifier struct_specifier class class_definition class_declaration].include?(parent.kind) - - parent = parent_node(parent) - end - nil - end - end - - class CSharpSyntaxAdapter - def implicit_state_accesses? - true - end - end - - class CSyntaxAdapter - private - - def c_visibility(node) - node.children.any? { |child| child.text == "static" } ? :private : :public - end - end - class TreeSitterLanguageAdapter BRANCH_KINDS = %w[if unless if_statement if_modifier unless_modifier if_expression while until while_statement for for_statement @@ -1002,6 +831,7 @@ def generic_member_name?(node) owner = parent_node(parent) return true if owner && field_like_node?(owner) end + return false if parent&.kind == "expression_list" && !member_expression_list?(parent) return false unless parent && field_like_node?(parent) field = named_field(parent, "field") || named_field(parent, "property") || @@ -1014,7 +844,12 @@ def generic_call_name?(node) parent = parent_node(node) return false unless parent - %w[call_expression method_invocation invocation_expression].include?(parent.kind) && + if %w[method_invocation invocation_expression].include?(parent.kind) + names = parent.named_children.select { |child| generic_identifier?(child) } + return names.size >= 2 ? names.last == node : parent.named_children.first == node + end + + %w[call_expression function_call method_call].include?(parent.kind) && (named_field(parent, "function") == node || parent.named_children.first == node) end @@ -1059,6 +894,8 @@ def generic_branch_body_nodes(node) ].compact bodies = node.named_children.drop(1) if bodies.empty? bodies.flat_map do |body| + next [body] if simple_action_wrapper?(body) + children = body.named_children.reject { |child| comment_node?(child) } children.empty? ? [body] : children end @@ -1082,8 +919,16 @@ def generic_path_action_node?(node) return false unless ts_node?(node) return false if branch_node?(node) + return true if simple_action_wrapper?(node) + generic_assignment_statement?(node) || - %w[call call_expression expression_statement return_statement identifier simple_identifier].include?(node.kind) + %w[call call_expression expression_statement return_statement].include?(node.kind) + end + + def simple_action_wrapper?(node) + return false unless %w[block statement_list statements control_structure_body].include?(node.kind) + + normalize_text(node.text).match?(/\A[A-Za-z_]\w*(?:\.[A-Za-z_]\w*)?\s*\([^{};]*\)\s*;?\z/) end def comparison_target(node) @@ -1366,8 +1211,12 @@ def record_state_write(document, node, stack, out) ) end - def skip_state_write_node?(_node) - false + def skip_state_write_node?(node) + parent = parent_node(node) + return false unless parent + + assignment_lhs?(node) && + %w[assignment assignment_expression augmented_assignment assignment_statement operator_assignment].include?(parent.kind) end def skip_state_write_target?(target) @@ -2175,6 +2024,8 @@ def first_argument_receiver_call_target(_document, node, target) return nil unless target[:receiver] == "self" first_arg = call_argument_nodes(node).first + return nil unless first_arg + arg_target = state_read_target(first_arg) return nil unless arg_target @@ -2196,6 +2047,9 @@ def call_argument_nodes(node) end def adjacent_argument_call_target(node) + return nil if generic_member_name?(node) + return nil if %w[call_expression method_invocation invocation_expression function_call method_call].include?(parent_node(node)&.kind) + args = next_sibling(node) return nil unless %w[argument_list arguments call_suffix].include?(args&.kind) @@ -2496,6 +2350,15 @@ def field_like_node?(node) ].include?(node.kind) end + def member_expression_list?(node) + return false unless node.kind == "expression_list" + return true if named_field(node, "operand") && named_field(node, "field") + + node.children.any? do |child| + !child.named? && %w[. -> :].include?(child.text.to_s) + end + end + def member_field_text(field) return nil unless ts_node?(field) @@ -2622,6 +2485,9 @@ def normalize_text(text) end end + require_relative "syntax/ruby" + require_relative "syntax/adapters" + LanguageProfile = TreeSitterLanguageAdapter LANGUAGE_PROFILES = { @@ -2694,19 +2560,19 @@ def normalize_text(text) grammar_names: %w[c-sharp csharp], tree_sitter_language_name: "c_sharp" ), - java: TreeSitterLanguageAdapter.new( + java: JavaSyntaxAdapter.new( language: :java, extensions: %w[.java], lexicon: JAVA_LEXICON, package: "tree-sitter-java" ), - swift: TreeSitterLanguageAdapter.new( + swift: SwiftSyntaxAdapter.new( language: :swift, extensions: %w[.swift], lexicon: SWIFT_LEXICON, package: "tree-sitter-swift" ), - kotlin: TreeSitterLanguageAdapter.new( + kotlin: KotlinSyntaxAdapter.new( language: :kotlin, extensions: %w[.kt .kts], lexicon: KOTLIN_LEXICON, @@ -3420,6 +3286,10 @@ def walk(document, profile, &block) end end -require_relative "syntax/ruby" require_relative "syntax/effects" require_relative "syntax/protocols" +require_relative "syntax/contracts" +require_relative "syntax/dispatch" +require_relative "syntax/clone_similarity" +require_relative "syntax/complexity" +require_relative "syntax/nil_guards" diff --git a/gems/decomplex/lib/decomplex/syntax/adapters.rb b/gems/decomplex/lib/decomplex/syntax/adapters.rb new file mode 100644 index 000000000..6e779b6eb --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/adapters.rb @@ -0,0 +1,358 @@ +# frozen_string_literal: true + +module Decomplex + module Syntax + class PythonSyntaxAdapter < TreeSitterLanguageAdapter + def function_name(node) + hidden_python_function_name(node) || super + end + + def visibility(_document, node) + name = function_name(node).to_s + return :private if name.start_with?("_") && !name.start_with?("__") + + :public + end + + def call_target(document, node) + python_adjacent_call_target(node) || super + end + + def local_methods(document) + super + end + + private + + def hidden_python_function_name(node) + return nil unless node.kind == "block" + return nil unless node.children.first&.kind.to_s == "def" + + node.named_children.find { |child| child.kind == "identifier" }&.text + end + + def python_function_body_statements(node) + body = named_field(node, "body") || + node.named_children.find { |child| child.kind == "block" } + return [] unless body + + body.named_children.reject { |child| child.kind == "comment" } + end + + def python_adjacent_call_target(node) + return nil unless %w[identifier].include?(node.kind) + + args = next_sibling(node) + return nil unless args&.kind == "argument_list" + + { + receiver: "self", + message: node.text, + arguments: args.named_children.map { |child| normalize_text(child.text) } + } + rescue StandardError + nil + end + end + + class GoSyntaxAdapter < TreeSitterLanguageAdapter + def visibility(_document, node) + exported_name_visibility(function_name(node)) + end + + private + + def boolean_container?(node) + return true if boolean_expression_list?(node, "&&") + + super + end + end + + class RustSyntaxAdapter < TreeSitterLanguageAdapter + def visibility(_document, node) + modifier_visibility(node) || :private + end + end + + class JavaScriptSyntaxAdapter < TreeSitterLanguageAdapter + def visibility(_document, node) + modifier_visibility(node) || private_name_visibility(node) + end + + private + + def private_name_visibility(node) + function_name(node).to_s.start_with?("#") ? :private : :public + end + end + + class CppSyntaxAdapter < TreeSitterLanguageAdapter + def visibility(_document, node) + modifier_visibility(node) || cpp_visibility(node) + end + + def function_params(node) + c_family_function_params(node) || super + end + + def implicit_state_accesses? + true + end + + private + + def control_context(node) + return :iterates if node.kind == "for_range_loop" + + super + end + + def cpp_visibility(node) + visibility = previous_cpp_access_specifier(node) + return visibility if visibility + + owner = nearest_owner_declaration(node) + return :public if owner&.kind == "struct_specifier" + + :private + end + + def previous_cpp_access_specifier(node) + sibling = prev_sibling(node) + while sibling + return sibling.text.to_sym if sibling.kind == "access_specifier" && + %w[public private protected].include?(sibling.text) + + sibling = prev_sibling(sibling) + end + nil + end + + def nearest_owner_declaration(node) + parent = parent_node(node) + seen = Set.new + while parent && !seen.include?(node_key(parent)) + seen << node_key(parent) + return parent if %w[class_specifier struct_specifier class class_definition class_declaration].include?(parent.kind) + + parent = parent_node(parent) + end + nil + end + end + + class CSharpSyntaxAdapter < TreeSitterLanguageAdapter + def visibility(_document, node) + modifier_visibility(node) || :private + end + + def implicit_state_accesses? + true + end + + private + + def control_context(node) + return :iterates if node.kind == "foreach_statement" + + super + end + end + + class CSyntaxAdapter < TreeSitterLanguageAdapter + def visibility(_document, node) + c_visibility(node) + end + + def function_params(node) + c_family_function_params(node) || super + end + + private + + def receiver_convention_owner_name(node, **_context) + return nil unless first_argument_receiver? + return nil unless node.kind == "function_definition" + + receiver = first_argument_receiver_parameter(node) + return nil unless receiver && receiver[:name] == "self" + + normalize_type_owner(receiver[:type]) + end + + def c_visibility(node) + node.children.any? { |child| child.text == "static" } ? :private : :public + end + end + + class LuaSyntaxAdapter < TreeSitterLanguageAdapter + def function_name(node) + lua_method_name(node) || super + end + + def receiver_owner_name(node) + lua_method_owner_name(node) || super + end + + def call_target(document, node) + lua_expression_list_call_target(node) || + lua_adjacent_member_call_target(node) || + super + end + + def state_read_target(node) + lua_single_return_member_target(node) || super + end + + def generated_prelude?(document, node) + return false unless line(node) == 1 + + first_line = document.lines.first.to_s + first_line.include?("_tl_compat") && first_line.include?("compat53.module") + end + + private + + def boolean_container?(node) + return true if boolean_expression_list?(node, "and") + + super + end + + def lua_method_name(node) + method = lua_method_index_expression(node) + return nil unless method + + method.named_children.last&.text + end + + def lua_method_owner_name(node) + method = lua_method_index_expression(node) + return nil unless method + + method.named_children.first&.text + end + + def lua_method_index_expression(node) + return nil unless node.kind == "function_declaration" + + node.named_children.find { |child| child.kind == "method_index_expression" } + end + + def lua_expression_list_call_target(node) + return nil unless node.kind == "expression_list" + + callee = node.named_children.find { |child| field_like_node?(child) } + args = node.named_children.find { |child| child.kind == "arguments" } + return nil unless callee && args + + target_from_callee(callee).merge(arguments: args.named_children.map { |child| normalize_text(child.text) }) + rescue StandardError + nil + end + + def lua_adjacent_member_call_target(node) + return nil unless node.kind == "identifier" + + args = next_sibling(node) + return nil unless args&.kind == "arguments" + + parent = parent_node(node) + return nil unless parent && field_like_node?(parent) + + target_from_callee(parent).merge(arguments: args.named_children.map { |child| normalize_text(child.text) }) + rescue StandardError + nil + end + + def lua_single_return_member_target(node) + return nil unless node.kind == "expression_list" + + text = normalize_text(node.text) + if (match = text.match(/\A([A-Za-z_]\w*)\.([A-Za-z_]\w*)\z/)) + return { receiver: match[1], field: match[2] } + end + + parent = parent_node(node) + return nil unless parent&.kind == "block" + return nil unless prev_sibling(node)&.kind.to_s == "return" || + parent.children.first&.kind.to_s == "return" + + return nil unless node.named_children.size == 1 + child = node.named_children.first + return nil unless field_like_node?(child) + + generic_state_read_target(child) + end + end + + class ZigSyntaxAdapter < TreeSitterLanguageAdapter + def visibility(_document, node) + modifier_visibility(node) || :private + end + + def state_declaration(node) + return zig_container_field_declaration(node) if node.kind == "container_field" + + super + end + + private + + def zig_container_field_declaration(node) + name = node.named_children.find { |child| child.kind == "identifier" } + return nil unless name + + { field: name.text, type: declared_type_text(node, name) } + end + end + + class JavaSyntaxAdapter < TreeSitterLanguageAdapter + def function_params(node) + return super unless node.kind == "method_declaration" + + params = node.named_children.find { |child| child.kind == "formal_parameters" } + return super unless params + + params.named_children.filter_map { |param| parameter_name(param) }.uniq + end + end + class JavaSyntaxAdapter + private + + def control_context(node) + return :iterates if node.kind == "enhanced_for_statement" + + super + end + end + class SwiftSyntaxAdapter < TreeSitterLanguageAdapter; end + class KotlinSyntaxAdapter < TreeSitterLanguageAdapter; end + + class TreeSitterLanguageAdapter + private + + def c_family_function_params(node) + return nil unless node.kind == "function_definition" + + declarator = named_field(node, "declarator") || + node.named_children.find { |child| child.kind == "function_declarator" } + params = declarator&.named_children&.find { |child| child.kind == "parameter_list" } + return nil unless params + + params.named_children.filter_map { |param| parameter_name(param) }.uniq + end + + def boolean_expression_list?(node, operator) + return false unless node.kind == "expression_list" + return false unless direct_operator(node) == operator + return false if node.named_children.size < 2 + + node.children.all? do |child| + child.named? || [operator, "(", ")"].include?(child.text.to_s) + end + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/syntax/clone_similarity.rb b/gems/decomplex/lib/decomplex/syntax/clone_similarity.rb new file mode 100644 index 000000000..733785a95 --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/clone_similarity.rb @@ -0,0 +1,275 @@ +# frozen_string_literal: true + +require "set" + +module Decomplex + module Syntax + CloneCandidate = Struct.new( + :file, :line, :span, :method_name, :node_name, :mass, + :fingerprint, :raw, :child_fingerprints, :child_masses, + keyword_init: true + ) + + class Document + def clone_candidates + @clone_candidates ||= adapter.clone_candidates(self) + end + end + + class TreeSitterAdapter + def clone_candidates(document) + syntax_profile(document.language).clone_candidates(document) + end + end + + class TreeSitterLanguageAdapter + CLONE_IDENTIFIER_KINDS = %w[ + identifier constant type_identifier field_identifier property_identifier + shorthand_property_identifier_pattern simple_identifier variable_name + ].freeze + CLONE_LITERAL_KINDS = %w[ + string string_content string_literal interpreted_string_literal raw_string_literal + integer float int number rational imaginary character char_literal + symbol simple_symbol true false nil none null + ].freeze + CLONE_SKIP_KINDS = %w[ + comment identifier constant type_identifier field_identifier property_identifier + parameters formal_parameters parameter_list argument_list arguments + block_parameters call_suffix function_value_parameters method_parameters value_argument + scope_resolution + ].freeze + CLONE_CANDIDATE_KINDS = %w[ + array assignment assignment_statement block case case_clause class + class_definition class_declaration compound_statement conjunction_expression control_structure_body + do_block enum_declaration for for_statement function_body hash if if_statement match_expression + match_statement method method_definition module operator_assignment singleton_method statements + struct_declaration switch_case switch_expression switch_statement + unless until while while_statement + ].freeze + CLONE_BODY_KINDS = %w[ + body block body_statement declaration_list statement_block compound_statement + function_body statements suite do_block + ].freeze + CLONE_CALL_KINDS = %w[ + call call_expression function_call method_call method_invocation invocation_expression + ].freeze + + def clone_candidates(document) + out = [] + seen = Set.new + + document.function_defs.each do |fn| + candidate = clone_candidate_for(document, fn.body, node_name: "defn", function_name: fn.name) + clone_add_candidate(out, seen, candidate) if candidate + end + + clone_walk(document.root) do |node| + next unless clone_candidate_node?(node) + + function = clone_method_span_for(document, line(node)) + clone_add_candidate(out, seen, clone_candidate_for(document, node, function_name: function&.name)) + end + + out + rescue StandardError + [] + end + + private + + def clone_add_candidate(out, seen, candidate) + return unless candidate + return if clone_typed_struct_schema_text?(candidate.raw) + + key = [candidate.file, candidate.line, candidate.span, candidate.node_name, candidate.fingerprint] + return if seen.include?(key) + + seen << key + out << candidate + end + + def clone_candidate_for(document, node, node_name: nil, function_name: nil) + fp, mass = clone_fingerprint(node) + return nil if fp.to_s.empty? + + line_no = line(node) + method = clone_method_span_for(document, line_no) + children = clone_fuzzy_children_for(node) + child_data = children.map { |child| clone_fingerprint(child) } + .reject { |child_fp, child_mass| child_fp.to_s.empty? || child_mass.zero? } + + CloneCandidate.new( + file: document.file, + line: line_no, + span: span(node), + method_name: function_name || method&.name || "(top-level)", + node_name: node_name || clone_node_name(node), + mass: mass, + fingerprint: fp, + raw: normalize_text(node.text), + child_fingerprints: child_data.map(&:first), + child_masses: child_data.map(&:last) + ) + end + + def clone_candidate_node?(node) + return false unless ts_node?(node) + return false unless node.named? + return false if CLONE_SKIP_KINDS.include?(node.kind) + return false unless CLONE_CANDIDATE_KINDS.include?(node.kind) + return false if clone_typed_struct_schema_text?(node.text) + + node.named_child_count.positive? + end + + def clone_fuzzy_children_for(node) + body = clone_body_node(node) + source = body || node + children = source.named_children + children = node.named_children if children.empty? + children.reject { |child| CLONE_SKIP_KINDS.include?(child.kind) || clone_typed_struct_schema_text?(child.text) } + end + + def clone_body_node(node) + named_field(node, "body") || + node.named_children.find { |child| CLONE_BODY_KINDS.include?(child.kind) } + end + + def clone_fingerprint(node, active = nil) + return ["", 0] unless ts_node?(node) + + active ||= Set.new + key = node_key(node) + return ["", 0] if active.include?(key) + + active << key + begin + return ["", 0] if node.kind == "comment" + return clone_fingerprint_call(node, active) if CLONE_CALL_KINDS.include?(node.kind) && clone_call_message(node) + + if node.child_count.zero? + token = clone_terminal_token(node) + return ["", 0] if token.empty? + + return [token, 1] + end + + child_parts = [] + mass = 1 + node.children.each do |child| + child_fp, child_mass = clone_fingerprint(child, active) + next if child_fp.empty? + + child_parts << child_fp + mass += child_mass + end + + return [clone_terminal_token(node), 1] if child_parts.empty? + + ["#{node.kind}(#{child_parts.join(' ')})", mass] + ensure + active.delete(key) + end + end + + def clone_fingerprint_call(node, active) + message = clone_call_message(node) + child_parts = [] + mass = 1 + node.children.each do |child| + child_fp, child_mass = clone_fingerprint(child, active) + next if child_fp.empty? + + child_parts << child_fp + mass += child_mass + end + ["#{node.kind}<#{message}>(#{child_parts.join(' ')})", mass] + end + + def clone_call_message(node) + return nil unless node.children.any? { |child| %w[argument_list arguments call_suffix].include?(child.kind) } + + callee = named_field(node, "function") || named_field(node, "callee") + return clone_callee_message(callee) if callee + + argument_node = node.children.find { |child| %w[argument_list arguments call_suffix].include?(child.kind) } + named_before_args = node.named_children.select do |child| + argument_node.nil? || child.start_byte < argument_node.start_byte + end + clone_callee_message(named_before_args.last) + end + + def clone_callee_message(node) + return nil unless ts_node?(node) + return node.text if CLONE_IDENTIFIER_KINDS.include?(node.kind) + return clone_navigation_suffix_message(node) if %w[navigation_expression directly_assignable_expression].include?(node.kind) + + leaf = node.named_children.reverse.find { |child| CLONE_IDENTIFIER_KINDS.include?(child.kind) } + leaf&.text + end + + def clone_navigation_suffix_message(node) + suffix = node.named_children.reverse.find { |child| child.kind == "navigation_suffix" } + leaf = suffix&.named_children&.reverse&.find { |child| CLONE_IDENTIFIER_KINDS.include?(child.kind) } + leaf&.text + end + + def clone_terminal_token(node) + kind = node.kind.to_s + return "id" if CLONE_IDENTIFIER_KINDS.include?(kind) + return clone_literal_token(kind) if CLONE_LITERAL_KINDS.include?(kind) + + text = normalize_text(node.text) + return "" if text.empty? + return "id" if text.match?(/\A[A-Za-z_]\w*[!?=]?\z/) + return "lit" if text.match?(/\A(?::[A-Za-z_]\w*|[-+]?\d+(?:\.\d+)?|".*"|'.*')\z/) + + "#{kind}:#{text}" + end + + def clone_literal_token(kind) + case kind + when "true", "false" then "bool" + when "nil", "none", "null" then "nil" + else "lit" + end + end + + def clone_node_name(node) + return "defn" if %w[method function_definition function_declaration method_definition function_item].include?(node.kind) + return "defs" if node.kind == "singleton_method" + + node.kind + end + + def clone_typed_struct_schema_text?(text) + text.to_s.match?(/<\s*T::Struct\b/) || + text.to_s.lines.all? { |line| line.strip.empty? || line.match?(/\A\s*(?:const|prop)\s+:[A-Za-z_]\w*\b/) } + end + + def clone_method_span_for(document, line_no) + document.function_defs.find { |fn| fn.span[0] <= line_no && line_no <= fn.span[2] } + rescue StandardError + nil + end + + def clone_walk(node, &block) + return unless ts_node?(node) + + pending = [node] + seen = Set.new + until pending.empty? + current = pending.pop + next unless ts_node?(current) + + key = node_key(current) + next if seen.include?(key) + + seen << key + yield current + current.children.reverse_each { |child| pending << child } + end + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/syntax/complexity.rb b/gems/decomplex/lib/decomplex/syntax/complexity.rb new file mode 100644 index 000000000..5f32993e0 --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/complexity.rb @@ -0,0 +1,187 @@ +# frozen_string_literal: true + +module Decomplex + module Syntax + class Document + def local_complexity_scores + @local_complexity_scores ||= adapter.local_complexity_scores(self) + end + end + + class TreeSitterAdapter + def local_complexity_scores(document) + profile = syntax_profile(document.language) + document.local_methods.to_h do |method| + [method.id, profile.local_complexity_score(method.node)] + end + end + end + + class TreeSitterLanguageAdapter + def local_complexity_score(method_node) + LocalComplexityScorer.new.score(method_node) + end + + class LocalComplexityScorer + def score(method_node) + signals = Hash.new(0) + { + score: round(score_node(method_node, nesting: 0, signals: signals)), + signals: signals.to_h + } + end + + private + + def score_node(node, nesting:, signals:) + return 0.0 unless tree_sitter_node?(node) + return 0.0 if skip_nested?(node) + + if branch?(node) + signals[:branches] += 1 + signals[:nested] += 1 if nesting.positive? + return branch_cost(nesting) + + predicate_cost(node, signals) + + score_children(node, nesting: nesting + 1, signals: signals) + end + + if loop?(node) + signals[:loops] += 1 + signals[:nested] += 1 if nesting.positive? + return branch_cost(nesting) + + score_children(node, nesting: nesting + 1, signals: signals) + end + + if case?(node) + signals[:cases] += 1 + return 0.5 + score_children(node, nesting: nesting + 1, signals: signals) + end + + if rescue?(node) + signals[:rescues] += 1 + return branch_cost(nesting) + + score_children(node, nesting: nesting + 1, signals: signals) + end + + if early_exit?(node) + signals[:early_exits] += 1 + exit_cost = nesting.positive? ? 0.5 + (nesting * 0.25) : 0.0 + return exit_cost + score_children(node, nesting: nesting, signals: signals) + end + + if boolean_node?(node) + signals[:boolean_ops] += 1 + return 0.25 + score_children(node, nesting: nesting, signals: signals) + end + + score_children(node, nesting: nesting, signals: signals) + end + + def score_children(node, nesting:, signals:) + node.children.sum { |child| score_node(child, nesting: nesting, signals: signals) } + end + + def predicate_cost(node, signals) + predicate = condition_node(node) + bools = boolean_count(predicate) + signals[:boolean_ops] += bools + bools * 0.5 + end + + def condition_node(node) + return node.named_children.last if modifier_if?(node) + return node.named_children.first if node.kind == "body_statement" + + node.named_children.first + end + + def boolean_count(node) + return 0 unless tree_sitter_node?(node) + + own = boolean_node?(node) ? 1 : 0 + own + node.children.sum { |child| boolean_count(child) } + end + + def boolean_node?(node) + tree_sitter_node?(node) && + %w[binary binary_expression boolean_operator conjunction_expression disjunction_expression].include?(node.kind) && + node.children.any? { |child| !child.named? && %w[&& || and or].include?(child.text.to_s) } + end + + def branch?(node) + return false unless tree_sitter_node?(node) + return true if %w[if unless if_statement if_expression if_modifier unless_modifier].include?(node.kind) && + node.named_children.any? + + hidden_if?(node) || modifier_if?(node) + end + + def hidden_if?(node) + return true if node.kind == "expression_statement" && node.text.to_s.lstrip.start_with?("if ") + return false unless %w[body_statement block statements statement_list].include?(node.kind) + + first_token = node.children.first + first_token && !first_token.named? && %w[if unless].include?(first_token.kind.to_s) + end + + def modifier_if?(node) + return true if %w[if_modifier unless_modifier].include?(node.kind) + return false unless node.kind == "body_statement" + + seen_named = false + node.children.any? do |child| + seen_named ||= child.named? + seen_named && !child.named? && %w[if unless].include?(child.kind.to_s) + end + end + + def loop?(node) + return false unless tree_sitter_node?(node) + return true if %w[while until while_statement for for_statement for_in_statement do_block].include?(node.kind) + return true if hidden_loop?(node) + + (node.kind == "expression_statement" && node.text.to_s.lstrip.match?(/\A(?:for|while|loop)\b/)) || + (node.kind == "labeled_statement" && node.text.to_s.lstrip.start_with?("for ")) + end + + def hidden_loop?(node) + %w[body_statement block statements statement_list].include?(node.kind) && + node.children.first && + !node.children.first.named? && + %w[for while loop].include?(node.children.first.kind.to_s) + end + + def case?(node) + tree_sitter_node?(node) && + (%w[case switch_statement switch_expression match_statement match_expression].include?(node.kind) || + (node.kind == "expression_statement" && node.text.to_s.lstrip.start_with?("match "))) + end + + def rescue?(node) + tree_sitter_node?(node) && %w[rescue rescue_modifier rescue_clause rescue_body].include?(node.kind) + end + + def early_exit?(node) + tree_sitter_node?(node) && + %w[return break next redo retry return_statement break_statement continue_statement].include?(node.kind) + end + + def skip_nested?(node) + %w[class module lambda].include?(node.kind) + end + + def tree_sitter_node?(node) + node.respond_to?(:kind) && node.respond_to?(:children) + end + + def branch_cost(nesting) + 1.1 + nesting + end + + def round(value) + (value * 10).round / 10.0 + end + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/syntax/contracts.rb b/gems/decomplex/lib/decomplex/syntax/contracts.rb new file mode 100644 index 000000000..be6200669 --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/contracts.rb @@ -0,0 +1,40 @@ +# frozen_string_literal: true + +module Decomplex + module Syntax + class Document + def local_contract_assignments(method) + adapter.local_contract_assignments(self, method) + end + end + + class TreeSitterLanguageAdapter + def local_contract_assignments(_document, method) + method.statements.each_with_object({}) do |statement, map| + next unless statement.writes.size == 1 + + name = statement.writes.first.to_s + map[name] ||= local_contract_source(name, statement.source) + end.compact + end + + private + + def local_contract_source(name, source) + match = source.to_s.match(/\b#{Regexp.escape(name)}\b\s*(?::=|=)\s*(.+?)\s*;?\z/m) + return nil unless match + + rhs = match[1].strip + return nil if rhs.match?(/\s(?:if|unless|rescue)\s|\?|:/) + + rhs + end + end + + class TreeSitterAdapter + def local_contract_assignments(document, method) + syntax_profile(document.language).local_contract_assignments(document, method) + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/syntax/dispatch.rb b/gems/decomplex/lib/decomplex/syntax/dispatch.rb new file mode 100644 index 000000000..d847e0a13 --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/dispatch.rb @@ -0,0 +1,148 @@ +# frozen_string_literal: true + +module Decomplex + module Syntax + DispatchSite = Struct.new(:variant_set, :arm_members, :outside, :file, + :function, :line, :span, keyword_init: true) + + class Document + def dispatch_sites + @dispatch_sites ||= adapter.dispatch_sites(self) + end + end + + class TreeSitterAdapter + def dispatch_sites(document) + syntax_profile(document.language).dispatch_sites(document) + end + end + + class TreeSitterLanguageAdapter + DISPATCH_CONSTANT_PATTERN = /\A[A-Z]\w*(?:(?:::|\.|_)[A-Z]\w*)*\z/ + IF_DISPATCH_PATTERN = /\A(?.+?)\s*(?:==|===)\s*(?[A-Z]\w*(?:(?:::|\.|_)[A-Z]\w*)*)\z/ + + def dispatch_sites(document) + arms = document.branch_arms + case_dispatch_sites(document, arms) + if_dispatch_sites(document, arms) + end + + private + + def case_dispatch_sites(document, arms) + arms.select { |arm| arm.kind == :case } + .group_by { |arm| [arm.file, arm.function, arm.decision_span, arm.predicate] } + .filter_map { |_key, case_arms| record_case_dispatch_site(document, case_arms) } + end + + def record_case_dispatch_site(document, arms) + predicate = arms.first.predicate.to_s + return nil if predicate.empty? + + arm_members = {} + arms.each do |arm| + variants = dispatch_constant_patterns(arm.member) + next if variants.empty? + + members = dispatch_members_inside(document, predicate, arm.function, arm.span) + variants.each { |variant| (arm_members[variant] ||= []).concat(members) } + end + return nil if arm_members.size < 2 + + arm_members.transform_values!(&:uniq) + DispatchSite.new( + variant_set: arm_members.keys.sort, + arm_members: arm_members, + outside: dispatch_members_outside(document, predicate, arms.first.function, arms.first.decision_span), + file: arms.first.file, + function: arms.first.function, + line: arms.first.decision_line, + span: arms.first.decision_span + ) + end + + def if_dispatch_sites(document, arms) + arms.select { |arm| arm.kind == :if && arm.member == "then" } + .filter_map { |arm| [arm, if_dispatch_match(arm.predicate)] } + .reject { |_arm, match| match.nil? } + .group_by { |arm, match| [arm.file, arm.function, match[:subject]] } + .filter_map { |_key, matched| record_if_dispatch_site(document, matched) } + end + + def record_if_dispatch_site(document, matched) + predicate = matched.first[1][:subject] + arm_members = {} + matched.each do |arm, match| + members = dispatch_members_inside(document, predicate, arm.function, arm.span) + (arm_members[match[:variant]] ||= []).concat(members) + end + return nil if arm_members.size < 2 + + arm_members.transform_values!(&:uniq) + DispatchSite.new( + variant_set: arm_members.keys.sort, + arm_members: arm_members, + outside: dispatch_members_outside_spans(document, predicate, matched.first[0].function, matched.map { |arm, _match| arm.span }), + file: matched.first[0].file, + function: matched.first[0].function, + line: matched.first[0].decision_line, + span: matched.first[0].decision_span + ) + end + + def if_dispatch_match(predicate) + source = predicate.to_s.strip + source = source[1...-1].strip if source.start_with?("(") && source.end_with?(")") + match = source.match(IF_DISPATCH_PATTERN) + return nil unless match + + { subject: match[:subject].strip, variant: match[:variant].strip } + end + + def dispatch_members_inside(document, predicate, function, span) + dispatch_member_calls(document, predicate, function) + .select { |call| dispatch_inside_span?(call.span, span) } + .map { |call| dispatch_member_name(call) } + .uniq + end + + def dispatch_members_outside(document, predicate, function, decision_span) + dispatch_member_calls(document, predicate, function) + .reject { |call| dispatch_inside_span?(call.span, decision_span) } + .map { |call| dispatch_member_name(call) } + .uniq + end + + def dispatch_members_outside_spans(document, predicate, function, spans) + dispatch_member_calls(document, predicate, function) + .reject { |call| spans.any? { |span| dispatch_inside_span?(call.span, span) } } + .map { |call| dispatch_member_name(call) } + .uniq + end + + def dispatch_member_calls(document, predicate, function) + document.call_sites.select do |call| + call.function == function && + call.receiver.to_s == predicate && + !call.message.to_s.empty? + end + end + + def dispatch_member_name(call) + call.message.to_s.sub(/=\z/, "") + end + + def dispatch_constant_patterns(member) + member.to_s.split(/\s*,\s*/).map { |pattern| pattern.sub(/\Acase\s+/, "") } + .select { |pattern| pattern.match?(DISPATCH_CONSTANT_PATTERN) } + end + + def dispatch_inside_span?(inner, outer) + return false unless inner && outer + + starts_after_or_at = (inner[0] > outer[0]) || (inner[0] == outer[0] && inner[1] >= outer[1]) + ends_before_or_at = (inner[2] < outer[2]) || (inner[2] == outer[2] && inner[3] <= outer[3]) + starts_after_or_at && ends_before_or_at + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/syntax/nil_guards.rb b/gems/decomplex/lib/decomplex/syntax/nil_guards.rb new file mode 100644 index 000000000..05599793b --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/nil_guards.rb @@ -0,0 +1,537 @@ +# frozen_string_literal: true + +require "set" + +module Decomplex + module Syntax + NilGuardFinding = Struct.new(:file, :defn, :line, :span, :local, :guard, + :proof, keyword_init: true) do + def to_h + loc = "#{file}:#{defn}:#{line}" + super.merge(at: loc, spans: { loc => span }) + end + end + + class Document + def redundant_nil_guard_findings + @redundant_nil_guard_findings ||= NilGuardAnalyzer.new(self).scan + end + end + + class NilGuardAnalyzer + Flow = Struct.new(:known, :terminated, keyword_init: true) + NilFact = Struct.new(:local, :non_nil_when_true, keyword_init: true) + + TERMINATING_CALLS = %w[raise fail abort exit exit!].freeze + + attr_reader :document, :findings + + def initialize(document) + @document = document + @findings = [] + end + + def scan + document.function_defs.each do |function| + process_block(method_statements(function.body), function.name, Set.new) + end + findings + end + + private + + def process_block(stmts, function, known) + current = known.dup + stmts.each do |stmt| + flow = process_stmt(stmt, function, current) + current = flow.known + return flow if flow.terminated + end + Flow.new(known: current, terminated: false) + end + + def process_stmt(node, function, known) + return Flow.new(known: known.dup, terminated: false) unless ts_node?(node) + + if if_node?(node) + process_branch(node, function, known) + elsif assignment_node?(node) + inspect_node(assignment_rhs(node), function, known) + next_known = known.dup + next_known.delete(assignment_lhs_name(node).to_s) + Flow.new(known: next_known, terminated: false) + else + inspect_node(node, function, known) + Flow.new(known: known.dup, terminated: terminating?(node)) + end + end + + def process_branch(node, function, known) + cond = branch_condition(node) + inspect_node(cond, function, known) + + then_known = known_for_branch(node, true, cond, known) + else_known = known_for_branch(node, false, cond, known) + then_flow = process_block(stmts_for(branch_then_body(node)), function, then_known) + else_flow = process_block(stmts_for(branch_else_body(node)), function, else_known) + + if then_flow.terminated && else_flow.terminated + Flow.new(known: Set.new, terminated: true) + elsif then_flow.terminated + Flow.new(known: else_flow.known, terminated: false) + elsif else_flow.terminated + Flow.new(known: then_flow.known, terminated: false) + else + Flow.new(known: then_flow.known & else_flow.known, terminated: false) + end + end + + def known_for_branch(node, body_branch, cond, known) + next_known = known.dup + cond_true_branch = unless_node?(node) ? !body_branch : body_branch + branch_nil_facts(cond, cond_true_branch).each { |fact| next_known.add(fact.local) } + next_known + end + + def inspect_node(node, function, known) + return unless ts_node?(node) + + recorded = record_redundant(node, function, known) + return if recorded && safe_navigation_call?(node) + return if method_like_node?(node) + + node.children.each { |child| inspect_node(child, function, known) } + end + + def record_redundant(node, function, known) + local = redundant_nil_subject(node, known) + return false unless local + + @findings << NilGuardFinding.new( + file: document.file, + defn: function, + line: line(node), + span: span(node), + local: local, + guard: normalize_text(node.text), + proof: "#{local} is already proven non-nil on this path" + ) + true + end + + def redundant_nil_subject(node, known) + subject = safe_navigation_subject(node) + return subject if subject && known.include?(subject) + + fact = nil_fact(node) + return nil unless fact && known.include?(fact.local) + + fact.local + end + + def nil_fact(node) + return nil unless ts_node?(node) + return nil_fact(node.named_children.first) if parenthesized_wrapper?(node) + + if nil_predicate_call?(node) + subject = subject_key(call_receiver_node(node)) + return subject ? NilFact.new(local: subject, non_nil_when_true: false) : nil + end + if non_nil_predicate_call?(node) + subject = subject_key(call_receiver_node(node)) + return subject ? NilFact.new(local: subject, non_nil_when_true: true) : nil + end + + return negated_nil_fact(node.named_children.first) if unary_not?(node) + + comparison_nil_fact(node) + end + + def branch_nil_facts(node, cond_truth) + return [] unless ts_node?(node) + return branch_nil_facts(node.named_children.first, cond_truth) if parenthesized_wrapper?(node) + + if boolean_and?(node) + return [] unless cond_truth + + return flatten_boolean_and(node).flat_map { |child| branch_nil_facts(child, true) } + end + + return branch_nil_facts(node.named_children.first, !cond_truth) if unary_not?(node) + + safe_receiver = safe_nav_receiver_fact(node) + return [safe_receiver] if safe_receiver && cond_truth + + fact = nil_fact(node) + return [fact] if fact && cond_truth == fact.non_nil_when_true + + truthy = truthy_subject_fact(node) + truthy && cond_truth ? [truthy] : [] + end + + def safe_nav_receiver_fact(node) + subject = safe_navigation_subject(node) + subject ? NilFact.new(local: subject, non_nil_when_true: true) : nil + end + + def truthy_subject_fact(node) + subject = subject_key(node) + return nil unless subject + + NilFact.new(local: subject, non_nil_when_true: true) + end + + def negated_nil_fact(node) + fact = nil_fact(node) + return nil unless fact + + NilFact.new(local: fact.local, + non_nil_when_true: !fact.non_nil_when_true) + end + + def comparison_nil_fact(node) + return nil unless ts_node?(node) && node.kind == "binary" + + operator = direct_operator(node) + return nil unless %w[== !=].include?(operator) + + left, right = node.named_children + subject = nil + if nil_literal?(right) + subject = subject_key(left) + elsif nil_literal?(left) + subject = subject_key(right) + end + return nil unless subject + + NilFact.new(local: subject, non_nil_when_true: operator == "!=") + end + + def method_statements(node) + body = method_body_node(node) + return [] unless body + + stmts_for(body) + end + + def method_body_node(node) + return nil unless ts_node?(node) + + case node.kind + when "method", "singleton_method", "argument_list", "function_definition", "function_item", + "function_declaration", "method_declaration" + node.named_children.reverse.find do |child| + %w[body_statement block compound_statement function_body statement_block].include?(child.kind) + end + when "body_statement", "block", "compound_statement", "function_body", "statement_block" + if method_like_node?(node) + node.named_children.reverse.find do |child| + %w[body_statement block compound_statement function_body statement_block].include?(child.kind) + end + else + node + end + end + end + + def stmts_for(node) + return [] unless ts_node?(node) + return [node] if if_node?(node) + return [node] if assignment_node?(node) + return [node] if call_node?(node) + + named = node.named_children.reject { |child| child.kind == "comment" } + if named.size == 1 && %w[statements statement_list].include?(named.first.kind) + return [named.first] if if_node?(named.first) + + named = named.first.named_children.reject { |child| child.kind == "comment" } + end + return [node] if named.empty? && !node.text.to_s.strip.empty? + + named + end + + def if_node?(node) + return false unless ts_node?(node) + return true if %w[if unless if_statement if_expression if_modifier unless_modifier].include?(node.kind) && node.named_children.any? + return true if node.kind == "expression_statement" && node.text.to_s.lstrip.start_with?("if ") + return false unless %w[body_statement block statements statement_list].include?(node.kind) + + first_token = node.children.first + return true if first_token && !first_token.named? && %w[if unless].include?(first_token.kind.to_s) + + seen_named = false + node.children.any? do |child| + seen_named ||= child.named? + seen_named && !child.named? && %w[if unless].include?(child.kind.to_s) + end + end + + def unless_node?(node) + node.kind.to_s.include?("unless") || first_token_kind(node) == "unless" + end + + def modifier_if_node?(node) + return true if %w[if_modifier unless_modifier].include?(node.kind) + return false unless %w[body_statement block statements statement_list].include?(node.kind) + + seen_named = false + node.children.any? do |child| + seen_named ||= child.named? + seen_named && !child.named? && %w[if unless].include?(child.kind.to_s) + end + end + + def branch_condition(node) + modifier_if_node?(node) ? node.named_children.last : node.named_children.first + end + + def branch_then_body(node) + if modifier_if_node?(node) + node.named_children.first + else + node.named_children.find { |child| child.kind == "then" } || node.named_children[1] + end + end + + def branch_else_body(node) + return nil if modifier_if_node?(node) + + node.named_children.find { |child| %w[else elsif].include?(child.kind) } || node.named_children[2] + end + + def assignment_node?(node) + ts_node?(node) && (%w[assignment assignment_expression assignment_statement].include?(node.kind) || flat_assignment_statement?(node)) + end + + def assignment_lhs_name(node) + assignment_lhs(node)&.text + end + + def assignment_lhs(node) + node.named_children.first if assignment_node?(node) + end + + def assignment_rhs(node) + node.named_children[1] if assignment_node?(node) + end + + def flat_assignment_statement?(node) + return false unless ts_node?(node) && node.kind == "body_statement" + + node.children.count { |child| !child.named? && child.text == "=" } == 1 && + node.named_children.size >= 2 + end + + def nil_predicate_call?(node) + call_node?(node) && %w[nil? is_none is_null isNull].include?(call_message(node).to_s) + end + + def non_nil_predicate_call?(node) + call_node?(node) && %w[is_some isSome present?].include?(call_message(node).to_s) + end + + def safe_navigation_call?(node) + ts_node?(node) && node.kind == "call" && + node.children.any? { |child| !child.named? && child.text == "&." } + end + + def safe_navigation_subject(node) + return nil unless safe_navigation_call?(node) + + subject_key(call_receiver_node(node)) + end + + def call_receiver_node(node) + return nil unless call_node?(node) + + if adjacent_field_call?(node) + return named_field(node, "object") || named_field(node, "receiver") || + named_field(node, "expression") || named_field(node, "operand") || + node.named_children.first + end + + if %w[call call_expression function_call invocation_expression method_invocation method_call].include?(node.kind) + if node.kind == "call" + names = node.named_children.select { |child| %w[identifier simple_identifier].include?(child.kind) } + return names.first if names.size >= 2 + end + + if %w[invocation_expression method_invocation].include?(node.kind) + names = node.named_children.select { |child| %w[identifier simple_identifier].include?(child.kind) } + return names.first if names.size >= 2 + end + + callee = named_field(node, "function") || named_field(node, "callee") || node.named_children.first + if field_like_node?(callee) + return named_field(callee, "object") || named_field(callee, "receiver") || + named_field(callee, "expression") || named_field(callee, "operand") || + callee.named_children.first + end + end + + node.named_children.first + end + + def call_message(node) + return nil unless call_node?(node) + + if adjacent_field_call?(node) + field = named_field(node, "field") || named_field(node, "property") || + named_field(node, "name") || named_field(node, "suffix") || + node.named_children.last + return field&.text.to_s.sub(/\A[.?]+/, "") + end + + if %w[call call_expression function_call invocation_expression method_invocation method_call].include?(node.kind) + if node.kind == "call" + names = node.named_children.select { |child| %w[identifier simple_identifier].include?(child.kind) } + return names.last.text if names.size >= 2 + end + + if %w[invocation_expression method_invocation].include?(node.kind) + names = node.named_children.select { |child| %w[identifier simple_identifier].include?(child.kind) } + return names[1].text if names.size >= 2 + end + + callee = named_field(node, "function") || named_field(node, "callee") || node.named_children.first + if field_like_node?(callee) + field = named_field(callee, "field") || named_field(callee, "property") || + named_field(callee, "name") || named_field(callee, "suffix") || + callee.named_children.last + return field&.text.to_s.sub(/\A[.?]+/, "") + end + return callee.text if %w[identifier simple_identifier].include?(callee&.kind) + end + + node.named_children.reverse.find { |child| %w[identifier simple_identifier].include?(child.kind) }&.text + end + + def call_has_arguments?(node) + ts_node?(node) && + (node.named_children.any? { |child| %w[argument_list arguments call_suffix].include?(child.kind) } || + %w[argument_list arguments call_suffix].include?(next_sibling(node)&.kind)) + end + + def subject_key(node) + return nil unless ts_node?(node) + + case node.kind + when "identifier", "simple_identifier" + node.text + when "self", "this" + "self" + when "call", "call_expression", "function_call", "method_invocation", "invocation_expression", "method_call" + return nil if call_has_arguments?(node) + + receiver = call_receiver_node(node) + message = call_message(node) + return nil unless message && stable_reader_name?(message) + return "self.#{message}" if receiver&.kind == "self" + + recv_key = subject_key(receiver) + recv_key ? "#{recv_key}.#{message}" : nil + else + nil + end + end + + def stable_reader_name?(name) + text = name.to_s + !(text.end_with?("=", "!") || text == "[]") + end + + def nil_literal?(node) + ts_node?(node) && node.kind == "nil" + end + + def unary_not?(node) + ts_node?(node) && node.kind == "unary" && + node.children.any? { |child| !child.named? && child.text == "!" } + end + + def parenthesized_wrapper?(node) + ts_node?(node) && %w[condition_clause parenthesized_expression parenthesized_statements].include?(node.kind) && + node.named_children.size == 1 + end + + def boolean_and?(node) + ts_node?(node) && node.kind == "binary" && direct_operator(node) == "&&" + end + + def flatten_boolean_and(node) + return [node] unless boolean_and?(node) + + node.named_children.flat_map { |child| flatten_boolean_and(child) } + end + + def direct_operator(node) + node.children.find { |child| !child.named? && !%w[( )].include?(child.text.to_s) }&.text.to_s + end + + def terminating?(node) + return false unless ts_node?(node) + return true if %w[return break next].include?(node.kind) + return true if node.text.to_s.strip.match?(/\A(?:return|break|next)\b/) + return true if node.kind == "identifier" && TERMINATING_CALLS.include?(node.text.to_s) + + call_node?(node) && TERMINATING_CALLS.include?(call_message(node).to_s) + end + + def method_like_node?(node) + ts_node?(node) && %w[method singleton_method function_definition function_item function_declaration method_declaration].include?(node.kind) + end + + def call_node?(node) + ts_node?(node) && + (%w[call argument_list call_expression function_call invocation_expression method_invocation method_call].include?(node.kind) || + adjacent_field_call?(node)) + end + + def adjacent_field_call?(node) + field_like_node?(node) && %w[argument_list arguments call_suffix].include?(next_sibling(node)&.kind) + end + + def next_sibling(node) + node.next_sibling + rescue StandardError + nil + end + + def first_token_kind(node) + node.children.find { |child| !child.named? }&.kind.to_s + end + + def line(node) + node.start_point.row + 1 + end + + def span(node) + [node.start_point.row + 1, node.start_point.column, node.end_point.row + 1, node.end_point.column] + end + + def normalize_text(text) + text.to_s.lines.map(&:strip).reject(&:empty?).join(" ") + end + + def named_field(node, name) + node.child_by_field_name(name) + rescue StandardError + nil + end + + def field_like_node?(node) + ts_node?(node) && + %w[ + attribute directly_assignable_expression dot_index_expression expression_list field field_access + field_expression member_access_expression member_expression navigation_expression scoped_identifier + selector_expression variable_list + ].include?(node.kind) + end + + def ts_node?(node) + node && node.respond_to?(:kind) && node.respond_to?(:children) + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/syntax/ruby.rb b/gems/decomplex/lib/decomplex/syntax/ruby.rb index bb6e6f165..73c5fc544 100644 --- a/gems/decomplex/lib/decomplex/syntax/ruby.rb +++ b/gems/decomplex/lib/decomplex/syntax/ruby.rb @@ -856,7 +856,7 @@ def apply_ruby_visibility!(out) events.each do |event| if event.is_a?(FunctionDef) - event.visibility ||= event.name.to_s.start_with?("self.") ? :public : visibility + event.visibility ||= event.name.to_s.include?(".") ? :public : visibility elsif event.arguments.to_a.empty? visibility = event.message.to_sym else diff --git a/gems/decomplex/lib/decomplex/weighted_inlined_cognitive_complexity.rb b/gems/decomplex/lib/decomplex/weighted_inlined_cognitive_complexity.rb index cc1d0c3a7..f438bbfd3 100644 --- a/gems/decomplex/lib/decomplex/weighted_inlined_cognitive_complexity.rb +++ b/gems/decomplex/lib/decomplex/weighted_inlined_cognitive_complexity.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true require "set" -require_relative "local_flow" +require_relative "syntax" require_relative "structural_topology" module Decomplex @@ -9,7 +9,8 @@ module Decomplex # same-owner bare/self helper calls. This catches "small" orchestration # methods whose complexity was moved into private/single-use helpers. class WeightedInlinedCognitiveComplexity - MethodBody = Struct.new(:id, :owner, :name, :file, :line, :span, :node, keyword_init: true) + MethodBody = Struct.new(:id, :owner, :name, :file, :line, :span, :node, + :complexity, keyword_init: true) LocalScore = Struct.new(:id, :owner, :name, :file, :line, :span, :score, :signals, keyword_init: true) Contribution = Struct.new(:callee_id, :callee_name, :score, :weight, :depth, :chain, keyword_init: true) @@ -31,9 +32,9 @@ def initialize(files, min_score:, min_hidden:, max_depth:) def scan topology = StructuralTopology.scan(@files) - bodies = LocalFlow.scan(@files).map { |summary| method_body(summary) } + bodies = syntax_method_bodies scores = bodies.to_h do |body| - score = LocalScorer.new.score(body.node) + score = body.complexity [body.id, LocalScore.new( id: body.id, owner: body.owner, @@ -51,7 +52,17 @@ def scan private - def method_body(summary) + def syntax_method_bodies + @files.flat_map do |file| + document = Syntax.parse(file, parser: "tree_sitter") + score_by_id = document.local_complexity_scores + document.local_methods.map do |method| + method_body(method, complexity: score_by_id.fetch(method.id, { score: 0.0, signals: {} })) + end + end + end + + def method_body(summary, complexity:) owner = summary.owner == "(top-level)" ? "(top-level:#{summary.file})" : summary.owner MethodBody.new( id: "#{owner}##{summary.name}", @@ -60,181 +71,11 @@ def method_body(summary) file: summary.file, line: summary.line, span: summary.span, - node: summary.node + node: summary.node, + complexity: complexity ) end - class LocalScorer - def score(method_node) - signals = Hash.new(0) - { - score: round(score_node(method_node, nesting: 0, signals: signals)), - signals: signals.to_h - } - end - - private - - def score_node(node, nesting:, signals:) - return 0.0 unless tree_sitter_node?(node) - - score_tree_sitter_node(node, nesting: nesting, signals: signals) - end - - def boolean_count(node) - tree_sitter_boolean_count(node) - end - - def score_tree_sitter_node(node, nesting:, signals:) - return 0.0 if skip_tree_sitter_nested?(node) - - if tree_sitter_branch?(node) - signals[:branches] += 1 - signals[:nested] += 1 if nesting.positive? - return branch_cost(nesting) + - tree_sitter_predicate_cost(node, signals) + - score_tree_sitter_children(node, nesting: nesting + 1, signals: signals) - end - - if tree_sitter_loop?(node) - signals[:loops] += 1 - signals[:nested] += 1 if nesting.positive? - return branch_cost(nesting) + - score_tree_sitter_children(node, nesting: nesting + 1, signals: signals) - end - - if tree_sitter_case?(node) - signals[:cases] += 1 - return 0.5 + score_tree_sitter_children(node, nesting: nesting + 1, signals: signals) - end - - if tree_sitter_rescue?(node) - signals[:rescues] += 1 - return branch_cost(nesting) + - score_tree_sitter_children(node, nesting: nesting + 1, signals: signals) - end - - if tree_sitter_early_exit?(node) - signals[:early_exits] += 1 - exit_cost = nesting.positive? ? 0.5 + (nesting * 0.25) : 0.0 - return exit_cost + score_tree_sitter_children(node, nesting: nesting, signals: signals) - end - - if tree_sitter_boolean_node?(node) - signals[:boolean_ops] += 1 - return 0.25 + score_tree_sitter_children(node, nesting: nesting, signals: signals) - end - - score_tree_sitter_children(node, nesting: nesting, signals: signals) - end - - def score_tree_sitter_children(node, nesting:, signals:) - node.children.sum { |child| score_node(child, nesting: nesting, signals: signals) } - end - - def tree_sitter_predicate_cost(node, signals) - predicate = tree_sitter_condition_node(node) - bools = tree_sitter_boolean_count(predicate) - signals[:boolean_ops] += bools - bools * 0.5 - end - - def tree_sitter_condition_node(node) - return node.named_children.last if tree_sitter_modifier_if?(node) - return node.named_children.first if node.kind == "body_statement" - - node.named_children.first - end - - def tree_sitter_boolean_count(node) - return 0 unless tree_sitter_node?(node) - - own = tree_sitter_boolean_node?(node) ? 1 : 0 - own + node.children.sum { |child| tree_sitter_boolean_count(child) } - end - - def tree_sitter_boolean_node?(node) - tree_sitter_node?(node) && - %w[binary binary_expression boolean_operator conjunction_expression disjunction_expression].include?(node.kind) && - node.children.any? { |child| !child.named? && %w[&& || and or].include?(child.text.to_s) } - end - - def tree_sitter_branch?(node) - return false unless tree_sitter_node?(node) - return true if %w[if unless if_statement if_expression if_modifier unless_modifier].include?(node.kind) && - node.named_children.any? - - tree_sitter_hidden_if?(node) || tree_sitter_modifier_if?(node) - end - - def tree_sitter_hidden_if?(node) - return true if node.kind == "expression_statement" && node.text.to_s.lstrip.start_with?("if ") - - %w[body_statement block statements statement_list].include?(node.kind) && - node.children.first && - !node.children.first.named? && - %w[if unless].include?(node.children.first.kind.to_s) - end - - def tree_sitter_modifier_if?(node) - return true if %w[if_modifier unless_modifier].include?(node.kind) - return false unless node.kind == "body_statement" - - seen_named = false - node.children.any? do |child| - seen_named ||= child.named? - seen_named && !child.named? && %w[if unless].include?(child.kind.to_s) - end - end - - def tree_sitter_loop?(node) - return false unless tree_sitter_node?(node) - return true if %w[while until while_statement for for_statement for_in_statement do_block].include?(node.kind) - return true if tree_sitter_hidden_loop?(node) - - (node.kind == "expression_statement" && node.text.to_s.lstrip.match?(/\A(?:for|while|loop)\b/)) || - (node.kind == "labeled_statement" && node.text.to_s.lstrip.start_with?("for ")) - end - - def tree_sitter_hidden_loop?(node) - %w[body_statement block statements statement_list].include?(node.kind) && - node.children.first && - !node.children.first.named? && - %w[for while loop].include?(node.children.first.kind.to_s) - end - - def tree_sitter_case?(node) - tree_sitter_node?(node) && - (%w[case switch_statement switch_expression match_statement match_expression].include?(node.kind) || - (node.kind == "expression_statement" && node.text.to_s.lstrip.start_with?("match "))) - end - - def tree_sitter_rescue?(node) - tree_sitter_node?(node) && %w[rescue rescue_modifier rescue_clause rescue_body].include?(node.kind) - end - - def tree_sitter_early_exit?(node) - tree_sitter_node?(node) && - %w[return break next redo retry return_statement break_statement continue_statement].include?(node.kind) - end - - def skip_tree_sitter_nested?(node) - %w[class module lambda].include?(node.kind) - end - - def tree_sitter_node?(node) - node.respond_to?(:kind) && node.respond_to?(:children) - end - - def branch_cost(nesting) - 1.1 + nesting - end - - def round(value) - (value * 10).round / 10.0 - end - end - class Analyzer def initialize(topology, scores, min_score, min_hidden, max_depth) @topology = topology diff --git a/gems/decomplex/test/architecture_invariants_test.rb b/gems/decomplex/test/architecture_invariants_test.rb new file mode 100644 index 000000000..617a779df --- /dev/null +++ b/gems/decomplex/test/architecture_invariants_test.rb @@ -0,0 +1,91 @@ +# frozen_string_literal: true + +require "minitest/autorun" + +class DecomplexArchitectureInvariantsTest < Minitest::Test + ROOT = File.expand_path("..", __dir__) + LIB = File.join(ROOT, "lib", "decomplex") + DETECTOR_BASENAMES = %w[ + co_update decision_pressure derived_state false_simplicity fat_union + flay_similarity function_lcom inconsistent_rename_clone local_flow + locality_drag miner mutability_pressure operational_discontinuity + ordered_protocol_mine oversized_predicate path_condition predicate_alias + redundant_nil_guard semantic_alias sequence_mine site_extractor + state_branch_density state_mesh structural_topology superfluous_state + temporal_ordering_pressure weighted_inlined_cognitive_complexity + ].freeze + DETECTOR_FILES = DETECTOR_BASENAMES.map { |name| File.join(LIB, "#{name}.rb") }.freeze + + RAW_TREE_SITTER_PATTERNS = { + "raw child traversal" => /(? /\bchild_by_field_name\b/, + "raw byte offsets" => /\b(?:start_byte|end_byte)\b/, + "raw point offsets" => /\b(?:start_point|end_point)\b/, + "Tree-sitter classes" => /\bTreeSitter(?:Adapter|LanguageAdapter|Normalizer|NodeFacade|FacadeContext)?\b/, + "raw node predicate helpers" => /\b(?:ts_node\?|tree_sitter_node\?)\b/, + "raw node duck typing" => /respond_to\?\s*\(\s*:children\s*\)/ + }.freeze + + SYNTAX_RB_EXTENSION_HOST_PATTERNS = { + "clone similarity belongs in syntax/clone_similarity.rb" => /\b(?:CloneCandidate|clone_candidates|CLONE_)/, + "dispatch facts belong in syntax/dispatch.rb" => /\b(?:DispatchSite|dispatch_sites|DISPATCH_)/, + "nil guard facts belong in syntax/nil_guards.rb" => /\b(?:NilGuard|redundant_nil_guard_findings)/, + "local complexity facts belong in syntax/complexity.rb" => /\b(?:LocalComplexity|local_complexity_scores)/ + }.freeze + SYNTAX_RB_ADAPTER_IMPLEMENTATION_PATTERNS = { + "concrete language adapters belong under lib/decomplex/syntax/" => + /^\s*class\s+(?!TreeSitterLanguageAdapter\b)\w+SyntaxAdapter\b/, + "language profiles must instantiate concrete adapters, not the base adapter" => + /:\s*TreeSitterLanguageAdapter\.new\(/ + }.freeze + + def test_detectors_do_not_talk_to_tree_sitter_nodes_directly + offenders = scan_files(DETECTOR_FILES, RAW_TREE_SITTER_PATTERNS) + + assert_empty offenders, format_offenders( + "Detectors must consume Syntax facts instead of raw Tree-sitter nodes", + offenders + ) + end + + def test_detector_specific_syntax_extensions_do_not_live_in_syntax_rb + syntax_rb = File.join(LIB, "syntax.rb") + offenders = scan_files([syntax_rb], SYNTAX_RB_EXTENSION_HOST_PATTERNS) + + assert_empty offenders, format_offenders( + "Detector-facing parser extensions must live under lib/decomplex/syntax/", + offenders + ) + end + + def test_language_adapter_implementations_do_not_live_in_syntax_rb + syntax_rb = File.join(LIB, "syntax.rb") + offenders = scan_files([syntax_rb], SYNTAX_RB_ADAPTER_IMPLEMENTATION_PATTERNS) + + assert_empty offenders, format_offenders( + "Core syntax.rb must not absorb concrete language adapter implementation", + offenders + ) + end + + private + + def scan_files(files, patterns) + files.sort.flat_map do |path| + rel = path.delete_prefix("#{ROOT}/") + File.readlines(path, chomp: true).each_with_index.flat_map do |line, index| + next if line.strip.start_with?("#") + + patterns.filter_map do |name, pattern| + next unless line.match?(pattern) + + "#{rel}:#{index + 1}: #{name}: #{line.strip}" + end + end.compact + end + end + + def format_offenders(message, offenders) + ([message] + offenders.map { |offender| " #{offender}" }).join("\n") + end +end diff --git a/gems/decomplex/test/examples_oracle_test.rb b/gems/decomplex/test/examples_oracle_test.rb index 038f77902..b176ed9e3 100644 --- a/gems/decomplex/test/examples_oracle_test.rb +++ b/gems/decomplex/test/examples_oracle_test.rb @@ -85,7 +85,7 @@ def project_detector_output(detector, output) "neglected_updates" => rows(output["neglected_updates"], %w[pair support has missing]) } when "decision-pressure" - present_rows(output) + rows(output, %w[contract decisions essential methods]) when "predicate-alias" { "alias_clusters" => Array(output["alias_clusters"]).map do |row| @@ -94,29 +94,38 @@ def project_detector_output(detector, output) } when "miner" { - "missing_abstractions" => present_rows(output["missing_abstractions"]) + "missing_abstractions" => Array(output["missing_abstractions"]).map do |row| + pick(row, %w[kind members support scatter]) + end, + "neglected_conditions" => rows(output["neglected_conditions"], %w[pattern support missing]) } when "semantic-alias" { "alias_clusters" => Array(output["alias_clusters"]).map do |row| - { "name_count" => Array(row["names"]).size } - end + { "canon" => canonical_predicate(row["canon"]), "name_count" => Array(row["names"]).size } + end, + "reification_miss_count" => Array(output["reification_misses"]).size } when "flay-similarity" - findings = Array(output["findings"]) - defn_findings = findings.select { |row| row["node"].to_s == "defn" } - findings = defn_findings unless defn_findings.empty? - findings.map do |row| + Array(output["findings"]).map do |row| pick(row, %w[clone_type node]).merge("site_count" => Array(row["sites"]).size) - end.uniq + end when "temporal-ordering-pressure" - Array(output).empty? ? [] : [{ "present" => true }] + Array(output).map do |row| + pick(row, %w[owner public_methods state_methods writers orderings]).merge( + "state_fields" => canonical_state_refs(row["state_fields"]), + "shared_fields" => canonical_state_refs(row["shared_fields"]) + ) + end when "state-branch-density" Array(output).map do |row| - { "present" => !row.empty? } + pick(row, %w[decisions]).merge( + "method" => canonical_method_name(row["method"]), + "state_refs" => canonical_state_refs(row["state_refs"]) + ) end when "redundant-nil-guard" - rows(output, %w[local]).uniq + rows(output, %w[local]) when "state-mesh" project_state_mesh(output) when "inconsistent-rename-clone" @@ -127,8 +136,8 @@ def project_detector_output(detector, output) rows(output, %w[derived source]) when "implicit-control-flow" { - "ordered_protocols" => present_rows(output["ordered_protocols"]), - "order_drift" => present_rows(output["order_drift"]) + "ordered_protocols" => project_protocols(output["ordered_protocols"]), + "order_drift" => project_protocols(output["order_drift"]) } when "weighted-inlined-complexity" Array(output).map do |row| @@ -143,15 +152,26 @@ def project_detector_output(detector, output) pick(row, %w[count]).merge("atom_count" => Array(row["atoms"]).size) end when "path-condition" - present_rows(output["neglected"]) + Array(output["neglected"]).map do |row| + { + "pattern" => canonical_predicate_atoms(row["pattern"]), + "support" => row["support"], + "missing" => canonical_predicate(row["missing"]), + "action" => canonical_action(row["action"]) + } + end when "sequence-mine" rows(output["broken"], %w[pair support has missing]) when "function-lcom" - present_rows(output) + rows(output, %w[mode components locals statements terminal_join]) when "false-simplicity" rows(output, %w[kind]) when "fat-union" - present_rows(output["fat_unions"]) + Array(output["fat_unions"]).map do |row| + pick(row, %w[common variant degenerate support scatter]).merge( + "variant_set" => canonical_variants(row["variant_set"]) + ) + end when "local-flow" Array(output).map do |method| { @@ -160,18 +180,69 @@ def project_detector_output(detector, output) } end when "structural-topology" - { "present" => !Array(output["methods"]).empty? || !Array(output["edges"]).empty? } + { + "method_count" => Array(output["methods"]).size, + "edges" => rows(output["edges"], %w[caller_name callee_name type]) + } else scrub_locations(output) end end def project_state_mesh(output) - { "state_mesh" => { "present" => meaningful?(output.fetch("state_mesh", {})) } } + state_mesh = output.fetch("state_mesh", {}) + fields = output.fetch("fields", {}) + { + "state_mesh" => pick(state_mesh, %w[total_fields total_writes total_reads total_re_derivations]), + "field_names" => canonical_state_refs(fields.keys) + } end def project_protocols(rows) - rows(rows, %w[protocol dependency states support observed missing]) + Array(rows).map do |row| + pick(row, %w[protocol dependency support observed missing]).merge( + "states" => canonical_state_refs(row["states"]) + ) + end + end + + def canonical_variants(value) + Array(value).map do |item| + item.to_s + .sub(/\A([A-Z][A-Za-z0-9]*)_([A-Z][A-Za-z0-9]*)\z/, '\1.\2') + .tr(":", ".") + .gsub(/\.+/, ".") + end.sort + end + + def canonical_state_refs(value) + Array(value).map do |item| + text = item.to_s + text = text.sub(/\A@/, "") + text = text.sub(/\A(?:self|this)\./, "") + text + end.uniq.sort + end + + def canonical_method_name(value) + value.to_s.split(/[.:#]/).last.to_s + end + + def canonical_predicate_atoms(value) + Array(value).map { |item| canonical_predicate(item) }.sort + end + + def canonical_predicate(value) + text = value.to_s.strip + text = text.delete_suffix(";").strip + text = text.gsub(/:([A-Za-z_]\w*)/) { Regexp.last_match(1).upcase } + text = text.gsub(/\b([A-Za-z_]\w*(?:\.[A-Za-z_]\w*)*)\.(\w+)\?/, '\1.\2') + text = text.gsub(/\b([A-Za-z_]\w*(?:\.[A-Za-z_]\w*)*)\.(\w+)\(\)/, '\1.\2') + text + end + + def canonical_action(value) + canonical_predicate(value).sub(/\A([A-Za-z_]\w*)\((.*)\)\z/, '\1(\2)') end def present_rows(value) diff --git a/gems/decomplex/test/flay_similarity_test.rb b/gems/decomplex/test/flay_similarity_test.rb index 374e903cd..a0793690e 100644 --- a/gems/decomplex/test/flay_similarity_test.rb +++ b/gems/decomplex/test/flay_similarity_test.rb @@ -7,7 +7,10 @@ class FlaySimilarityTest < Minitest::Test def grammar_available?(language) env = "DECOMPLEX_TS_#{language.to_s.upcase}_PATH" - ENV[env] && File.file?(ENV[env]) + return true if ENV[env] && File.file?(ENV[env]) + + adapter = Decomplex::Syntax::TreeSitterAdapter.new + adapter.send(:grammar_candidates, language).any? { |path| File.file?(path) } end def scan(source, ext: ".rb", mass: 8, fuzzy: 1) diff --git a/gems/decomplex/test/syntax_test.rb b/gems/decomplex/test/syntax_test.rb index ca8c730c7..90962f6b0 100644 --- a/gems/decomplex/test/syntax_test.rb +++ b/gems/decomplex/test/syntax_test.rb @@ -6,6 +6,19 @@ require_relative "../lib/decomplex/report" class SyntaxTest < Minitest::Test + def self.populate_tree_sitter_env_defaults + adapter = Decomplex::Syntax::TreeSitterAdapter.new + Decomplex::Syntax::LANGUAGE_PROFILES.each_key do |language| + env = "DECOMPLEX_TS_#{language.to_s.upcase}_PATH" + next if ENV[env] && File.file?(ENV[env]) + + candidate = adapter.send(:grammar_candidates, language).find { |path| File.file?(path) } + ENV[env] = candidate if candidate + end + end + + populate_tree_sitter_env_defaults + def with_file(source, ext = ".rb") file = Tempfile.new(["syntax", ext]) file.write(source) @@ -438,8 +451,8 @@ def run(items): assert_includes doc.state_writes.map { |write| [write.receiver, write.field] }, ["self", "items"] assert_includes doc.state_param_origins.map { |origin| [origin.owner, origin.function, origin.receiver, origin.field, origin.param] }, ["Worker", "__init__", "self", "items", "items"] - assert_includes doc.call_sites.map { |call| [call.owner, call.function, call.receiver, call.message] }, - ["Worker", "call", "self.items", "append"] + assert_includes doc.call_sites.map { |call| [call.function, call.receiver, call.message] }, + ["call", "self.items", "append"] assert_includes doc.call_sites.map { |call| [call.function, call.receiver, call.message, call.arguments] }, ["run", "self", "prepare", ["items"]] end @@ -461,7 +474,7 @@ def test_tree_sitter_c_adapter_extracts_functions_branches_and_pointer_state assert_includes doc.function_defs.map(&:name), "classify" assert_includes doc.state_writes.map { |write| [write.receiver, write.field, write.function] }, - ["node", "storage", "classify"] + ["self", "storage", "classify"] assert_includes doc.decision_sites.map(&:kind), :conjunction assert_includes doc.decision_sites.map(&:kind), :case_dispatch end @@ -510,7 +523,7 @@ def test_tree_sitter_csharp_adapter_extracts_class_methods_and_member_state assert_includes doc.owner_defs.map { |owner| [owner.name, owner.kind] }, ["Parser", :class] assert_includes doc.function_defs.map { |fn| [fn.owner, fn.name] }, ["Parser", "Parse"] assert_includes doc.state_writes.map { |write| [write.receiver, write.field, write.function] }, - ["this", "_storage", "Parse"] + ["self", "_storage", "Parse"] assert_includes doc.decision_sites.map(&:kind), :case_dispatch end end @@ -534,7 +547,7 @@ def test_tree_sitter_java_adapter_extracts_class_methods_and_member_state assert_includes doc.owner_defs.map { |owner| [owner.name, owner.kind] }, ["Parser", :class] assert_includes doc.function_defs.map { |fn| [fn.owner, fn.name] }, ["Parser", "parse"] assert_includes doc.state_writes.map { |write| [write.receiver, write.field, write.function] }, - ["this", "storage", "parse"] + ["self", "storage", "parse"] assert_includes doc.decision_sites.map(&:kind), :case_dispatch end end @@ -590,7 +603,7 @@ class Parser { assert_includes doc.owner_defs.map { |owner| [owner.name, owner.kind] }, ["Parser", :class] assert_includes doc.function_defs.map { |fn| [fn.owner, fn.name] }, ["Parser", "parse"] assert_includes doc.state_writes.map { |write| [write.receiver, write.field, write.function] }, - ["this", "storage", "parse"] + ["self", "storage", "parse"] assert_includes doc.decision_sites.map(&:kind), :case_dispatch end end diff --git a/spec/decomplex_architecture_invariants_spec.rb b/spec/decomplex_architecture_invariants_spec.rb new file mode 100644 index 000000000..e0bdfd10f --- /dev/null +++ b/spec/decomplex_architecture_invariants_spec.rb @@ -0,0 +1,80 @@ +require "rspec" + +RSpec.describe "architecture invariants: decomplex syntax boundaries" do + ROOT = File.expand_path("..", __dir__) + DECOMPLEX_LIB = File.join(ROOT, "gems", "decomplex", "lib", "decomplex") + DETECTOR_BASENAMES = %w[ + co_update decision_pressure derived_state false_simplicity fat_union + flay_similarity function_lcom inconsistent_rename_clone local_flow + locality_drag miner mutability_pressure operational_discontinuity + ordered_protocol_mine oversized_predicate path_condition predicate_alias + redundant_nil_guard semantic_alias sequence_mine site_extractor + state_branch_density state_mesh structural_topology superfluous_state + temporal_ordering_pressure weighted_inlined_cognitive_complexity + ].freeze + DETECTOR_FILES = DETECTOR_BASENAMES.map { |name| File.join(DECOMPLEX_LIB, "#{name}.rb") }.freeze + + RAW_TREE_SITTER_PATTERNS = { + "raw child traversal" => /(? /\bchild_by_field_name\b/, + "raw byte offsets" => /\b(?:start_byte|end_byte)\b/, + "raw point offsets" => /\b(?:start_point|end_point)\b/, + "Tree-sitter classes" => /\bTreeSitter(?:Adapter|LanguageAdapter|Normalizer|NodeFacade|FacadeContext)?\b/, + "raw node predicate helpers" => /\b(?:ts_node\?|tree_sitter_node\?)\b/, + "raw node duck typing" => /respond_to\?\s*\(\s*:children\s*\)/ + }.freeze + + SYNTAX_RB_EXTENSION_HOST_PATTERNS = { + "clone similarity belongs in syntax/clone_similarity.rb" => /\b(?:CloneCandidate|clone_candidates|CLONE_)/, + "dispatch facts belong in syntax/dispatch.rb" => /\b(?:DispatchSite|dispatch_sites|DISPATCH_)/, + "nil guard facts belong in syntax/nil_guards.rb" => /\b(?:NilGuard|redundant_nil_guard_findings)/, + "local complexity facts belong in syntax/complexity.rb" => /\b(?:LocalComplexity|local_complexity_scores)/ + }.freeze + + SYNTAX_RB_ADAPTER_IMPLEMENTATION_PATTERNS = { + "concrete language adapters belong under lib/decomplex/syntax/" => + /^\s*class\s+(?!TreeSitterLanguageAdapter\b)\w+SyntaxAdapter\b/, + "language profiles must instantiate concrete adapters, not the base adapter" => + /:\s*TreeSitterLanguageAdapter\.new\(/ + }.freeze + + def scan_files(files, patterns) + files.sort.flat_map do |path| + rel = path.delete_prefix("#{ROOT}/") + File.readlines(path, chomp: true).each_with_index.flat_map do |line, index| + next if line.strip.start_with?("#") + + patterns.filter_map do |name, pattern| + "#{rel}:#{index + 1}: #{name}: #{line.strip}" if line.match?(pattern) + end + end.compact + end + end + + def format_offenders(message, offenders) + ([message] + offenders.map { |offender| " #{offender}" }).join("\n") + end + + it "keeps detectors behind Syntax facts instead of raw Tree-sitter nodes" do + offenders = scan_files(DETECTOR_FILES, RAW_TREE_SITTER_PATTERNS) + + expect(offenders).to be_empty, + format_offenders("Detectors must consume Syntax facts instead of raw Tree-sitter nodes", offenders) + end + + it "keeps detector-facing syntax extensions out of syntax.rb" do + syntax_rb = File.join(DECOMPLEX_LIB, "syntax.rb") + offenders = scan_files([syntax_rb], SYNTAX_RB_EXTENSION_HOST_PATTERNS) + + expect(offenders).to be_empty, + format_offenders("Detector-facing parser extensions must live under lib/decomplex/syntax/", offenders) + end + + it "keeps concrete language adapter implementation out of syntax.rb" do + syntax_rb = File.join(DECOMPLEX_LIB, "syntax.rb") + offenders = scan_files([syntax_rb], SYNTAX_RB_ADAPTER_IMPLEMENTATION_PATTERNS) + + expect(offenders).to be_empty, + format_offenders("Core syntax.rb must not absorb concrete language adapter implementation", offenders) + end +end From 35d7da46e0d6637e9aefc7ec83c2b392f9a97a02 Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Fri, 19 Jun 2026 18:14:58 +0000 Subject: [PATCH 28/52] Refactor decomplex syntax adapters --- .gitignore | 3 + .npmrc | 1 + .../syntax-adapter-decomposition-design.md | 96 +++ gems/decomplex/examples/php/co-update.php | 19 + .../examples/php/decision-pressure.php | 5 + gems/decomplex/examples/php/derived-state.php | 6 + .../examples/php/false-simplicity.php | 6 + gems/decomplex/examples/php/fat-union.php | 29 + .../examples/php/flay-similarity.php | 42 + gems/decomplex/examples/php/function-lcom.php | 13 + .../examples/php/implicit-control-flow.php | 12 + .../php/inconsistent-rename-clone.php | 14 + gems/decomplex/examples/php/local-flow.php | 10 + gems/decomplex/examples/php/locality-drag.php | 28 + gems/decomplex/examples/php/miner.php | 5 + .../php/operational-discontinuity.php | 11 + .../examples/php/oversized-predicate.php | 6 + .../decomplex/examples/php/path-condition.php | 16 + .../examples/php/predicate-alias.php | 4 + .../examples/php/redundant-nil-guard.php | 6 + .../decomplex/examples/php/semantic-alias.php | 8 + gems/decomplex/examples/php/sequence-mine.php | 6 + .../examples/php/state-branch-density.php | 12 + gems/decomplex/examples/php/state-mesh.php | 19 + .../examples/php/structural-topology.php | 17 + .../php/temporal-ordering-pressure.php | 19 + .../php/weighted-inlined-complexity.php | 45 + gems/decomplex/lib/decomplex/syntax.rb | 783 +++++++----------- .../lib/decomplex/syntax/adapters.rb | 344 +------- gems/decomplex/lib/decomplex/syntax/c.rb | 97 +++ gems/decomplex/lib/decomplex/syntax/cpp.rb | 123 +++ gems/decomplex/lib/decomplex/syntax/csharp.rb | 84 ++ gems/decomplex/lib/decomplex/syntax/go.rb | 93 +++ gems/decomplex/lib/decomplex/syntax/java.rb | 86 ++ .../lib/decomplex/syntax/javascript.rb | 82 ++ gems/decomplex/lib/decomplex/syntax/kotlin.rb | 74 ++ gems/decomplex/lib/decomplex/syntax/lua.rb | 162 ++++ .../lib/decomplex/syntax/nil_guards.rb | 14 +- gems/decomplex/lib/decomplex/syntax/php.rb | 496 +++++++++++ gems/decomplex/lib/decomplex/syntax/python.rb | 123 +++ gems/decomplex/lib/decomplex/syntax/ruby.rb | 113 +++ gems/decomplex/lib/decomplex/syntax/rust.rb | 78 ++ gems/decomplex/lib/decomplex/syntax/swift.rb | 74 ++ .../lib/decomplex/syntax/typescript.rb | 10 + gems/decomplex/lib/decomplex/syntax/zig.rb | 88 ++ .../test/architecture_invariants_test.rb | 50 ++ gems/decomplex/test/examples_oracle_test.rb | 4 +- package-lock.json | 413 +++++++++ package.json | 20 + .../decomplex_architecture_invariants_spec.rb | 48 ++ 50 files changed, 3114 insertions(+), 803 deletions(-) create mode 100644 .npmrc create mode 100644 gems/decomplex/docs/agents/syntax-adapter-decomposition-design.md create mode 100644 gems/decomplex/examples/php/co-update.php create mode 100644 gems/decomplex/examples/php/decision-pressure.php create mode 100644 gems/decomplex/examples/php/derived-state.php create mode 100644 gems/decomplex/examples/php/false-simplicity.php create mode 100644 gems/decomplex/examples/php/fat-union.php create mode 100644 gems/decomplex/examples/php/flay-similarity.php create mode 100644 gems/decomplex/examples/php/function-lcom.php create mode 100644 gems/decomplex/examples/php/implicit-control-flow.php create mode 100644 gems/decomplex/examples/php/inconsistent-rename-clone.php create mode 100644 gems/decomplex/examples/php/local-flow.php create mode 100644 gems/decomplex/examples/php/locality-drag.php create mode 100644 gems/decomplex/examples/php/miner.php create mode 100644 gems/decomplex/examples/php/operational-discontinuity.php create mode 100644 gems/decomplex/examples/php/oversized-predicate.php create mode 100644 gems/decomplex/examples/php/path-condition.php create mode 100644 gems/decomplex/examples/php/predicate-alias.php create mode 100644 gems/decomplex/examples/php/redundant-nil-guard.php create mode 100644 gems/decomplex/examples/php/semantic-alias.php create mode 100644 gems/decomplex/examples/php/sequence-mine.php create mode 100644 gems/decomplex/examples/php/state-branch-density.php create mode 100644 gems/decomplex/examples/php/state-mesh.php create mode 100644 gems/decomplex/examples/php/structural-topology.php create mode 100644 gems/decomplex/examples/php/temporal-ordering-pressure.php create mode 100644 gems/decomplex/examples/php/weighted-inlined-complexity.php create mode 100644 gems/decomplex/lib/decomplex/syntax/c.rb create mode 100644 gems/decomplex/lib/decomplex/syntax/cpp.rb create mode 100644 gems/decomplex/lib/decomplex/syntax/csharp.rb create mode 100644 gems/decomplex/lib/decomplex/syntax/go.rb create mode 100644 gems/decomplex/lib/decomplex/syntax/java.rb create mode 100644 gems/decomplex/lib/decomplex/syntax/javascript.rb create mode 100644 gems/decomplex/lib/decomplex/syntax/kotlin.rb create mode 100644 gems/decomplex/lib/decomplex/syntax/lua.rb create mode 100644 gems/decomplex/lib/decomplex/syntax/php.rb create mode 100644 gems/decomplex/lib/decomplex/syntax/python.rb create mode 100644 gems/decomplex/lib/decomplex/syntax/rust.rb create mode 100644 gems/decomplex/lib/decomplex/syntax/swift.rb create mode 100644 gems/decomplex/lib/decomplex/syntax/typescript.rb create mode 100644 gems/decomplex/lib/decomplex/syntax/zig.rb create mode 100644 package-lock.json create mode 100644 package.json diff --git a/.gitignore b/.gitignore index 8ae93f4e8..1c6ec4b11 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ !/.devcontainer/ !/.github/ !/.gitignore +!/.npmrc !/.rspec !/.vscode/ !/CLAUDE.md @@ -17,6 +18,8 @@ !/gems/** !/LICENSE !/ONE-PAGER.md +!/package-lock.json +!/package.json !/README.md !/TODO.md !/clear diff --git a/.npmrc b/.npmrc new file mode 100644 index 000000000..521a9f7c0 --- /dev/null +++ b/.npmrc @@ -0,0 +1 @@ +legacy-peer-deps=true diff --git a/gems/decomplex/docs/agents/syntax-adapter-decomposition-design.md b/gems/decomplex/docs/agents/syntax-adapter-decomposition-design.md new file mode 100644 index 000000000..4b30104d8 --- /dev/null +++ b/gems/decomplex/docs/agents/syntax-adapter-decomposition-design.md @@ -0,0 +1,96 @@ +# Syntax Adapter Decomposition Design + +## Goal + +`Decomplex::Syntax` should be a cross-language fact model and Tree-sitter facade. It should not know Ruby, PHP, Java, Rust, Zig, or any other concrete grammar beyond the registry that maps a language key to an adapter. + +Language adapters own: + +- parser package metadata and extensions +- lexicon regexes +- concrete Tree-sitter node kind names +- grammar-specific hidden constructs +- source-text conventions that cannot be represented generically + +The base `TreeSitterLanguageAdapter` owns: + +- traversal +- shared fact emission +- generic algorithms over adapter-provided grammar shapes +- empty defaults for optional language-specific fact providers + +## Current Issues + +`syntax.rb` still contains a large union of concrete grammar node names. That makes new language support look easy until a language differs, then the generic code grows another special case. The result is brittle cross-language support because unrelated languages inherit grammar assumptions they do not share. + +The main areas are: + +- function and owner detection +- parameter, body, and local-flow discovery +- assignment and declaration recognition +- branch, case, loop, and hidden branch detection +- call, member-access, and state-target discovery + +## Target Shape + +Each adapter exposes declarative grammar-shape methods. The base adapter uses those methods instead of hard-coded language unions: + +- `function_node_kinds` +- `method_node_kinds` +- `owner_node_kinds` +- `loop_node_kinds` +- `if_node_kinds` +- `case_node_kinds` +- `hidden_if_wrapper_kinds` +- `hidden_case_wrapper_kinds` +- `case_arm_node_kinds` +- `function_body_node_kinds` +- `parameter_list_node_kinds` +- `assignment_node_kinds` +- `declaration_node_kinds` +- `field_declaration_node_kinds` +- `identifier_node_kinds` +- `field_like_node_kinds` +- `call_node_kinds` +- `adjacent_call_node_kinds` +- `argument_list_node_kinds` +- `comment_prefixes` + +Adapters override only the shapes they need. When a language needs real logic instead of vocabulary, it overrides the semantic method directly, such as `function_name`, `call_target`, `state_target`, or `case_arm_patterns`. + +## Migration Plan + +1. Move source-text language quirks out of `syntax.rb`. + - Generic defaults return empty facts. + - Ruby owns Sorbet `T::Struct`, `const`, and `T.type_alias` parsing. + +2. Move lexicons beside adapters. + - `LanguageLexicon` remains shared. + - Concrete `*_LEXICON` constants live in the files that define their adapters. + +3. Introduce grammar-shape methods in the base adapter. + - Start with one area at a time. + - Replace each concrete node-kind union with an adapter method call. + - Keep behavior stable while moving the data boundary. + +4. Push concrete node kinds down into adapters. + - The base adapter may retain only truly generic names if they are part of a documented normalized adapter contract. + - Otherwise, adapters provide the language-specific kind sets. + +5. Add architecture invariants. + - `syntax.rb` must not define language lexicons. + - `syntax.rb` must not contain Sorbet or Ruby source-text patterns. + - Detectors must not call Tree-sitter APIs directly. + - New concrete grammar kind lists in `syntax.rb` should fail review unless backed by a documented generic adapter contract. + +## Verification + +For each migration step: + +- run the examples oracle tests +- run the full Decomplex Ruby test suite +- run architecture invariant tests +- run `decomplex report` on `gems/decomplex/lib/decomplex` before and after +- compare whether reported issues are stable or whether differences reflect reduced self-findings in `syntax.rb` + +The expected direction is a smaller `syntax.rb`, fewer language names and source-level quirks in shared code, and no loss of detector oracle specificity. diff --git a/gems/decomplex/examples/php/co-update.php b/gems/decomplex/examples/php/co-update.php new file mode 100644 index 000000000..24674bb9b --- /dev/null +++ b/gems/decomplex/examples/php/co-update.php @@ -0,0 +1,19 @@ +storage = HEAP; + $node->provenance = HEAP; +} + +function stable_two($node) { + $node->storage = HEAP; + $node->provenance = HEAP; +} + +function stable_three($node) { + $node->storage = HEAP; + $node->provenance = HEAP; +} + +function misses_provenance($node) { + $node->storage = HEAP; +} diff --git a/gems/decomplex/examples/php/decision-pressure.php b/gems/decomplex/examples/php/decision-pressure.php new file mode 100644 index 000000000..6fa0480b7 --- /dev/null +++ b/gems/decomplex/examples/php/decision-pressure.php @@ -0,0 +1,5 @@ +symbol; + return $value->isNull(); +} diff --git a/gems/decomplex/examples/php/derived-state.php b/gems/decomplex/examples/php/derived-state.php new file mode 100644 index 000000000..bb81ac362 --- /dev/null +++ b/gems/decomplex/examples/php/derived-state.php @@ -0,0 +1,6 @@ +line(); + $node->col(); + $node->ty(); + $node->span(); + $node->parent(); + $node->recv(); + break; + case AST::Func: + $node->line(); + $node->col(); + $node->ty(); + $node->span(); + $node->parent(); + $node->name(); + break; + case AST::Lit: + $node->line(); + $node->col(); + $node->ty(); + $node->span(); + $node->parent(); + $node->value(); + break; + } +} diff --git a/gems/decomplex/examples/php/flay-similarity.php b/gems/decomplex/examples/php/flay-similarity.php new file mode 100644 index 000000000..0a97d128c --- /dev/null +++ b/gems/decomplex/examples/php/flay-similarity.php @@ -0,0 +1,42 @@ +part1; + if ($value1->ready() && $value1->enabled()) { $total += $value1->amount; } + $value2 = $node->part2; + if ($value2->ready() && $value2->enabled()) { $total += $value2->amount; } + $value3 = $node->part3; + if ($value3->ready() && $value3->enabled()) { $total += $value3->amount; } + $value4 = $node->part4; + if ($value4->ready() && $value4->enabled()) { $total += $value4->amount; } + $value5 = $node->part5; + if ($value5->ready() && $value5->enabled()) { $total += $value5->amount; } + $value6 = $node->part6; + if ($value6->ready() && $value6->enabled()) { $total += $value6->amount; } + $value7 = $node->part7; + if ($value7->ready() && $value7->enabled()) { $total += $value7->amount; } + $value8 = $node->part8; + if ($value8->ready() && $value8->enabled()) { $total += $value8->amount; } + return $total; +} + +function second_clone($entry) { + $total = 0; + $item1 = $entry->part1; + if ($item1->ready() && $item1->enabled()) { $total += $item1->amount; } + $item2 = $entry->part2; + if ($item2->ready() && $item2->enabled()) { $total += $item2->amount; } + $item3 = $entry->part3; + if ($item3->ready() && $item3->enabled()) { $total += $item3->amount; } + $item4 = $entry->part4; + if ($item4->ready() && $item4->enabled()) { $total += $item4->amount; } + $item5 = $entry->part5; + if ($item5->ready() && $item5->enabled()) { $total += $item5->amount; } + $item6 = $entry->part6; + if ($item6->ready() && $item6->enabled()) { $total += $item6->amount; } + $item7 = $entry->part7; + if ($item7->ready() && $item7->enabled()) { $total += $item7->amount; } + $item8 = $entry->part8; + if ($item8->ready() && $item8->enabled()) { $total += $item8->amount; } + return $total; +} diff --git a/gems/decomplex/examples/php/function-lcom.php b/gems/decomplex/examples/php/function-lcom.php new file mode 100644 index 000000000..30cf1ee1e --- /dev/null +++ b/gems/decomplex/examples/php/function-lcom.php @@ -0,0 +1,13 @@ +round(); + + $timestamp = now(); + $buffer = Buffer::init(); + $buffer->push($timestamp); + $logger->info($buffer); + + return Result::init($rounded, $buffer); +} diff --git a/gems/decomplex/examples/php/implicit-control-flow.php b/gems/decomplex/examples/php/implicit-control-flow.php new file mode 100644 index 000000000..b3c0ebaeb --- /dev/null +++ b/gems/decomplex/examples/php/implicit-control-flow.php @@ -0,0 +1,12 @@ +status = READY; } + public function validate() { $this->valid = $this->status == READY; } + public function commit() { $this->done = $this->valid; } + + public function ok1() { $this->prepare(); $this->validate(); $this->commit(); } + public function ok2() { $this->prepare(); $this->validate(); $this->commit(); } + public function ok3() { $this->prepare(); $this->validate(); $this->commit(); } + public function ok4() { $this->prepare(); $this->validate(); $this->commit(); } + public function drift() { $this->validate(); $this->prepare(); $this->commit(); } +} diff --git a/gems/decomplex/examples/php/inconsistent-rename-clone.php b/gems/decomplex/examples/php/inconsistent-rename-clone.php new file mode 100644 index 000000000..cf838cd66 --- /dev/null +++ b/gems/decomplex/examples/php/inconsistent-rename-clone.php @@ -0,0 +1,14 @@ +round(); + + $timestamp = now(); + $buffer = Buffer::init(); + $buffer->push($timestamp); + return Result::init($total, $buffer); +} diff --git a/gems/decomplex/examples/php/locality-drag.php b/gems/decomplex/examples/php/locality-drag.php new file mode 100644 index 000000000..8e11e0d0f --- /dev/null +++ b/gems/decomplex/examples/php/locality-drag.php @@ -0,0 +1,28 @@ +id; + + $total = $cart->total; + if ($total > 100) { + if ($cart->discountable()) { + $discount = 10; + } + } + if ($cart->taxable()) { + if ($cart->region) { + $tax = $total * 0.2; + } + } + if ($logger->enabled()) { + if ($logger->debug()) { + $logger->info($total); + } + } + if ($cart->valid()) { + if ($cart->ready()) { + $status = READY; + } + } + + emit($receipt_id); +} diff --git a/gems/decomplex/examples/php/miner.php b/gems/decomplex/examples/php/miner.php new file mode 100644 index 000000000..da59083db --- /dev/null +++ b/gems/decomplex/examples/php/miner.php @@ -0,0 +1,5 @@ +p() && $y->q() && $z->r()) { go($x); } +} + +function two($x, $y, $z) { + if ($x->p() && $y->q() && $z->r()) { go($x); } +} + +function three($x, $y, $z) { + if ($x->p() && $y->q() && $z->r()) { go($x); } +} + +function bug($x, $y, $z) { + if ($x->p() && $y->q()) { go($x); } +} diff --git a/gems/decomplex/examples/php/predicate-alias.php b/gems/decomplex/examples/php/predicate-alias.php new file mode 100644 index 000000000..e139bbd5b --- /dev/null +++ b/gems/decomplex/examples/php/predicate-alias.php @@ -0,0 +1,4 @@ +isSome()) { + $value->isNull(); + } +} diff --git a/gems/decomplex/examples/php/semantic-alias.php b/gems/decomplex/examples/php/semantic-alias.php new file mode 100644 index 000000000..0ff8fcc64 --- /dev/null +++ b/gems/decomplex/examples/php/semantic-alias.php @@ -0,0 +1,8 @@ +provenance == FRAME; } +function is_frame($node) { return $node->provenance == FRAME; } +function heap_pred($node) { return $node->provenance == HEAP; } + +function somewhere($node) { + if ($node->provenance == FRAME) { return 1; } +} diff --git a/gems/decomplex/examples/php/sequence-mine.php b/gems/decomplex/examples/php/sequence-mine.php new file mode 100644 index 000000000..e11013b62 --- /dev/null +++ b/gems/decomplex/examples/php/sequence-mine.php @@ -0,0 +1,6 @@ +checked = true; + } + + if ($this->checked && $name == "admin") { + print("hello"); + } + } +} diff --git a/gems/decomplex/examples/php/state-mesh.php b/gems/decomplex/examples/php/state-mesh.php new file mode 100644 index 000000000..27a5d3101 --- /dev/null +++ b/gems/decomplex/examples/php/state-mesh.php @@ -0,0 +1,19 @@ +a = 1; + $this->b = 2; + } + + public function writer() { + $this->a = 3; + } + + public function reader() { + return $this->a + $this->b; + } + + public function a_alias() { + return $this->a; + } +} diff --git a/gems/decomplex/examples/php/structural-topology.php b/gems/decomplex/examples/php/structural-topology.php new file mode 100644 index 000000000..e23623abb --- /dev/null +++ b/gems/decomplex/examples/php/structural-topology.php @@ -0,0 +1,17 @@ +prepare(); + if ($this->ready()) { + $this->validate(); + } + foreach ($items as $item) { + $this->helper($item); + } + } + + private function prepare() {} + private function ready() { return true; } + public function validate() {} + private function helper($item) { return $item; } +} diff --git a/gems/decomplex/examples/php/temporal-ordering-pressure.php b/gems/decomplex/examples/php/temporal-ordering-pressure.php new file mode 100644 index 000000000..e3456b4d9 --- /dev/null +++ b/gems/decomplex/examples/php/temporal-ordering-pressure.php @@ -0,0 +1,19 @@ +a = 1; + } + + public function two() { + $this->a = 2; + $this->b = 3; + } + + public function three() { + $this->b = 4; + } + + public function reader() { + return $this->a; + } +} diff --git a/gems/decomplex/examples/php/weighted-inlined-complexity.php b/gems/decomplex/examples/php/weighted-inlined-complexity.php new file mode 100644 index 000000000..e8dfba599 --- /dev/null +++ b/gems/decomplex/examples/php/weighted-inlined-complexity.php @@ -0,0 +1,45 @@ +validate_user($user); + $this->apply_discount($cart); + $this->process_payment($user, $cart); + $this->audit_cart($cart); + } + + private function validate_user($user) { + if (!$user) { return false; } + if ($user->active() && !$user->suspended()) { + if ($user->profile->complete()) { return true; } + return false; + } + return false; + } + + private function apply_discount($cart) { + if ($cart->total > 100 && $this->eligible()) { + if ($this->holiday()) { return 20; } + if ($this->loyalty_month()) { return 15; } + return 10; + } + } + + private function process_payment($user, $cart) { + if ($this->gateway->ready()) { + if ($cart->total > 0 && $user->active()) { + if ($this->fraud_check($user)) { $this->charge($user, $cart); } + else { $this->decline($user); } + } + } + } + + private function audit_cart($cart) { + foreach ($cart->items as $item) { + if ($item->taxable()) { + if ($item->region && $item->amount > 0) { + $this->record_tax($item); + } + } + } + } +} diff --git a/gems/decomplex/lib/decomplex/syntax.rb b/gems/decomplex/lib/decomplex/syntax.rb index a79ba7615..8f3056211 100644 --- a/gems/decomplex/lib/decomplex/syntax.rb +++ b/gems/decomplex/lib/decomplex/syntax.rb @@ -66,218 +66,95 @@ def call_name?(source, names) end end - RUBY_LEXICON = LanguageLexicon.new( - nil_literal_patterns: [/\bnil\b/].freeze, - type_guard_patterns: [ - /(?:\A|[^\w!?])(?:nil\?|is_a\?|kind_of\?|instance_of\?|respond_to\?)(?:\s*\(|\b)/, - /&\./ - ].freeze, - diagnostic_patterns: [ - /(?:\A|[^\w!?])(?:raise|fail|abort)[!?]?(?:\s*\(|\b)/ - ].freeze, - trivial_patterns: [ - /\A(?:nil|true|false|0|1|break|next)\s*;?\z/, - /\Areturn\s+(?:nil|true|false|0|1)\s*;?\z/ - ].freeze - ).freeze - PYTHON_LEXICON = LanguageLexicon.new( - nil_literal_patterns: [/\bNone\b/].freeze, - type_guard_patterns: [ - /\b(?:isinstance|issubclass|hasattr)\s*\(/, - /\bis\s+(?:not\s+)?None\b/, - /\btype\s*\([^)]*\)\s*(?:==|is)\s*/ - ].freeze, - diagnostic_patterns: [ - /\braise\b/, - /\bassert\b/, - /\bsys\.exit\s*\(/ - ].freeze, - trivial_patterns: [ - /\A(?:None|True|False|0|1|break|continue|pass)\s*;?\z/, - /\Areturn\s+(?:None|True|False|0|1)\s*;?\z/ - ].freeze - ).freeze - JAVASCRIPT_LEXICON = LanguageLexicon.new( - nil_literal_patterns: [/\b(?:null|undefined)\b/].freeze, - type_guard_patterns: [ - /\btypeof\b/, - /\binstanceof\b/, - /(?:\?\.|\b(?:==|!=|===|!==)\s*(?:null|undefined)\b)/ - ].freeze, - diagnostic_patterns: [ - /\bthrow\b/, - /\bprocess\.exit\s*\(/ - ].freeze, - trivial_patterns: [ - /\A(?:null|undefined|true|false|0|1|break|continue)\s*;?\z/, - /\Areturn\s+(?:null|undefined|true|false|0|1)\s*;?\z/ - ].freeze - ).freeze - GO_LEXICON = LanguageLexicon.new( - nil_literal_patterns: [/\bnil\b/].freeze, - type_guard_patterns: [ - /\bnil\b/, - /\.\(type\)/, - /\.\([A-Za-z_]\w*(?:\.[A-Za-z_]\w*)*\)/ - ].freeze, - diagnostic_patterns: [ - /\bpanic\s*\(/, - /\breturn\s+error[.\w]*/ - ].freeze, - trivial_patterns: [ - /\A(?:nil|true|false|0|1|break|continue|fallthrough)\s*;?\z/, - /\Areturn\s+(?:nil|true|false|0|1)\s*;?\z/ - ].freeze - ).freeze - RUST_LEXICON = LanguageLexicon.new( - nil_literal_patterns: [/\bNone\b/].freeze, - type_guard_patterns: [ - /\b(?:is_some|is_none)\s*\(/, - /\b(?:Some|None)\b/, - /\bmatches!\s*\(/ - ].freeze, - diagnostic_patterns: [ - /\b(?:panic|unreachable|todo|unimplemented)!\s*\(/, - /\breturn\s+Err\s*\(/ - ].freeze, - trivial_patterns: [ - /\A(?:None|true|false|0|1|break|continue|unreachable!)\s*;?\z/, - /\Areturn\s+(?:None|true|false|0|1)\s*;?\z/ - ].freeze - ).freeze - ZIG_LEXICON = LanguageLexicon.new( - nil_literal_patterns: [/\bnull\b/].freeze, - type_guard_patterns: [ - /\bnull\b/, - /@typeInfo\b/, - /\bif\s*\([^)]*\)\s*\|/ - ].freeze, - diagnostic_patterns: [ - /@panic\s*\(/, - /\bunreachable\b/, - /\breturn\s+error[.\w]*/ - ].freeze, - trivial_patterns: [ - /\A(?:null|true|false|0|1|break|continue|unreachable)\s*;?\z/, - /\Areturn\s+(?:null|true|false|0|1)\s*;?\z/ - ].freeze - ).freeze - LUA_LEXICON = LanguageLexicon.new( - nil_literal_patterns: [/\bnil\b/].freeze, - type_guard_patterns: [ - /\btype\s*\(/, - /\bnil\b/, - /\b(?:pcall|xpcall)\s*\(/ - ].freeze, - diagnostic_patterns: [ - /\berror\s*\(/, - /\bassert\s*\(/ - ].freeze, - trivial_patterns: [ - /\A(?:nil|true|false|0|1|break)\s*;?\z/, - /\Areturn\s+(?:nil|true|false|0|1)\s*;?\z/ - ].freeze - ).freeze - C_LEXICON = LanguageLexicon.new( - nil_literal_patterns: [/\bNULL\b/].freeze, - type_guard_patterns: [ - /\bNULL\b/, - /\bsizeof\s*\(/, - /\b_Generic\s*\(/ - ].freeze, - diagnostic_patterns: [ - /\b(?:assert|abort|exit)\s*\(/, - /\breturn\s+errno\b/ - ].freeze, - trivial_patterns: [ - /\A(?:NULL|true|false|0|1|break|continue)\s*;?\z/, - /\Areturn\s+(?:NULL|true|false|0|1)\s*;?\z/ - ].freeze - ).freeze - CPP_LEXICON = LanguageLexicon.new( - nil_literal_patterns: [/\b(?:nullptr|NULL)\b/].freeze, - type_guard_patterns: [ - /\b(?:nullptr|NULL)\b/, - /\b(?:dynamic_cast|typeid)\s*[<(]/, - /\bstd::(?:get_if|holds_alternative)\s*[<(]/ - ].freeze, - diagnostic_patterns: [ - /\bthrow\b/, - /\b(?:assert|abort|exit)\s*\(/, - /\bstd::terminate\s*\(/ - ].freeze, - trivial_patterns: [ - /\A(?:nullptr|NULL|true|false|0|1|break|continue)\s*;?\z/, - /\Areturn\s+(?:nullptr|NULL|true|false|0|1)\s*;?\z/ - ].freeze - ).freeze - CSHARP_LEXICON = LanguageLexicon.new( - nil_literal_patterns: [/\bnull\b/].freeze, - type_guard_patterns: [ - /\bnull\b/, - /(?:\?\.|\?\?)/, - /\b(?:is|as|typeof)\b/ - ].freeze, - diagnostic_patterns: [ - /\bthrow\b/, - /\b(?:Debug\.Assert|Trace\.Assert|Environment\.Exit)\s*\(/ - ].freeze, - trivial_patterns: [ - /\A(?:null|true|false|0|1|break|continue)\s*;?\z/, - /\Areturn\s+(?:null|true|false|0|1)\s*;?\z/ - ].freeze - ).freeze - JAVA_LEXICON = LanguageLexicon.new( - nil_literal_patterns: [/\bnull\b/].freeze, - type_guard_patterns: [ - /\bnull\b/, - /\binstanceof\b/, - /\bObjects\.(?:isNull|nonNull|requireNonNull)\s*\(/ - ].freeze, - diagnostic_patterns: [ - /\bthrow\b/, - /\bassert\b/, - /\bSystem\.exit\s*\(/ - ].freeze, - trivial_patterns: [ - /\A(?:null|true|false|0|1|break|continue)\s*;?\z/, - /\Areturn\s+(?:null|true|false|0|1)\s*;?\z/ - ].freeze - ).freeze - SWIFT_LEXICON = LanguageLexicon.new( - nil_literal_patterns: [/\bnil\b/].freeze, - type_guard_patterns: [ - /\bnil\b/, - /(?:\?\.|\?\?)/, - /\b(?:if|guard)\s+let\b/, - /\b(?:as\?|is)(?:\s|$)/ - ].freeze, - diagnostic_patterns: [ - /\bthrow\b/, - /\b(?:fatalError|preconditionFailure|assertionFailure|assert|precondition)\s*\(/ - ].freeze, - trivial_patterns: [ - /\A(?:nil|true|false|0|1|break|continue)\s*;?\z/, - /\Areturn\s+(?:nil|true|false|0|1)\s*;?\z/ - ].freeze - ).freeze - KOTLIN_LEXICON = LanguageLexicon.new( - nil_literal_patterns: [/\bnull\b/].freeze, - type_guard_patterns: [ - /\bnull\b/, - /(?:\?\.|\?\?)/, - /\b(?:is|as\?)(?:\s|$)/ - ].freeze, - diagnostic_patterns: [ - /\bthrow\b/, - /\b(?:error|require|check|assert|TODO)\s*\(/ - ].freeze, - trivial_patterns: [ - /\A(?:null|true|false|0|1|break|continue)\s*;?\z/, - /\Areturn\s+(?:null|true|false|0|1)\s*;?\z/ - ].freeze - ).freeze class TreeSitterLanguageAdapter + EMPTY_NODE_KINDS = [].freeze + ADAPTER_KIND_METHODS = { + function_node_kinds: :FUNCTION_NODE_KINDS, + class_owner_node_kinds: :CLASS_OWNER_NODE_KINDS, + module_owner_node_kinds: :MODULE_OWNER_NODE_KINDS, + generic_owner_node_kinds: :GENERIC_OWNER_NODE_KINDS, + impl_owner_node_kinds: :IMPL_OWNER_NODE_KINDS, + struct_owner_node_kinds: :STRUCT_OWNER_NODE_KINDS, + union_owner_node_kinds: :UNION_OWNER_NODE_KINDS, + enum_owner_node_kinds: :ENUM_OWNER_NODE_KINDS, + anonymous_owner_node_kinds: :ANONYMOUS_OWNER_NODE_KINDS, + call_node_kinds: :CALL_NODE_KINDS, + adjacent_call_node_kinds: :ADJACENT_CALL_NODE_KINDS, + parameter_list_node_kinds: :PARAMETER_LIST_NODE_KINDS, + method_parameter_list_node_kinds: :METHOD_PARAMETER_LIST_NODE_KINDS, + inline_parameter_node_kinds: :INLINE_PARAMETER_NODE_KINDS, + function_body_node_kinds: :FUNCTION_BODY_NODE_KINDS, + nested_statement_wrapper_node_kinds: :NESTED_STATEMENT_WRAPPER_NODE_KINDS, + identifier_node_kinds: :IDENTIFIER_NODE_KINDS, + local_identifier_wrapper_node_kinds: :LOCAL_IDENTIFIER_WRAPPER_NODE_KINDS, + assignment_node_kinds: :ASSIGNMENT_NODE_KINDS, + assignment_operator_tokens: :ASSIGNMENT_OPERATOR_TOKENS, + local_declaration_node_kinds: :LOCAL_DECLARATION_NODE_KINDS, + short_variable_declaration_node_kinds: :SHORT_VARIABLE_DECLARATION_NODE_KINDS, + variable_declaration_node_kinds: :VARIABLE_DECLARATION_NODE_KINDS, + declaration_assignment_node_kinds: :DECLARATION_ASSIGNMENT_NODE_KINDS, + path_action_node_kinds: :PATH_ACTION_NODE_KINDS, + simple_action_wrapper_node_kinds: :SIMPLE_ACTION_WRAPPER_NODE_KINDS, + comparison_node_kinds: :COMPARISON_NODE_KINDS, + branch_node_kinds: :BRANCH_NODE_KINDS, + loop_node_kinds: :LOOP_NODE_KINDS, + text_loop_node_kinds: :TEXT_LOOP_NODE_KINDS, + labeled_loop_node_kinds: :LABELED_LOOP_NODE_KINDS, + case_node_kinds: :CASE_NODE_KINDS, + hidden_case_wrapper_node_kinds: :HIDDEN_CASE_WRAPPER_NODE_KINDS, + hidden_match_node_kinds: :HIDDEN_MATCH_NODE_KINDS, + branch_loop_node_kinds: :BRANCH_LOOP_NODE_KINDS, + branch_case_node_kinds: :BRANCH_CASE_NODE_KINDS, + if_node_kinds: :IF_NODE_KINDS, + hidden_if_token_kinds: :HIDDEN_IF_TOKEN_KINDS, + hidden_case_token_kinds: :HIDDEN_CASE_TOKEN_KINDS, + case_arm_node_kinds: :CASE_ARM_NODE_KINDS, + when_case_arm_node_kinds: :WHEN_CASE_ARM_NODE_KINDS, + switch_case_arm_node_kinds: :SWITCH_CASE_ARM_NODE_KINDS, + case_pattern_node_kinds: :CASE_PATTERN_NODE_KINDS, + case_subject_node_kinds: :CASE_SUBJECT_NODE_KINDS, + case_container_stop_node_kinds: :CASE_CONTAINER_STOP_NODE_KINDS, + case_subject_skip_node_kinds: :CASE_SUBJECT_SKIP_NODE_KINDS, + default_case_patterns: :DEFAULT_CASE_PATTERNS, + boolean_and_operators: :BOOLEAN_AND_OPERATORS, + boolean_container_node_kinds: :BOOLEAN_CONTAINER_NODE_KINDS, + boolean_wrapper_node_kinds: :BOOLEAN_WRAPPER_NODE_KINDS, + parenthesized_wrapper_node_kinds: :PARENTHESIZED_WRAPPER_NODE_KINDS, + parenthesized_pattern_node_kinds: :PARENTHESIZED_PATTERN_NODE_KINDS, + hidden_if_wrapper_node_kinds: :HIDDEN_IF_WRAPPER_NODE_KINDS, + local_variable_declarator_node_kinds: :LOCAL_VARIABLE_DECLARATOR_NODE_KINDS, + field_declaration_node_kinds: :FIELD_DECLARATION_NODE_KINDS, + declaration_site_parent_node_kinds: :DECLARATION_SITE_PARENT_NODE_KINDS, + receiver_type_node_kinds: :RECEIVER_TYPE_NODE_KINDS, + method_receiver_node_kinds: :METHOD_RECEIVER_NODE_KINDS, + receiver_parameter_node_kinds: :RECEIVER_PARAMETER_NODE_KINDS, + first_argument_receiver_type_node_kinds: :FIRST_ARGUMENT_RECEIVER_TYPE_NODE_KINDS, + first_argument_receiver_name_node_kinds: :FIRST_ARGUMENT_RECEIVER_NAME_NODE_KINDS, + bound_container_wrapper_node_kinds: :BOUND_CONTAINER_WRAPPER_NODE_KINDS, + bound_container_parent_node_kinds: :BOUND_CONTAINER_PARENT_NODE_KINDS, + bound_container_name_node_kinds: :BOUND_CONTAINER_NAME_NODE_KINDS, + adjacent_method_invocation_node_kinds: :ADJACENT_METHOD_INVOCATION_NODE_KINDS, + argument_list_node_kinds: :ARGUMENT_LIST_NODE_KINDS, + self_call_identifier_node_kinds: :SELF_CALL_IDENTIFIER_NODE_KINDS, + self_receiver_names: :SELF_RECEIVER_NAMES, + field_identifier_node_kinds: :FIELD_IDENTIFIER_NODE_KINDS, + declarator_node_kinds: :DECLARATOR_NODE_KINDS, + assignment_state_declaration_node_kinds: :ASSIGNMENT_STATE_DECLARATION_NODE_KINDS, + accessor_call_node_kinds: :ACCESSOR_CALL_NODE_KINDS, + expression_list_node_kinds: :EXPRESSION_LIST_NODE_KINDS, + navigation_suffix_node_kinds: :NAVIGATION_SUFFIX_NODE_KINDS, + literal_field_expression_node_kinds: :LITERAL_FIELD_EXPRESSION_NODE_KINDS, + block_argument_node_kinds: :BLOCK_ARGUMENT_NODE_KINDS, + parameter_identifier_node_kinds: :PARAMETER_IDENTIFIER_NODE_KINDS, + member_access_operator_tokens: :MEMBER_ACCESS_OPERATOR_TOKENS, + public_visibility_tokens: :PUBLIC_VISIBILITY_TOKENS, + field_like_node_kinds: :FIELD_LIKE_NODE_KINDS + }.freeze + + ADAPTER_KIND_METHODS.each do |method_name, constant_name| + define_method(method_name) { adapter_node_kinds(constant_name) } + end + attr_reader :language, :extensions, :lexicon, :package, :grammar_names, :tree_sitter_language_name @@ -296,17 +173,16 @@ def first_argument_receiver? @first_argument_receiver end + def adapter_node_kinds(constant_name) + self.class.const_defined?(constant_name) ? self.class.const_get(constant_name) : EMPTY_NODE_KINDS + end + def function_name(node) - case node.kind - when "method", "function_definition", "function_declaration", - "method_definition", "function_item" - named_field(node, "name")&.text || - declarator_name(named_field(node, "declarator")) || - first_named_text(node, %w[identifier constant property_identifier]) - when "method_declaration" - named_field(node, "name")&.text || - first_named_text(node, %w[field_identifier identifier]) - end + return nil unless function_node_kinds.include?(node.kind) + + named_field(node, "name")&.text || + declarator_name(named_field(node, "declarator")) || + first_named_text(node, identifier_node_kinds + field_identifier_node_kinds) end def function_kind(_document, node, stack) @@ -318,16 +194,18 @@ def visibility(_document, node) end def owner_name_from_declaration(document, node) - case node.kind - when "class", "class_definition", "class_declaration", "class_specifier", "module" - named_field(node, "name")&.text || - first_named_text(node, %w[constant identifier type_identifier]) - when "impl_item", "impl_block" + if (class_owner_node_kinds + module_owner_node_kinds).include?(node.kind) + named_field(node, "name")&.text || + first_named_text(node, identifier_node_kinds + field_identifier_node_kinds) + elsif generic_owner_node_kinds.include?(node.kind) + named_field(node, "name")&.text || + first_named_text(node, identifier_node_kinds + field_identifier_node_kinds) + elsif impl_owner_node_kinds.include?(node.kind) impl_owner_name(node) - when "struct_item", "struct_spec", "struct_specifier", "type_spec", "type_declaration" + elsif struct_owner_node_kinds.include?(node.kind) named_field(node, "name")&.text || - first_named_text(node, %w[type_identifier identifier]) - when "struct_declaration", "union_declaration", "enum_declaration" + first_named_text(node, identifier_node_kinds + field_identifier_node_kinds) + elsif anonymous_owner_node_kinds.include?(node.kind) bound_container_name(node) || returned_container_owner(document, node) || anonymous_owner_name(document, node) @@ -335,13 +213,18 @@ def owner_name_from_declaration(document, node) end def owner_kind(node) - case node.kind - when "class", "class_definition", "class_declaration", "class_specifier" then :class - when "module" then :module - when "impl_item", "impl_block" then :impl - when "struct_declaration", "struct_item", "struct_spec", "struct_specifier" then :struct - when "union_declaration" then :union - when "enum_declaration" then :enum + if class_owner_node_kinds.include?(node.kind) + :class + elsif module_owner_node_kinds.include?(node.kind) + :module + elsif impl_owner_node_kinds.include?(node.kind) + :impl + elsif union_owner_node_kinds.include?(node.kind) + :union + elsif enum_owner_node_kinds.include?(node.kind) + :enum + elsif (struct_owner_node_kinds + anonymous_owner_node_kinds).include?(node.kind) + :struct else :owner end end @@ -354,7 +237,7 @@ def function_receiver_name(node, stack) def receiver_convention_owner_name(node, **_context) return nil unless first_argument_receiver? - return nil unless node.kind == "function_definition" + return nil unless function_node_kinds.include?(node.kind) receiver = first_argument_receiver_parameter(node) return nil unless receiver @@ -378,12 +261,9 @@ def generated_prelude?(_document, _node) end def call_target(document, node) - case node.kind - when "call_expression", "method_invocation", "invocation_expression", "function_call", "method_call" + if call_node_kinds.include?(node.kind) generic_call_target(document, node) - when "attribute", "selector_expression", "field", "field_access", "member_expression", - "member_access_expression", "field_expression", "expression_list", - "navigation_expression", "dot_index_expression", "variable_list", "identifier", "simple_identifier" + elsif adjacent_call_node_kinds.include?(node.kind) adjacent_argument_call_target(node) end end @@ -402,10 +282,6 @@ def state_target(lhs) end class TreeSitterLanguageAdapter - BRANCH_KINDS = %w[if unless if_statement if_modifier unless_modifier if_expression - while until while_statement for for_statement - case switch_statement expression_switch_statement switch_expression - match_statement match_expression when_expression].freeze COMPARISON_OPERATORS = %w[== !=].freeze NOISE_MESSAGES = %w[! != == === < <= > >= [] []= to_s inspect class].freeze @@ -493,16 +369,16 @@ def implicit_state_accesses? end def function_params(node) - params = if node.kind == "method_declaration" - lists = node.named_children.select { |child| child.kind == "parameter_list" } + params = if method_parameter_list_node_kinds.any? && function_node_kinds.include?(node.kind) + lists = node.named_children.select { |child| method_parameter_list_node_kinds.include?(child.kind) } lists.size > 1 ? lists[1] : lists.first else named_field(node, "parameters") || node.named_children.find do |child| - %w[parameters formal_parameters function_value_parameters parameter_list].include?(child.kind) + parameter_list_node_kinds.include?(child.kind) end end - params ||= node.named_children.select { |child| child.kind == "parameter" } if node.kind == "function_declaration" + params ||= node.named_children.select { |child| inline_parameter_node_kinds.include?(child.kind) } return [] unless params Array(params.respond_to?(:named_children) ? params.named_children : params).filter_map do |param| @@ -527,6 +403,18 @@ def method_param_types(_document) {} end + def immutable_struct_readers(_document) + {} + end + + def immutable_struct_reader_types(_document) + {} + end + + def type_aliases(_document) + {} + end + def predicate_def(_document, function_def) body = generic_predicate_body(function_def.body) return nil unless body @@ -594,7 +482,7 @@ def generic_function_body_node(node) named_field(node, "body") || node.named_children.reverse.find do |child| - %w[block body body_statement function_body statement_block compound_statement declaration_list].include?(child.kind) + function_body_node_kinds.include?(child.kind) end end @@ -603,7 +491,7 @@ def generic_function_body_statements(node) return [] unless body named = body.named_children.reject { |child| comment_node?(child) } - if named.size == 1 && %w[statements statement_list].include?(named.first.kind) + if named.size == 1 && nested_statement_wrapper_node_kinds.include?(named.first.kind) return [named.first] if branch_node?(named.first) named = named.first.named_children.reject { |child| comment_node?(child) } @@ -728,13 +616,13 @@ def generic_nested_local_scope?(node) end def generic_identifier?(node) - ts_node?(node) && %w[identifier simple_identifier field_identifier property_identifier].include?(node.kind) + ts_node?(node) && identifier_node_kinds.include?(node.kind) end def generic_local_identifier_text(node) return node.text.to_s if generic_identifier?(node) return nil unless ts_node?(node) - return nil unless %w[argument pattern directly_assignable_expression value_argument].include?(node.kind) + return nil unless local_identifier_wrapper_node_kinds.include?(node.kind) return nil unless node.named_children.empty? text = node.text.to_s @@ -743,8 +631,8 @@ def generic_local_identifier_text(node) def generic_assignment_statement?(node) ts_node?(node) && - (%w[assignment assignment_expression augmented_assignment assignment_statement operator_assignment].include?(node.kind) || - node.children.any? { |child| !child.named? && %w[= += -= *= /= %=].include?(child.text.to_s) }) + (assignment_node_kinds.include?(node.kind) || + node.children.any? { |child| !child.named? && assignment_operator_tokens.include?(child.text.to_s) }) end def generic_local_write_node?(node) @@ -755,7 +643,7 @@ def generic_local_write_node?(node) return false if generic_member_name?(node) return true if generic_declaration_name?(node) - if %w[assignment assignment_expression augmented_assignment assignment_statement operator_assignment].include?(parent.kind) + if assignment_node_kinds.include?(parent.kind) lhs = named_field(parent, "left") || parent.named_children.first return lhs == node end @@ -776,13 +664,10 @@ def generic_local_declaration_name(node) def generic_local_declaration_name_node(node) return nil unless ts_node?(node) - return nil unless %w[ - declaration init_declarator let_declaration lexical_declaration local_variable_declaration - property_declaration short_var_declaration variable_declaration variable_declarator - ].include?(node.kind) + return nil unless local_declaration_node_kinds.include?(node.kind) - if node.kind == "short_var_declaration" - left = node.named_children.find { |child| child.kind == "expression_list" } + if short_variable_declaration_node_kinds.include?(node.kind) + left = node.named_children.find { |child| variable_declaration_node_kinds.include?(child.kind) } if left identifier = left.named_children.find { |child| generic_identifier?(child) } return identifier if identifier @@ -790,10 +675,10 @@ def generic_local_declaration_name_node(node) return left if simple_identifier_text?(left&.text) end - variable = node.named_children.find { |child| child.kind == "variable_declaration" } + variable = node.named_children.find { |child| variable_declaration_node_kinds.include?(child.kind) } return variable if simple_identifier_text?(variable&.text) - declaration_assignment = node.named_children.find { |child| child.kind == "assignment_statement" } + declaration_assignment = node.named_children.find { |child| declaration_assignment_node_kinds.include?(child.kind) } if declaration_assignment lhs = declaration_assignment.named_children.first identifier = lhs&.named_children&.find { |child| generic_identifier?(child) } @@ -803,15 +688,14 @@ def generic_local_declaration_name_node(node) named_field(node, "pattern") || named_field(node, "name") || - node.named_children.find { |child| child.kind == "pattern" } || - node.named_children.find { |child| child.kind == "variable_declaration" }&.named_children&.find { |child| generic_identifier?(child) } || - node.named_children.find { |child| child.kind == "expression_list" }&.named_children&.find { |child| generic_identifier?(child) } || + node.named_children.find { |child| local_identifier_wrapper_node_kinds.include?(child.kind) } || + node.named_children.find { |child| variable_declaration_node_kinds.include?(child.kind) }&.named_children&.find { |child| generic_identifier?(child) } || node.named_children.find { |child| generic_identifier?(child) } end def generic_assignment_lhs_names(node) return [] unless ts_node?(node) - return [] unless %w[assignment assignment_expression assignment_statement augmented_assignment operator_assignment].include?(node.kind) + return [] unless assignment_node_kinds.include?(node.kind) lhs = named_field(node, "left") || node.named_children.first return [] unless ts_node?(lhs) @@ -831,7 +715,7 @@ def generic_member_name?(node) owner = parent_node(parent) return true if owner && field_like_node?(owner) end - return false if parent&.kind == "expression_list" && !member_expression_list?(parent) + return false if parent && expression_list_node_kinds.include?(parent.kind) && !member_expression_list?(parent) return false unless parent && field_like_node?(parent) field = named_field(parent, "field") || named_field(parent, "property") || @@ -843,13 +727,14 @@ def generic_member_name?(node) def generic_call_name?(node) parent = parent_node(node) return false unless parent + return false if field_like_node?(parent) - if %w[method_invocation invocation_expression].include?(parent.kind) + if adjacent_method_invocation_node_kinds.include?(parent.kind) names = parent.named_children.select { |child| generic_identifier?(child) } return names.size >= 2 ? names.last == node : parent.named_children.first == node end - %w[call_expression function_call method_call].include?(parent.kind) && + call_node_kinds.include?(parent.kind) && (named_field(parent, "function") == node || parent.named_children.first == node) end @@ -922,17 +807,17 @@ def generic_path_action_node?(node) return true if simple_action_wrapper?(node) generic_assignment_statement?(node) || - %w[call call_expression expression_statement return_statement].include?(node.kind) + path_action_node_kinds.include?(node.kind) end def simple_action_wrapper?(node) - return false unless %w[block statement_list statements control_structure_body].include?(node.kind) + return false unless simple_action_wrapper_node_kinds.include?(node.kind) normalize_text(node.text).match?(/\A[A-Za-z_]\w*(?:\.[A-Za-z_]\w*)?\s*\([^{};]*\)\s*;?\z/) end def comparison_target(node) - return nil unless %w[binary binary_expression].include?(node.kind) + return nil unless comparison_node_kinds.include?(node.kind) operator = direct_operator(node) return nil unless COMPARISON_OPERATORS.include?(operator) @@ -1001,10 +886,9 @@ def line_text(document, node) end def control_context(node) - return :iterates if %w[while until while_statement for for_statement for_in_statement - loop_expression do_block].include?(node.kind) - return :iterates if node.kind == "expression_statement" && node.text.to_s.lstrip.match?(/\A(?:for|while|loop)\b/) - return :iterates if node.kind == "labeled_statement" && node.text.to_s.lstrip.start_with?("for ") + return :iterates if loop_node_kinds.include?(node.kind) + return :iterates if text_loop_node_kinds.include?(node.kind) && node.text.to_s.lstrip.match?(/\A(?:for|while|loop)\b/) + return :iterates if labeled_loop_node_kinds.include?(node.kind) && node.text.to_s.lstrip.start_with?("for ") return :conditional if branch_node?(node) nil @@ -1018,9 +902,7 @@ def record_decision_site(document, node, stack, out) return end - case node.kind - when "case", "switch_statement", "expression_switch_statement", "switch_expression", - "match_statement", "match_expression", "when_expression" + if case_node_kinds.include?(node.kind) return if predicate_less_case?(node) patterns = case_patterns(node) @@ -1036,9 +918,9 @@ def record_decision_site(document, node, stack, out) predicate: decision_predicate(node), enclosing_span: span(node) ) - when "body_statement", "block", "block_body", "argument_list", "statements" + elsif hidden_case_wrapper_node_kinds.include?(node.kind) return unless hidden_case?(node) - return if node.named_children.any? { |child| child.kind == "case" } + return if node.named_children.any? { |child| case_node_kinds.include?(child.kind) } return if predicate_less_case?(node) patterns = case_patterns(node) @@ -1054,7 +936,7 @@ def record_decision_site(document, node, stack, out) predicate: decision_predicate(node), enclosing_span: span(node) ) - when "expression_statement" + elsif hidden_match_node_kinds.include?(node.kind) return unless hidden_match?(node) patterns = case_patterns(node) @@ -1107,7 +989,7 @@ def decision_enclosing_span(node) seen = Set.new while ts_node?(parent) && !seen.include?(node_key(parent)) seen << node_key(parent) - return span(parent) if branch_node?(parent) || %w[while until].include?(parent.kind) + return span(parent) if branch_node?(parent) || loop_node_kinds.include?(parent.kind) parent = parent_node(parent) end @@ -1187,7 +1069,7 @@ def record_state_write(document, node, stack, out) return if skip_state_write_node?(node) lhs = - if %w[assignment assignment_expression augmented_assignment assignment_statement operator_assignment].include?(node.kind) + if assignment_node_kinds.include?(node.kind) named_field(node, "left") || node.named_children.first elsif assignment_lhs?(node) node @@ -1216,7 +1098,7 @@ def skip_state_write_node?(node) return false unless parent assignment_lhs?(node) && - %w[assignment assignment_expression augmented_assignment assignment_statement operator_assignment].include?(parent.kind) + assignment_node_kinds.include?(parent.kind) end def skip_state_write_target?(target) @@ -1246,7 +1128,7 @@ def record_state_read(document, node, stack, out) def record_state_param_origin(document, node, stack, out) lhs = nil rhs = nil - if %w[assignment assignment_expression augmented_assignment assignment_statement].include?(node.kind) + if assignment_node_kinds.include?(node.kind) lhs = named_field(node, "left") || node.named_children.first rhs = named_field(node, "right") || named_field(node, "value") || node.named_children[1] elsif assignment_lhs?(node) @@ -1320,15 +1202,11 @@ def record_branch_arm(document, node, stack, out) return end - case node.kind - when "while", "until", "while_statement", "for", "for_statement" + if branch_loop_node_kinds.include?(node.kind) record_loop_arm(document, node, stack, out) - when "case", "body_statement", "block", "expression_statement", "statements", "switch_statement", "expression_switch_statement", "switch_expression", - "match_statement", "match_expression", "when_expression" - return if node.kind == "body_statement" && !hidden_case?(node) - return if node.kind == "block" && !hidden_case?(node) - return if node.kind == "statements" && !hidden_case?(node) - return if node.kind == "expression_statement" && !hidden_match?(node) + elsif branch_case_node_kinds.include?(node.kind) + return if hidden_case_wrapper_node_kinds.include?(node.kind) && !hidden_case?(node) + return if hidden_match_node_kinds.include?(node.kind) && !hidden_match?(node) record_case_arms(document, node, stack, out) end @@ -1454,13 +1332,11 @@ def case_patterns(node) end def case_arm_patterns(child) - case child.kind - when "when", "match_arm" - patterns = child.named_children.select { |node| %w[pattern case_pattern match_pattern].include?(node.kind) } + if when_case_arm_node_kinds.include?(child.kind) + patterns = child.named_children.select { |node| case_pattern_node_kinds.include?(node.kind) } patterns = [named_field(child, "pattern") || child.named_children.first].compact if patterns.empty? case_pattern_texts(patterns) - when "switch_case", "case_clause", "expression_case", "case_statement", "switch_section", - "switch_block_statement_group", "switch_entry", "when_entry" + elsif switch_case_arm_node_kinds.include?(child.kind) return [] if child.text.to_s.lstrip.start_with?("else") value = named_field(child, "value") || named_field(child, "pattern") || @@ -1501,11 +1377,9 @@ def case_arms(node) child = stack.shift next unless ts_node?(child) - if %w[when switch_case case_clause expression_case case_statement switch_section switch_block_statement_group switch_entry when_entry match_arm].include?(child.kind) + if case_arm_node_kinds.include?(child.kind) arms << child - elsif !%w[method function_definition function_declaration method_definition - method_declaration function_item class class_definition - class_declaration].include?(child.kind) + elsif !case_container_stop_node_kinds.include?(child.kind) stack.concat(child.named_children) end end @@ -1521,19 +1395,19 @@ def decision_predicate(node) def decision_subject(node) named_field(node, "value") || named_field(node, "subject") || - node.named_children.find { |child| child.kind == "when_subject" } || + node.named_children.find { |child| case_subject_node_kinds.include?(child.kind) } || named_field(node, "condition") || node.named_children.find do |child| - !%w[when switch_case case_clause expression_case case_statement switch_section switch_block_statement_group switch_entry when_entry match_arm else then comment].include?(child.kind) + !case_subject_skip_node_kinds.include?(child.kind) end end def predicate_less_case?(node) - (node.kind == "case" || hidden_case?(node)) && !decision_subject(node) + (case_node_kinds.include?(node.kind) || hidden_case?(node)) && !decision_subject(node) end def default_case_pattern?(text) - text.nil? || %w[_ default].include?(text) + text.nil? || default_case_patterns.include?(text) end def boolean_and?(node) @@ -1542,7 +1416,7 @@ def boolean_and?(node) return boolean_and?(child) end - %w[&& and].include?(direct_operator(node)) + boolean_and_operators.include?(direct_operator(node)) end def flatten_boolean_and(node) @@ -1556,14 +1430,14 @@ def flatten_boolean_and(node) def boolean_container?(node) return false unless ts_node?(node) - return true if %w[binary binary_expression boolean_operator conjunction_expression disjunction_expression].include?(node.kind) + return true if boolean_container_node_kinds.include?(node.kind) return boolean_container?(node.named_children.first) if parenthesized_wrapper?(node) - return false unless %w[body_statement block_body statement pattern argument_list].include?(node.kind) - return false unless %w[&& and].include?(direct_operator(node)) + return false unless boolean_wrapper_node_kinds.include?(node.kind) + return false unless boolean_and_operators.include?(direct_operator(node)) return false if node.named_children.size < 2 node.children.all? do |child| - child.named? || %w[&& and ( )].include?(child.text.to_s) + child.named? || (boolean_and_operators + %w[( )]).include?(child.text.to_s) end end @@ -1573,7 +1447,7 @@ def same_span?(left, right) def conjunction_span(node) base = span(node) - if node.kind == "pattern" && node.text.to_s.lstrip.start_with?("(") + if parenthesized_pattern_node_kinds.include?(node.kind) && node.text.to_s.lstrip.start_with?("(") base = base.dup base[1] += 1 end @@ -1581,7 +1455,7 @@ def conjunction_span(node) end def parenthesized_wrapper?(node) - ts_node?(node) && %w[condition_clause parenthesized_statements parenthesized_expression].include?(node.kind) && + ts_node?(node) && parenthesized_wrapper_node_kinds.include?(node.kind) && node.named_children.size == 1 end @@ -1618,22 +1492,22 @@ def direct_operator(node) end def branch_node?(node) - BRANCH_KINDS.include?(node.kind) || hidden_match?(node) || hidden_if?(node) || + branch_node_kinds.include?(node.kind) || hidden_match?(node) || hidden_if?(node) || hidden_modifier_if?(node) || hidden_case?(node) end def if_node?(node) - %w[if unless if_statement if_expression if_modifier unless_modifier].include?(node.kind) || + if_node_kinds.include?(node.kind) || hidden_if?(node) || hidden_modifier_if?(node) end def hidden_if?(node) return false unless ts_node?(node) - return true if node.kind == "expression_statement" && node.text.to_s.lstrip.start_with?("if ") - return false unless %w[block body_statement statements statement_list].include?(node.kind) + return true if hidden_match_node_kinds.include?(node.kind) && node.text.to_s.lstrip.start_with?("if ") + return false unless hidden_if_wrapper_node_kinds.include?(node.kind) first_token = node.children.first - first_token && !first_token.named? && %w[if unless].include?(first_token.kind.to_s) + first_token && !first_token.named? && hidden_if_token_kinds.include?(first_token.kind.to_s) end def hidden_modifier_if?(node) @@ -1646,15 +1520,15 @@ def modifier_condition(node) def hidden_case?(node) return false unless ts_node?(node) - return false unless %w[body_statement block statements statement_list].include?(node.kind) + return false unless hidden_case_wrapper_node_kinds.include?(node.kind) first_token = node.children.first - first_token && !first_token.named? && %w[case match switch when].include?(first_token.kind.to_s) + first_token && !first_token.named? && hidden_case_token_kinds.include?(first_token.kind.to_s) end def hidden_match?(node) ts_node?(node) && - node.kind == "expression_statement" && + hidden_match_node_kinds.include?(node.kind) && node.text.to_s.lstrip.start_with?("match ") end @@ -1795,26 +1669,25 @@ def local_declaration_index(document) def local_variable_declarator?(node) return false unless ts_node?(node) - return false unless %w[variable_declarator init_declarator].include?(node.kind) + return false unless local_variable_declarator_node_kinds.include?(node.kind) - !inside_kind?(node, %w[field_declaration property_declaration public_field_definition]) + !inside_kind?(node, field_declaration_node_kinds) end def local_name_node(node) named_field(node, "name") || - node.named_children.find { |child| %w[identifier field_identifier property_identifier].include?(child.kind) } + node.named_children.find { |child| (identifier_node_kinds + field_identifier_node_kinds).include?(child.kind) } end def implicit_state_identifier?(node) - ts_node?(node) && %w[identifier field_identifier property_identifier].include?(node.kind) + ts_node?(node) && (identifier_node_kinds + field_identifier_node_kinds).include?(node.kind) end def identifier_declaration_site?(node) parent = parent_node(node) return false unless parent - return true if %w[parameter_declaration parameter variable_declarator init_declarator function_declarator - method_declaration function_definition class_specifier class].include?(parent.kind) - return true if inside_kind?(node, %w[field_declaration property_declaration public_field_definition]) + return true if declaration_site_parent_node_kinds.include?(parent.kind) + return true if inside_kind?(node, field_declaration_node_kinds) false end @@ -1832,7 +1705,7 @@ def implicit_assignment_lhs?(node) parent = parent_node(node) return false unless parent - if %w[assignment_expression assignment assignment_statement augmented_assignment operator_assignment].include?(parent.kind) + if assignment_node_kinds.include?(parent.kind) lhs = named_field(parent, "left") || parent.named_children.first return lhs == node end @@ -1903,7 +1776,7 @@ def method_receiver_type_node(node) return nil unless declaration declaration.named_children.reverse.find do |child| - %w[pointer_type type_identifier qualified_type generic_type scoped_type_identifier].include?(child.kind) + receiver_type_node_kinds.include?(child.kind) end end @@ -1911,30 +1784,31 @@ def method_receiver_param_node(node) declaration = method_receiver_declaration(node) return nil unless declaration - declaration.named_children.find { |child| child.kind == "identifier" } + declaration.named_children.find { |child| identifier_node_kinds.include?(child.kind) } end def method_receiver_declaration(node) - return nil unless ts_node?(node) && node.kind == "method_declaration" + return nil unless ts_node?(node) && method_receiver_node_kinds.include?(node.kind) - receiver_params = node.named_children.find { |child| child.kind == "parameter_list" } - receiver_params&.named_children&.find { |child| child.kind == "parameter_declaration" } + receiver_params = node.named_children.find { |child| method_parameter_list_node_kinds.include?(child.kind) } + receiver_params&.named_children&.find { |child| receiver_parameter_node_kinds.include?(child.kind) } end def first_argument_receiver_parameter(node) params = named_field(named_field(node, "declarator"), "parameters") || named_field(node, "parameters") || - node.named_children.find { |child| child.kind == "parameter_list" } || - named_field(node, "declarator")&.named_children&.find { |child| child.kind == "parameter_list" } - first = params&.named_children&.find { |child| child.kind == "parameter_declaration" } + node.named_children.find { |child| parameter_list_node_kinds.include?(child.kind) } || + named_field(node, "declarator")&.named_children&.find { |child| parameter_list_node_kinds.include?(child.kind) } + first = params&.named_children&.find { |child| receiver_parameter_node_kinds.include?(child.kind) } return nil unless first type_node = first.named_children.find do |child| - %w[type_identifier primitive_type qualified_identifier scoped_type_identifier].include?(child.kind) + first_argument_receiver_type_node_kinds.include?(child.kind) end name_node = first.named_children.reverse.find do |child| - %w[identifier field_identifier].include?(child.kind) + first_argument_receiver_name_node_kinds.include?(child.kind) end + name_node ||= nested_receiver_name_node(first) name_node ||= declarator_name(first) return nil unless type_node && name_node @@ -1942,6 +1816,18 @@ def first_argument_receiver_parameter(node) { type: type_node.text, name: name } end + def nested_receiver_name_node(node) + node.named_children.reverse_each do |child| + next unless ts_node?(child) + + direct = child.named_children.reverse.find do |grandchild| + first_argument_receiver_name_node_kinds.include?(grandchild.kind) + end + return direct if direct + end + nil + end + def snake_case_type_name(type) type.to_s .split("::").last @@ -1954,16 +1840,15 @@ def bound_container_name(node) parent = parent_node(node) seen_nodes = Set.new while parent && !seen_nodes.include?(node_key(parent)) && - %w[ERROR expression_statement return_expression].include?(parent.kind) + bound_container_wrapper_node_kinds.include?(parent.kind) seen_nodes << node_key(parent) parent = parent_node(parent) end return nil unless parent - if %w[variable_declaration const_declaration lexical_declaration public_field_definition - field_declaration property_declaration].include?(parent.kind) + if bound_container_parent_node_kinds.include?(parent.kind) name = named_field(parent, "name") || - parent.named_children.find { |child| %w[identifier field_identifier property_identifier type_identifier].include?(child.kind) } + parent.named_children.find { |child| bound_container_name_node_kinds.include?(child.kind) } return name.text if name end nil @@ -1990,7 +1875,7 @@ def anonymous_owner_name(document, node) end def generic_call_target(document, node) - if %w[method_invocation invocation_expression].include?(node.kind) + if adjacent_method_invocation_node_kinds.include?(node.kind) adjacent = generic_adjacent_method_invocation_target(node) return adjacent if adjacent end @@ -2008,10 +1893,10 @@ def generic_call_target(document, node) end def generic_adjacent_method_invocation_target(node) - names = node.named_children.select { |child| %w[identifier simple_identifier].include?(child.kind) } + names = node.named_children.select { |child| identifier_node_kinds.include?(child.kind) } return nil unless names.size >= 2 - args = node.named_children.find { |child| %w[argument_list arguments call_suffix].include?(child.kind) } + args = node.named_children.find { |child| argument_list_node_kinds.include?(child.kind) } { receiver: normalize_text(names.first.text), message: names[1].text, @@ -2038,22 +1923,37 @@ def first_argument_receiver_call_target(_document, node, target) def call_argument_nodes(node) args = named_field(node, "arguments") || - node.named_children.find { |child| %w[argument_list arguments].include?(child.kind) } + node.named_children.find { |child| argument_list_node_kinds.include?(child.kind) } return Array(args&.named_children) if args - return [] unless node.kind == "call_expression" + return [] unless call_node_kinds.include?(node.kind) callee = named_field(node, "function") || named_field(node, "callee") || node.named_children.first node.named_children.reject { |child| child == callee } end def adjacent_argument_call_target(node) - return nil if generic_member_name?(node) - return nil if %w[call_expression method_invocation invocation_expression function_call method_call].include?(parent_node(node)&.kind) - - args = next_sibling(node) - return nil unless %w[argument_list arguments call_suffix].include?(args&.kind) + return nil if generic_member_name?(node) && !member_message_identifier?(node) + return nil if call_node_kinds.include?(parent_node(node)&.kind) + + callee = node + args = nil + if member_message_identifier?(node) + parent = parent_node(node) + if parent && field_like_node?(parent) + parent_args = next_sibling(parent) + if argument_list_node_kinds.include?(parent_args&.kind) + callee = parent + args = parent_args + elsif argument_list_node_kinds.include?(next_sibling(node)&.kind) + callee = parent + args = next_sibling(node) + end + end + end + args ||= next_sibling(callee) + return nil unless argument_list_node_kinds.include?(args&.kind) - target_from_callee(node).merge(arguments: args.named_children.map { |child| normalize_text(child.text) }) + target_from_callee(callee).merge(arguments: args.named_children.map { |child| normalize_text(child.text) }) rescue NoMethodError nil end @@ -2066,7 +1966,7 @@ def target_from_callee(callee) callee.named_children.find { |child| child.kind != "navigation_suffix" } field = named_field(callee, "field") || named_field(callee, "property") || named_field(callee, "suffix") || - callee.named_children.find { |child| child.kind == "navigation_suffix" } || + callee.named_children.find { |child| navigation_suffix_node_kinds.include?(child.kind) } || callee.named_children.last field_text = member_field_text(field) return nil unless object && field_text @@ -2075,7 +1975,7 @@ def target_from_callee(callee) receiver: normalize_text(object.text).sub(/\A\*/, ""), message: field_text } - elsif %w[identifier field_identifier property_identifier constant type_identifier].include?(callee.kind) + elsif self_call_identifier_node_kinds.include?(callee.kind) { receiver: "self", message: callee.text @@ -2111,13 +2011,10 @@ def noise_call?(target) end def generic_state_declaration(node) - case node.kind - when "assignment", "assignment_expression", "assignment_statement" + if assignment_state_declaration_node_kinds.include?(node.kind) assignment_state_declaration(node) - when "property_declaration", "public_field_definition", "field_definition", "field_declaration" + elsif field_declaration_node_kinds.include?(node.kind) generic_field_declaration(node) - else - nil end end @@ -2131,8 +2028,8 @@ def generic_field_declaration(node) def field_declaration_name_node(node) named_field(node, "name") || variable_declarator_name(node) || - node.named_children.find { |child| %w[field_identifier property_identifier].include?(child.kind) } || - node.named_children.reverse.find { |child| child.kind == "identifier" } + node.named_children.find { |child| field_identifier_node_kinds.include?(child.kind) } || + node.named_children.reverse.find { |child| identifier_node_kinds.include?(child.kind) } end def variable_declarator_name(node) @@ -2145,17 +2042,17 @@ def variable_declarator_name(node) next if seen.include?(key) seen << key - if %w[variable_declarator pointer_declarator declarator].include?(current.kind) + if declarator_node_kinds.include?(current.kind) direct_name = named_field(current, "name") || current.named_children.find do |child| - %w[identifier field_identifier property_identifier].include?(child.kind) + (identifier_node_kinds + field_identifier_node_kinds).include?(child.kind) end return direct_name if direct_name - return current if current.kind == "variable_declarator" && current.text.match?(/\A[A-Za-z_]\w*\z/) - elsif current.kind == "init_declarator" + return current if local_variable_declarator_node_kinds.include?(current.kind) && current.text.match?(/\A[A-Za-z_]\w*\z/) + elsif local_variable_declarator_node_kinds.include?(current.kind) return named_field(current, "name") || current.named_children.find do |child| - %w[identifier field_identifier property_identifier].include?(child.kind) + (identifier_node_kinds + field_identifier_node_kinds).include?(child.kind) end end pending.concat(current.named_children) @@ -2198,7 +2095,7 @@ def assignment_state_declaration(node) rhs = named_field(node, "right") || named_field(node, "value") || node.named_children[1] target = state_target(lhs) return nil unless target - return nil unless %w[self this].include?(target[:receiver].to_s) + return nil unless self_receiver_names.include?(target[:receiver].to_s) type = inferred_assignment_type(rhs) return nil unless type @@ -2219,8 +2116,7 @@ def inferred_assignment_type(node) end def generic_state_read_target(node) - case node.kind - when "call" + if accessor_call_node_kinds.include?(node.kind) receiver = named_field(node, "receiver") method = named_field(node, "method") return nil unless receiver && method @@ -2229,10 +2125,8 @@ def generic_state_read_target(node) return nil if named_field(node, "arguments") { receiver: normalize_text(receiver.text), field: method.text } - when "field", "field_access", "selector_expression", "member_expression", "member_access_expression", "attribute", - "field_expression", "navigation_expression", "directly_assignable_expression", "expression_list", - "dot_index_expression", "variable_list" - return nil if node.kind == "expression_list" && !(named_field(node, "operand") && named_field(node, "field")) + elsif field_like_node?(node) + return nil if expression_list_node_kinds.include?(node.kind) && !(named_field(node, "operand") && named_field(node, "field")) object = named_field(node, "object") || named_field(node, "receiver") || named_field(node, "expression") || @@ -2241,10 +2135,10 @@ def generic_state_read_target(node) node.named_children.find { |child| child.kind != "navigation_suffix" } field = named_field(node, "field") || named_field(node, "property") || named_field(node, "name") || named_field(node, "suffix") || - node.named_children.find { |child| child.kind == "navigation_suffix" } || + node.named_children.find { |child| navigation_suffix_node_kinds.include?(child.kind) } || node.named_children.last - if node.kind == "field_expression" && node.text.to_s.start_with?(".") - field = node.named_children.find { |child| child.kind == "identifier" } || field + if literal_field_expression_node_kinds.include?(node.kind) && node.text.to_s.start_with?(".") + field = node.named_children.find { |child| identifier_node_kinds.include?(child.kind) } || field return { receiver: ".literal", field: field.text } if field end field_text = member_field_text(field) @@ -2260,17 +2154,14 @@ def generic_state_target(lhs) return nil unless ts_node?(lhs) return nil if prev_sibling(lhs)&.text == ":" - case lhs.kind - when "call" + if accessor_call_node_kinds.include?(lhs.kind) receiver = named_field(lhs, "receiver") method = named_field(lhs, "method") return nil unless receiver && method { receiver: normalize_text(receiver.text), field: method.text.sub(/=\z/, "") } - when "field", "field_access", "selector_expression", "member_expression", "member_access_expression", "attribute", - "field_expression", "navigation_expression", "directly_assignable_expression", "expression_list", - "dot_index_expression", "variable_list" - if lhs.kind == "expression_list" && !(named_field(lhs, "operand") && named_field(lhs, "field")) + elsif field_like_node?(lhs) + if expression_list_node_kinds.include?(lhs.kind) && !(named_field(lhs, "operand") && named_field(lhs, "field")) return generic_state_target(lhs.named_children.first) end @@ -2281,10 +2172,10 @@ def generic_state_target(lhs) lhs.named_children.find { |child| child.kind != "navigation_suffix" } field = named_field(lhs, "field") || named_field(lhs, "property") || named_field(lhs, "name") || named_field(lhs, "suffix") || - lhs.named_children.find { |child| child.kind == "navigation_suffix" } || + lhs.named_children.find { |child| navigation_suffix_node_kinds.include?(child.kind) } || lhs.named_children.last - if lhs.kind == "field_expression" && lhs.text.to_s.start_with?(".") - field = lhs.named_children.find { |child| child.kind == "identifier" } || field + if literal_field_expression_node_kinds.include?(lhs.kind) && lhs.text.to_s.start_with?(".") + field = lhs.named_children.find { |child| identifier_node_kinds.include?(child.kind) } || field return { receiver: ".literal", field: field.text.sub(/=\z/, "") } if field end field_text = member_field_text(field) @@ -2298,7 +2189,7 @@ def assignment_lhs?(node) return false if prev_sibling(node)&.text == ":" sibling = next_sibling(node) - sibling && %w[= += -= *= /= %= &&= ||=].include?(sibling.text.to_s) + sibling && assignment_operator_tokens.include?(sibling.text.to_s) end def direct_state_ref(_node) @@ -2307,7 +2198,7 @@ def direct_state_ref(_node) def call_has_block?(node) ts_node?(node) && - node.named_children.any? { |child| %w[block do_block lambda].include?(child.kind) } + node.named_children.any? { |child| block_argument_node_kinds.include?(child.kind) } end def next_sibling(node) @@ -2343,28 +2234,24 @@ def parent_node(node) end def field_like_node?(node) - %w[ - attribute directly_assignable_expression dot_index_expression expression_list field field_access - field_expression member_access_expression member_expression navigation_expression scoped_identifier - selector_expression variable_list - ].include?(node.kind) + field_like_node_kinds.include?(node.kind) end def member_expression_list?(node) - return false unless node.kind == "expression_list" + return false unless expression_list_node_kinds.include?(node.kind) return true if named_field(node, "operand") && named_field(node, "field") node.children.any? do |child| - !child.named? && %w[. -> :].include?(child.text.to_s) + !child.named? && member_access_operator_tokens.include?(child.text.to_s) end end def member_field_text(field) return nil unless ts_node?(field) - if field.kind == "navigation_suffix" + if navigation_suffix_node_kinds.include?(field.kind) suffix = named_field(field, "suffix") || - field.named_children.find { |child| %w[identifier simple_identifier field_identifier property_identifier].include?(child.kind) } || + field.named_children.find { |child| (identifier_node_kinds + field_identifier_node_kinds).include?(child.kind) } || field.named_children.last text = suffix&.text.to_s return nil if text.empty? @@ -2383,8 +2270,7 @@ def normalize_type_owner(text) end def first_named_text(node, kinds) - expanded = kinds.include?("identifier") ? kinds + %w[simple_identifier] : kinds - child = node.named_children.find { |c| expanded.include?(c.kind) } + child = node.named_children.find { |c| kinds.include?(c.kind) } child&.text end @@ -2400,7 +2286,7 @@ def declarator_name(node) next if seen.include?(key) seen << key - return current.text if %w[identifier simple_identifier field_identifier property_identifier].include?(current.kind) + return current.text if (identifier_node_kinds + field_identifier_node_kinds).include?(current.kind) current.named_children.reverse_each { |child| pending << child } end @@ -2417,18 +2303,18 @@ def exported_name_visibility(name) def modifier_visibility(node) return :private if node.children.any? { |child| child.text == "private" } return :protected if node.children.any? { |child| child.text == "protected" } - return :public if node.children.any? { |child| %w[public pub].include?(child.text) } + return :public if node.children.any? { |child| public_visibility_tokens.include?(child.text) } nil end def parameter_name(param) return nil unless ts_node?(param) - return param.text if %w[identifier simple_identifier shorthand_property_identifier_pattern].include?(param.kind) + return param.text if parameter_identifier_node_kinds.include?(param.kind) name = named_field(param, "name") || param.named_children.select do |child| - %w[identifier simple_identifier field_identifier property_identifier].include?(child.kind) + parameter_identifier_node_kinds.include?(child.kind) end.last text = name&.text.to_s return nil if text.empty? || text == "_" @@ -2438,7 +2324,7 @@ def parameter_name(param) def normalize_target_receiver(target, stack) receiver = target[:receiver].to_s - return target.merge(receiver: "self") if %w[self this].include?(receiver) + return target.merge(receiver: "self") if self_receiver_names.include?(receiver) current_receiver = current_receiver_name(stack) return target unless current_receiver @@ -2485,7 +2371,6 @@ def normalize_text(text) end end - require_relative "syntax/ruby" require_relative "syntax/adapters" LanguageProfile = TreeSitterLanguageAdapter @@ -2509,10 +2394,10 @@ def normalize_text(text) lexicon: JAVASCRIPT_LEXICON, package: "tree-sitter-javascript" ), - typescript: JavaScriptSyntaxAdapter.new( + typescript: TypeScriptSyntaxAdapter.new( language: :typescript, extensions: %w[.ts .tsx], - lexicon: JAVASCRIPT_LEXICON, + lexicon: TYPESCRIPT_LEXICON, package: "tree-sitter-typescript" ), go: GoSyntaxAdapter.new( @@ -2577,6 +2462,12 @@ def normalize_text(text) extensions: %w[.kt .kts], lexicon: KOTLIN_LEXICON, package: "tree-sitter-kotlin" + ), + php: PhpSyntaxAdapter.new( + language: :php, + extensions: %w[.php], + lexicon: PHP_LEXICON, + package: "tree-sitter-php" ) }.freeze @@ -2735,65 +2626,15 @@ def path_condition_sites end def immutable_struct_readers - adapter.immutable_struct_readers(lines) + adapter.immutable_struct_readers(self) end def immutable_struct_reader_types - adapter.immutable_struct_reader_types(lines) + adapter.immutable_struct_reader_types(self) end def type_aliases - adapter.type_aliases(lines) - end - end - - module SourceTextHelpers - module_function - - def immutable_struct_readers(lines) - readers = Hash.new { |h, k| h[k] = Set.new } - class_stack = [] - lines.each do |line| - if (match = line.match(/\A\s*class\s+([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\s*<\s*T::Struct\b/)) - class_stack << match[1] - next - end - if class_stack.any? && (match = line.match(/\A\s*const\s+:([A-Za-z_]\w*)\b/)) - readers[class_stack.last].add(match[1].to_sym) - next - end - class_stack.pop if class_stack.any? && line.match?(/\A\s*end\s*(?:#.*)?\z/) - end - readers - end - - def immutable_struct_reader_types(lines) - reader_types = Hash.new { |h, k| h[k] = {} } - class_stack = [] - lines.each do |line| - if (match = line.match(/\A\s*class\s+([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\s*<\s*T::Struct\b/)) - class_stack << match[1] - next - end - if class_stack.any? && (match = line.match(/\A\s*const\s+:([A-Za-z_]\w*)\s*,\s*([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\b/)) - reader_types[class_stack.last][match[1].to_sym] = match[2] - next - end - class_stack.pop if class_stack.any? && line.match?(/\A\s*end\s*(?:#.*)?\z/) - end - reader_types - end - - def type_aliases(lines) - aliases = {} - lines.each do |line| - if (match = line.match(/\A\s*([A-Z]\w*)\s*=\s*T\.type_alias\s*\{\s*([A-Z]\w*(?:::[A-Z]\w*)*)\s*\}/)) - aliases[match[1]] = match[2] - elsif (match = line.match(/\A\s*([A-Z]\w*)\s*=\s*([A-Z]\w*(?:::[A-Z]\w*)*)\b/)) - aliases[match[1]] = match[2] - end - end - aliases + adapter.type_aliases(self) end end @@ -3168,16 +3009,16 @@ def path_condition_sites(document) syntax_profile(document.language).path_condition_sites(document) end - def immutable_struct_readers(lines) - SourceTextHelpers.immutable_struct_readers(lines) + def immutable_struct_readers(document) + syntax_profile(document.language).immutable_struct_readers(document) end - def immutable_struct_reader_types(lines) - SourceTextHelpers.immutable_struct_reader_types(lines) + def immutable_struct_reader_types(document) + syntax_profile(document.language).immutable_struct_reader_types(document) end - def type_aliases(lines) - SourceTextHelpers.type_aliases(lines) + def type_aliases(document) + syntax_profile(document.language).type_aliases(document) end private diff --git a/gems/decomplex/lib/decomplex/syntax/adapters.rb b/gems/decomplex/lib/decomplex/syntax/adapters.rb index 6e779b6eb..124452f83 100644 --- a/gems/decomplex/lib/decomplex/syntax/adapters.rb +++ b/gems/decomplex/lib/decomplex/syntax/adapters.rb @@ -2,334 +2,6 @@ module Decomplex module Syntax - class PythonSyntaxAdapter < TreeSitterLanguageAdapter - def function_name(node) - hidden_python_function_name(node) || super - end - - def visibility(_document, node) - name = function_name(node).to_s - return :private if name.start_with?("_") && !name.start_with?("__") - - :public - end - - def call_target(document, node) - python_adjacent_call_target(node) || super - end - - def local_methods(document) - super - end - - private - - def hidden_python_function_name(node) - return nil unless node.kind == "block" - return nil unless node.children.first&.kind.to_s == "def" - - node.named_children.find { |child| child.kind == "identifier" }&.text - end - - def python_function_body_statements(node) - body = named_field(node, "body") || - node.named_children.find { |child| child.kind == "block" } - return [] unless body - - body.named_children.reject { |child| child.kind == "comment" } - end - - def python_adjacent_call_target(node) - return nil unless %w[identifier].include?(node.kind) - - args = next_sibling(node) - return nil unless args&.kind == "argument_list" - - { - receiver: "self", - message: node.text, - arguments: args.named_children.map { |child| normalize_text(child.text) } - } - rescue StandardError - nil - end - end - - class GoSyntaxAdapter < TreeSitterLanguageAdapter - def visibility(_document, node) - exported_name_visibility(function_name(node)) - end - - private - - def boolean_container?(node) - return true if boolean_expression_list?(node, "&&") - - super - end - end - - class RustSyntaxAdapter < TreeSitterLanguageAdapter - def visibility(_document, node) - modifier_visibility(node) || :private - end - end - - class JavaScriptSyntaxAdapter < TreeSitterLanguageAdapter - def visibility(_document, node) - modifier_visibility(node) || private_name_visibility(node) - end - - private - - def private_name_visibility(node) - function_name(node).to_s.start_with?("#") ? :private : :public - end - end - - class CppSyntaxAdapter < TreeSitterLanguageAdapter - def visibility(_document, node) - modifier_visibility(node) || cpp_visibility(node) - end - - def function_params(node) - c_family_function_params(node) || super - end - - def implicit_state_accesses? - true - end - - private - - def control_context(node) - return :iterates if node.kind == "for_range_loop" - - super - end - - def cpp_visibility(node) - visibility = previous_cpp_access_specifier(node) - return visibility if visibility - - owner = nearest_owner_declaration(node) - return :public if owner&.kind == "struct_specifier" - - :private - end - - def previous_cpp_access_specifier(node) - sibling = prev_sibling(node) - while sibling - return sibling.text.to_sym if sibling.kind == "access_specifier" && - %w[public private protected].include?(sibling.text) - - sibling = prev_sibling(sibling) - end - nil - end - - def nearest_owner_declaration(node) - parent = parent_node(node) - seen = Set.new - while parent && !seen.include?(node_key(parent)) - seen << node_key(parent) - return parent if %w[class_specifier struct_specifier class class_definition class_declaration].include?(parent.kind) - - parent = parent_node(parent) - end - nil - end - end - - class CSharpSyntaxAdapter < TreeSitterLanguageAdapter - def visibility(_document, node) - modifier_visibility(node) || :private - end - - def implicit_state_accesses? - true - end - - private - - def control_context(node) - return :iterates if node.kind == "foreach_statement" - - super - end - end - - class CSyntaxAdapter < TreeSitterLanguageAdapter - def visibility(_document, node) - c_visibility(node) - end - - def function_params(node) - c_family_function_params(node) || super - end - - private - - def receiver_convention_owner_name(node, **_context) - return nil unless first_argument_receiver? - return nil unless node.kind == "function_definition" - - receiver = first_argument_receiver_parameter(node) - return nil unless receiver && receiver[:name] == "self" - - normalize_type_owner(receiver[:type]) - end - - def c_visibility(node) - node.children.any? { |child| child.text == "static" } ? :private : :public - end - end - - class LuaSyntaxAdapter < TreeSitterLanguageAdapter - def function_name(node) - lua_method_name(node) || super - end - - def receiver_owner_name(node) - lua_method_owner_name(node) || super - end - - def call_target(document, node) - lua_expression_list_call_target(node) || - lua_adjacent_member_call_target(node) || - super - end - - def state_read_target(node) - lua_single_return_member_target(node) || super - end - - def generated_prelude?(document, node) - return false unless line(node) == 1 - - first_line = document.lines.first.to_s - first_line.include?("_tl_compat") && first_line.include?("compat53.module") - end - - private - - def boolean_container?(node) - return true if boolean_expression_list?(node, "and") - - super - end - - def lua_method_name(node) - method = lua_method_index_expression(node) - return nil unless method - - method.named_children.last&.text - end - - def lua_method_owner_name(node) - method = lua_method_index_expression(node) - return nil unless method - - method.named_children.first&.text - end - - def lua_method_index_expression(node) - return nil unless node.kind == "function_declaration" - - node.named_children.find { |child| child.kind == "method_index_expression" } - end - - def lua_expression_list_call_target(node) - return nil unless node.kind == "expression_list" - - callee = node.named_children.find { |child| field_like_node?(child) } - args = node.named_children.find { |child| child.kind == "arguments" } - return nil unless callee && args - - target_from_callee(callee).merge(arguments: args.named_children.map { |child| normalize_text(child.text) }) - rescue StandardError - nil - end - - def lua_adjacent_member_call_target(node) - return nil unless node.kind == "identifier" - - args = next_sibling(node) - return nil unless args&.kind == "arguments" - - parent = parent_node(node) - return nil unless parent && field_like_node?(parent) - - target_from_callee(parent).merge(arguments: args.named_children.map { |child| normalize_text(child.text) }) - rescue StandardError - nil - end - - def lua_single_return_member_target(node) - return nil unless node.kind == "expression_list" - - text = normalize_text(node.text) - if (match = text.match(/\A([A-Za-z_]\w*)\.([A-Za-z_]\w*)\z/)) - return { receiver: match[1], field: match[2] } - end - - parent = parent_node(node) - return nil unless parent&.kind == "block" - return nil unless prev_sibling(node)&.kind.to_s == "return" || - parent.children.first&.kind.to_s == "return" - - return nil unless node.named_children.size == 1 - child = node.named_children.first - return nil unless field_like_node?(child) - - generic_state_read_target(child) - end - end - - class ZigSyntaxAdapter < TreeSitterLanguageAdapter - def visibility(_document, node) - modifier_visibility(node) || :private - end - - def state_declaration(node) - return zig_container_field_declaration(node) if node.kind == "container_field" - - super - end - - private - - def zig_container_field_declaration(node) - name = node.named_children.find { |child| child.kind == "identifier" } - return nil unless name - - { field: name.text, type: declared_type_text(node, name) } - end - end - - class JavaSyntaxAdapter < TreeSitterLanguageAdapter - def function_params(node) - return super unless node.kind == "method_declaration" - - params = node.named_children.find { |child| child.kind == "formal_parameters" } - return super unless params - - params.named_children.filter_map { |param| parameter_name(param) }.uniq - end - end - class JavaSyntaxAdapter - private - - def control_context(node) - return :iterates if node.kind == "enhanced_for_statement" - - super - end - end - class SwiftSyntaxAdapter < TreeSitterLanguageAdapter; end - class KotlinSyntaxAdapter < TreeSitterLanguageAdapter; end - class TreeSitterLanguageAdapter private @@ -356,3 +28,19 @@ def boolean_expression_list?(node, operator) end end end + +require_relative "ruby" +require_relative "python" +require_relative "javascript" +require_relative "typescript" +require_relative "go" +require_relative "rust" +require_relative "zig" +require_relative "lua" +require_relative "c" +require_relative "cpp" +require_relative "csharp" +require_relative "java" +require_relative "swift" +require_relative "kotlin" +require_relative "php" diff --git a/gems/decomplex/lib/decomplex/syntax/c.rb b/gems/decomplex/lib/decomplex/syntax/c.rb new file mode 100644 index 000000000..5d0c51e0c --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/c.rb @@ -0,0 +1,97 @@ +# frozen_string_literal: true + +module Decomplex + module Syntax + C_LEXICON = LanguageLexicon.new( + nil_literal_patterns: [/\bNULL\b/].freeze, + type_guard_patterns: [ + /\bNULL\b/, + /\bsizeof\s*\(/, + /\b_Generic\s*\(/ + ].freeze, + diagnostic_patterns: [ + /\b(?:assert|abort|exit)\s*\(/, + /\breturn\s+errno\b/ + ].freeze, + trivial_patterns: [ + /\A(?:NULL|true|false|0|1|break|continue)\s*;?\z/, + /\Areturn\s+(?:NULL|true|false|0|1)\s*;?\z/ + ].freeze + ).freeze + + class CSyntaxAdapter < TreeSitterLanguageAdapter + FUNCTION_NODE_KINDS = %w[function_definition].freeze + CALL_NODE_KINDS = %w[call_expression].freeze + CLASS_OWNER_NODE_KINDS = [].freeze + STRUCT_OWNER_NODE_KINDS = %w[struct_specifier].freeze + UNION_OWNER_NODE_KINDS = %w[union_declaration].freeze + ENUM_OWNER_NODE_KINDS = %w[enum_declaration].freeze + ANONYMOUS_OWNER_NODE_KINDS = %w[struct_declaration union_declaration enum_declaration].freeze + PARAMETER_LIST_NODE_KINDS = %w[parameter_list].freeze + FUNCTION_BODY_NODE_KINDS = %w[compound_statement].freeze + IDENTIFIER_NODE_KINDS = %w[identifier].freeze + FIELD_IDENTIFIER_NODE_KINDS = %w[field_identifier].freeze + PARAMETER_IDENTIFIER_NODE_KINDS = %w[identifier field_identifier].freeze + LOCAL_DECLARATION_NODE_KINDS = %w[declaration init_declarator].freeze + LOCAL_VARIABLE_DECLARATOR_NODE_KINDS = %w[init_declarator].freeze + FIELD_DECLARATION_NODE_KINDS = %w[field_declaration].freeze + DECLARATION_SITE_PARENT_NODE_KINDS = %w[parameter_declaration init_declarator function_declarator struct_specifier].freeze + FIRST_ARGUMENT_RECEIVER_TYPE_NODE_KINDS = %w[type_identifier primitive_type qualified_identifier scoped_type_identifier].freeze + FIRST_ARGUMENT_RECEIVER_NAME_NODE_KINDS = %w[identifier field_identifier].freeze + RECEIVER_PARAMETER_NODE_KINDS = %w[parameter_declaration].freeze + BOUND_CONTAINER_WRAPPER_NODE_KINDS = %w[ERROR expression_statement return_expression].freeze + BOUND_CONTAINER_PARENT_NODE_KINDS = %w[declaration field_declaration].freeze + BOUND_CONTAINER_NAME_NODE_KINDS = %w[identifier field_identifier type_identifier].freeze + ASSIGNMENT_NODE_KINDS = %w[assignment_expression].freeze + ASSIGNMENT_STATE_DECLARATION_NODE_KINDS = %w[assignment_expression].freeze + ASSIGNMENT_OPERATOR_TOKENS = %w[= += -= *= /= %=].freeze + PATH_ACTION_NODE_KINDS = %w[call_expression expression_statement return_statement].freeze + SIMPLE_ACTION_WRAPPER_NODE_KINDS = %w[compound_statement].freeze + COMPARISON_NODE_KINDS = %w[binary_expression].freeze + BRANCH_NODE_KINDS = %w[if_statement for_statement switch_statement].freeze + LOOP_NODE_KINDS = %w[for_statement].freeze + BRANCH_LOOP_NODE_KINDS = LOOP_NODE_KINDS + CASE_NODE_KINDS = %w[switch_statement].freeze + BRANCH_CASE_NODE_KINDS = %w[switch_statement].freeze + IF_NODE_KINDS = %w[if_statement].freeze + CASE_ARM_NODE_KINDS = %w[case_statement].freeze + SWITCH_CASE_ARM_NODE_KINDS = %w[case_statement].freeze + CASE_CONTAINER_STOP_NODE_KINDS = %w[function_definition struct_specifier].freeze + CASE_SUBJECT_SKIP_NODE_KINDS = %w[case_statement else comment].freeze + DEFAULT_CASE_PATTERNS = %w[_ default].freeze + BOOLEAN_AND_OPERATORS = %w[&& and].freeze + BOOLEAN_CONTAINER_NODE_KINDS = %w[binary_expression].freeze + PARENTHESIZED_WRAPPER_NODE_KINDS = %w[parenthesized_expression].freeze + ARGUMENT_LIST_NODE_KINDS = %w[argument_list].freeze + SELF_CALL_IDENTIFIER_NODE_KINDS = %w[identifier field_identifier type_identifier].freeze + SELF_RECEIVER_NAMES = %w[self].freeze + ACCESSOR_CALL_NODE_KINDS = [].freeze + FIELD_LIKE_NODE_KINDS = %w[field_expression].freeze + BLOCK_ARGUMENT_NODE_KINDS = [].freeze + + def visibility(_document, node) + c_visibility(node) + end + + def function_params(node) + c_family_function_params(node) || super + end + + private + + def receiver_convention_owner_name(node, **_context) + return nil unless first_argument_receiver? + return nil unless node.kind == "function_definition" + + receiver = first_argument_receiver_parameter(node) + return nil unless receiver && receiver[:name] == "self" + + normalize_type_owner(receiver[:type]) + end + + def c_visibility(node) + node.children.any? { |child| child.text == "static" } ? :private : :public + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/syntax/cpp.rb b/gems/decomplex/lib/decomplex/syntax/cpp.rb new file mode 100644 index 000000000..96bb4629b --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/cpp.rb @@ -0,0 +1,123 @@ +# frozen_string_literal: true + +module Decomplex + module Syntax + CPP_LEXICON = LanguageLexicon.new( + nil_literal_patterns: [/\b(?:nullptr|NULL)\b/].freeze, + type_guard_patterns: [ + /\b(?:nullptr|NULL)\b/, + /\b(?:dynamic_cast|typeid)\s*[<(]/, + /\bstd::(?:get_if|holds_alternative)\s*[<(]/ + ].freeze, + diagnostic_patterns: [ + /\bthrow\b/, + /\b(?:assert|abort|exit)\s*\(/, + /\bstd::terminate\s*\(/ + ].freeze, + trivial_patterns: [ + /\A(?:nullptr|NULL|true|false|0|1|break|continue)\s*;?\z/, + /\Areturn\s+(?:nullptr|NULL|true|false|0|1)\s*;?\z/ + ].freeze + ).freeze + + class CppSyntaxAdapter < TreeSitterLanguageAdapter + FUNCTION_NODE_KINDS = %w[function_definition].freeze + CALL_NODE_KINDS = %w[call_expression].freeze + CLASS_OWNER_NODE_KINDS = %w[class_specifier].freeze + STRUCT_OWNER_NODE_KINDS = %w[struct_specifier].freeze + PARAMETER_LIST_NODE_KINDS = %w[parameter_list].freeze + FUNCTION_BODY_NODE_KINDS = %w[compound_statement].freeze + IDENTIFIER_NODE_KINDS = %w[identifier type_identifier qualified_identifier namespace_identifier].freeze + FIELD_IDENTIFIER_NODE_KINDS = %w[field_identifier].freeze + PARAMETER_IDENTIFIER_NODE_KINDS = %w[identifier field_identifier].freeze + LOCAL_DECLARATION_NODE_KINDS = %w[declaration init_declarator].freeze + LOCAL_VARIABLE_DECLARATOR_NODE_KINDS = %w[init_declarator].freeze + FIELD_DECLARATION_NODE_KINDS = %w[field_declaration].freeze + DECLARATION_SITE_PARENT_NODE_KINDS = %w[parameter_declaration init_declarator function_declarator class_specifier struct_specifier].freeze + RECEIVER_TYPE_NODE_KINDS = %w[type_identifier qualified_identifier scoped_type_identifier].freeze + FIRST_ARGUMENT_RECEIVER_TYPE_NODE_KINDS = %w[type_identifier primitive_type qualified_identifier scoped_type_identifier].freeze + FIRST_ARGUMENT_RECEIVER_NAME_NODE_KINDS = %w[identifier field_identifier].freeze + RECEIVER_PARAMETER_NODE_KINDS = %w[parameter_declaration].freeze + ASSIGNMENT_NODE_KINDS = %w[assignment_expression].freeze + ASSIGNMENT_STATE_DECLARATION_NODE_KINDS = %w[assignment_expression].freeze + ASSIGNMENT_OPERATOR_TOKENS = %w[= += -= *= /= %=].freeze + PATH_ACTION_NODE_KINDS = %w[call_expression expression_statement return_statement].freeze + SIMPLE_ACTION_WRAPPER_NODE_KINDS = %w[compound_statement].freeze + COMPARISON_NODE_KINDS = %w[binary_expression].freeze + BRANCH_NODE_KINDS = %w[if_statement for_range_loop switch_statement].freeze + LOOP_NODE_KINDS = %w[for_range_loop].freeze + BRANCH_LOOP_NODE_KINDS = LOOP_NODE_KINDS + CASE_NODE_KINDS = %w[switch_statement].freeze + BRANCH_CASE_NODE_KINDS = %w[switch_statement].freeze + IF_NODE_KINDS = %w[if_statement].freeze + CASE_ARM_NODE_KINDS = %w[case_statement].freeze + SWITCH_CASE_ARM_NODE_KINDS = %w[case_statement].freeze + CASE_CONTAINER_STOP_NODE_KINDS = %w[function_definition class_specifier struct_specifier].freeze + CASE_SUBJECT_SKIP_NODE_KINDS = %w[case_statement else comment].freeze + DEFAULT_CASE_PATTERNS = %w[_ default].freeze + BOOLEAN_AND_OPERATORS = %w[&& and].freeze + BOOLEAN_CONTAINER_NODE_KINDS = %w[binary_expression].freeze + PARENTHESIZED_WRAPPER_NODE_KINDS = %w[condition_clause parenthesized_expression].freeze + ARGUMENT_LIST_NODE_KINDS = %w[argument_list].freeze + SELF_CALL_IDENTIFIER_NODE_KINDS = %w[identifier type_identifier field_identifier qualified_identifier].freeze + SELF_RECEIVER_NAMES = %w[this self].freeze + PUBLIC_VISIBILITY_TOKENS = %w[public pub].freeze + ACCESSOR_CALL_NODE_KINDS = [].freeze + FIELD_LIKE_NODE_KINDS = %w[field_expression].freeze + BLOCK_ARGUMENT_NODE_KINDS = [].freeze + + def visibility(_document, node) + modifier_visibility(node) || cpp_visibility(node) + end + + def function_params(node) + c_family_function_params(node) || super + end + + def implicit_state_accesses? + true + end + + private + + def control_context(node) + return :iterates if node.kind == "for_range_loop" + + super + end + + def cpp_visibility(node) + visibility = previous_cpp_access_specifier(node) + return visibility if visibility + + owner = nearest_owner_declaration(node) + return :public if owner&.kind == "struct_specifier" + + :private + end + + def previous_cpp_access_specifier(node) + sibling = prev_sibling(node) + while sibling + return sibling.text.to_sym if sibling.kind == "access_specifier" && + %w[public private protected].include?(sibling.text) + + sibling = prev_sibling(sibling) + end + nil + end + + def nearest_owner_declaration(node) + parent = parent_node(node) + seen = Set.new + while parent && !seen.include?(node_key(parent)) + seen << node_key(parent) + return parent if %w[class_specifier struct_specifier class class_definition class_declaration].include?(parent.kind) + + parent = parent_node(parent) + end + nil + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/syntax/csharp.rb b/gems/decomplex/lib/decomplex/syntax/csharp.rb new file mode 100644 index 000000000..b697b5f8c --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/csharp.rb @@ -0,0 +1,84 @@ +# frozen_string_literal: true + +module Decomplex + module Syntax + CSHARP_LEXICON = LanguageLexicon.new( + nil_literal_patterns: [/\bnull\b/].freeze, + type_guard_patterns: [ + /\bnull\b/, + /(?:\?\.|\?\?)/, + /\b(?:is|as|typeof)\b/ + ].freeze, + diagnostic_patterns: [ + /\bthrow\b/, + /\b(?:Debug\.Assert|Trace\.Assert|Environment\.Exit)\s*\(/ + ].freeze, + trivial_patterns: [ + /\A(?:null|true|false|0|1|break|continue)\s*;?\z/, + /\Areturn\s+(?:null|true|false|0|1)\s*;?\z/ + ].freeze + ).freeze + + class CSharpSyntaxAdapter < TreeSitterLanguageAdapter + FUNCTION_NODE_KINDS = %w[method_declaration].freeze + CALL_NODE_KINDS = %w[invocation_expression].freeze + CLASS_OWNER_NODE_KINDS = %w[class_declaration].freeze + PARAMETER_LIST_NODE_KINDS = %w[parameter_list].freeze + FUNCTION_BODY_NODE_KINDS = %w[block declaration_list].freeze + IDENTIFIER_NODE_KINDS = %w[identifier].freeze + FIELD_IDENTIFIER_NODE_KINDS = [].freeze + PARAMETER_IDENTIFIER_NODE_KINDS = %w[identifier].freeze + LOCAL_IDENTIFIER_WRAPPER_NODE_KINDS = %w[argument].freeze + LOCAL_DECLARATION_NODE_KINDS = %w[local_declaration_statement variable_declaration variable_declarator].freeze + VARIABLE_DECLARATION_NODE_KINDS = %w[variable_declaration].freeze + LOCAL_VARIABLE_DECLARATOR_NODE_KINDS = %w[variable_declarator].freeze + DECLARATOR_NODE_KINDS = %w[variable_declaration variable_declarator].freeze + FIELD_DECLARATION_NODE_KINDS = %w[field_declaration].freeze + DECLARATION_SITE_PARENT_NODE_KINDS = %w[parameter variable_declarator method_declaration class_declaration].freeze + ASSIGNMENT_NODE_KINDS = %w[assignment_expression].freeze + ASSIGNMENT_STATE_DECLARATION_NODE_KINDS = %w[assignment_expression].freeze + ASSIGNMENT_OPERATOR_TOKENS = %w[= += -= *= /= %=].freeze + PATH_ACTION_NODE_KINDS = %w[invocation_expression expression_statement return_statement].freeze + SIMPLE_ACTION_WRAPPER_NODE_KINDS = %w[block].freeze + COMPARISON_NODE_KINDS = %w[binary_expression].freeze + BRANCH_NODE_KINDS = %w[if_statement foreach_statement switch_statement].freeze + LOOP_NODE_KINDS = %w[foreach_statement].freeze + BRANCH_LOOP_NODE_KINDS = LOOP_NODE_KINDS + CASE_NODE_KINDS = %w[switch_statement].freeze + BRANCH_CASE_NODE_KINDS = %w[switch_statement].freeze + IF_NODE_KINDS = %w[if_statement].freeze + CASE_ARM_NODE_KINDS = %w[switch_section].freeze + SWITCH_CASE_ARM_NODE_KINDS = %w[switch_section].freeze + CASE_CONTAINER_STOP_NODE_KINDS = %w[method_declaration class_declaration].freeze + CASE_SUBJECT_SKIP_NODE_KINDS = %w[switch_section else comment].freeze + DEFAULT_CASE_PATTERNS = %w[_ default].freeze + BOOLEAN_AND_OPERATORS = %w[&& and].freeze + BOOLEAN_CONTAINER_NODE_KINDS = %w[binary_expression].freeze + PARENTHESIZED_WRAPPER_NODE_KINDS = %w[parenthesized_expression].freeze + ADJACENT_METHOD_INVOCATION_NODE_KINDS = %w[invocation_expression].freeze + ARGUMENT_LIST_NODE_KINDS = %w[argument_list].freeze + SELF_CALL_IDENTIFIER_NODE_KINDS = %w[identifier].freeze + SELF_RECEIVER_NAMES = %w[this self].freeze + PUBLIC_VISIBILITY_TOKENS = %w[public pub].freeze + ACCESSOR_CALL_NODE_KINDS = [].freeze + FIELD_LIKE_NODE_KINDS = %w[member_access_expression].freeze + BLOCK_ARGUMENT_NODE_KINDS = [].freeze + + def visibility(_document, node) + modifier_visibility(node) || :private + end + + def implicit_state_accesses? + true + end + + private + + def control_context(node) + return :iterates if node.kind == "foreach_statement" + + super + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/syntax/go.rb b/gems/decomplex/lib/decomplex/syntax/go.rb new file mode 100644 index 000000000..5904153cf --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/go.rb @@ -0,0 +1,93 @@ +# frozen_string_literal: true + +module Decomplex + module Syntax + GO_LEXICON = LanguageLexicon.new( + nil_literal_patterns: [/\bnil\b/].freeze, + type_guard_patterns: [ + /\bnil\b/, + /\.\(type\)/, + /\.\([A-Za-z_]\w*(?:\.[A-Za-z_]\w*)*\)/ + ].freeze, + diagnostic_patterns: [ + /\bpanic\s*\(/, + /\breturn\s+error[.\w]*/ + ].freeze, + trivial_patterns: [ + /\A(?:nil|true|false|0|1|break|continue|fallthrough)\s*;?\z/, + /\Areturn\s+(?:nil|true|false|0|1)\s*;?\z/ + ].freeze + ).freeze + + class GoSyntaxAdapter < TreeSitterLanguageAdapter + FUNCTION_NODE_KINDS = %w[function_declaration method_declaration].freeze + CALL_NODE_KINDS = %w[call_expression].freeze + ADJACENT_CALL_NODE_KINDS = %w[selector_expression identifier field_identifier].freeze + GENERIC_OWNER_NODE_KINDS = %w[type_spec].freeze + PARAMETER_LIST_NODE_KINDS = %w[parameter_list].freeze + METHOD_PARAMETER_LIST_NODE_KINDS = %w[parameter_list].freeze + METHOD_RECEIVER_NODE_KINDS = %w[method_declaration].freeze + FUNCTION_BODY_NODE_KINDS = %w[block statement_list].freeze + NESTED_STATEMENT_WRAPPER_NODE_KINDS = %w[statement_list].freeze + IDENTIFIER_NODE_KINDS = %w[identifier].freeze + FIELD_IDENTIFIER_NODE_KINDS = %w[field_identifier].freeze + PARAMETER_IDENTIFIER_NODE_KINDS = %w[identifier field_identifier].freeze + LOCAL_IDENTIFIER_WRAPPER_NODE_KINDS = %w[expression_list].freeze + LOCAL_DECLARATION_NODE_KINDS = %w[short_var_declaration variable_declaration].freeze + SHORT_VARIABLE_DECLARATION_NODE_KINDS = %w[short_var_declaration].freeze + VARIABLE_DECLARATION_NODE_KINDS = %w[expression_list variable_declaration].freeze + LOCAL_VARIABLE_DECLARATOR_NODE_KINDS = [].freeze + FIELD_DECLARATION_NODE_KINDS = %w[field_declaration].freeze + DECLARATION_SITE_PARENT_NODE_KINDS = %w[parameter_declaration function_declaration method_declaration type_spec].freeze + RECEIVER_TYPE_NODE_KINDS = %w[pointer_type type_identifier].freeze + RECEIVER_PARAMETER_NODE_KINDS = %w[parameter_declaration].freeze + FIRST_ARGUMENT_RECEIVER_TYPE_NODE_KINDS = %w[type_identifier pointer_type].freeze + FIRST_ARGUMENT_RECEIVER_NAME_NODE_KINDS = %w[identifier field_identifier].freeze + ASSIGNMENT_NODE_KINDS = %w[assignment_statement].freeze + ASSIGNMENT_STATE_DECLARATION_NODE_KINDS = %w[assignment_statement].freeze + ASSIGNMENT_OPERATOR_TOKENS = %w[= += -= *= /= %=].freeze + PATH_ACTION_NODE_KINDS = %w[call_expression expression_statement return_statement].freeze + SIMPLE_ACTION_WRAPPER_NODE_KINDS = %w[block statement_list].freeze + COMPARISON_NODE_KINDS = %w[binary_expression].freeze + BRANCH_NODE_KINDS = %w[if_statement for_statement expression_switch_statement].freeze + LOOP_NODE_KINDS = %w[for_statement].freeze + BRANCH_LOOP_NODE_KINDS = LOOP_NODE_KINDS + CASE_NODE_KINDS = %w[expression_switch_statement].freeze + BRANCH_CASE_NODE_KINDS = %w[expression_switch_statement].freeze + IF_NODE_KINDS = %w[if_statement].freeze + HIDDEN_IF_WRAPPER_NODE_KINDS = %w[block statement_list].freeze + HIDDEN_IF_TOKEN_KINDS = %w[if].freeze + CASE_ARM_NODE_KINDS = %w[expression_case].freeze + SWITCH_CASE_ARM_NODE_KINDS = %w[expression_case].freeze + CASE_PATTERN_NODE_KINDS = [].freeze + CASE_SUBJECT_NODE_KINDS = [].freeze + CASE_CONTAINER_STOP_NODE_KINDS = %w[function_declaration method_declaration type_spec].freeze + CASE_SUBJECT_SKIP_NODE_KINDS = %w[expression_case else comment].freeze + DEFAULT_CASE_PATTERNS = %w[_ default].freeze + BOOLEAN_AND_OPERATORS = %w[&& and].freeze + BOOLEAN_CONTAINER_NODE_KINDS = %w[binary_expression].freeze + BOOLEAN_WRAPPER_NODE_KINDS = %w[expression_list].freeze + PARENTHESIZED_WRAPPER_NODE_KINDS = %w[parenthesized_expression].freeze + ARGUMENT_LIST_NODE_KINDS = %w[argument_list].freeze + SELF_CALL_IDENTIFIER_NODE_KINDS = %w[identifier field_identifier type_identifier].freeze + SELF_RECEIVER_NAMES = %w[self].freeze + PUBLIC_VISIBILITY_TOKENS = %w[public pub].freeze + ACCESSOR_CALL_NODE_KINDS = [].freeze + EXPRESSION_LIST_NODE_KINDS = %w[expression_list].freeze + FIELD_LIKE_NODE_KINDS = %w[selector_expression expression_list].freeze + BLOCK_ARGUMENT_NODE_KINDS = [].freeze + + def visibility(_document, node) + exported_name_visibility(function_name(node)) + end + + private + + def boolean_container?(node) + return true if boolean_expression_list?(node, "&&") + + super + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/syntax/java.rb b/gems/decomplex/lib/decomplex/syntax/java.rb new file mode 100644 index 000000000..d63befb78 --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/java.rb @@ -0,0 +1,86 @@ +# frozen_string_literal: true + +module Decomplex + module Syntax + JAVA_LEXICON = LanguageLexicon.new( + nil_literal_patterns: [/\bnull\b/].freeze, + type_guard_patterns: [ + /\bnull\b/, + /\binstanceof\b/, + /\bObjects\.(?:isNull|nonNull|requireNonNull)\s*\(/ + ].freeze, + diagnostic_patterns: [ + /\bthrow\b/, + /\bassert\b/, + /\bSystem\.exit\s*\(/ + ].freeze, + trivial_patterns: [ + /\A(?:null|true|false|0|1|break|continue)\s*;?\z/, + /\Areturn\s+(?:null|true|false|0|1)\s*;?\z/ + ].freeze + ).freeze + + class JavaSyntaxAdapter < TreeSitterLanguageAdapter + FUNCTION_NODE_KINDS = %w[method_declaration].freeze + CALL_NODE_KINDS = %w[method_invocation].freeze + CLASS_OWNER_NODE_KINDS = %w[class_declaration].freeze + PARAMETER_LIST_NODE_KINDS = %w[formal_parameters].freeze + FUNCTION_BODY_NODE_KINDS = %w[block].freeze + IDENTIFIER_NODE_KINDS = %w[identifier type_identifier].freeze + FIELD_IDENTIFIER_NODE_KINDS = [].freeze + PARAMETER_IDENTIFIER_NODE_KINDS = %w[identifier].freeze + LOCAL_DECLARATION_NODE_KINDS = %w[local_variable_declaration variable_declarator].freeze + VARIABLE_DECLARATION_NODE_KINDS = %w[variable_declarator].freeze + LOCAL_VARIABLE_DECLARATOR_NODE_KINDS = %w[variable_declarator].freeze + FIELD_DECLARATION_NODE_KINDS = %w[field_declaration].freeze + DECLARATION_SITE_PARENT_NODE_KINDS = %w[formal_parameter variable_declarator method_declaration class_declaration].freeze + ASSIGNMENT_NODE_KINDS = %w[assignment_expression].freeze + ASSIGNMENT_STATE_DECLARATION_NODE_KINDS = %w[assignment_expression].freeze + ASSIGNMENT_OPERATOR_TOKENS = %w[= += -= *= /= %=].freeze + PATH_ACTION_NODE_KINDS = %w[method_invocation expression_statement return_statement].freeze + SIMPLE_ACTION_WRAPPER_NODE_KINDS = %w[block].freeze + COMPARISON_NODE_KINDS = %w[binary_expression].freeze + BRANCH_NODE_KINDS = %w[if_statement enhanced_for_statement switch_expression].freeze + LOOP_NODE_KINDS = %w[enhanced_for_statement].freeze + BRANCH_LOOP_NODE_KINDS = LOOP_NODE_KINDS + CASE_NODE_KINDS = %w[switch_expression].freeze + BRANCH_CASE_NODE_KINDS = %w[switch_expression].freeze + IF_NODE_KINDS = %w[if_statement].freeze + CASE_ARM_NODE_KINDS = %w[switch_block_statement_group].freeze + SWITCH_CASE_ARM_NODE_KINDS = %w[switch_block_statement_group].freeze + CASE_CONTAINER_STOP_NODE_KINDS = %w[method_declaration class_declaration].freeze + CASE_SUBJECT_SKIP_NODE_KINDS = %w[switch_block_statement_group else line_comment].freeze + DEFAULT_CASE_PATTERNS = %w[_ default].freeze + BOOLEAN_AND_OPERATORS = %w[&& and].freeze + BOOLEAN_CONTAINER_NODE_KINDS = %w[binary_expression].freeze + PARENTHESIZED_WRAPPER_NODE_KINDS = %w[parenthesized_expression].freeze + ADJACENT_METHOD_INVOCATION_NODE_KINDS = %w[method_invocation].freeze + ARGUMENT_LIST_NODE_KINDS = %w[argument_list].freeze + SELF_CALL_IDENTIFIER_NODE_KINDS = %w[identifier type_identifier].freeze + SELF_RECEIVER_NAMES = %w[this self].freeze + PUBLIC_VISIBILITY_TOKENS = %w[public pub].freeze + ACCESSOR_CALL_NODE_KINDS = [].freeze + FIELD_LIKE_NODE_KINDS = %w[field_access].freeze + BLOCK_ARGUMENT_NODE_KINDS = [].freeze + + def function_params(node) + return super unless node.kind == "method_declaration" + + params = node.named_children.find { |child| child.kind == "formal_parameters" } + return super unless params + + params.named_children.filter_map { |param| parameter_name(param) }.uniq + end + end + + class JavaSyntaxAdapter + private + + def control_context(node) + return :iterates if node.kind == "enhanced_for_statement" + + super + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/syntax/javascript.rb b/gems/decomplex/lib/decomplex/syntax/javascript.rb new file mode 100644 index 000000000..d802dce93 --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/javascript.rb @@ -0,0 +1,82 @@ +# frozen_string_literal: true + +module Decomplex + module Syntax + JAVASCRIPT_LEXICON = LanguageLexicon.new( + nil_literal_patterns: [/\b(?:null|undefined)\b/].freeze, + type_guard_patterns: [ + /\btypeof\b/, + /\binstanceof\b/, + /(?:\?\.|\b(?:==|!=|===|!==)\s*(?:null|undefined)\b)/ + ].freeze, + diagnostic_patterns: [ + /\bthrow\b/, + /\bprocess\.exit\s*\(/ + ].freeze, + trivial_patterns: [ + /\A(?:null|undefined|true|false|0|1|break|continue)\s*;?\z/, + /\Areturn\s+(?:null|undefined|true|false|0|1)\s*;?\z/ + ].freeze + ).freeze + + class JavaScriptSyntaxAdapter < TreeSitterLanguageAdapter + FUNCTION_NODE_KINDS = %w[function_declaration method_definition].freeze + CALL_NODE_KINDS = %w[call_expression].freeze + ADJACENT_CALL_NODE_KINDS = %w[member_expression identifier property_identifier].freeze + CLASS_OWNER_NODE_KINDS = %w[class_declaration].freeze + PARAMETER_LIST_NODE_KINDS = %w[formal_parameters].freeze + FUNCTION_BODY_NODE_KINDS = %w[statement_block].freeze + NESTED_STATEMENT_WRAPPER_NODE_KINDS = [].freeze + IDENTIFIER_NODE_KINDS = %w[identifier].freeze + FIELD_IDENTIFIER_NODE_KINDS = %w[property_identifier].freeze + PARAMETER_IDENTIFIER_NODE_KINDS = %w[identifier].freeze + LOCAL_DECLARATION_NODE_KINDS = %w[lexical_declaration variable_declarator].freeze + VARIABLE_DECLARATION_NODE_KINDS = %w[variable_declarator].freeze + LOCAL_VARIABLE_DECLARATOR_NODE_KINDS = %w[variable_declarator].freeze + FIELD_DECLARATION_NODE_KINDS = %w[public_field_definition].freeze + DECLARATION_SITE_PARENT_NODE_KINDS = %w[formal_parameters variable_declarator method_definition function_declaration class_declaration].freeze + ASSIGNMENT_NODE_KINDS = %w[assignment_expression augmented_assignment_expression].freeze + ASSIGNMENT_STATE_DECLARATION_NODE_KINDS = %w[assignment_expression].freeze + ASSIGNMENT_OPERATOR_TOKENS = %w[= += -= *= /= %= &&= ||=].freeze + PATH_ACTION_NODE_KINDS = %w[call_expression expression_statement return_statement].freeze + SIMPLE_ACTION_WRAPPER_NODE_KINDS = %w[statement_block].freeze + COMPARISON_NODE_KINDS = %w[binary_expression].freeze + BRANCH_NODE_KINDS = %w[if_statement for_in_statement switch_statement].freeze + LOOP_NODE_KINDS = %w[for_in_statement].freeze + BRANCH_LOOP_NODE_KINDS = LOOP_NODE_KINDS + CASE_NODE_KINDS = %w[switch_statement].freeze + BRANCH_CASE_NODE_KINDS = %w[switch_statement].freeze + IF_NODE_KINDS = %w[if_statement].freeze + CASE_ARM_NODE_KINDS = %w[switch_case].freeze + SWITCH_CASE_ARM_NODE_KINDS = %w[switch_case].freeze + CASE_PATTERN_NODE_KINDS = [].freeze + CASE_CONTAINER_STOP_NODE_KINDS = %w[function_declaration method_definition class_declaration].freeze + CASE_SUBJECT_SKIP_NODE_KINDS = %w[switch_case else comment].freeze + DEFAULT_CASE_PATTERNS = %w[_ default].freeze + BOOLEAN_AND_OPERATORS = %w[&& and].freeze + BOOLEAN_CONTAINER_NODE_KINDS = %w[binary_expression].freeze + PARENTHESIZED_WRAPPER_NODE_KINDS = %w[parenthesized_expression].freeze + BOUND_CONTAINER_PARENT_NODE_KINDS = %w[lexical_declaration public_field_definition].freeze + BOUND_CONTAINER_NAME_NODE_KINDS = %w[identifier property_identifier].freeze + ADJACENT_METHOD_INVOCATION_NODE_KINDS = [].freeze + ARGUMENT_LIST_NODE_KINDS = %w[arguments].freeze + SELF_CALL_IDENTIFIER_NODE_KINDS = %w[identifier property_identifier].freeze + SELF_RECEIVER_NAMES = %w[this self].freeze + PUBLIC_VISIBILITY_TOKENS = %w[public pub].freeze + ACCESSOR_CALL_NODE_KINDS = [].freeze + EXPRESSION_LIST_NODE_KINDS = [].freeze + FIELD_LIKE_NODE_KINDS = %w[member_expression].freeze + BLOCK_ARGUMENT_NODE_KINDS = [].freeze + + def visibility(_document, node) + modifier_visibility(node) || private_name_visibility(node) + end + + private + + def private_name_visibility(node) + function_name(node).to_s.start_with?("#") ? :private : :public + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/syntax/kotlin.rb b/gems/decomplex/lib/decomplex/syntax/kotlin.rb new file mode 100644 index 000000000..ce9414faa --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/kotlin.rb @@ -0,0 +1,74 @@ +# frozen_string_literal: true + +module Decomplex + module Syntax + KOTLIN_LEXICON = LanguageLexicon.new( + nil_literal_patterns: [/\bnull\b/].freeze, + type_guard_patterns: [ + /\bnull\b/, + /(?:\?\.|\?\?)/, + /\b(?:is|as\?)(?:\s|$)/ + ].freeze, + diagnostic_patterns: [ + /\bthrow\b/, + /\b(?:error|require|check|assert|TODO)\s*\(/ + ].freeze, + trivial_patterns: [ + /\A(?:null|true|false|0|1|break|continue)\s*;?\z/, + /\Areturn\s+(?:null|true|false|0|1)\s*;?\z/ + ].freeze + ).freeze + + class KotlinSyntaxAdapter < TreeSitterLanguageAdapter + FUNCTION_NODE_KINDS = %w[function_declaration].freeze + CALL_NODE_KINDS = %w[call_expression].freeze + ADJACENT_CALL_NODE_KINDS = %w[navigation_expression directly_assignable_expression simple_identifier].freeze + CLASS_OWNER_NODE_KINDS = %w[class_declaration].freeze + PARAMETER_LIST_NODE_KINDS = %w[function_value_parameters].freeze + FUNCTION_BODY_NODE_KINDS = %w[function_body statements].freeze + NESTED_STATEMENT_WRAPPER_NODE_KINDS = %w[statements].freeze + IDENTIFIER_NODE_KINDS = %w[simple_identifier type_identifier].freeze + FIELD_IDENTIFIER_NODE_KINDS = [].freeze + PARAMETER_IDENTIFIER_NODE_KINDS = %w[simple_identifier].freeze + LOCAL_IDENTIFIER_WRAPPER_NODE_KINDS = %w[directly_assignable_expression value_argument].freeze + LOCAL_DECLARATION_NODE_KINDS = %w[property_declaration variable_declaration].freeze + VARIABLE_DECLARATION_NODE_KINDS = %w[variable_declaration directly_assignable_expression].freeze + LOCAL_VARIABLE_DECLARATOR_NODE_KINDS = [].freeze + FIELD_DECLARATION_NODE_KINDS = %w[property_declaration].freeze + DECLARATION_SITE_PARENT_NODE_KINDS = %w[parameter variable_declaration property_declaration function_declaration class_declaration].freeze + ASSIGNMENT_NODE_KINDS = %w[assignment].freeze + ASSIGNMENT_STATE_DECLARATION_NODE_KINDS = %w[assignment].freeze + ASSIGNMENT_OPERATOR_TOKENS = %w[= += -= *= /= %=].freeze + PATH_ACTION_NODE_KINDS = %w[call_expression jump_expression].freeze + SIMPLE_ACTION_WRAPPER_NODE_KINDS = %w[statements control_structure_body function_body].freeze + COMPARISON_NODE_KINDS = %w[equality_expression comparison_expression conjunction_expression additive_expression multiplicative_expression].freeze + BRANCH_NODE_KINDS = %w[if_expression for_statement when_expression].freeze + LOOP_NODE_KINDS = %w[for_statement].freeze + BRANCH_LOOP_NODE_KINDS = LOOP_NODE_KINDS + CASE_NODE_KINDS = %w[when_expression].freeze + HIDDEN_CASE_WRAPPER_NODE_KINDS = %w[statements].freeze + HIDDEN_CASE_TOKEN_KINDS = %w[when].freeze + BRANCH_CASE_NODE_KINDS = %w[when_expression statements].freeze + IF_NODE_KINDS = %w[if_expression].freeze + HIDDEN_IF_WRAPPER_NODE_KINDS = %w[statements].freeze + HIDDEN_IF_TOKEN_KINDS = %w[if].freeze + CASE_ARM_NODE_KINDS = %w[when_entry].freeze + SWITCH_CASE_ARM_NODE_KINDS = %w[when_entry].freeze + CASE_PATTERN_NODE_KINDS = %w[when_condition pattern].freeze + CASE_SUBJECT_NODE_KINDS = %w[when_subject].freeze + CASE_CONTAINER_STOP_NODE_KINDS = %w[function_declaration class_declaration].freeze + CASE_SUBJECT_SKIP_NODE_KINDS = %w[when_entry else line_comment].freeze + DEFAULT_CASE_PATTERNS = %w[_ default else].freeze + BOOLEAN_AND_OPERATORS = %w[&& and].freeze + BOOLEAN_CONTAINER_NODE_KINDS = %w[conjunction_expression equality_expression comparison_expression].freeze + BOOLEAN_WRAPPER_NODE_KINDS = %w[statements pattern].freeze + ARGUMENT_LIST_NODE_KINDS = %w[call_suffix value_argument].freeze + SELF_CALL_IDENTIFIER_NODE_KINDS = %w[simple_identifier type_identifier].freeze + SELF_RECEIVER_NAMES = %w[this self].freeze + ACCESSOR_CALL_NODE_KINDS = [].freeze + NAVIGATION_SUFFIX_NODE_KINDS = %w[navigation_suffix].freeze + FIELD_LIKE_NODE_KINDS = %w[navigation_expression directly_assignable_expression].freeze + BLOCK_ARGUMENT_NODE_KINDS = [].freeze + end + end +end diff --git a/gems/decomplex/lib/decomplex/syntax/lua.rb b/gems/decomplex/lib/decomplex/syntax/lua.rb new file mode 100644 index 000000000..8cd0de951 --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/lua.rb @@ -0,0 +1,162 @@ +# frozen_string_literal: true + +module Decomplex + module Syntax + LUA_LEXICON = LanguageLexicon.new( + nil_literal_patterns: [/\bnil\b/].freeze, + type_guard_patterns: [ + /\btype\s*\(/, + /\bnil\b/, + /\b(?:pcall|xpcall)\s*\(/ + ].freeze, + diagnostic_patterns: [ + /\berror\s*\(/, + /\bassert\s*\(/ + ].freeze, + trivial_patterns: [ + /\A(?:nil|true|false|0|1|break)\s*;?\z/, + /\Areturn\s+(?:nil|true|false|0|1)\s*;?\z/ + ].freeze + ).freeze + + class LuaSyntaxAdapter < TreeSitterLanguageAdapter + FUNCTION_NODE_KINDS = %w[function_declaration].freeze + CALL_NODE_KINDS = %w[function_call method_call].freeze + ADJACENT_CALL_NODE_KINDS = %w[dot_index_expression identifier expression_list variable_list].freeze + PARAMETER_LIST_NODE_KINDS = %w[parameters].freeze + FUNCTION_BODY_NODE_KINDS = %w[block].freeze + NESTED_STATEMENT_WRAPPER_NODE_KINDS = %w[block].freeze + IDENTIFIER_NODE_KINDS = %w[identifier].freeze + FIELD_IDENTIFIER_NODE_KINDS = [].freeze + PARAMETER_IDENTIFIER_NODE_KINDS = %w[identifier].freeze + LOCAL_DECLARATION_NODE_KINDS = %w[variable_declaration].freeze + VARIABLE_DECLARATION_NODE_KINDS = %w[variable_declaration variable_list].freeze + DECLARATION_ASSIGNMENT_NODE_KINDS = %w[assignment_statement].freeze + LOCAL_VARIABLE_DECLARATOR_NODE_KINDS = [].freeze + DECLARATION_SITE_PARENT_NODE_KINDS = %w[parameters variable_declaration function_declaration].freeze + ASSIGNMENT_NODE_KINDS = %w[assignment_statement].freeze + ASSIGNMENT_STATE_DECLARATION_NODE_KINDS = %w[assignment_statement].freeze + ASSIGNMENT_OPERATOR_TOKENS = %w[= += -= *= /= %=].freeze + PATH_ACTION_NODE_KINDS = %w[function_call expression_list return_statement].freeze + SIMPLE_ACTION_WRAPPER_NODE_KINDS = %w[block].freeze + COMPARISON_NODE_KINDS = %w[binary_expression].freeze + BRANCH_NODE_KINDS = %w[if_statement for_statement].freeze + LOOP_NODE_KINDS = %w[for_statement].freeze + BRANCH_LOOP_NODE_KINDS = LOOP_NODE_KINDS + IF_NODE_KINDS = %w[if_statement].freeze + HIDDEN_IF_WRAPPER_NODE_KINDS = %w[block].freeze + HIDDEN_IF_TOKEN_KINDS = %w[if].freeze + DEFAULT_CASE_PATTERNS = %w[_ default].freeze + BOOLEAN_AND_OPERATORS = %w[and &&].freeze + BOOLEAN_CONTAINER_NODE_KINDS = %w[binary_expression].freeze + BOOLEAN_WRAPPER_NODE_KINDS = %w[expression_list].freeze + ARGUMENT_LIST_NODE_KINDS = %w[arguments].freeze + SELF_CALL_IDENTIFIER_NODE_KINDS = %w[identifier].freeze + SELF_RECEIVER_NAMES = %w[self].freeze + PUBLIC_VISIBILITY_TOKENS = %w[pub public].freeze + ACCESSOR_CALL_NODE_KINDS = [].freeze + EXPRESSION_LIST_NODE_KINDS = %w[expression_list].freeze + FIELD_LIKE_NODE_KINDS = %w[dot_index_expression variable_list].freeze + BLOCK_ARGUMENT_NODE_KINDS = [].freeze + + def function_name(node) + lua_method_name(node) || super + end + + def receiver_owner_name(node) + lua_method_owner_name(node) || super + end + + def call_target(document, node) + lua_expression_list_call_target(node) || + lua_adjacent_member_call_target(node) || + super + end + + def state_read_target(node) + lua_single_return_member_target(node) || super + end + + def generated_prelude?(document, node) + return false unless line(node) == 1 + + first_line = document.lines.first.to_s + first_line.include?("_tl_compat") && first_line.include?("compat53.module") + end + + private + + def boolean_container?(node) + return true if boolean_expression_list?(node, "and") + + super + end + + def lua_method_name(node) + method = lua_method_index_expression(node) + return nil unless method + + method.named_children.last&.text + end + + def lua_method_owner_name(node) + method = lua_method_index_expression(node) + return nil unless method + + method.named_children.first&.text + end + + def lua_method_index_expression(node) + return nil unless node.kind == "function_declaration" + + node.named_children.find { |child| child.kind == "method_index_expression" } + end + + def lua_expression_list_call_target(node) + return nil unless node.kind == "expression_list" + + callee = node.named_children.find { |child| field_like_node?(child) } + args = node.named_children.find { |child| child.kind == "arguments" } + return nil unless callee && args + + target_from_callee(callee).merge(arguments: args.named_children.map { |child| normalize_text(child.text) }) + rescue StandardError + nil + end + + def lua_adjacent_member_call_target(node) + return nil unless node.kind == "identifier" + + args = next_sibling(node) + return nil unless args&.kind == "arguments" + + parent = parent_node(node) + return nil unless parent && field_like_node?(parent) + + target_from_callee(parent).merge(arguments: args.named_children.map { |child| normalize_text(child.text) }) + rescue StandardError + nil + end + + def lua_single_return_member_target(node) + return nil unless node.kind == "expression_list" + + text = normalize_text(node.text) + if (match = text.match(/\A([A-Za-z_]\w*)\.([A-Za-z_]\w*)\z/)) + return { receiver: match[1], field: match[2] } + end + + parent = parent_node(node) + return nil unless parent&.kind == "block" + return nil unless prev_sibling(node)&.kind.to_s == "return" || + parent.children.first&.kind.to_s == "return" + + return nil unless node.named_children.size == 1 + child = node.named_children.first + return nil unless field_like_node?(child) + + generic_state_read_target(child) + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/syntax/nil_guards.rb b/gems/decomplex/lib/decomplex/syntax/nil_guards.rb index 05599793b..d61ac5835 100644 --- a/gems/decomplex/lib/decomplex/syntax/nil_guards.rb +++ b/gems/decomplex/lib/decomplex/syntax/nil_guards.rb @@ -14,7 +14,19 @@ def to_h class Document def redundant_nil_guard_findings - @redundant_nil_guard_findings ||= NilGuardAnalyzer.new(self).scan + @redundant_nil_guard_findings ||= adapter.redundant_nil_guard_findings(self) + end + end + + class TreeSitterLanguageAdapter + def redundant_nil_guard_findings(document) + NilGuardAnalyzer.new(document).scan + end + end + + class TreeSitterAdapter + def redundant_nil_guard_findings(document) + syntax_profile(document.language).redundant_nil_guard_findings(document) end end diff --git a/gems/decomplex/lib/decomplex/syntax/php.rb b/gems/decomplex/lib/decomplex/syntax/php.rb new file mode 100644 index 000000000..a6be7e5c8 --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/php.rb @@ -0,0 +1,496 @@ +# frozen_string_literal: true + +module Decomplex + module Syntax + PHP_LEXICON = LanguageLexicon.new( + nil_literal_patterns: [/\bnull\b/i].freeze, + type_guard_patterns: [ + /\bnull\b/i, + /\b(?:is_null|isset|empty|is_a|instanceof)\s*(?:\(|\b)/ + ].freeze, + diagnostic_patterns: [ + /\bthrow\b/, + /\b(?:die|exit|trigger_error)\s*\(/ + ].freeze, + trivial_patterns: [ + /\A(?:null|true|false|0|1|break|continue)\s*;?\z/i, + /\Areturn\s+(?:null|true|false|0|1)\s*;?\z/i + ].freeze + ).freeze + + class PhpSyntaxAdapter < TreeSitterLanguageAdapter + FUNCTION_NODE_KINDS = %w[function_definition method_declaration].freeze + CALL_NODE_KINDS = %w[function_call_expression member_call_expression scoped_call_expression print_intrinsic].freeze + CLASS_OWNER_NODE_KINDS = %w[class_declaration].freeze + PARAMETER_LIST_NODE_KINDS = %w[formal_parameters].freeze + FUNCTION_BODY_NODE_KINDS = %w[compound_statement declaration_list].freeze + IDENTIFIER_NODE_KINDS = %w[name variable_name].freeze + FIELD_IDENTIFIER_NODE_KINDS = [].freeze + PARAMETER_IDENTIFIER_NODE_KINDS = %w[name variable_name simple_parameter].freeze + LOCAL_DECLARATION_NODE_KINDS = [].freeze + LOCAL_VARIABLE_DECLARATOR_NODE_KINDS = [].freeze + FIELD_DECLARATION_NODE_KINDS = %w[property_declaration].freeze + DECLARATION_SITE_PARENT_NODE_KINDS = %w[simple_parameter method_declaration function_definition class_declaration].freeze + ASSIGNMENT_NODE_KINDS = %w[assignment_expression augmented_assignment_expression].freeze + ASSIGNMENT_STATE_DECLARATION_NODE_KINDS = %w[assignment_expression].freeze + ASSIGNMENT_OPERATOR_TOKENS = %w[= += -= *= /= %=].freeze + PATH_ACTION_NODE_KINDS = %w[function_call_expression member_call_expression scoped_call_expression expression_statement return_statement print_intrinsic].freeze + SIMPLE_ACTION_WRAPPER_NODE_KINDS = %w[compound_statement declaration_list].freeze + COMPARISON_NODE_KINDS = %w[binary_expression].freeze + BRANCH_NODE_KINDS = %w[if_statement foreach_statement switch_statement].freeze + LOOP_NODE_KINDS = %w[foreach_statement].freeze + BRANCH_LOOP_NODE_KINDS = LOOP_NODE_KINDS + CASE_NODE_KINDS = %w[switch_statement].freeze + BRANCH_CASE_NODE_KINDS = %w[switch_statement].freeze + IF_NODE_KINDS = %w[if_statement].freeze + CASE_ARM_NODE_KINDS = %w[case_statement].freeze + SWITCH_CASE_ARM_NODE_KINDS = %w[case_statement].freeze + CASE_CONTAINER_STOP_NODE_KINDS = %w[function_definition method_declaration class_declaration].freeze + CASE_SUBJECT_SKIP_NODE_KINDS = %w[case_statement else comment].freeze + DEFAULT_CASE_PATTERNS = %w[_ default].freeze + BOOLEAN_AND_OPERATORS = %w[&& and].freeze + BOOLEAN_CONTAINER_NODE_KINDS = %w[binary_expression].freeze + PARENTHESIZED_WRAPPER_NODE_KINDS = %w[parenthesized_expression].freeze + ARGUMENT_LIST_NODE_KINDS = %w[arguments argument].freeze + SELF_CALL_IDENTIFIER_NODE_KINDS = %w[name variable_name].freeze + SELF_RECEIVER_NAMES = %w[$this this self].freeze + ACCESSOR_CALL_NODE_KINDS = [].freeze + FIELD_LIKE_NODE_KINDS = %w[member_access_expression member_call_expression class_constant_access_expression].freeze + BLOCK_ARGUMENT_NODE_KINDS = [].freeze + + def function_name(node) + return php_name_text(named_field(node, "name") || node.named_children.find { |child| child.kind == "name" }) if %w[ + function_definition method_declaration + ].include?(node.kind) + + super + end + + def owner_name_from_declaration(document, node) + return php_name_text(named_field(node, "name") || node.named_children.find { |child| child.kind == "name" }) if node.kind == "class_declaration" + + super + end + + def visibility(_document, node) + modifier = node.named_children.find { |child| child.kind == "visibility_modifier" } + return modifier.text.to_sym if modifier && %w[public private protected].include?(modifier.text) + + :public + end + + def function_params(node) + params = named_field(node, "parameters") || + node.named_children.find { |child| child.kind == "formal_parameters" } + return super unless params + + params.named_children.filter_map { |param| php_parameter_name(param) }.uniq + end + + def call_target(document, node) + php_call_target(node) || super + end + + def state_read_target(node) + return nil if php_assignment_lhs?(node) + + super + end + + def state_declaration(node) + php_property_declaration(node) || super + end + + def predicate_def(document, function_def) + predicate = super + return nil unless predicate + + PredicateDef.new( + file: predicate.file, + name: predicate.name, + owner: predicate.owner, + body: php_normalize_source(predicate.body), + line: predicate.line, + span: predicate.span + ) + end + + def path_condition_sites(document) + super.map do |site| + PathConditionSite.new( + guards: site.guards.map { |guard| php_normalize_source(guard) }, + action: php_normalize_source(site.action), + file: site.file, + function: site.function, + line: site.line, + span: site.span + ) + end + end + + def local_contract_assignments(document, method) + super.transform_values { |source| php_normalize_source(source) } + end + + def redundant_nil_guard_findings(document) + findings = [] + document.function_defs.each do |function_def| + php_nil_guard_walk(document, function_def.body, function_def.name, Set.new, findings) + end + findings + end + + private + + def php_call_target(node) + return php_print_target(node) if node.kind == "print_intrinsic" + return nil unless %w[function_call_expression member_call_expression scoped_call_expression].include?(node.kind) + + names = node.named_children.select do |child| + php_name_node?(child) || child.kind == "variable_name" || child.kind == "member_access_expression" + end + args = node.named_children.find { |child| child.kind == "arguments" } + + case node.kind + when "member_call_expression" + access = names.find { |child| child.kind == "member_access_expression" } + receiver = access ? php_member_receiver(access) : names.first + message = access ? php_member_name(access) : names[1] + return nil unless receiver && message + + { + receiver: php_normalize_receiver(php_identifier_text(receiver)), + message: php_name_text(message), + arguments: php_argument_texts(args) + } + when "scoped_call_expression" + receiver = names.first + message = names[1] + return nil unless receiver && message + + { + receiver: php_name_text(receiver), + message: php_name_text(message), + arguments: php_argument_texts(args) + } + when "function_call_expression" + name = names.first + return nil unless name + + { + receiver: "self", + message: php_name_text(name), + arguments: php_argument_texts(args) + } + end + end + + def php_print_target(node) + { + receiver: "self", + message: "print", + arguments: node.named_children.map { |child| php_normalize_source(child.text) } + } + end + + def php_property_declaration(node) + return nil unless node.kind == "property_declaration" + + property = node.named_children.find { |child| child.kind == "property_element" } + name = property&.named_children&.find { |child| child.kind == "variable_name" } + return nil unless name + + { field: php_identifier_text(name), type: declared_type_text(node, name) } + end + + def php_parameter_name(param) + variable = param.named_children.find { |child| child.kind == "variable_name" } + php_identifier_text(variable) || php_identifier_text(param) + end + + def generic_identifier?(node) + super || (ts_node?(node) && %w[name variable_name].include?(node.kind)) + end + + def generic_local_identifier_text(node) + return php_identifier_text(node) if ts_node?(node) && node.kind == "variable_name" + + super + end + + def generic_member_name?(node) + return true if parent_node(node)&.kind == "variable_name" + return false if node.kind == "variable_name" + + super + end + + def generic_local_writes(node) + (super + php_local_write_names(node)).map { |name| php_identifier_text_value(name) }.uniq + end + + def generic_local_write_node?(node) + return true if ts_node?(node) && node.kind == "variable_name" && php_assignment_lhs?(node) + + super + end + + def decision_member_text(node) + php_normalize_source(super) + end + + def decision_predicate(node) + php_normalize_source(super) + end + + def comparison_target(node) + target = super + return nil unless target + + target.merge(source: php_normalize_source(target[:source])) + end + + def control_context(node) + return :iterates if node.kind == "foreach_statement" + + super + end + + def target_from_callee(callee) + target = super + return target unless target + + target.merge(receiver: php_normalize_receiver(target[:receiver])) + end + + def generic_state_read_target(node) + target = super + return target unless target + + target.merge(receiver: php_normalize_receiver(target[:receiver])) + end + + def generic_state_target(lhs) + target = super + return target unless target + + target.merge(receiver: php_normalize_receiver(target[:receiver])) + end + + def member_field_text(field) + php_name_text(field) + end + + def simple_identifier_text?(text) + php_identifier_text_value(text).match?(/\A[A-Za-z_]\w*\z/) + end + + def php_name_node?(node) + ts_node?(node) && %w[name qualified_name].include?(node.kind) + end + + def php_assignment_lhs?(node) + parent = parent_node(node) + return false unless parent + + %w[assignment_expression augmented_assignment_expression].include?(parent.kind) && + parent.named_children.first == node + end + + def php_local_write_names(node) + writes = [] + generic_walk_local(node) do |child| + next unless ts_node?(child) && child.kind == "variable_name" + next unless php_assignment_lhs?(child) + + writes << php_identifier_text(child) + end + writes.compact + end + + def php_argument_texts(args) + Array(args&.named_children).map { |child| php_normalize_source(child.text) } + end + + def php_member_receiver(node) + return nil unless ts_node?(node) + + named_field(node, "object") || named_field(node, "receiver") || + named_field(node, "expression") || node.named_children.first + end + + def php_member_name(node) + return nil unless ts_node?(node) + + named_field(node, "name") || named_field(node, "field") || + node.named_children.reverse.find { |child| php_name_node?(child) } + end + + def php_identifier_text(node) + text = php_identifier_text_value(node&.text) + text.empty? ? nil : text + end + + def php_name_text(node) + text = php_identifier_text_value(node&.text) + text.empty? ? nil : text + end + + def php_identifier_text_value(text) + text.to_s.sub(/\A\$/, "") + end + + def php_normalize_receiver(receiver) + value = php_identifier_text_value(receiver) + value == "this" ? "self" : value + end + + def php_normalize_source(source) + source.to_s + .gsub(/\$([A-Za-z_]\w*)/, '\1') + .gsub(/->|::/, ".") + end + + def php_nil_guard_walk(document, node, function, known, findings) + return unless ts_node?(node) + return if generic_nested_local_scope?(node) && function_name(node) != function + + if node.kind == "if_statement" + php_process_nil_guard_if(document, node, function, known, findings) + return + end + + php_record_redundant_nil_guard(document, node, function, known, findings) + node.named_children.each do |child| + php_nil_guard_walk(document, child, function, known, findings) + end + end + + def php_process_nil_guard_if(document, node, function, known, findings) + condition = named_field(node, "condition") || node.named_children.first + body = named_field(node, "body") || node.named_children[1] + branch_known = known.dup + php_non_nil_facts(condition).each { |local| branch_known.add(local) } + php_nil_guard_walk(document, body, function, branch_known, findings) + end + + def php_record_redundant_nil_guard(document, node, function, known, findings) + subject = php_nil_guard_subject(node) + return unless subject && known.include?(subject) + + findings << NilGuardFinding.new( + file: document.file, + defn: function, + line: line(node), + span: span(node), + local: subject, + guard: php_normalize_source(node.text), + proof: "#{subject} is already proven non-nil on this path" + ) + end + + def php_non_nil_facts(node) + node = php_unwrap_parenthesized(node) + return [] unless ts_node?(node) + + subject = php_subject_key(node) + return [subject] if subject + + call = php_member_call_parts(node) + return [call[:receiver]] if call && %w[isSome is_some present].include?(call[:message]) + + comparison = php_nil_comparison(node) + return [comparison[:subject]] if comparison && %w[!== !=].include?(comparison[:operator]) + + [] + end + + def php_nil_guard_subject(node) + node = php_unwrap_parenthesized(node) + return nil unless ts_node?(node) + + call = php_member_call_parts(node) + return call[:receiver] if call && %w[isNull is_null nil is_none].include?(call[:message]) + + comparison = php_nil_comparison(node) + return comparison[:subject] if comparison && %w[=== ==].include?(comparison[:operator]) + + function_call = php_function_call_parts(node) + return function_call[:arguments].first if function_call && %w[is_null].include?(function_call[:message]) + + nil + end + + def php_nil_comparison(node) + return nil unless ts_node?(node) && node.kind == "binary_expression" + + operator = direct_operator(node) + return nil unless %w[=== !== == !=].include?(operator) + + left, right = node.named_children + if php_null_literal?(right) + subject = php_subject_key(left) + elsif php_null_literal?(left) + subject = php_subject_key(right) + end + subject ? { subject: subject, operator: operator } : nil + end + + def php_member_call_parts(node) + node = php_unwrap_parenthesized(node) + return nil unless ts_node?(node) && node.kind == "member_call_expression" + + access = node.named_children.find { |child| child.kind == "member_access_expression" } + receiver_node = access ? php_member_receiver(access) : node.named_children.find { |child| child.kind == "variable_name" } + message_node = access ? php_member_name(access) : node.named_children.find { |child| php_name_node?(child) } + receiver = php_subject_key(receiver_node) + message = php_name_text(message_node) + return nil unless receiver && message + + { receiver: receiver, message: message } + end + + def php_function_call_parts(node) + node = php_unwrap_parenthesized(node) + return nil unless ts_node?(node) && node.kind == "function_call_expression" + + name = node.named_children.find { |child| php_name_node?(child) } + args = node.named_children.find { |child| child.kind == "arguments" } + { + message: php_name_text(name), + arguments: Array(args&.named_children).filter_map { |child| php_subject_key(child) } + } + end + + def php_subject_key(node) + node = php_unwrap_parenthesized(node) + return nil unless ts_node?(node) + + case node.kind + when "variable_name", "name" + php_identifier_text(node) + when "member_access_expression" + receiver = php_subject_key(php_member_receiver(node)) + message = php_name_text(php_member_name(node)) + receiver && message ? "#{receiver}.#{message}" : nil + else + nil + end + end + + def php_unwrap_parenthesized(node) + current = node + while ts_node?(current) && + %w[parenthesized_expression parenthesized_statements].include?(current.kind) && + current.named_children.size == 1 + current = current.named_children.first + end + current + end + + def php_null_literal?(node) + ts_node?(node) && node.kind == "null" + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/syntax/python.rb b/gems/decomplex/lib/decomplex/syntax/python.rb new file mode 100644 index 000000000..87508d55c --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/python.rb @@ -0,0 +1,123 @@ +# frozen_string_literal: true + +module Decomplex + module Syntax + PYTHON_LEXICON = LanguageLexicon.new( + nil_literal_patterns: [/\bNone\b/].freeze, + type_guard_patterns: [ + /\b(?:isinstance|issubclass|hasattr)\s*\(/, + /\bis\s+(?:not\s+)?None\b/, + /\btype\s*\([^)]*\)\s*(?:==|is)\s*/ + ].freeze, + diagnostic_patterns: [ + /\braise\b/, + /\bassert\b/, + /\bsys\.exit\s*\(/ + ].freeze, + trivial_patterns: [ + /\A(?:None|True|False|0|1|break|continue|pass)\s*;?\z/, + /\Areturn\s+(?:None|True|False|0|1)\s*;?\z/ + ].freeze + ).freeze + + class PythonSyntaxAdapter < TreeSitterLanguageAdapter + FUNCTION_NODE_KINDS = %w[function_definition].freeze + CALL_NODE_KINDS = %w[call].freeze + ADJACENT_CALL_NODE_KINDS = %w[attribute identifier].freeze + CLASS_OWNER_NODE_KINDS = %w[class_definition].freeze + PARAMETER_LIST_NODE_KINDS = %w[parameters].freeze + FUNCTION_BODY_NODE_KINDS = %w[block].freeze + NESTED_STATEMENT_WRAPPER_NODE_KINDS = %w[block].freeze + IDENTIFIER_NODE_KINDS = %w[identifier].freeze + FIELD_IDENTIFIER_NODE_KINDS = [].freeze + PARAMETER_IDENTIFIER_NODE_KINDS = %w[identifier].freeze + ASSIGNMENT_NODE_KINDS = %w[assignment augmented_assignment].freeze + ASSIGNMENT_OPERATOR_TOKENS = %w[= += -= *= /= %=].freeze + PATH_ACTION_NODE_KINDS = %w[call expression_statement return_statement].freeze + SIMPLE_ACTION_WRAPPER_NODE_KINDS = %w[block].freeze + COMPARISON_NODE_KINDS = %w[comparison_operator binary_operator boolean_operator].freeze + BRANCH_NODE_KINDS = %w[if_statement for_statement match_statement].freeze + LOOP_NODE_KINDS = %w[for_statement].freeze + BRANCH_LOOP_NODE_KINDS = LOOP_NODE_KINDS + CASE_NODE_KINDS = %w[match_statement].freeze + HIDDEN_CASE_WRAPPER_NODE_KINDS = %w[block].freeze + HIDDEN_CASE_TOKEN_KINDS = %w[match case].freeze + BRANCH_CASE_NODE_KINDS = %w[match_statement block].freeze + IF_NODE_KINDS = %w[if_statement].freeze + HIDDEN_IF_WRAPPER_NODE_KINDS = %w[block statement_list].freeze + HIDDEN_IF_TOKEN_KINDS = %w[if].freeze + CASE_ARM_NODE_KINDS = %w[case_clause].freeze + SWITCH_CASE_ARM_NODE_KINDS = %w[case_clause].freeze + CASE_PATTERN_NODE_KINDS = %w[case_pattern pattern].freeze + CASE_SUBJECT_NODE_KINDS = [].freeze + CASE_CONTAINER_STOP_NODE_KINDS = %w[function_definition class_definition].freeze + CASE_SUBJECT_SKIP_NODE_KINDS = %w[case_clause else comment].freeze + DEFAULT_CASE_PATTERNS = %w[_ default].freeze + BOOLEAN_AND_OPERATORS = %w[and &&].freeze + BOOLEAN_CONTAINER_NODE_KINDS = %w[binary_operator boolean_operator comparison_operator].freeze + BOOLEAN_WRAPPER_NODE_KINDS = %w[block].freeze + PARENTHESIZED_WRAPPER_NODE_KINDS = %w[parenthesized_expression].freeze + LOCAL_VARIABLE_DECLARATOR_NODE_KINDS = [].freeze + FIELD_DECLARATION_NODE_KINDS = [].freeze + DECLARATION_SITE_PARENT_NODE_KINDS = %w[parameters].freeze + ADJACENT_METHOD_INVOCATION_NODE_KINDS = [].freeze + ARGUMENT_LIST_NODE_KINDS = %w[argument_list].freeze + SELF_CALL_IDENTIFIER_NODE_KINDS = %w[identifier].freeze + SELF_RECEIVER_NAMES = %w[self].freeze + ACCESSOR_CALL_NODE_KINDS = %w[call].freeze + FIELD_LIKE_NODE_KINDS = %w[attribute].freeze + BLOCK_ARGUMENT_NODE_KINDS = [].freeze + + def function_name(node) + hidden_python_function_name(node) || super + end + + def visibility(_document, node) + name = function_name(node).to_s + return :private if name.start_with?("_") && !name.start_with?("__") + + :public + end + + def call_target(document, node) + python_adjacent_call_target(node) || super + end + + def local_methods(document) + super + end + + private + + def hidden_python_function_name(node) + return nil unless node.kind == "block" + return nil unless node.children.first&.kind.to_s == "def" + + node.named_children.find { |child| child.kind == "identifier" }&.text + end + + def python_function_body_statements(node) + body = named_field(node, "body") || + node.named_children.find { |child| child.kind == "block" } + return [] unless body + + body.named_children.reject { |child| child.kind == "comment" } + end + + def python_adjacent_call_target(node) + return nil unless %w[identifier].include?(node.kind) + + args = next_sibling(node) + return nil unless args&.kind == "argument_list" + + { + receiver: "self", + message: node.text, + arguments: args.named_children.map { |child| normalize_text(child.text) } + } + rescue StandardError + nil + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/syntax/ruby.rb b/gems/decomplex/lib/decomplex/syntax/ruby.rb index 73c5fc544..2cfdaf2a6 100644 --- a/gems/decomplex/lib/decomplex/syntax/ruby.rb +++ b/gems/decomplex/lib/decomplex/syntax/ruby.rb @@ -2,7 +2,62 @@ module Decomplex module Syntax + RUBY_LEXICON = LanguageLexicon.new( + nil_literal_patterns: [/\bnil\b/].freeze, + type_guard_patterns: [ + /(?:\A|[^\w!?])(?:nil\?|is_a\?|kind_of\?|instance_of\?|respond_to\?)(?:\s*\(|\b)/, + /&\./ + ].freeze, + diagnostic_patterns: [ + /(?:\A|[^\w!?])(?:raise|fail|abort)[!?]?(?:\s*\(|\b)/ + ].freeze, + trivial_patterns: [ + /\A(?:nil|true|false|0|1|break|next)\s*;?\z/, + /\Areturn\s+(?:nil|true|false|0|1)\s*;?\z/ + ].freeze + ).freeze + class RubySyntaxAdapter < TreeSitterLanguageAdapter + FUNCTION_NODE_KINDS = %w[method].freeze + CALL_NODE_KINDS = %w[call].freeze + CLASS_OWNER_NODE_KINDS = %w[class].freeze + MODULE_OWNER_NODE_KINDS = %w[module].freeze + PARAMETER_LIST_NODE_KINDS = %w[method_parameters].freeze + FUNCTION_BODY_NODE_KINDS = %w[body_statement do_block].freeze + NESTED_STATEMENT_WRAPPER_NODE_KINDS = %w[body_statement].freeze + IDENTIFIER_NODE_KINDS = %w[identifier constant].freeze + FIELD_IDENTIFIER_NODE_KINDS = [].freeze + PARAMETER_IDENTIFIER_NODE_KINDS = %w[identifier].freeze + LOCAL_IDENTIFIER_WRAPPER_NODE_KINDS = %w[pattern].freeze + ASSIGNMENT_NODE_KINDS = %w[assignment operator_assignment].freeze + ASSIGNMENT_OPERATOR_TOKENS = %w[= += -= *= /= %= &&= ||=].freeze + PATH_ACTION_NODE_KINDS = %w[call return].freeze + SIMPLE_ACTION_WRAPPER_NODE_KINDS = %w[body_statement].freeze + COMPARISON_NODE_KINDS = %w[binary].freeze + BRANCH_NODE_KINDS = %w[if unless if_modifier unless_modifier case while until for].freeze + LOOP_NODE_KINDS = %w[while until for do_block].freeze + BRANCH_LOOP_NODE_KINDS = LOOP_NODE_KINDS + CASE_NODE_KINDS = %w[case].freeze + BRANCH_CASE_NODE_KINDS = %w[case body_statement].freeze + IF_NODE_KINDS = %w[if unless if_modifier unless_modifier].freeze + HIDDEN_IF_WRAPPER_NODE_KINDS = %w[body_statement].freeze + HIDDEN_CASE_WRAPPER_NODE_KINDS = %w[body_statement].freeze + HIDDEN_IF_TOKEN_KINDS = %w[if unless].freeze + HIDDEN_CASE_TOKEN_KINDS = %w[case when].freeze + CASE_ARM_NODE_KINDS = %w[when].freeze + WHEN_CASE_ARM_NODE_KINDS = %w[when].freeze + CASE_PATTERN_NODE_KINDS = %w[pattern].freeze + CASE_CONTAINER_STOP_NODE_KINDS = %w[method class module].freeze + CASE_SUBJECT_SKIP_NODE_KINDS = %w[when else then comment].freeze + DEFAULT_CASE_PATTERNS = %w[_ default else].freeze + BOOLEAN_AND_OPERATORS = %w[&& and].freeze + BOOLEAN_CONTAINER_NODE_KINDS = %w[binary].freeze + BOOLEAN_WRAPPER_NODE_KINDS = %w[body_statement pattern argument_list].freeze + PARENTHESIZED_PATTERN_NODE_KINDS = %w[pattern].freeze + ACCESSOR_CALL_NODE_KINDS = %w[call].freeze + BLOCK_ARGUMENT_NODE_KINDS = %w[block do_block lambda].freeze + SELF_RECEIVER_NAMES = %w[self].freeze + def function_name(node) case node.kind when "body_statement" @@ -148,6 +203,18 @@ def path_condition_sites(document) out end + def immutable_struct_readers(document) + ruby_immutable_struct_readers(document.lines) + end + + def immutable_struct_reader_types(document) + ruby_immutable_struct_reader_types(document.lines) + end + + def type_aliases(document) + ruby_type_aliases(document.lines) + end + private def comparison_target(node) @@ -843,6 +910,52 @@ def sig_param_types(sig_source) match[1].scan(/([A-Za-z_]\w*)\s*:\s*([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)/).to_h end + def ruby_immutable_struct_readers(lines) + readers = Hash.new { |h, k| h[k] = Set.new } + class_stack = [] + lines.each do |line| + if (match = line.match(/\A\s*class\s+([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\s*<\s*T::Struct\b/)) + class_stack << match[1] + next + end + if class_stack.any? && (match = line.match(/\A\s*const\s+:([A-Za-z_]\w*)\b/)) + readers[class_stack.last].add(match[1].to_sym) + next + end + class_stack.pop if class_stack.any? && line.match?(/\A\s*end\s*(?:#.*)?\z/) + end + readers + end + + def ruby_immutable_struct_reader_types(lines) + reader_types = Hash.new { |h, k| h[k] = {} } + class_stack = [] + lines.each do |line| + if (match = line.match(/\A\s*class\s+([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\s*<\s*T::Struct\b/)) + class_stack << match[1] + next + end + if class_stack.any? && (match = line.match(/\A\s*const\s+:([A-Za-z_]\w*)\s*,\s*([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\b/)) + reader_types[class_stack.last][match[1].to_sym] = match[2] + next + end + class_stack.pop if class_stack.any? && line.match?(/\A\s*end\s*(?:#.*)?\z/) + end + reader_types + end + + def ruby_type_aliases(lines) + aliases = {} + lines.each do |line| + if (match = line.match(/\A\s*([A-Z]\w*)\s*=\s*T\.type_alias\s*\{\s*([A-Z]\w*(?:::[A-Z]\w*)*)\s*\}/)) + aliases[match[1]] = match[2] + elsif (match = line.match(/\A\s*([A-Z]\w*)\s*=\s*([A-Z]\w*(?:::[A-Z]\w*)*)\b/)) + aliases[match[1]] = match[2] + end + end + aliases + end + def apply_ruby_visibility!(out) functions_by_owner = out.fetch(:function_defs).group_by(&:owner) calls_by_owner = out.fetch(:call_sites).group_by(&:owner) diff --git a/gems/decomplex/lib/decomplex/syntax/rust.rb b/gems/decomplex/lib/decomplex/syntax/rust.rb new file mode 100644 index 000000000..e86d9cdfa --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/rust.rb @@ -0,0 +1,78 @@ +# frozen_string_literal: true + +module Decomplex + module Syntax + RUST_LEXICON = LanguageLexicon.new( + nil_literal_patterns: [/\bNone\b/].freeze, + type_guard_patterns: [ + /\b(?:is_some|is_none)\s*\(/, + /\b(?:Some|None)\b/, + /\bmatches!\s*\(/ + ].freeze, + diagnostic_patterns: [ + /\b(?:panic|unreachable|todo|unimplemented)!\s*\(/, + /\breturn\s+Err\s*\(/ + ].freeze, + trivial_patterns: [ + /\A(?:None|true|false|0|1|break|continue|unreachable!)\s*;?\z/, + /\Areturn\s+(?:None|true|false|0|1)\s*;?\z/ + ].freeze + ).freeze + + class RustSyntaxAdapter < TreeSitterLanguageAdapter + FUNCTION_NODE_KINDS = %w[function_item].freeze + CALL_NODE_KINDS = %w[call_expression].freeze + IMPL_OWNER_NODE_KINDS = %w[impl_item].freeze + STRUCT_OWNER_NODE_KINDS = %w[struct_item].freeze + PARAMETER_LIST_NODE_KINDS = %w[parameters].freeze + FUNCTION_BODY_NODE_KINDS = %w[block declaration_list].freeze + NESTED_STATEMENT_WRAPPER_NODE_KINDS = [].freeze + IDENTIFIER_NODE_KINDS = %w[identifier type_identifier].freeze + FIELD_IDENTIFIER_NODE_KINDS = %w[field_identifier].freeze + PARAMETER_IDENTIFIER_NODE_KINDS = %w[identifier self_parameter].freeze + LOCAL_IDENTIFIER_WRAPPER_NODE_KINDS = %w[pattern].freeze + LOCAL_DECLARATION_NODE_KINDS = %w[let_declaration].freeze + LOCAL_VARIABLE_DECLARATOR_NODE_KINDS = [].freeze + FIELD_DECLARATION_NODE_KINDS = %w[field_declaration].freeze + DECLARATION_SITE_PARENT_NODE_KINDS = %w[parameter let_declaration function_item struct_item impl_item].freeze + RECEIVER_TYPE_NODE_KINDS = %w[type_identifier generic_type scoped_type_identifier].freeze + ASSIGNMENT_NODE_KINDS = %w[assignment_expression compound_assignment_expr].freeze + ASSIGNMENT_STATE_DECLARATION_NODE_KINDS = %w[assignment_expression].freeze + ASSIGNMENT_OPERATOR_TOKENS = %w[= += -= *= /= %=].freeze + PATH_ACTION_NODE_KINDS = %w[call_expression expression_statement return_expression].freeze + SIMPLE_ACTION_WRAPPER_NODE_KINDS = %w[block].freeze + COMPARISON_NODE_KINDS = %w[binary_expression].freeze + BRANCH_NODE_KINDS = %w[if_expression match_expression for_expression].freeze + LOOP_NODE_KINDS = %w[for_expression].freeze + TEXT_LOOP_NODE_KINDS = %w[expression_statement].freeze + BRANCH_LOOP_NODE_KINDS = LOOP_NODE_KINDS + CASE_NODE_KINDS = %w[match_expression].freeze + HIDDEN_MATCH_NODE_KINDS = %w[expression_statement].freeze + BRANCH_CASE_NODE_KINDS = %w[match_expression expression_statement].freeze + IF_NODE_KINDS = %w[if_expression].freeze + CASE_ARM_NODE_KINDS = %w[match_arm].freeze + WHEN_CASE_ARM_NODE_KINDS = %w[match_arm].freeze + CASE_PATTERN_NODE_KINDS = %w[match_pattern pattern].freeze + CASE_CONTAINER_STOP_NODE_KINDS = %w[function_item impl_item struct_item].freeze + CASE_SUBJECT_SKIP_NODE_KINDS = %w[match_arm else comment].freeze + DEFAULT_CASE_PATTERNS = %w[_ default].freeze + BOOLEAN_AND_OPERATORS = %w[&& and].freeze + BOOLEAN_CONTAINER_NODE_KINDS = %w[binary_expression].freeze + PARENTHESIZED_WRAPPER_NODE_KINDS = %w[parenthesized_expression tuple_expression].freeze + ARGUMENT_LIST_NODE_KINDS = %w[arguments].freeze + SELF_CALL_IDENTIFIER_NODE_KINDS = %w[identifier type_identifier field_identifier].freeze + SELF_RECEIVER_NAMES = %w[self].freeze + PUBLIC_VISIBILITY_TOKENS = %w[pub public].freeze + ACCESSOR_CALL_NODE_KINDS = [].freeze + EXPRESSION_LIST_NODE_KINDS = [].freeze + NAVIGATION_SUFFIX_NODE_KINDS = [].freeze + LITERAL_FIELD_EXPRESSION_NODE_KINDS = %w[field_expression].freeze + FIELD_LIKE_NODE_KINDS = %w[field_expression scoped_identifier].freeze + BLOCK_ARGUMENT_NODE_KINDS = [].freeze + + def visibility(_document, node) + modifier_visibility(node) || :private + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/syntax/swift.rb b/gems/decomplex/lib/decomplex/syntax/swift.rb new file mode 100644 index 000000000..2eb402927 --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/swift.rb @@ -0,0 +1,74 @@ +# frozen_string_literal: true + +module Decomplex + module Syntax + SWIFT_LEXICON = LanguageLexicon.new( + nil_literal_patterns: [/\bnil\b/].freeze, + type_guard_patterns: [ + /\bnil\b/, + /(?:\?\.|\?\?)/, + /\b(?:if|guard)\s+let\b/, + /\b(?:as\?|is)(?:\s|$)/ + ].freeze, + diagnostic_patterns: [ + /\bthrow\b/, + /\b(?:fatalError|preconditionFailure|assertionFailure|assert|precondition)\s*\(/ + ].freeze, + trivial_patterns: [ + /\A(?:nil|true|false|0|1|break|continue)\s*;?\z/, + /\Areturn\s+(?:nil|true|false|0|1)\s*;?\z/ + ].freeze + ).freeze + + class SwiftSyntaxAdapter < TreeSitterLanguageAdapter + FUNCTION_NODE_KINDS = %w[function_declaration].freeze + CALL_NODE_KINDS = %w[call_expression].freeze + ADJACENT_CALL_NODE_KINDS = %w[navigation_expression directly_assignable_expression simple_identifier].freeze + CLASS_OWNER_NODE_KINDS = %w[class_declaration].freeze + PARAMETER_LIST_NODE_KINDS = %w[function_value_parameters].freeze + INLINE_PARAMETER_NODE_KINDS = %w[parameter].freeze + FUNCTION_BODY_NODE_KINDS = %w[function_body statements].freeze + NESTED_STATEMENT_WRAPPER_NODE_KINDS = %w[statements].freeze + IDENTIFIER_NODE_KINDS = %w[simple_identifier type_identifier].freeze + FIELD_IDENTIFIER_NODE_KINDS = [].freeze + PARAMETER_IDENTIFIER_NODE_KINDS = %w[simple_identifier].freeze + LOCAL_IDENTIFIER_WRAPPER_NODE_KINDS = %w[directly_assignable_expression value_argument pattern].freeze + LOCAL_DECLARATION_NODE_KINDS = %w[property_declaration variable_declaration].freeze + VARIABLE_DECLARATION_NODE_KINDS = %w[variable_declaration directly_assignable_expression].freeze + LOCAL_VARIABLE_DECLARATOR_NODE_KINDS = [].freeze + FIELD_DECLARATION_NODE_KINDS = %w[property_declaration].freeze + DECLARATION_SITE_PARENT_NODE_KINDS = %w[parameter variable_declaration property_declaration function_declaration class_declaration].freeze + ASSIGNMENT_NODE_KINDS = %w[assignment].freeze + ASSIGNMENT_STATE_DECLARATION_NODE_KINDS = %w[assignment].freeze + ASSIGNMENT_OPERATOR_TOKENS = %w[= += -= *= /= %=].freeze + PATH_ACTION_NODE_KINDS = %w[call_expression control_transfer_statement].freeze + SIMPLE_ACTION_WRAPPER_NODE_KINDS = %w[statements control_structure_body function_body].freeze + COMPARISON_NODE_KINDS = %w[equality_expression comparison_expression conjunction_expression additive_expression multiplicative_expression].freeze + BRANCH_NODE_KINDS = %w[if_statement for_statement switch_statement].freeze + LOOP_NODE_KINDS = %w[for_statement].freeze + BRANCH_LOOP_NODE_KINDS = LOOP_NODE_KINDS + CASE_NODE_KINDS = %w[switch_statement].freeze + BRANCH_CASE_NODE_KINDS = %w[switch_statement].freeze + IF_NODE_KINDS = %w[if_statement].freeze + HIDDEN_IF_WRAPPER_NODE_KINDS = %w[statements].freeze + HIDDEN_IF_TOKEN_KINDS = %w[if].freeze + CASE_ARM_NODE_KINDS = %w[switch_entry].freeze + SWITCH_CASE_ARM_NODE_KINDS = %w[switch_entry].freeze + CASE_PATTERN_NODE_KINDS = %w[switch_pattern pattern].freeze + CASE_CONTAINER_STOP_NODE_KINDS = %w[function_declaration class_declaration].freeze + CASE_SUBJECT_SKIP_NODE_KINDS = %w[switch_entry else comment].freeze + DEFAULT_CASE_PATTERNS = %w[_ default].freeze + BOOLEAN_AND_OPERATORS = %w[&& and].freeze + BOOLEAN_CONTAINER_NODE_KINDS = %w[conjunction_expression equality_expression comparison_expression].freeze + BOOLEAN_WRAPPER_NODE_KINDS = %w[statements pattern].freeze + ARGUMENT_LIST_NODE_KINDS = %w[call_suffix value_argument].freeze + SELF_CALL_IDENTIFIER_NODE_KINDS = %w[simple_identifier type_identifier].freeze + SELF_RECEIVER_NAMES = %w[self].freeze + PUBLIC_VISIBILITY_TOKENS = %w[public pub].freeze + ACCESSOR_CALL_NODE_KINDS = [].freeze + NAVIGATION_SUFFIX_NODE_KINDS = %w[navigation_suffix].freeze + FIELD_LIKE_NODE_KINDS = %w[navigation_expression directly_assignable_expression].freeze + BLOCK_ARGUMENT_NODE_KINDS = [].freeze + end + end +end diff --git a/gems/decomplex/lib/decomplex/syntax/typescript.rb b/gems/decomplex/lib/decomplex/syntax/typescript.rb new file mode 100644 index 000000000..6fce63161 --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/typescript.rb @@ -0,0 +1,10 @@ +# frozen_string_literal: true + +module Decomplex + module Syntax + TYPESCRIPT_LEXICON = JAVASCRIPT_LEXICON + + class TypeScriptSyntaxAdapter < JavaScriptSyntaxAdapter + end + end +end diff --git a/gems/decomplex/lib/decomplex/syntax/zig.rb b/gems/decomplex/lib/decomplex/syntax/zig.rb new file mode 100644 index 000000000..fcf985953 --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/zig.rb @@ -0,0 +1,88 @@ +# frozen_string_literal: true + +module Decomplex + module Syntax + ZIG_LEXICON = LanguageLexicon.new( + nil_literal_patterns: [/\bnull\b/].freeze, + type_guard_patterns: [ + /\bnull\b/, + /@typeInfo\b/, + /\bif\s*\([^)]*\)\s*\|/ + ].freeze, + diagnostic_patterns: [ + /@panic\s*\(/, + /\bunreachable\b/, + /\breturn\s+error[.\w]*/ + ].freeze, + trivial_patterns: [ + /\A(?:null|true|false|0|1|break|continue|unreachable)\s*;?\z/, + /\Areturn\s+(?:null|true|false|0|1)\s*;?\z/ + ].freeze + ).freeze + + class ZigSyntaxAdapter < TreeSitterLanguageAdapter + FUNCTION_NODE_KINDS = %w[function_declaration].freeze + CALL_NODE_KINDS = %w[call_expression].freeze + ADJACENT_CALL_NODE_KINDS = %w[field_expression identifier].freeze + ANONYMOUS_OWNER_NODE_KINDS = %w[struct_declaration].freeze + PARAMETER_LIST_NODE_KINDS = %w[parameters].freeze + FUNCTION_BODY_NODE_KINDS = %w[block block_expression].freeze + IDENTIFIER_NODE_KINDS = %w[identifier].freeze + FIELD_IDENTIFIER_NODE_KINDS = [].freeze + PARAMETER_IDENTIFIER_NODE_KINDS = %w[identifier].freeze + LOCAL_DECLARATION_NODE_KINDS = %w[variable_declaration].freeze + LOCAL_VARIABLE_DECLARATOR_NODE_KINDS = [].freeze + FIELD_DECLARATION_NODE_KINDS = %w[container_field].freeze + BOUND_CONTAINER_PARENT_NODE_KINDS = %w[variable_declaration].freeze + BOUND_CONTAINER_NAME_NODE_KINDS = %w[identifier].freeze + DECLARATION_SITE_PARENT_NODE_KINDS = %w[parameter variable_declaration function_declaration struct_declaration].freeze + ASSIGNMENT_NODE_KINDS = %w[assignment_expression].freeze + ASSIGNMENT_STATE_DECLARATION_NODE_KINDS = %w[assignment_expression].freeze + ASSIGNMENT_OPERATOR_TOKENS = %w[= += -= *= /= %=].freeze + PATH_ACTION_NODE_KINDS = %w[call_expression expression_statement return_expression].freeze + SIMPLE_ACTION_WRAPPER_NODE_KINDS = %w[block].freeze + COMPARISON_NODE_KINDS = %w[binary_expression].freeze + BRANCH_NODE_KINDS = %w[if_statement switch_expression for_statement labeled_statement].freeze + LOOP_NODE_KINDS = %w[for_statement].freeze + TEXT_LOOP_NODE_KINDS = %w[labeled_statement].freeze + BRANCH_LOOP_NODE_KINDS = %w[for_statement labeled_statement].freeze + CASE_NODE_KINDS = %w[switch_expression].freeze + BRANCH_CASE_NODE_KINDS = %w[switch_expression].freeze + IF_NODE_KINDS = %w[if_statement].freeze + CASE_ARM_NODE_KINDS = %w[switch_case].freeze + SWITCH_CASE_ARM_NODE_KINDS = %w[switch_case].freeze + CASE_CONTAINER_STOP_NODE_KINDS = %w[function_declaration struct_declaration].freeze + CASE_SUBJECT_SKIP_NODE_KINDS = %w[switch_case else comment].freeze + DEFAULT_CASE_PATTERNS = %w[_ default else].freeze + BOOLEAN_AND_OPERATORS = %w[and &&].freeze + BOOLEAN_CONTAINER_NODE_KINDS = %w[binary_expression].freeze + ARGUMENT_LIST_NODE_KINDS = %w[argument_list arguments].freeze + SELF_CALL_IDENTIFIER_NODE_KINDS = %w[identifier].freeze + SELF_RECEIVER_NAMES = %w[self].freeze + PUBLIC_VISIBILITY_TOKENS = %w[pub public].freeze + ACCESSOR_CALL_NODE_KINDS = [].freeze + LITERAL_FIELD_EXPRESSION_NODE_KINDS = %w[field_expression].freeze + FIELD_LIKE_NODE_KINDS = %w[field_expression].freeze + BLOCK_ARGUMENT_NODE_KINDS = [].freeze + + def visibility(_document, node) + modifier_visibility(node) || :private + end + + def state_declaration(node) + return zig_container_field_declaration(node) if node.kind == "container_field" + + super + end + + private + + def zig_container_field_declaration(node) + name = node.named_children.find { |child| child.kind == "identifier" } + return nil unless name + + { field: name.text, type: declared_type_text(node, name) } + end + end + end +end diff --git a/gems/decomplex/test/architecture_invariants_test.rb b/gems/decomplex/test/architecture_invariants_test.rb index 617a779df..92324e718 100644 --- a/gems/decomplex/test/architecture_invariants_test.rb +++ b/gems/decomplex/test/architecture_invariants_test.rb @@ -38,6 +38,29 @@ class DecomplexArchitectureInvariantsTest < Minitest::Test "language profiles must instantiate concrete adapters, not the base adapter" => /:\s*TreeSitterLanguageAdapter\.new\(/ }.freeze + ADAPTER_LOADER_LANGUAGE_IMPLEMENTATION_PATTERNS = { + "language lexicons belong in the language adapter file" => + /^\s*[A-Z_]+_LEXICON\s*=/, + "concrete language adapters belong in the language adapter file" => + /^\s*class\s+(?!TreeSitterLanguageAdapter\b)\w+SyntaxAdapter\b/ + }.freeze + LANGUAGE_ADAPTER_FILES = { + "ruby.rb" => "RubySyntaxAdapter", + "python.rb" => "PythonSyntaxAdapter", + "javascript.rb" => "JavaScriptSyntaxAdapter", + "typescript.rb" => "TypeScriptSyntaxAdapter", + "go.rb" => "GoSyntaxAdapter", + "rust.rb" => "RustSyntaxAdapter", + "zig.rb" => "ZigSyntaxAdapter", + "lua.rb" => "LuaSyntaxAdapter", + "c.rb" => "CSyntaxAdapter", + "cpp.rb" => "CppSyntaxAdapter", + "csharp.rb" => "CSharpSyntaxAdapter", + "java.rb" => "JavaSyntaxAdapter", + "swift.rb" => "SwiftSyntaxAdapter", + "kotlin.rb" => "KotlinSyntaxAdapter", + "php.rb" => "PhpSyntaxAdapter" + }.freeze def test_detectors_do_not_talk_to_tree_sitter_nodes_directly offenders = scan_files(DETECTOR_FILES, RAW_TREE_SITTER_PATTERNS) @@ -68,6 +91,33 @@ def test_language_adapter_implementations_do_not_live_in_syntax_rb ) end + def test_each_language_adapter_has_its_own_file + offenders = LANGUAGE_ADAPTER_FILES.filter_map do |file_name, class_name| + path = File.join(LIB, "syntax", file_name) + next "#{file_name}: missing file" unless File.file?(path) + + source = File.read(path) + next if source.match?(/^\s*class\s+#{Regexp.escape(class_name)}\b/) + + "#{file_name}: missing #{class_name}" + end + + assert_empty offenders, format_offenders( + "Every supported language must have an explicit adapter file", + offenders + ) + end + + def test_adapter_loader_does_not_absorb_language_implementations + adapters_rb = File.join(LIB, "syntax", "adapters.rb") + offenders = scan_files([adapters_rb], ADAPTER_LOADER_LANGUAGE_IMPLEMENTATION_PATTERNS) + + assert_empty offenders, format_offenders( + "Adapter loader must only load adapters and shared base helpers", + offenders + ) + end + private def scan_files(files, patterns) diff --git a/gems/decomplex/test/examples_oracle_test.rb b/gems/decomplex/test/examples_oracle_test.rb index b176ed9e3..c8353eca2 100644 --- a/gems/decomplex/test/examples_oracle_test.rb +++ b/gems/decomplex/test/examples_oracle_test.rb @@ -7,9 +7,7 @@ class ExamplesOracleTest < Minitest::Test EXAMPLES_ROOT = File.expand_path("../examples", __dir__) ORACLE_DIR = File.join(EXAMPLES_ROOT, "oracles") - SOURCE_EXTENSIONS = %w[ - .rb .rs .zig .py .js .ts .cs .lua .c .cpp .java .kt .swift .go - ].freeze + SOURCE_EXTENSIONS = Decomplex::Syntax.supported_exts.freeze LOCATION_KEYS = %w[ at boundaries boundary_crossings component_lines defn examples file gap_lines line locations predicate raw reason sites span spans source diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 000000000..bc510afc1 --- /dev/null +++ b/package-lock.json @@ -0,0 +1,413 @@ +{ + "name": "cheat", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "dependencies": { + "@tree-sitter-grammars/tree-sitter-lua": "^0.4.1", + "@tree-sitter-grammars/tree-sitter-zig": "^1.1.2", + "tree-sitter-c": "^0.24.1", + "tree-sitter-c-sharp": "^0.23.5", + "tree-sitter-cpp": "^0.23.4", + "tree-sitter-go": "^0.25.0", + "tree-sitter-java": "^0.23.5", + "tree-sitter-javascript": "^0.25.0", + "tree-sitter-kotlin": "^0.3.8", + "tree-sitter-php": "^0.24.2", + "tree-sitter-python": "^0.25.0", + "tree-sitter-ruby": "^0.23.1", + "tree-sitter-rust": "^0.24.0", + "tree-sitter-swift": "^0.7.1", + "tree-sitter-typescript": "^0.23.2" + } + }, + "node_modules/@tree-sitter-grammars/tree-sitter-lua": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/@tree-sitter-grammars/tree-sitter-lua/-/tree-sitter-lua-0.4.1.tgz", + "integrity": "sha512-EwagFaU6ZveVk18/Y8qUhZkkiBKnQ7dSCHbm//TUroLVKy3i1rOYGy/cNHtSkAb1eDvS1HhCLybH2S541Cya/g==", + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.5.0", + "node-gyp-build": "^4.8.4" + }, + "peerDependencies": { + "tree-sitter": "^0.22.4" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + } + }, + "node_modules/@tree-sitter-grammars/tree-sitter-zig": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@tree-sitter-grammars/tree-sitter-zig/-/tree-sitter-zig-1.1.2.tgz", + "integrity": "sha512-J0L31HZ2isy3F5zb2g5QWQOv2r/pbruQNL9ADhuQv2pn5BQOzxt80WcEJaYXBeuJ8GHxVT42slpCna8k1c8LOw==", + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.3.0", + "node-gyp-build": "^4.8.4" + }, + "peerDependencies": { + "tree-sitter": "^0.22.1" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + } + }, + "node_modules/isexe": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", + "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==", + "license": "ISC" + }, + "node_modules/node-addon-api": { + "version": "8.8.0", + "resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-8.8.0.tgz", + "integrity": "sha512-c5Ko1fZJIJmzhFIkhRN76WTq+fC6tWnGy9CXA0fA+XygsWZmEwG8vmbkNqxMyoaa0Tin4djul49NzdVcJJcjeA==", + "license": "MIT", + "engines": { + "node": "^18 || ^20 || >= 21" + } + }, + "node_modules/node-gyp-build": { + "version": "4.8.4", + "resolved": "https://registry.npmjs.org/node-gyp-build/-/node-gyp-build-4.8.4.tgz", + "integrity": "sha512-LA4ZjwlnUblHVgq0oBF3Jl/6h/Nvs5fzBLwdEF4nuxnFdsfajde4WfxtJr3CaiH+F6ewcIB/q4jQ4UzPyid+CQ==", + "license": "MIT", + "bin": { + "node-gyp-build": "bin.js", + "node-gyp-build-optional": "optional.js", + "node-gyp-build-test": "build-test.js" + } + }, + "node_modules/tree-sitter-c": { + "version": "0.24.1", + "resolved": "https://registry.npmjs.org/tree-sitter-c/-/tree-sitter-c-0.24.1.tgz", + "integrity": "sha512-lkYwWN3SRecpvaeqmFKkuPNR3ZbtnvHU+4XAEEkJdrp3JfSp2pBrhXOtvfsENUneye76g889Y0ddF2DM0gEDpA==", + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.3.1", + "node-gyp-build": "^4.8.4" + }, + "peerDependencies": { + "tree-sitter": "^0.22.4" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + } + }, + "node_modules/tree-sitter-c-sharp": { + "version": "0.23.5", + "resolved": "https://registry.npmjs.org/tree-sitter-c-sharp/-/tree-sitter-c-sharp-0.23.5.tgz", + "integrity": "sha512-xJGOeXPMmld0nES5+080N/06yY6LQi+KWGWV4LfZaZe6srJPtUtfhIbRSN7EZN6IaauzW28v6W4QHFwmeUW6HQ==", + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.2.2", + "node-gyp-build": "^4.8.4" + }, + "peerDependencies": { + "tree-sitter": "^0.25.0" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + } + }, + "node_modules/tree-sitter-cli": { + "version": "0.23.2", + "resolved": "https://registry.npmjs.org/tree-sitter-cli/-/tree-sitter-cli-0.23.2.tgz", + "integrity": "sha512-kPPXprOqREX+C/FgUp2Qpt9jd0vSwn+hOgjzVv/7hapdoWpa+VeWId53rf4oNNd29ikheF12BYtGD/W90feMbA==", + "hasInstallScript": true, + "license": "MIT", + "bin": { + "tree-sitter": "cli.js" + }, + "engines": { + "node": ">=12.0.0" + } + }, + "node_modules/tree-sitter-cpp": { + "version": "0.23.4", + "resolved": "https://registry.npmjs.org/tree-sitter-cpp/-/tree-sitter-cpp-0.23.4.tgz", + "integrity": "sha512-qR5qUDyhZ5jJ6V8/umiBxokRbe89bCGmcq/dk94wI4kN86qfdV8k0GHIUEKaqWgcu42wKal5E97LKpLeVW8sKw==", + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.2.1", + "node-gyp-build": "^4.8.2", + "tree-sitter-c": "^0.23.1" + }, + "peerDependencies": { + "tree-sitter": "^0.21.1" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + } + }, + "node_modules/tree-sitter-cpp/node_modules/tree-sitter-c": { + "version": "0.23.6", + "resolved": "https://registry.npmjs.org/tree-sitter-c/-/tree-sitter-c-0.23.6.tgz", + "integrity": "sha512-0dxXKznVyUA0s6PjNolJNs2yF87O5aL538A/eR6njA5oqX3C3vH4vnx3QdOKwuUdpKEcFdHuiDpRKLLCA/tjvQ==", + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.3.0", + "node-gyp-build": "^4.8.4" + }, + "peerDependencies": { + "tree-sitter": "^0.22.1" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + } + }, + "node_modules/tree-sitter-go": { + "version": "0.25.0", + "resolved": "https://registry.npmjs.org/tree-sitter-go/-/tree-sitter-go-0.25.0.tgz", + "integrity": "sha512-APBc/Dq3xz/e35Xpkhb1blu5UgW+2E3RyGWawZSCNcbGwa7jhSQPS8KsUupuzBla8PCo8+lz9W/JDJjmfRa2tw==", + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.3.1", + "node-gyp-build": "^4.8.4" + }, + "peerDependencies": { + "tree-sitter": "^0.25.0" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + } + }, + "node_modules/tree-sitter-java": { + "version": "0.23.5", + "resolved": "https://registry.npmjs.org/tree-sitter-java/-/tree-sitter-java-0.23.5.tgz", + "integrity": "sha512-Yju7oQ0Xx7GcUT01mUglPP+bYfvqjNCGdxqigTnew9nLGoII42PNVP3bHrYeMxswiCRM0yubWmN5qk+zsg0zMA==", + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.2.2", + "node-gyp-build": "^4.8.2" + }, + "peerDependencies": { + "tree-sitter": "^0.21.1" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + } + }, + "node_modules/tree-sitter-javascript": { + "version": "0.25.0", + "resolved": "https://registry.npmjs.org/tree-sitter-javascript/-/tree-sitter-javascript-0.25.0.tgz", + "integrity": "sha512-1fCbmzAskZkxcZzN41sFZ2br2iqTYP3tKls1b/HKGNPQUVOpsUxpmGxdN/wMqAk3jYZnYBR1dd/y/0avMeU7dw==", + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.3.1", + "node-gyp-build": "^4.8.4" + }, + "peerDependencies": { + "tree-sitter": "^0.25.0" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + } + }, + "node_modules/tree-sitter-kotlin": { + "version": "0.3.8", + "resolved": "https://registry.npmjs.org/tree-sitter-kotlin/-/tree-sitter-kotlin-0.3.8.tgz", + "integrity": "sha512-A4obq6bjzmYrA+F0JLLoheFPcofFkctNaZSpnDd+GPn1SfVZLY4/GG4C0cYVBTOShuPBGGAOPLM1JWLZQV4m1g==", + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^7.1.0", + "node-gyp-build": "^4.8.0" + }, + "peerDependencies": { + "tree-sitter": "^0.21.0" + }, + "peerDependenciesMeta": { + "tree_sitter": { + "optional": true + } + } + }, + "node_modules/tree-sitter-kotlin/node_modules/node-addon-api": { + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-7.1.1.tgz", + "integrity": "sha512-5m3bsyrjFWE1xf7nz7YXdN4udnVtXK6/Yfgn5qnahL6bCkf2yKt4k3nuTKAtT4r3IG8JNR2ncsIMdZuAzJjHQQ==", + "license": "MIT" + }, + "node_modules/tree-sitter-php": { + "version": "0.24.2", + "resolved": "https://registry.npmjs.org/tree-sitter-php/-/tree-sitter-php-0.24.2.tgz", + "integrity": "sha512-zwgAePc/HozNaWOOfwRAA+3p8yhuehRw8Fb7vn5qd2XjiIc93uJPryDTMYTSjBRjVIUg/KY6pM3rRzs8dSwKfw==", + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.2.2", + "node-gyp-build": "^4.8.2" + }, + "peerDependencies": { + "tree-sitter": "^0.22.4" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + } + }, + "node_modules/tree-sitter-python": { + "version": "0.25.0", + "resolved": "https://registry.npmjs.org/tree-sitter-python/-/tree-sitter-python-0.25.0.tgz", + "integrity": "sha512-eCmJx6zQa35GxaCtQD+wXHOhYqBxEL+bp71W/s3fcDMu06MrtzkVXR437dRrCrbrDbyLuUDJpAgycs7ncngLXw==", + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.5.0", + "node-gyp-build": "^4.8.4" + }, + "peerDependencies": { + "tree-sitter": "^0.25.0" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + } + }, + "node_modules/tree-sitter-ruby": { + "version": "0.23.1", + "resolved": "https://registry.npmjs.org/tree-sitter-ruby/-/tree-sitter-ruby-0.23.1.tgz", + "integrity": "sha512-d9/RXgWjR6HanN7wTYhS5bpBQLz1VkH048Vm3CodPGyJVnamXMGb8oEhDypVCBq4QnHui9sTXuJBBP3WtCw5RA==", + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.2.2", + "node-gyp-build": "^4.8.2" + }, + "peerDependencies": { + "tree-sitter": "^0.21.1" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + } + }, + "node_modules/tree-sitter-rust": { + "version": "0.24.0", + "resolved": "https://registry.npmjs.org/tree-sitter-rust/-/tree-sitter-rust-0.24.0.tgz", + "integrity": "sha512-NWemUDf629Tfc90Y0Z55zuwPCAHkLxWnMf2RznYu4iBkkrQl2o/CHGB7Cr52TyN5F1DAx8FmUnDtCy9iUkXZEQ==", + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.2.2", + "node-gyp-build": "^4.8.4" + }, + "peerDependencies": { + "tree-sitter": "^0.22.1" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + } + }, + "node_modules/tree-sitter-swift": { + "version": "0.7.1", + "resolved": "https://registry.npmjs.org/tree-sitter-swift/-/tree-sitter-swift-0.7.1.tgz", + "integrity": "sha512-pneKVTuGamaBsqqqfB9BvNQjktzh/0IVPR54jLB5Fq/JTDQwYHd0Wo6pVyZ5jAYpbztzq+rJ/rpL9ruxTmSoKw==", + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.0.0", + "node-gyp-build": "^4.8.0", + "tree-sitter-cli": "^0.23", + "which": "2.0.2" + }, + "peerDependencies": { + "tree-sitter": "^0.22.1" + }, + "peerDependenciesMeta": { + "tree_sitter": { + "optional": true + } + } + }, + "node_modules/tree-sitter-typescript": { + "version": "0.23.2", + "resolved": "https://registry.npmjs.org/tree-sitter-typescript/-/tree-sitter-typescript-0.23.2.tgz", + "integrity": "sha512-e04JUUKxTT53/x3Uq1zIL45DoYKVfHH4CZqwgZhPg5qYROl5nQjV+85ruFzFGZxu+QeFVbRTPDRnqL9UbU4VeA==", + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.2.2", + "node-gyp-build": "^4.8.2", + "tree-sitter-javascript": "^0.23.1" + }, + "peerDependencies": { + "tree-sitter": "^0.21.0" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + } + }, + "node_modules/tree-sitter-typescript/node_modules/tree-sitter-javascript": { + "version": "0.23.1", + "resolved": "https://registry.npmjs.org/tree-sitter-javascript/-/tree-sitter-javascript-0.23.1.tgz", + "integrity": "sha512-/bnhbrTD9frUYHQTiYnPcxyHORIw157ERBa6dqzaKxvR/x3PC4Yzd+D1pZIMS6zNg2v3a8BZ0oK7jHqsQo9fWA==", + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.2.2", + "node-gyp-build": "^4.8.2" + }, + "peerDependencies": { + "tree-sitter": "^0.21.1" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + } + }, + "node_modules/which": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", + "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", + "license": "ISC", + "dependencies": { + "isexe": "^2.0.0" + }, + "bin": { + "node-which": "bin/node-which" + }, + "engines": { + "node": ">= 8" + } + } + } +} diff --git a/package.json b/package.json new file mode 100644 index 000000000..0d38f430d --- /dev/null +++ b/package.json @@ -0,0 +1,20 @@ +{ + "private": true, + "dependencies": { + "@tree-sitter-grammars/tree-sitter-lua": "^0.4.1", + "@tree-sitter-grammars/tree-sitter-zig": "^1.1.2", + "tree-sitter-c": "^0.24.1", + "tree-sitter-c-sharp": "^0.23.5", + "tree-sitter-cpp": "^0.23.4", + "tree-sitter-go": "^0.25.0", + "tree-sitter-java": "^0.23.5", + "tree-sitter-javascript": "^0.25.0", + "tree-sitter-kotlin": "^0.3.8", + "tree-sitter-php": "^0.24.2", + "tree-sitter-python": "^0.25.0", + "tree-sitter-ruby": "^0.23.1", + "tree-sitter-rust": "^0.24.0", + "tree-sitter-swift": "^0.7.1", + "tree-sitter-typescript": "^0.23.2" + } +} diff --git a/spec/decomplex_architecture_invariants_spec.rb b/spec/decomplex_architecture_invariants_spec.rb index e0bdfd10f..c3c15272d 100644 --- a/spec/decomplex_architecture_invariants_spec.rb +++ b/spec/decomplex_architecture_invariants_spec.rb @@ -38,6 +38,31 @@ /:\s*TreeSitterLanguageAdapter\.new\(/ }.freeze + ADAPTER_LOADER_LANGUAGE_IMPLEMENTATION_PATTERNS = { + "language lexicons belong in the language adapter file" => + /^\s*[A-Z_]+_LEXICON\s*=/, + "concrete language adapters belong in the language adapter file" => + /^\s*class\s+(?!TreeSitterLanguageAdapter\b)\w+SyntaxAdapter\b/ + }.freeze + + LANGUAGE_ADAPTER_FILES = { + "ruby.rb" => "RubySyntaxAdapter", + "python.rb" => "PythonSyntaxAdapter", + "javascript.rb" => "JavaScriptSyntaxAdapter", + "typescript.rb" => "TypeScriptSyntaxAdapter", + "go.rb" => "GoSyntaxAdapter", + "rust.rb" => "RustSyntaxAdapter", + "zig.rb" => "ZigSyntaxAdapter", + "lua.rb" => "LuaSyntaxAdapter", + "c.rb" => "CSyntaxAdapter", + "cpp.rb" => "CppSyntaxAdapter", + "csharp.rb" => "CSharpSyntaxAdapter", + "java.rb" => "JavaSyntaxAdapter", + "swift.rb" => "SwiftSyntaxAdapter", + "kotlin.rb" => "KotlinSyntaxAdapter", + "php.rb" => "PhpSyntaxAdapter" + }.freeze + def scan_files(files, patterns) files.sort.flat_map do |path| rel = path.delete_prefix("#{ROOT}/") @@ -77,4 +102,27 @@ def format_offenders(message, offenders) expect(offenders).to be_empty, format_offenders("Core syntax.rb must not absorb concrete language adapter implementation", offenders) end + + it "keeps one adapter file per supported language" do + offenders = LANGUAGE_ADAPTER_FILES.filter_map do |file_name, class_name| + path = File.join(DECOMPLEX_LIB, "syntax", file_name) + next "#{file_name}: missing file" unless File.file?(path) + + source = File.read(path) + next if source.match?(/^\s*class\s+#{Regexp.escape(class_name)}\b/) + + "#{file_name}: missing #{class_name}" + end + + expect(offenders).to be_empty, + format_offenders("Every supported language must have an explicit adapter file", offenders) + end + + it "keeps the adapter loader from absorbing language implementations" do + adapters_rb = File.join(DECOMPLEX_LIB, "syntax", "adapters.rb") + offenders = scan_files([adapters_rb], ADAPTER_LOADER_LANGUAGE_IMPLEMENTATION_PATTERNS) + + expect(offenders).to be_empty, + format_offenders("Adapter loader must only load adapters and shared base helpers", offenders) + end end From a19082b806018e02d83cb992a981e63b72067693 Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Fri, 19 Jun 2026 19:44:49 +0000 Subject: [PATCH 29/52] Refactor decomplex Rust adapters --- .../lib/decomplex/ast/adapters/base.rb | 1 + .../lib/decomplex/ast/adapters/rust.rb | 10 + .../lib/decomplex/ast/legacy_normalizer.rb | 1 + .../rust/src/decomplex/architecture_test.rs | 261 ++ gems/decomplex/rust/src/decomplex/ast.rs | 2334 ++--------------- .../rust/src/decomplex/ast/adapters/base.rs | 978 +++++++ .../rust/src/decomplex/ast/adapters/c.rs | 5 + .../rust/src/decomplex/ast/adapters/cpp.rs | 5 + .../rust/src/decomplex/ast/adapters/csharp.rs | 5 + .../rust/src/decomplex/ast/adapters/go.rs | 5 + .../rust/src/decomplex/ast/adapters/java.rs | 5 + .../src/decomplex/ast/adapters/javascript.rs | 1 + .../rust/src/decomplex/ast/adapters/kotlin.rs | 5 + .../rust/src/decomplex/ast/adapters/lua.rs | 497 ++++ .../rust/src/decomplex/ast/adapters/mod.rs | 67 + .../rust/src/decomplex/ast/adapters/python.rs | 557 ++++ .../rust/src/decomplex/ast/adapters/ruby.rs | 267 ++ .../rust/src/decomplex/ast/adapters/rust.rs | 5 + .../rust/src/decomplex/ast/adapters/swift.rs | 5 + .../src/decomplex/ast/adapters/typescript.rs | 247 ++ .../rust/src/decomplex/ast/adapters/zig.rs | 5 + .../decomplex/detectors/false_simplicity.rs | 679 +---- .../decomplex/detectors/flay_similarity.rs | 566 +--- .../detectors/state_branch_density.rs | 188 +- gems/decomplex/rust/src/decomplex/mod.rs | 3 + gems/decomplex/rust/src/decomplex/syntax.rs | 15 + .../src/decomplex/syntax/adapters/base.rs | 1052 ++++++++ .../rust/src/decomplex/syntax/adapters/c.rs | 122 + .../rust/src/decomplex/syntax/adapters/cpp.rs | 125 + .../src/decomplex/syntax/adapters/csharp.rs | 91 + .../adapters/false_simplicity_lexicon.rs | 673 +++++ .../rust/src/decomplex/syntax/adapters/go.rs | 111 + .../src/decomplex/syntax/adapters/java.rs | 87 + .../decomplex/syntax/adapters/javascript.rs | 95 + .../src/decomplex/syntax/adapters/kotlin.rs | 109 + .../rust/src/decomplex/syntax/adapters/lua.rs | 76 + .../rust/src/decomplex/syntax/adapters/mod.rs | 68 + .../src/decomplex/syntax/adapters/python.rs | 95 + .../src/decomplex/syntax/adapters/ruby.rs | 396 +++ .../src/decomplex/syntax/adapters/rust.rs | 103 + .../src/decomplex/syntax/adapters/swift.rs | 105 + .../decomplex/syntax/adapters/typescript.rs | 95 + .../rust/src/decomplex/syntax/adapters/zig.rs | 79 + .../decomplex/syntax/tree_sitter_adapter.rs | 923 +------ gems/decomplex/test/ast_test.rb | 3 +- 45 files changed, 6890 insertions(+), 4235 deletions(-) create mode 100644 gems/decomplex/lib/decomplex/ast/adapters/rust.rb create mode 100644 gems/decomplex/rust/src/decomplex/architecture_test.rs create mode 100644 gems/decomplex/rust/src/decomplex/ast/adapters/base.rs create mode 100644 gems/decomplex/rust/src/decomplex/ast/adapters/c.rs create mode 100644 gems/decomplex/rust/src/decomplex/ast/adapters/cpp.rs create mode 100644 gems/decomplex/rust/src/decomplex/ast/adapters/csharp.rs create mode 100644 gems/decomplex/rust/src/decomplex/ast/adapters/go.rs create mode 100644 gems/decomplex/rust/src/decomplex/ast/adapters/java.rs create mode 100644 gems/decomplex/rust/src/decomplex/ast/adapters/javascript.rs create mode 100644 gems/decomplex/rust/src/decomplex/ast/adapters/kotlin.rs create mode 100644 gems/decomplex/rust/src/decomplex/ast/adapters/lua.rs create mode 100644 gems/decomplex/rust/src/decomplex/ast/adapters/mod.rs create mode 100644 gems/decomplex/rust/src/decomplex/ast/adapters/python.rs create mode 100644 gems/decomplex/rust/src/decomplex/ast/adapters/ruby.rs create mode 100644 gems/decomplex/rust/src/decomplex/ast/adapters/rust.rs create mode 100644 gems/decomplex/rust/src/decomplex/ast/adapters/swift.rs create mode 100644 gems/decomplex/rust/src/decomplex/ast/adapters/typescript.rs create mode 100644 gems/decomplex/rust/src/decomplex/ast/adapters/zig.rs create mode 100644 gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs create mode 100644 gems/decomplex/rust/src/decomplex/syntax/adapters/c.rs create mode 100644 gems/decomplex/rust/src/decomplex/syntax/adapters/cpp.rs create mode 100644 gems/decomplex/rust/src/decomplex/syntax/adapters/csharp.rs create mode 100644 gems/decomplex/rust/src/decomplex/syntax/adapters/false_simplicity_lexicon.rs create mode 100644 gems/decomplex/rust/src/decomplex/syntax/adapters/go.rs create mode 100644 gems/decomplex/rust/src/decomplex/syntax/adapters/java.rs create mode 100644 gems/decomplex/rust/src/decomplex/syntax/adapters/javascript.rs create mode 100644 gems/decomplex/rust/src/decomplex/syntax/adapters/kotlin.rs create mode 100644 gems/decomplex/rust/src/decomplex/syntax/adapters/lua.rs create mode 100644 gems/decomplex/rust/src/decomplex/syntax/adapters/mod.rs create mode 100644 gems/decomplex/rust/src/decomplex/syntax/adapters/python.rs create mode 100644 gems/decomplex/rust/src/decomplex/syntax/adapters/ruby.rs create mode 100644 gems/decomplex/rust/src/decomplex/syntax/adapters/rust.rs create mode 100644 gems/decomplex/rust/src/decomplex/syntax/adapters/swift.rs create mode 100644 gems/decomplex/rust/src/decomplex/syntax/adapters/typescript.rs create mode 100644 gems/decomplex/rust/src/decomplex/syntax/adapters/zig.rs diff --git a/gems/decomplex/lib/decomplex/ast/adapters/base.rb b/gems/decomplex/lib/decomplex/ast/adapters/base.rb index e82e65176..46caeb593 100644 --- a/gems/decomplex/lib/decomplex/ast/adapters/base.rb +++ b/gems/decomplex/lib/decomplex/ast/adapters/base.rb @@ -95,6 +95,7 @@ def for(document) when :python then PythonTreeSitterNormalizationAdapter.new(document) when :lua then LuaTreeSitterNormalizationAdapter.new(document) when :typescript, :javascript then TypeScriptTreeSitterNormalizationAdapter.new(document) + when :rust then RustTreeSitterNormalizationAdapter.new(document) else raise UnsupportedLanguageError, "unsupported AST normalization language #{document&.language.inspect}" diff --git a/gems/decomplex/lib/decomplex/ast/adapters/rust.rb b/gems/decomplex/lib/decomplex/ast/adapters/rust.rb new file mode 100644 index 000000000..50cd53d86 --- /dev/null +++ b/gems/decomplex/lib/decomplex/ast/adapters/rust.rb @@ -0,0 +1,10 @@ +# frozen_string_literal: true + +require_relative "base" + +module Decomplex + module Ast + class RustTreeSitterNormalizationAdapter < TreeSitterNormalizationAdapter + end + end +end diff --git a/gems/decomplex/lib/decomplex/ast/legacy_normalizer.rb b/gems/decomplex/lib/decomplex/ast/legacy_normalizer.rb index b30ae7903..449f6fc6c 100644 --- a/gems/decomplex/lib/decomplex/ast/legacy_normalizer.rb +++ b/gems/decomplex/lib/decomplex/ast/legacy_normalizer.rb @@ -8,6 +8,7 @@ require_relative "adapters/python" require_relative "adapters/lua" require_relative "adapters/typescript" +require_relative "adapters/rust" module Decomplex module Ast diff --git a/gems/decomplex/rust/src/decomplex/architecture_test.rs b/gems/decomplex/rust/src/decomplex/architecture_test.rs new file mode 100644 index 000000000..9c379865d --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/architecture_test.rs @@ -0,0 +1,261 @@ +use std::fs; +use std::path::{Path, PathBuf}; + +fn crate_src() -> PathBuf { + Path::new(env!("CARGO_MANIFEST_DIR")).join("src/decomplex") +} + +#[test] +fn every_supported_language_has_a_syntax_adapter_file() { + let adapters = crate_src().join("syntax/adapters"); + let expected = [ + "c.rs", + "cpp.rs", + "csharp.rs", + "go.rs", + "java.rs", + "javascript.rs", + "kotlin.rs", + "lua.rs", + "python.rs", + "ruby.rs", + "rust.rs", + "swift.rs", + "typescript.rs", + "zig.rs", + ]; + + for file in expected { + assert!( + adapters.join(file).is_file(), + "missing syntax adapter file {}", + adapters.join(file).display() + ); + } +} + +#[test] +fn every_supported_language_has_an_ast_adapter_file() { + let adapters = crate_src().join("ast/adapters"); + let expected = [ + "c.rs", + "cpp.rs", + "csharp.rs", + "go.rs", + "java.rs", + "javascript.rs", + "kotlin.rs", + "lua.rs", + "python.rs", + "ruby.rs", + "rust.rs", + "swift.rs", + "typescript.rs", + "zig.rs", + ]; + + for file in expected { + assert!( + adapters.join(file).is_file(), + "missing AST adapter file {}", + adapters.join(file).display() + ); + } +} + +#[test] +fn tree_sitter_adapter_does_not_define_concrete_language_profiles() { + let path = crate_src().join("syntax/tree_sitter_adapter.rs"); + let source = fs::read_to_string(&path).expect("read tree_sitter_adapter.rs"); + let forbidden = [ + "default_profile!", + "struct RubyProfile", + "struct PythonProfile", + "struct JavaScriptProfile", + "struct JavaProfile", + "struct TypeScriptProfile", + "struct SwiftProfile", + "struct KotlinProfile", + "struct GoProfile", + "struct RustProfile", + "struct ZigProfile", + "struct LuaProfile", + "struct CProfile", + "struct CppProfile", + "struct CSharpProfile", + ]; + + for pattern in forbidden { + assert!( + !source.contains(pattern), + "{} should live in syntax/adapters, not tree_sitter_adapter.rs", + pattern + ); + } +} + +#[test] +fn ast_normalizer_does_not_define_a_language_adapter_enum() { + let path = crate_src().join("ast.rs"); + let source = fs::read_to_string(&path).expect("read ast.rs"); + for pattern in [ + "enum TreeSitterNormalizationAdapter", + "impl TreeSitterNormalizationAdapter", + "TreeSitterNormalizationAdapter::", + ] { + assert!( + !source.contains(pattern), + "{} should live as polymorphic ast/adapters implementations", + pattern + ); + } +} + +#[test] +fn ast_adapters_do_not_delegate_through_a_language_kind_selector() { + let adapters = crate_src().join("ast/adapters"); + for entry in fs::read_dir(&adapters).expect("read ast adapters dir") { + let path = entry.expect("ast adapter entry").path(); + if path.extension().and_then(|ext| ext.to_str()) != Some("rs") { + continue; + } + let source = fs::read_to_string(&path).expect("read ast adapter"); + for pattern in [ + "TreeSitterNormalizationAdapter", + "fn kind(&self)", + "self.kind()", + ] { + assert!( + !source.contains(pattern), + "{} delegates through {}; put behavior directly in the adapter", + path.display(), + pattern + ); + } + } +} + +#[test] +fn detectors_do_not_import_tree_sitter_directly() { + let detectors = crate_src().join("detectors"); + let entries = fs::read_dir(&detectors).expect("read detectors dir"); + + for entry in entries { + let path = entry.expect("detector entry").path(); + if path.extension().and_then(|ext| ext.to_str()) != Some("rs") { + continue; + } + let source = fs::read_to_string(&path).expect("read detector source"); + assert!( + !source.contains("tree_sitter"), + "{} imports tree_sitter directly; detectors should consume normalized syntax/AST facts", + path.display() + ); + } +} + +#[test] +fn false_simplicity_detector_does_not_own_language_lexicons() { + let path = crate_src().join("detectors/false_simplicity.rs"); + let source = fs::read_to_string(&path).expect("read false_simplicity.rs"); + for pattern in [ + "fn lexicon_for", + "struct Lexicon", + "RUBY_CONTEXT_PAIRS", + "RUBY_CALLBACK_SET", + "RUBY_CORE_CONSTS", + "PYTHON_CONTEXT_PAIRS", + "JS_CONTEXT_PAIRS", + "COMMON_CALLBACK_SET", + ] { + assert!( + !source.contains(pattern), + "{} belongs in syntax/adapters, not the false_simplicity detector", + pattern + ); + } +} + +#[test] +fn state_branch_density_detector_does_not_own_ruby_source_mining() { + let path = crate_src().join("detectors/state_branch_density.rs"); + let source = fs::read_to_string(&path).expect("read state_branch_density.rs"); + for pattern in [ + "T::Struct", + "T\\.type_alias", + "const\\s+:", + "fn immutable_struct_readers", + "fn immutable_struct_reader_types", + "fn type_aliases", + "fn extract_method_param_types", + "fn sig_param_types", + ] { + assert!( + !source.contains(pattern), + "{} belongs in the Ruby syntax adapter, not state_branch_density", + pattern + ); + } +} + +#[test] +fn flay_similarity_detector_does_not_own_clone_fingerprint_grammar() { + let path = crate_src().join("detectors/flay_similarity.rs"); + let source = fs::read_to_string(&path).expect("read flay_similarity.rs"); + for pattern in [ + "RawNode", + "CLONE_CANDIDATE_KINDS", + "IDENTIFIER_KINDS", + "LITERAL_KINDS", + "fn candidate_node", + "fn fingerprint", + "fn typed_struct_schema_text", + ] { + assert!( + !source.contains(pattern), + "{} belongs in syntax/adapters, not flay_similarity", + pattern + ); + } +} + +#[test] +fn ast_normalizer_does_not_branch_on_language_after_parser_setup() { + let path = crate_src().join("ast.rs"); + let source = fs::read_to_string(&path).expect("read ast.rs"); + let normalizer_source = source + .split_once("struct TreeSitterNormalizer") + .map(|(_, rest)| rest) + .unwrap_or(&source); + let language_branch_count = [ + "Language::Ruby", + "Language::Python", + "Language::JavaScript", + "Language::Java", + "Language::TypeScript", + "Language::Swift", + "Language::Kotlin", + "Language::Go", + "Language::Rust", + "Language::Zig", + "Language::Lua", + "Language::C", + "Language::Cpp", + "Language::CSharp", + "Self::Ruby", + "Self::Python", + "Self::Lua", + "Self::TypeScript", + "Self::Default", + "TreeSitterNormalizationAdapter::Python", + "TreeSitterNormalizationAdapter::Lua", + ] + .iter() + .map(|pattern| normalizer_source.matches(pattern).count()) + .sum::(); + + assert_eq!( + language_branch_count, 0, + "ast.rs normalizer branches on language; put behavior in ast/adapters instead" + ); +} diff --git a/gems/decomplex/rust/src/decomplex/ast.rs b/gems/decomplex/rust/src/decomplex/ast.rs index edb348779..3f23318a9 100644 --- a/gems/decomplex/rust/src/decomplex/ast.rs +++ b/gems/decomplex/rust/src/decomplex/ast.rs @@ -6,6 +6,9 @@ use std::fs; use std::path::Path; use tree_sitter::{Language as TreeSitterLanguage, Node as TreeSitterNode, Parser}; +mod adapters; +use adapters::{normalization_adapter, AstNormalizationAdapter, NamedChildrenAction}; + pub type Span = [usize; 4]; const COMPARISON_OPERATORS: &[&str] = &["==", "!=", "===", "!==", "<", "<=", ">", ">="]; const OPERATOR_CALL_OPERATORS: &[&str] = &[ @@ -279,1523 +282,194 @@ pub fn node(child: &Child) -> Option<&Node> { match child { Child::Node(node) => Some(node), _ => None, - } -} - -pub fn slice(node: &Node, _lines: &[String]) -> String { - normalize_text(&node.text) -} - -pub fn body_stmts(defn_node: &Node) -> Vec<&Node> { - let scope_index = if defn_node.r#type == "DEFS" { 2 } else { 1 }; - let Some(scope) = defn_node.children.get(scope_index).and_then(node) else { - return Vec::new(); - }; - if scope.r#type != "SCOPE" { - return Vec::new(); - } - let Some(body) = scope.children.get(2).and_then(node) else { - return Vec::new(); - }; - if body.r#type == "BLOCK" { - body.children.iter().filter_map(node).collect() - } else { - vec![body] - } -} - -pub fn canon_polarity(text: &str) -> (String, bool) { - let trimmed = text.trim(); - if let Some(rest) = trimmed.strip_prefix('!') { - ( - rest.trim_start_matches('(') - .trim_end_matches(')') - .trim() - .to_string(), - true, - ) - } else { - (trimmed.to_string(), false) - } -} - -pub fn flatten_and(node: &Node) -> Vec<&Node> { - if node.r#type != "AND" { - return vec![node]; - } - node.children - .iter() - .filter_map(self::node) - .flat_map(flatten_and) - .collect() -} - -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -enum TreeSitterNormalizationAdapter { - Default, - Ruby, - Python, - Lua, - TypeScript, -} - -const QUESTION_COLON_TERNARY_KINDS: &[&str] = &[ - "body_statement", - "block_body", - "statement", - "argument_list", - "conditional", -]; -const TYPESCRIPT_TERNARY_KINDS: &[&str] = &[ - "body_statement", - "block_body", - "statement", - "argument_list", - "conditional", - "ternary_expression", -]; -const CASE_ARGUMENT_WHEN_KINDS: &[&str] = &[ - "when", - "switch_case", - "case_clause", - "expression_case", - "case_statement", - "switch_section", - "switch_block_statement_group", - "switch_entry", - "when_entry", - "match_arm", -]; -const CASE_ELSE_KINDS: &[&str] = &["else", "switch_default"]; -const CASE_DEFAULT_PATTERN_KINDS: &[&str] = &["case_pattern", "match_pattern", "pattern"]; -const LEADING_FUNCTION_WRAPPER_KINDS: &[&str] = &["body_statement", "statement"]; -const PYTHON_LEADING_FUNCTION_WRAPPER_KINDS: &[&str] = &["block"]; -const LUA_LEADING_FUNCTION_WRAPPER_KINDS: &[&str] = &["block"]; -const RUBY_LEADING_FUNCTION_TARGET_KINDS: &[&str] = &["method", "singleton_method"]; -const PYTHON_LEADING_FUNCTION_TARGET_KINDS: &[&str] = &["function_definition"]; -const LUA_LEADING_FUNCTION_TARGET_KINDS: &[&str] = &["function_declaration"]; -const OWNER_STATEMENT_NESTED_KINDS: &[&str] = - &["class", "class_definition", "class_declaration", "module"]; -const LEADING_OWNER_WRAPPER_KINDS: &[&str] = &["body_statement", "statement"]; -const PYTHON_LEADING_OWNER_WRAPPER_KINDS: &[&str] = &["block"]; -const OWNER_NODE_KINDS: &[&str] = &["class", "class_definition", "class_declaration", "module"]; -const IF_NODE_KINDS: &[&str] = &[ - "if", - "if_statement", - "if_modifier", - "unless", - "unless_modifier", - "if_expression", - "conditional", -]; -const LEADING_IF_WRAPPER_KINDS: &[&str] = &["body_statement", "block", "block_body", "statement"]; -const PYTHON_LEADING_IF_WRAPPER_KINDS: &[&str] = &["block"]; -const LUA_LEADING_IF_WRAPPER_KINDS: &[&str] = &["block"]; -const LEADING_CASE_WRAPPER_KINDS: &[&str] = &["body_statement", "block", "block_body", "statement"]; -const CASE_NODE_KINDS: &[&str] = &[ - "case", - "switch_statement", - "expression_switch_statement", - "switch_expression", - "match_statement", - "match_expression", - "when_expression", -]; -const LEADING_LOOP_WRAPPER_KINDS: &[&str] = &["body_statement", "block", "block_body", "statement"]; -const LOOP_NODE_KINDS: &[&str] = &[ - "while", - "while_statement", - "while_modifier", - "until", - "until_modifier", -]; -const RESCUE_BODY_WRAPPER_KINDS: &[&str] = &["body_statement", "block_body", "statement"]; -const ENSURE_BODY_WRAPPER_KINDS: &[&str] = &["body_statement", "block_body", "statement"]; -const ARRAY_LITERAL_WRAPPER_KINDS: &[&str] = &[ - "body_statement", - "block", - "block_body", - "statement", - "argument_list", - "expression_statement", -]; -const ARRAY_LITERAL_NODE_KINDS: &[&str] = &["array", "list"]; -const ELEMENT_REFERENCE_WRAPPER_KINDS: &[&str] = &[ - "body_statement", - "block", - "block_body", - "statement", - "expression_statement", - "expression_list", -]; -const ELEMENT_REFERENCE_NODE_KINDS: &[&str] = &[ - "element_reference", - "subscript", - "subscript_expression", - "bracket_index_expression", -]; -const HASH_LITERAL_WRAPPER_KINDS: &[&str] = &[ - "body_statement", - "block", - "block_body", - "statement", - "argument_list", - "expression_statement", - "parenthesized_expression", -]; -const HASH_LITERAL_NODE_KINDS: &[&str] = &["hash", "dictionary", "object", "table_constructor"]; -const STATEMENT_BLOCK_PARENT_KINDS: &[&str] = &[ - "method_declaration", - "constructor_declaration", - "function_declaration", - "function_body", - "if_statement", - "while_statement", - "for_statement", - "enhanced_for_statement", - "try_statement", - "catch_clause", - "finally_clause", - "do_statement", - "lambda_expression", -]; -const EMPTY_BODY_WRAPPER_KINDS: &[&str] = &["body_statement", "block", "block_body", "statement"]; -const HEREDOC_BODY_WRAPPER_KINDS: &[&str] = &["body_statement", "block_body", "statement", "then"]; -const INTERPOLATED_STATEMENT_WRAPPER_KINDS: &[&str] = - &["body_statement", "block_body", "statement", "argument_list"]; -const CONCATENATED_STRING_WRAPPER_KINDS: &[&str] = - &["body_statement", "block_body", "statement", "argument_list"]; -const PYTHON_CONCATENATED_STRING_WRAPPER_KINDS: &[&str] = &[ - "body_statement", - "block_body", - "statement", - "argument_list", - "block", - "expression_statement", -]; -const CONCATENATED_STRING_NODE_KINDS: &[&str] = &["chained_string", "concatenated_string"]; - -struct TernaryParts<'tree> { - condition: TreeSitterNode<'tree>, - positive: Vec>, - negative: Vec>, -} - -impl TreeSitterNormalizationAdapter { - fn for_language(language: Language) -> Self { - match language { - Language::Ruby => Self::Ruby, - Language::Python => Self::Python, - Language::Lua => Self::Lua, - Language::TypeScript | Language::JavaScript => Self::TypeScript, - _ => Self::Default, - } - } - - fn ruby(self) -> bool { - self == Self::Ruby - } - - fn yield_statement(self, node: TreeSitterNode<'_>, source: &str) -> bool { - let allowed = match self { - Self::Python => matches!( - node.kind(), - "body_statement" | "block" | "block_body" | "expression_statement" | "statement" - ), - _ => matches!( - node.kind(), - "body_statement" | "block" | "block_body" | "statement" - ), - }; - if !allowed { - return false; - } - let named_children = node - .children(&mut node.walk()) - .filter(|child| child.is_named()) - .collect::>(); - named_children.len() == 1 - && named_children[0].kind() == "yield" - && node_text(named_children[0], source) == node_text(node, source) - } - - fn super_statement(self, node: TreeSitterNode<'_>, source: &str) -> bool { - if self != Self::Ruby { - return false; - } - if !matches!( - node.kind(), - "body_statement" | "block" | "block_body" | "call" | "statement" - ) { - return false; - } - if node_text(node, source).trim() == "super" { - return true; - } - let raw = raw_named_children(node); - let named = if raw.len() == 1 && raw[0].kind() == "call" { - raw_named_children(raw[0]) - } else { - raw - }; - named - .first() - .map(|child| child.kind() == "super") - .unwrap_or(false) - && named - .iter() - .skip(1) - .all(|child| child.kind() == "argument_list") - } - - fn safe_navigation_call(self, node: TreeSitterNode<'_>, source: &str) -> bool { - let ruby_safe_navigation = node - .children(&mut node.walk()) - .any(|child| !child.is_named() && node_text(child, source) == "&."); - if self != Self::TypeScript { - return ruby_safe_navigation; - } - - ruby_safe_navigation - || node - .children(&mut node.walk()) - .any(|child| child.kind() == "optional_chain" && node_text(child, source) == "?.") - || (node.kind() == "call_expression" - && named_children(node) - .into_iter() - .any(|child| self.safe_navigation_call(child, source))) - } - - fn ternary_statement(self, node: TreeSitterNode<'_>, source: &str) -> bool { - self.ternary_parts(node, source).is_some() - } - - fn ternary_parts<'tree>( - self, - node: TreeSitterNode<'tree>, - source: &str, - ) -> Option> { - match self { - Self::Python => { - if node.kind() != "conditional_expression" { - return None; - } - let named = named_children(node); - Some(TernaryParts { - condition: *named.get(1)?, - positive: vec![*named.first()?], - negative: vec![*named.get(2)?], - }) - } - Self::Lua => None, - Self::TypeScript => { - question_colon_ternary_parts(node, source, TYPESCRIPT_TERNARY_KINDS) - } - Self::Default | Self::Ruby => { - question_colon_ternary_parts(node, source, QUESTION_COLON_TERNARY_KINDS) - } - } - } - - fn case_argument_list(self, node: TreeSitterNode<'_>, source: &str) -> bool { - if self != Self::Ruby || node.kind() != "argument_list" { - return false; - } - let raw_named = named_children(node); - let target = if raw_named.len() == 1 - && raw_named[0].kind() == "case" - && node_text(raw_named[0], source) == node_text(node, source) - { - raw_named[0] - } else { - node - }; - let has_case_keyword = target - .children(&mut target.walk()) - .any(|child| !child.is_named() && child.kind() == "case"); - has_case_keyword - && named_children(target) - .iter() - .any(|child| CASE_ARGUMENT_WHEN_KINDS.contains(&child.kind())) - } - - fn case_arm(self, node: TreeSitterNode<'_>, source: &str) -> bool { - CASE_ARGUMENT_WHEN_KINDS.contains(&node.kind()) && !self.case_else_arm(node, source) - } - - fn case_else_node<'tree>( - self, - node: TreeSitterNode<'tree>, - source: &str, - ) -> Option> { - let mut stack = named_children(node); - while !stack.is_empty() { - let child = stack.remove(0); - if self.case_else_node_kind(child, source) { - return Some(child); - } - if CASE_ARGUMENT_WHEN_KINDS.contains(&child.kind()) { - continue; - } - if !function_kind(child.kind()) { - stack.extend(named_children(child)); - } - } - None - } - - fn case_else_node_kind(self, node: TreeSitterNode<'_>, source: &str) -> bool { - CASE_ELSE_KINDS.contains(&node.kind()) || self.case_else_arm(node, source) - } - - fn case_else_arm(self, node: TreeSitterNode<'_>, source: &str) -> bool { - if self != Self::Python || node.kind() != "case_clause" { - return false; - } - - named_children(node) - .into_iter() - .find(|child| CASE_DEFAULT_PATTERN_KINDS.contains(&child.kind())) - .map(|pattern| node_text(pattern, source).trim() == "_") - .unwrap_or(false) - } - - fn leading_function_statement(self, node: TreeSitterNode<'_>, source: &str) -> bool { - let Some(target) = self.leading_function_target(node, source) else { - return false; - }; - let expected_keyword = match self { - Self::Lua => "function", - _ => "def", - }; - target - .children(&mut target.walk()) - .next() - .map(|child| child.kind() == expected_keyword) - .unwrap_or(false) - && named_children(target) - .iter() - .any(|child| identifier_kind_name(child.kind())) - } - - fn leading_function_target<'tree>( - self, - node: TreeSitterNode<'tree>, - source: &str, - ) -> Option> { - let (wrapper_kinds, target_kinds) = match self { - Self::Ruby | Self::Default => ( - LEADING_FUNCTION_WRAPPER_KINDS, - RUBY_LEADING_FUNCTION_TARGET_KINDS, - ), - Self::Python => ( - PYTHON_LEADING_FUNCTION_WRAPPER_KINDS, - PYTHON_LEADING_FUNCTION_TARGET_KINDS, - ), - Self::Lua => ( - LUA_LEADING_FUNCTION_WRAPPER_KINDS, - LUA_LEADING_FUNCTION_TARGET_KINDS, - ), - Self::TypeScript => return None, - }; - if !wrapper_kinds.contains(&node.kind()) { - return None; - } - if node - .children(&mut node.walk()) - .next() - .map(|child| matches!(child.kind(), "def" | "function")) - .unwrap_or(false) - { - return Some(node); - } - let raw_named = named_children(node); - if raw_named.len() == 1 - && target_kinds.contains(&raw_named[0].kind()) - && node_text(raw_named[0], source) == node_text(node, source) - { - return Some(raw_named[0]); - } - None - } - - fn leading_owner_statement(self, node: TreeSitterNode<'_>, source: &str) -> bool { - let Some(target) = self.leading_owner_target(node, source) else { - return false; - }; - target - .children(&mut target.walk()) - .next() - .map(|child| matches!(child.kind(), "class" | "module")) - .unwrap_or(false) - && named_children(target).len() >= 2 - && named_children(target) - .first() - .map(|child| !OWNER_STATEMENT_NESTED_KINDS.contains(&child.kind())) - .unwrap_or(false) - } - - fn leading_owner_target<'tree>( - self, - node: TreeSitterNode<'tree>, - source: &str, - ) -> Option> { - let wrapper_kinds = match self { - Self::Python => PYTHON_LEADING_OWNER_WRAPPER_KINDS, - Self::Ruby | Self::Default => LEADING_OWNER_WRAPPER_KINDS, - Self::Lua | Self::TypeScript => LEADING_OWNER_WRAPPER_KINDS, - }; - if !wrapper_kinds.contains(&node.kind()) { - return None; - } - let raw_named = named_children(node); - if raw_named.len() == 1 - && OWNER_NODE_KINDS.contains(&raw_named[0].kind()) - && node_text(raw_named[0], source) == node_text(node, source) - { - return Some(raw_named[0]); - } - Some(node) - } - - fn leading_if_statement(self, node: TreeSitterNode<'_>, source: &str) -> bool { - let Some(target) = self.leading_if_target(node, source) else { - return false; - }; - target - .children(&mut target.walk()) - .next() - .map(|child| matches!(child.kind(), "if" | "unless")) - .unwrap_or(false) - && named_children(target).len() >= 2 - && named_children(target) - .first() - .map(|child| !IF_NODE_KINDS.contains(&child.kind())) - .unwrap_or(false) - } - - fn leading_if_target<'tree>( - self, - node: TreeSitterNode<'tree>, - source: &str, - ) -> Option> { - let wrapper_kinds = match self { - Self::Python => PYTHON_LEADING_IF_WRAPPER_KINDS, - Self::Lua => LUA_LEADING_IF_WRAPPER_KINDS, - Self::Ruby | Self::TypeScript | Self::Default => LEADING_IF_WRAPPER_KINDS, - }; - if !wrapper_kinds.contains(&node.kind()) { - return None; - } - if matches!(self, Self::Python | Self::Lua) { - let raw_named = named_children(node); - if raw_named.len() == 1 - && raw_named[0].kind() == "if_statement" - && node_text(raw_named[0], source) == node_text(node, source) - { - return Some(raw_named[0]); - } - } - let raw_named = named_children(node); - if raw_named.len() == 1 - && IF_NODE_KINDS.contains(&raw_named[0].kind()) - && node_text(raw_named[0], source) == node_text(node, source) - { - return Some(raw_named[0]); - } - Some(node) - } - - fn leading_case_statement(self, node: TreeSitterNode<'_>, source: &str) -> bool { - let Some(target) = self.leading_case_target(node, source) else { - return false; - }; - target - .children(&mut target.walk()) - .next() - .map(|child| matches!(child.kind(), "case" | "match" | "switch")) - .unwrap_or(false) - && case_arm_descendant(target) - } - - fn leading_case_target<'tree>( - self, - node: TreeSitterNode<'tree>, - source: &str, - ) -> Option> { - if !LEADING_CASE_WRAPPER_KINDS.contains(&node.kind()) { - return None; - } - let raw_named = named_children(node); - if raw_named.len() == 1 - && CASE_NODE_KINDS.contains(&raw_named[0].kind()) - && node_text(raw_named[0], source) == node_text(node, source) - { - return Some(raw_named[0]); - } - Some(node) - } - - fn leading_loop_statement(self, node: TreeSitterNode<'_>, source: &str) -> bool { - let Some(target) = self.leading_loop_target(node, source) else { - return false; - }; - target - .children(&mut target.walk()) - .next() - .map(|child| !child.is_named() && matches!(child.kind(), "while" | "until")) - .unwrap_or(false) - && named_children(target).len() >= 2 - } - - fn leading_loop_target<'tree>( - self, - node: TreeSitterNode<'tree>, - source: &str, - ) -> Option> { - if !LEADING_LOOP_WRAPPER_KINDS.contains(&node.kind()) { - return None; - } - let raw_named = named_children(node); - if raw_named.len() == 1 - && LOOP_NODE_KINDS.contains(&raw_named[0].kind()) - && node_text(raw_named[0], source) == node_text(node, source) - { - return Some(raw_named[0]); - } - Some(node) - } - - fn rescue_body_statement(self, node: TreeSitterNode<'_>, source: &str) -> bool { - !self.rescue_clauses(node, source).is_empty() - } - - fn rescue_body_target<'tree>( - self, - node: TreeSitterNode<'tree>, - source: &str, - ) -> Option> { - match self { - Self::Python => { - if node.kind() == "try_statement" { - return Some(node); - } - if node.kind() == "block" { - let raw_named = named_children(node); - if raw_named.len() == 1 - && raw_named[0].kind() == "try_statement" - && node_text(raw_named[0], source) == node_text(node, source) - { - return Some(raw_named[0]); - } - } - } - Self::TypeScript => { - if node.kind() == "try_statement" { - return Some(node); - } - if node.kind() == "statement_block" { - let raw_named = named_children(node); - if raw_named.len() == 1 - && raw_named[0].kind() == "try_statement" - && node_text(raw_named[0], source) == node_text(node, source) - { - return Some(raw_named[0]); - } - } - } - _ => {} - } - - if RESCUE_BODY_WRAPPER_KINDS.contains(&node.kind()) { - Some(node) - } else { - None - } - } - - fn rescue_body_nodes<'tree>( - self, - node: TreeSitterNode<'tree>, - source: &str, - ) -> Vec> { - let Some(target) = self.rescue_body_target(node, source) else { - return Vec::new(); - }; - let named = named_children(target); - match self { - Self::Python => { - if target.kind() == "try_statement" { - return named - .into_iter() - .take_while(|child| { - !matches!(child.kind(), "except_clause" | "finally_clause") - }) - .collect(); - } - } - Self::TypeScript => { - if target.kind() == "try_statement" { - return named - .into_iter() - .take_while(|child| { - !matches!(child.kind(), "catch_clause" | "finally_clause") - }) - .collect(); - } - } - _ => {} - } - - let Some(index) = named.iter().position(|child| self.rescue_clause(*child)) else { - return Vec::new(); - }; - named[..index].to_vec() - } - - fn rescue_clauses<'tree>( - self, - node: TreeSitterNode<'tree>, - source: &str, - ) -> Vec> { - let Some(target) = self.rescue_body_target(node, source) else { - return Vec::new(); - }; - let clause_kind = match self { - Self::Python => "except_clause", - Self::TypeScript => "catch_clause", - _ => "rescue", - }; - named_children(target) - .into_iter() - .filter(|child| child.kind() == clause_kind) - .collect() - } - - fn rescue_clause_exceptions<'tree>( - self, - node: TreeSitterNode<'tree>, - source: &str, - ) -> Vec> { - match self { - Self::Python => { - let Some(pattern) = named_children(node) - .into_iter() - .find(|child| !matches!(child.kind(), "block" | "comment")) - else { - return Vec::new(); - }; - if pattern.kind() != "as_pattern" { - return vec![pattern]; - } - named_children(pattern) - .into_iter() - .find(|child| child.kind() != "as_pattern_target") - .into_iter() - .collect() - } - Self::TypeScript => Vec::new(), - _ => { - let Some(exceptions) = named_children(node) - .into_iter() - .find(|child| child.kind() == "exceptions") - else { - return Vec::new(); - }; - let text = node_text(exceptions, source).trim(); - if ruby_exception_constant_text(text) - || (named_children(exceptions).is_empty() && !text.is_empty()) - { - return vec![exceptions]; - } - named_children(exceptions) - } - } - } - - fn rescue_clause_exceptions_source<'tree>( - self, - node: TreeSitterNode<'tree>, - source: &str, - ) -> Option> { - match self { - Self::Python => self - .rescue_clause_exceptions(node, source) - .into_iter() - .next(), - Self::TypeScript => None, - _ => named_children(node) - .into_iter() - .find(|child| child.kind() == "exceptions"), - } - } - - fn rescue_clause_exception_variable_name<'tree>( - self, - node: TreeSitterNode<'tree>, - ) -> Option> { - match self { - Self::Python => named_children(node) - .into_iter() - .find(|child| child.kind() == "as_pattern") - .and_then(|pattern| descendant(pattern, &["as_pattern_target"])), - Self::TypeScript => named_children(node) - .into_iter() - .find(|child| identifier_kind_name(child.kind())), - _ => named_children(node) - .into_iter() - .find(|child| child.kind() == "exception_variable") - .and_then(|variable| { - named_children(variable) - .into_iter() - .find(|child| identifier_kind_name(child.kind())) - }), - } - } - - fn rescue_clause_exception_variable_source<'tree>( - self, - node: TreeSitterNode<'tree>, - ) -> Option> { - match self { - Self::Python | Self::TypeScript => self.rescue_clause_exception_variable_name(node), - _ => named_children(node) - .into_iter() - .find(|child| child.kind() == "exception_variable"), - } - } - - fn rescue_clause_handler<'tree>( - self, - node: TreeSitterNode<'tree>, - ) -> Option> { - match self { - Self::Python => named_children(node) - .into_iter() - .rev() - .find(|child| child.kind() == "block"), - Self::TypeScript => named_children(node) - .into_iter() - .rev() - .find(|child| child.kind() == "statement_block"), - _ => named_children(node).into_iter().rev().find(|child| { - !matches!( - child.kind(), - "exceptions" | "exception_variable" | "comment" - ) - }), - } - } - - fn rescue_clause(self, node: TreeSitterNode<'_>) -> bool { - node.kind() == "rescue" - } - - fn ensure_body_statement(self, node: TreeSitterNode<'_>, source: &str) -> bool { - self.ensure_clause(node, source).is_some() - } - - fn ensure_body_target<'tree>( - self, - node: TreeSitterNode<'tree>, - source: &str, - ) -> Option> { - match self { - Self::Python => { - if node.kind() == "try_statement" { - return Some(node); - } - if node.kind() == "block" { - let raw_named = named_children(node); - if raw_named.len() == 1 - && raw_named[0].kind() == "try_statement" - && node_text(raw_named[0], source) == node_text(node, source) - { - return Some(raw_named[0]); - } - } - } - Self::TypeScript => { - if node.kind() == "try_statement" { - return Some(node); - } - if node.kind() == "statement_block" { - let raw_named = named_children(node); - if raw_named.len() == 1 - && raw_named[0].kind() == "try_statement" - && node_text(raw_named[0], source) == node_text(node, source) - { - return Some(raw_named[0]); - } - } - } - _ => {} - } - - if ENSURE_BODY_WRAPPER_KINDS.contains(&node.kind()) { - Some(node) - } else { - None - } - } - - fn ensure_body_nodes<'tree>( - self, - node: TreeSitterNode<'tree>, - source: &str, - ) -> Vec> { - let Some(target) = self.ensure_body_target(node, source) else { - return Vec::new(); - }; - let named = named_children(target); - let ensure_kind = match self { - Self::Python | Self::TypeScript => "finally_clause", - _ => "ensure", - }; - let Some(index) = named.iter().position(|child| child.kind() == ensure_kind) else { - return Vec::new(); - }; - named[..index].to_vec() - } - - fn ensure_clause<'tree>( - self, - node: TreeSitterNode<'tree>, - source: &str, - ) -> Option> { - let target = self.ensure_body_target(node, source)?; - let ensure_kind = match self { - Self::Python | Self::TypeScript => "finally_clause", - _ => "ensure", - }; - named_children(target) - .into_iter() - .find(|child| child.kind() == ensure_kind) - } - - fn ensure_clause_body<'tree>( - self, - node: TreeSitterNode<'tree>, - ) -> Option> { - match self { - Self::Python => named_children(node) - .into_iter() - .rev() - .find(|child| child.kind() == "block"), - Self::TypeScript => named_children(node) - .into_iter() - .rev() - .find(|child| child.kind() == "statement_block"), - _ => None, - } - } - - fn array_literal_statement(self, node: TreeSitterNode<'_>, source: &str) -> bool { - self.array_literal_target(node, source).is_some() - } - - fn array_literal_target<'tree>( - self, - node: TreeSitterNode<'tree>, - source: &str, - ) -> Option> { - if self == Self::Lua { - if let Some(target) = lua_positional_table_target(node, source) { - return Some(target); - } - } - - if ARRAY_LITERAL_NODE_KINDS.contains(&node.kind()) { - return Some(node); - } - if !ARRAY_LITERAL_WRAPPER_KINDS.contains(&node.kind()) { - return None; - } - if bracketed(node, source, "[", "]") { - return Some(node); - } - - let named = named_children(node); - let child = *named.first()?; - if named.len() == 1 { - if ARRAY_LITERAL_NODE_KINDS.contains(&child.kind()) { - return Some(child); - } - - if matches!(child.kind(), "expression_statement" | "statement") - && node_text(child, source).trim() == node_text(node, source).trim() - { - return self.array_literal_target(child, source); - } - - let stripped = node_text(node, source).trim(); - if stripped == node_text(child, source) - || stripped == format!("{};", node_text(child, source)) - { - if ARRAY_LITERAL_NODE_KINDS.contains(&child.kind()) { - return Some(child); - } - } - } - - None - } - - fn array_literal_values<'tree>( - self, - node: TreeSitterNode<'tree>, - source: &str, - ) -> Vec> { - let target = self.array_literal_target(node, source).unwrap_or(node); - if self == Self::Lua { - if target.kind() == "arguments" { - if let Some(table) = named_children(target) - .into_iter() - .find(|child| child.kind() == "table_constructor") - { - if node_text(target, source).trim() == node_text(table, source).trim() { - return named_children(table); - } - } - } - if target.kind() == "table_constructor" { - return named_children(target); - } - } - - named_children(target) - } - - fn element_reference_statement(self, node: TreeSitterNode<'_>, source: &str) -> bool { - self.element_reference_target(node, source).is_some() - } - - fn element_reference_target<'tree>( - self, - node: TreeSitterNode<'tree>, - source: &str, - ) -> Option> { - if ELEMENT_REFERENCE_NODE_KINDS.contains(&node.kind()) { - return Some(node); - } - if !ELEMENT_REFERENCE_WRAPPER_KINDS.contains(&node.kind()) { - return None; - } - - let named = named_children(node); - if named.len() == 1 - && ELEMENT_REFERENCE_WRAPPER_KINDS.contains(&named[0].kind()) - && node_text(named[0], source).trim() == node_text(node, source).trim() - { - return self.element_reference_target(named[0], source); - } - if named.len() == 1 && ELEMENT_REFERENCE_NODE_KINDS.contains(&named[0].kind()) { - let stripped = node_text(node, source).trim(); - let child_text = node_text(named[0], source); - if stripped == child_text || stripped == format!("{child_text};") { - return Some(named[0]); - } - } - - if element_reference_shape(node, source) { - Some(node) - } else { - None - } - } - - fn element_reference_receiver<'tree>( - self, - node: TreeSitterNode<'tree>, - source: &str, - ) -> Option> { - let target = self.element_reference_target(node, source).unwrap_or(node); - named_children(target).first().copied() - } - - fn element_reference_arguments<'tree>( - self, - node: TreeSitterNode<'tree>, - source: &str, - ) -> Vec> { - let target = self.element_reference_target(node, source).unwrap_or(node); - named_children(target).into_iter().skip(1).collect() - } - - fn hash_literal_statement(self, node: TreeSitterNode<'_>, source: &str) -> bool { - self.hash_literal_target(node, source).is_some() - } - - fn hash_literal_target<'tree>( - self, - node: TreeSitterNode<'tree>, - source: &str, - ) -> Option> { - if self == Self::Lua { - if let Some(target) = lua_keyed_table_target(node, source) { - return Some(target); - } - } - - if HASH_LITERAL_NODE_KINDS.contains(&node.kind()) { - return Some(node); - } - if !HASH_LITERAL_WRAPPER_KINDS.contains(&node.kind()) { - return None; - } - if statement_block_wrapper(node) { - return None; - } - if bracketed(node, source, "{", "}") { - return Some(node); - } - - let named = named_children(node); - if named.len() != 1 { - return None; - } - - let child = named[0]; - if node.kind() == "parenthesized_expression" { - return self.hash_literal_target(child, source); - } - - let stripped = node_text(node, source).trim(); - let child_text = node_text(child, source); - if stripped == child_text || stripped == format!("{child_text};") { - if HASH_LITERAL_NODE_KINDS.contains(&child.kind()) { - return Some(child); - } - if HASH_LITERAL_WRAPPER_KINDS.contains(&child.kind()) { - return self.hash_literal_target(child, source); - } - } - - None - } - - fn hash_literal_values<'tree>( - self, - node: TreeSitterNode<'tree>, - source: &str, - ) -> Vec> { - let target = self.hash_literal_target(node, source).unwrap_or(node); - if self == Self::Lua { - if target.kind() == "arguments" { - if let Some(table) = named_children(target) - .into_iter() - .find(|child| child.kind() == "table_constructor") - { - return named_children(table); - } - return named_children(target); - } - if target.kind() == "table_constructor" { - return named_children(target); - } - } - - named_children(target) - } - - fn empty_body_statement(self, node: TreeSitterNode<'_>, source: &str) -> bool { - if EMPTY_BODY_WRAPPER_KINDS.contains(&node.kind()) - && named_children(node).is_empty() - && node_text(node, source).trim().is_empty() - { - return true; - } - - match self { - Self::Python => { - if node.kind() == "pass_statement" { - return true; - } - if node.kind() == "block" && node_text(node, source).trim() == "pass" { - let named = named_children(node); - return named.is_empty() - || named.iter().all(|child| child.kind() == "pass_statement"); - } - false - } - Self::TypeScript => { - node.kind() == "statement_block" - && named_children(node).is_empty() - && node_text(node, source).trim() == "{}" - } - _ => false, - } - } - - fn heredoc_body_statement(self, node: TreeSitterNode<'_>) -> bool { - self == Self::Ruby - && HEREDOC_BODY_WRAPPER_KINDS.contains(&node.kind()) - && named_children(node) - .iter() - .any(|child| child.kind() == "heredoc_body") - } - - fn heredoc_call_for_body(self, node: TreeSitterNode<'_>, source: &str) -> bool { - if self != Self::Ruby { - return false; - } - if node.kind() == "heredoc_beginning" { - return true; - } - if matches!(node.kind(), "call" | "argument_list") - && heredoc_marker_text(node_text(node, source)) - { - return true; - } - - named_children(node).into_iter().any(|child| { - if named_children(child) - .into_iter() - .any(|grandchild| grandchild.kind() == "heredoc_body") - { - return false; - } - - self.heredoc_call_for_body(child, source) - }) - } - - fn interpolated_statement( - self, - node: TreeSitterNode<'_>, - children: &[TreeSitterNode<'_>], - ) -> bool { - INTERPOLATED_STATEMENT_WRAPPER_KINDS.contains(&node.kind()) - && children.iter().any(|child| child.kind() == "interpolation") - } - - fn concatenated_string_statement( - self, - node: TreeSitterNode<'_>, - children: &[TreeSitterNode<'_>], - ) -> bool { - if concatenated_string_node(node).is_some() { - return true; - } - let wrapper_kinds = match self { - Self::Python => PYTHON_CONCATENATED_STRING_WRAPPER_KINDS, - _ => CONCATENATED_STRING_WRAPPER_KINDS, - }; - if !wrapper_kinds.contains(&node.kind()) { - return false; - } - if children.len() > 1 && children.iter().all(|child| child.kind() == "string") { - return true; - } - children.len() == 1 && concatenated_string_target(children[0]).is_some() - } - - fn zero_child_identifier_call(self, node: TreeSitterNode<'_>, source: &str) -> bool { - if self != Self::Ruby - || node.kind() != "call" - || !ruby_variable_name_text(node_text(node, source)) - { - return false; - } - let named = named_children(node); - named.is_empty() - || (named.len() == 1 - && identifier_kind_name(named[0].kind()) - && node_text(named[0], source) == node_text(node, source)) - } - - fn boolean_expression_kind(self, node: TreeSitterNode<'_>) -> bool { - BOOLEAN_EXPRESSION_KINDS.contains(&node.kind()) - || (self == Self::Lua && node.kind() == "expression_list") - } - - fn comparison_expression_kind(self, node: TreeSitterNode<'_>) -> bool { - COMPARISON_EXPRESSION_KINDS.contains(&node.kind()) - || (self == Self::Lua && node.kind() == "expression_list") - } - - fn dotted_expression_wrapper(self, node: TreeSitterNode<'_>) -> bool { - let kinds = match self { - Self::Python => PYTHON_DOTTED_EXPRESSION_WRAPPER_KINDS, - _ => DOTTED_EXPRESSION_WRAPPER_KINDS, - }; - kinds.contains(&node.kind()) - } - - fn unary_not_expression(self, node: TreeSitterNode<'_>, source: &str) -> bool { - matches!(node.kind(), "unary" | "unary_expression") - && node_text(node, source).trim_start().starts_with('!') - } - - fn unary_minus_expression(self, node: TreeSitterNode<'_>, source: &str) -> bool { - match self { - Self::Python => { - matches!(node.kind(), "unary" | "unary_expression" | "unary_operator") - && node_text(node, source).trim_start().starts_with('-') - } - Self::Lua => { - (matches!(node.kind(), "unary" | "unary_expression") - && node_text(node, source).trim_start().starts_with('-')) - || (node.kind() == "expression_list" - && node - .children(&mut node.walk()) - .next() - .map(|child| node_text(child, source) == "-") - .unwrap_or(false) - && named_children(node).len() == 1) - } - _ => { - matches!(node.kind(), "unary" | "unary_expression") - && node_text(node, source).trim_start().starts_with('-') - } - } - } - - fn binary_operator(self, node: TreeSitterNode<'_>, source: &str) -> Option { - if let Some(operator) = direct_binary_operator(node, source) { - return Some(operator.to_string()); - } - - let raw_named = raw_named_children(node); - if raw_named.len() == 1 - && BINARY_WRAPPER_KINDS.contains(&raw_named[0].kind()) - && node_text(node, source) == node_text(raw_named[0], source) - { - return self.binary_operator(raw_named[0], source); - } - - None - } - - fn class_node(self, node: TreeSitterNode<'_>) -> bool { - matches!( - node.kind(), - "class" | "class_definition" | "class_declaration" | "class_specifier" - ) - } - - fn identifier_text_node(self, node: TreeSitterNode<'_>, source: &str) -> bool { - self == Self::Lua - && matches!(node.kind(), "variable_list" | "expression_list") - && bare_identifier_text(node_text(node, source)) - } - - fn member_assignment_target(self, node: TreeSitterNode<'_>, source: &str) -> bool { - if self != Self::Lua || node.kind() != "variable_list" { - return false; - } - - let raw_named = raw_named_children(node); - let target = if raw_named.len() == 1 - && raw_named[0].kind() == "dot_index_expression" - && node_text(node, source) == node_text(raw_named[0], source) - { - raw_named[0] - } else { - node - }; - - raw_named_children(target).len() == 2 - && target - .children(&mut target.walk()) - .any(|child| !child.is_named() && node_text(child, source) == ".") - } - - fn instance_variable(self, node: TreeSitterNode<'_>, source: &str) -> bool { - if node.kind() == "instance_variable" { - return true; - } - - self == Self::Ruby - && node_text(node, source) - .strip_prefix('@') - .map(ruby_variable_name_text) - .unwrap_or(false) - } - - fn global_variable(self, node: TreeSitterNode<'_>, source: &str) -> bool { - if node.kind() == "global_variable" { - return true; - } - - self == Self::Ruby - && node_text(node, source) - .strip_prefix('$') - .map(ruby_variable_name_text) - .unwrap_or(false) - } - - fn assignment_operator(self, text: &str) -> bool { - match self { - Self::Ruby => matches!( - text, - "=" | "+=" - | "-=" - | "*=" - | "/=" - | "%=" - | "**=" - | "&&=" - | "||=" - | "&=" - | "|=" - | "^=" - | "<<=" - | ">>=" - ), - Self::Python => matches!( - text, - "=" | "+=" - | "-=" - | "*=" - | "/=" - | "%=" - | "//=" - | "**=" - | "@=" - | "&=" - | "|=" - | "^=" - | "<<=" - | ">>=" - | ":=" - ), - Self::Lua => text == "=", - Self::TypeScript => matches!( - text, - "=" | "+=" - | "-=" - | "*=" - | "/=" - | "%=" - | "**=" - | "<<=" - | ">>=" - | ">>>=" - | "&=" - | "|=" - | "^=" - | "&&=" - | "||=" - | "??=" - ), - Self::Default => matches!(text, "=" | "+=" | "-=" | "*=" | "/=" | "%="), - } - } - - fn unwrap_node(self, node: TreeSitterNode<'_>, source: &str, named_child_count: usize) -> bool { - if matches!( - node.kind(), - "parenthesized_expression" - | "parenthesized_statements" - | "expression_statement" - | "statement" - | "case_pattern" - | "match_pattern" - | "pattern" - ) && named_child_count == 1 - { - return true; - } - - if self != Self::Lua || node.kind() != "expression_list" || named_child_count != 1 { - return false; - } - - let raw_named = raw_named_children(node); - if raw_named.len() == 1 - && raw_named[0].kind() == "parenthesized_expression" - && node_text(raw_named[0], source) == node_text(node, source) - { - return true; - } - - let mut cursor = node.walk(); - let raw_children = node.children(&mut cursor).collect::>(); - raw_children - .first() - .map(|child| node_text(*child, source) == "(") - .unwrap_or(false) - && raw_children - .last() - .map(|child| node_text(*child, source) == ")") - .unwrap_or(false) - } - - fn interpolated_string( - self, - node: TreeSitterNode<'_>, - children: &[TreeSitterNode<'_>], - ) -> bool { - if node.kind() == "string" && children.iter().any(|child| child.kind() == "interpolation") { - return true; - } - - self == Self::TypeScript - && node.kind() == "template_string" - && children - .iter() - .any(|child| child.kind() == "template_substitution") - } - - fn lambda_expression(self, node: TreeSitterNode<'_>, source: &str) -> bool { - self.lambda_target(node, source).is_some() - } - - fn lambda_target<'tree>( - self, - node: TreeSitterNode<'tree>, - source: &str, - ) -> Option> { - if node.kind() == "lambda" { - return Some(node); - } - - if self == Self::TypeScript - && matches!(node.kind(), "arrow_function" | "function_expression") - { - return Some(node); - } - - if self == Self::Lua { - if node.kind() == "function_definition" { - return Some(node); - } + } +} - if node.kind() == "expression_list" { - let named = named_children(node); - if named.len() == 1 - && named[0].kind() == "function_definition" - && node_text(named[0], source) == node_text(node, source) - { - return Some(named[0]); - } - } - } +pub fn slice(node: &Node, _lines: &[String]) -> String { + normalize_text(&node.text) +} - None +pub fn body_stmts(defn_node: &Node) -> Vec<&Node> { + let scope_index = if defn_node.r#type == "DEFS" { 2 } else { 1 }; + let Some(scope) = defn_node.children.get(scope_index).and_then(node) else { + return Vec::new(); + }; + if scope.r#type != "SCOPE" { + return Vec::new(); + } + let Some(body) = scope.children.get(2).and_then(node) else { + return Vec::new(); + }; + if body.r#type == "BLOCK" { + body.children.iter().filter_map(node).collect() + } else { + vec![body] } +} - fn interpolation_node(self, node: TreeSitterNode<'_>) -> bool { - node.kind() == "interpolation" - || (self == Self::TypeScript && node.kind() == "template_substitution") +pub fn canon_polarity(text: &str) -> (String, bool) { + let trimmed = text.trim(); + if let Some(rest) = trimmed.strip_prefix('!') { + ( + rest.trim_start_matches('(') + .trim_end_matches(')') + .trim() + .to_string(), + true, + ) + } else { + (trimmed.to_string(), false) } +} - fn explicit_alternative<'tree>( - self, - node: TreeSitterNode<'tree>, - ) -> Option> { - let alternatives: &[&str] = match self { - Self::Ruby => &["elsif", "else"], - Self::Python => &["elif_clause", "else", "else_clause"], - Self::Lua => &["elseif_statement", "else", "else_statement"], - Self::TypeScript => &["else", "else_clause"], - Self::Default => &["else", "else_clause", "else_statement"], - }; - named_children(node) - .into_iter() - .find(|child| alternatives.contains(&child.kind())) +pub fn flatten_and(node: &Node) -> Vec<&Node> { + if node.r#type != "AND" { + return vec![node]; } + node.children + .iter() + .filter_map(self::node) + .flat_map(flatten_and) + .collect() +} + +const QUESTION_COLON_TERNARY_KINDS: &[&str] = &[ + "body_statement", + "block_body", + "statement", + "argument_list", + "conditional", +]; +const TYPESCRIPT_TERNARY_KINDS: &[&str] = &[ + "body_statement", + "block_body", + "statement", + "argument_list", + "conditional", + "ternary_expression", +]; +const CASE_ARGUMENT_WHEN_KINDS: &[&str] = &[ + "when", + "switch_case", + "case_clause", + "expression_case", + "case_statement", + "switch_section", + "switch_block_statement_group", + "switch_entry", + "when_entry", + "match_arm", +]; +const CASE_ELSE_KINDS: &[&str] = &["else", "switch_default"]; +const CASE_DEFAULT_PATTERN_KINDS: &[&str] = &["case_pattern", "match_pattern", "pattern"]; +const LEADING_FUNCTION_WRAPPER_KINDS: &[&str] = &["body_statement", "statement"]; +const PYTHON_LEADING_FUNCTION_WRAPPER_KINDS: &[&str] = &["block"]; +const LUA_LEADING_FUNCTION_WRAPPER_KINDS: &[&str] = &["block"]; +const OWNER_STATEMENT_NESTED_KINDS: &[&str] = + &["class", "class_definition", "class_declaration", "module"]; +const LEADING_OWNER_WRAPPER_KINDS: &[&str] = &["body_statement", "statement"]; +const PYTHON_LEADING_OWNER_WRAPPER_KINDS: &[&str] = &["block"]; +const OWNER_NODE_KINDS: &[&str] = &["class", "class_definition", "class_declaration", "module"]; +const IF_NODE_KINDS: &[&str] = &[ + "if", + "if_statement", + "if_modifier", + "unless", + "unless_modifier", + "if_expression", + "conditional", +]; +const LEADING_IF_WRAPPER_KINDS: &[&str] = &["body_statement", "block", "block_body", "statement"]; +const PYTHON_LEADING_IF_WRAPPER_KINDS: &[&str] = &["block"]; +const LUA_LEADING_IF_WRAPPER_KINDS: &[&str] = &["block"]; +const LEADING_CASE_WRAPPER_KINDS: &[&str] = &["body_statement", "block", "block_body", "statement"]; +const CASE_NODE_KINDS: &[&str] = &[ + "case", + "switch_statement", + "expression_switch_statement", + "switch_expression", + "match_statement", + "match_expression", + "when_expression", +]; +const LEADING_LOOP_WRAPPER_KINDS: &[&str] = &["body_statement", "block", "block_body", "statement"]; +const LOOP_NODE_KINDS: &[&str] = &[ + "while", + "while_statement", + "while_modifier", + "until", + "until_modifier", +]; +const RESCUE_BODY_WRAPPER_KINDS: &[&str] = &["body_statement", "block_body", "statement"]; +const ENSURE_BODY_WRAPPER_KINDS: &[&str] = &["body_statement", "block_body", "statement"]; +const ARRAY_LITERAL_WRAPPER_KINDS: &[&str] = &[ + "body_statement", + "block", + "block_body", + "statement", + "argument_list", + "expression_statement", +]; +const ARRAY_LITERAL_NODE_KINDS: &[&str] = &["array", "list"]; +const ELEMENT_REFERENCE_WRAPPER_KINDS: &[&str] = &[ + "body_statement", + "block", + "block_body", + "statement", + "expression_statement", + "expression_list", +]; +const ELEMENT_REFERENCE_NODE_KINDS: &[&str] = &[ + "element_reference", + "subscript", + "subscript_expression", + "bracket_index_expression", +]; +const HASH_LITERAL_WRAPPER_KINDS: &[&str] = &[ + "body_statement", + "block", + "block_body", + "statement", + "argument_list", + "expression_statement", + "parenthesized_expression", +]; +const HASH_LITERAL_NODE_KINDS: &[&str] = &["hash", "dictionary", "object", "table_constructor"]; +const STATEMENT_BLOCK_PARENT_KINDS: &[&str] = &[ + "method_declaration", + "constructor_declaration", + "function_declaration", + "function_body", + "if_statement", + "while_statement", + "for_statement", + "enhanced_for_statement", + "try_statement", + "catch_clause", + "finally_clause", + "do_statement", + "lambda_expression", +]; +const EMPTY_BODY_WRAPPER_KINDS: &[&str] = &["body_statement", "block", "block_body", "statement"]; +const HEREDOC_BODY_WRAPPER_KINDS: &[&str] = &["body_statement", "block_body", "statement", "then"]; +const INTERPOLATED_STATEMENT_WRAPPER_KINDS: &[&str] = + &["body_statement", "block_body", "statement", "argument_list"]; +const CONCATENATED_STRING_WRAPPER_KINDS: &[&str] = + &["body_statement", "block_body", "statement", "argument_list"]; +const PYTHON_CONCATENATED_STRING_WRAPPER_KINDS: &[&str] = &[ + "body_statement", + "block_body", + "statement", + "argument_list", + "block", + "expression_statement", +]; +const CONCATENATED_STRING_NODE_KINDS: &[&str] = &["chained_string", "concatenated_string"]; + +pub(crate) struct TernaryParts<'tree> { + pub(crate) condition: TreeSitterNode<'tree>, + pub(crate) positive: Vec>, + pub(crate) negative: Vec>, } fn direct_binary_operator<'source>( @@ -2090,7 +764,7 @@ fn lua_keyed_table_target<'tree>( struct TreeSitterNormalizer<'source> { source: &'source str, language: Language, - normalization_adapter: TreeSitterNormalizationAdapter, + normalization_adapter: &'static dyn AstNormalizationAdapter, local_stack: Vec>, root_span: Option, current_heredoc_body_span: Option, @@ -2101,7 +775,7 @@ impl<'source> TreeSitterNormalizer<'source> { Self { source, language, - normalization_adapter: TreeSitterNormalizationAdapter::for_language(language), + normalization_adapter: normalization_adapter(language), local_stack: Vec::new(), root_span: None, current_heredoc_body_span: None, @@ -2332,7 +1006,7 @@ impl<'source> TreeSitterNormalizer<'source> { | "raw_string_literal" => { if self.interpolated_string(node) { Some(self.normalize_interpolated_string(node)) - } else if let Some(content) = self.lua_no_paren_string_argument_content(node) { + } else if let Some(content) = self.no_paren_string_argument_content(node) { Some(self.wrap( "STR", vec![Child::String(node_text(content, self.source).to_string())], @@ -2485,7 +1159,7 @@ impl<'source> TreeSitterNormalizer<'source> { )) } - fn normalize_python_nested_class_as_iter(&mut self, node: TreeSitterNode<'_>) -> Option { + fn normalize_nested_class_as_iter(&mut self, node: TreeSitterNode<'_>) -> Option { let name_node = self .named_field(node, "name") .or_else(|| self.first_named(node))?; @@ -2618,17 +1292,11 @@ impl<'source> TreeSitterNormalizer<'source> { } fn normalize_body(&mut self, node: TreeSitterNode<'_>) -> Option { - if self.language == Language::Python && node.kind() == "block" { - let raw_children = self.raw_named_children(node); - if raw_children.len() == 1 - && raw_children[0].kind() == "class_definition" - && node - .parent() - .map(|parent| parent.kind() == "class_definition") - .unwrap_or(false) - { - return self.normalize_python_nested_class_as_iter(raw_children[0]); - } + if let Some(child) = self + .normalization_adapter + .nested_class_body_child(node, self.source) + { + return self.normalize_nested_class_as_iter(child); } if self.leading_function_statement(node) { return self.normalize_leading_function_statement(node); @@ -2828,19 +1496,13 @@ impl<'source> TreeSitterNormalizer<'source> { } fn normalize_else_or_branch(&mut self, node: TreeSitterNode<'_>) -> Option { - if self.language == Language::Python && node.kind() == "else_clause" { - if let Some(block) = self - .raw_named_children(node) - .into_iter() - .find(|child| child.kind() == "block") - { - if let Some(normalized) = self.normalize_python_else_if_block(block) { - return Some(self.wrap( - "ELSE_CLAUSE", - vec![Child::Node(Box::new(normalized))], - node, - )); - } + if let Some(block) = self.normalization_adapter.else_if_block(node, self.source) { + if let Some(normalized) = self.normalize_else_if_block_child(block) { + return Some(self.wrap( + "ELSE_CLAUSE", + vec![Child::Node(Box::new(normalized))], + node, + )); } } if node.kind() != "else" { @@ -2859,7 +1521,7 @@ impl<'source> TreeSitterNormalizer<'source> { self.normalize_body_nodes(self.named_children(node), node) } - fn normalize_python_else_if_block(&mut self, node: TreeSitterNode<'_>) -> Option { + fn normalize_else_if_block_child(&mut self, node: TreeSitterNode<'_>) -> Option { let statements = self .raw_named_children(node) .into_iter() @@ -3684,7 +2346,10 @@ impl<'source> TreeSitterNormalizer<'source> { right: Option, source: TreeSitterNode<'_>, ) -> Option { - if self.language != Language::Ruby || !matches!(operator, "||" | "&&") { + if !self + .normalization_adapter + .logical_operator_assignment(operator) + { return None; } if !self.identifier_kind(left.kind()) { @@ -3858,21 +2523,10 @@ impl<'source> TreeSitterNormalizer<'source> { fn normalize_call_with_block(&mut self, node: TreeSitterNode<'_>) -> Option { let block = self.call_block(node); - let call_source = if self.language == Language::Ruby - && matches!(node.kind(), "body_statement" | "block_body" | "statement") - { - let raw_named = self.raw_named_children(node); - if raw_named.len() == 1 - && raw_named[0].kind() == "call" - && node_text(node, self.source) == node_text(raw_named[0], self.source) - { - raw_named[0] - } else { - node - } - } else { - node - }; + let call_source = self + .normalization_adapter + .statement_wrapped_call_target(node, self.source) + .unwrap_or(node); let call = self.normalize_call_without_block(call_source, block)?; let args = self.normalize_block_parameters(block); let body = block.and_then(|block| { @@ -4132,7 +2786,10 @@ impl<'source> TreeSitterNormalizer<'source> { } return Some(self.wrap(node_type, children, node)); } - if self.language == Language::Ruby && self.const_kind(function.kind()) { + if self + .normalization_adapter + .bare_const_call_function(function) + { let children = vec![ Child::Symbol(node_text(function, self.source).to_string()), if let Some(source) = call_source.as_ref() { @@ -5124,7 +3781,7 @@ impl<'source> TreeSitterNormalizer<'source> { } fn normalize_parameters(&mut self, node: Option>) -> Option { - if self.language != Language::Ruby { + if !self.normalization_adapter.normalize_default_parameters() { return None; } let node = node?; @@ -5154,7 +3811,7 @@ impl<'source> TreeSitterNormalizer<'source> { } fn normalize_block_parameters(&mut self, block: Option>) -> Option { - if self.language != Language::Ruby { + if !self.normalization_adapter.normalize_block_parameters() { return None; } let block = block?; @@ -5648,7 +4305,7 @@ impl<'source> TreeSitterNormalizer<'source> { } fn assignment_lhs(&self, node: TreeSitterNode<'_>) -> bool { - if self.lua_single_assignment_block_child(node) { + if self.single_assignment_block_child(node) { return false; } if node @@ -5667,37 +4324,8 @@ impl<'source> TreeSitterNormalizer<'source> { } fn literal_fragment_assignment_context(&self, node: TreeSitterNode<'_>) -> bool { - let Some(parent) = node.parent() else { - return false; - }; - if matches!( - parent.kind(), - "string" | "delimited_symbol" | "regex" | "regex_literal" - ) { - return true; - } - if self.language == Language::Lua - && matches!( - node.kind(), - "string_content" | "escape_sequence" | "interpolation" | "string_fragment" - ) - && parent.kind() == "expression_list" - { - return true; - } - - matches!( - node.kind(), - "string_content" | "escape_sequence" | "interpolation" | "string_fragment" - ) && parent - .parent() - .map(|grandparent| { - matches!( - grandparent.kind(), - "string" | "delimited_symbol" | "regex" | "regex_literal" - ) - }) - .unwrap_or(false) + self.normalization_adapter + .literal_fragment_assignment_context(node, self.source) } fn literal_fragment_expression_list(&self, node: TreeSitterNode<'_>) -> bool { @@ -5710,7 +4338,7 @@ impl<'source> TreeSitterNormalizer<'source> { } fn assignment_rhs(&self, node: TreeSitterNode<'_>) -> bool { - if self.lua_single_assignment_block_child(node) { + if self.single_assignment_block_child(node) { return false; } if self.literal_fragment_assignment_context(node) { @@ -5721,34 +4349,14 @@ impl<'source> TreeSitterNormalizer<'source> { .unwrap_or(false) } - fn lua_single_assignment_block_child(&self, node: TreeSitterNode<'_>) -> bool { - if self.language != Language::Lua { - return false; - } - let Some(parent) = node.parent() else { - return false; - }; - if parent.kind() != "assignment_statement" { - return false; - } - let Some(grandparent) = parent.parent() else { - return false; - }; - grandparent.kind() == "block" - && node_text(grandparent, self.source) == node_text(parent, self.source) - && self.raw_named_children(grandparent).len() == 1 + fn single_assignment_block_child(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .single_assignment_block_child(node, self.source) } - fn lua_single_assignment_statement(&self, node: TreeSitterNode<'_>) -> bool { - if self.language != Language::Lua || node.kind() != "assignment_statement" { - return false; - } - let Some(parent) = node.parent() else { - return false; - }; - parent.kind() == "block" - && node_text(parent, self.source) == node_text(node, self.source) - && self.raw_named_children(parent).len() == 1 + fn single_assignment_statement(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .single_assignment_statement(node, self.source) } fn has_assignment_operator_child(&self, node: TreeSitterNode<'_>) -> bool { @@ -6057,15 +4665,10 @@ impl<'source> TreeSitterNormalizer<'source> { node: TreeSitterNode<'_>, function: TreeSitterNode<'_>, ) -> bool { - let function_text = if self.language == Language::Ruby && function.kind() == "call" { - self.named_children(function) - .into_iter() - .next() - .map(|child| node_text(child, self.source)) - .unwrap_or_else(|| node_text(function, self.source)) - } else { - node_text(function, self.source) - }; + let function_text_source = self + .normalization_adapter + .inline_def_function_text_source(function, self.source); + let function_text = node_text(function_text_source, self.source); inline_def_wrapper_mid(function_text) && node_text(node, self.source).contains("def ") } @@ -6077,21 +4680,10 @@ impl<'source> TreeSitterNormalizer<'source> { } fn inline_def_from_statement(&mut self, node: TreeSitterNode<'_>) -> Option { - let target = if self.language == Language::Ruby - && matches!(node.kind(), "body_statement" | "block_body" | "statement") - { - let raw_named = self.raw_named_children(node); - if raw_named.len() == 1 - && raw_named[0].kind() == "call" - && node_text(raw_named[0], self.source) == node_text(node, self.source) - { - raw_named[0] - } else { - node - } - } else { - node - }; + let target = self + .normalization_adapter + .statement_wrapped_call_target(node, self.source) + .unwrap_or(node); let source = self .named_children(target) .into_iter() @@ -6314,10 +4906,7 @@ impl<'source> TreeSitterNormalizer<'source> { &self, node: TreeSitterNode<'tree>, ) -> Option> { - let body_kind = match self.normalization_adapter { - TreeSitterNormalizationAdapter::Python | TreeSitterNormalizationAdapter::Lua => "block", - _ => "body_statement", - }; + let body_kind = self.normalization_adapter.leading_function_body_kind(); self.named_children(node) .into_iter() .rev() @@ -6342,18 +4931,9 @@ impl<'source> TreeSitterNormalizer<'source> { return false; } let named = self.named_children(node); - let target = if self.language == Language::Ruby - && named.len() == 1 - && matches!( - named[0].kind(), - "binary" | "binary_expression" | "binary_operator" | "boolean_operator" - ) - && node_text(node, self.source) == node_text(named[0], self.source) - { - named[0] - } else { - node - }; + let target = self + .normalization_adapter + .boolean_statement_target(node, self.source, &named); if !matches!( self.binary_operator(target).as_deref(), Some("&&" | "||" | "and" | "or") @@ -6373,19 +4953,8 @@ impl<'source> TreeSitterNormalizer<'source> { } fn operator_call_expression(&self, node: TreeSitterNode<'_>) -> bool { - let operator_call_kind = match self.language { - Language::Python => matches!( - node.kind(), - "binary" | "binary_expression" | "binary_operator" - ), - Language::Lua => matches!( - node.kind(), - "binary" | "binary_expression" | "expression_list" - ), - _ => matches!(node.kind(), "binary" | "binary_expression"), - }; - - operator_call_kind + self.normalization_adapter + .operator_call_expression_kind(node) && self.named_children(node).len() >= 2 && self .binary_operator(node) @@ -6613,21 +5182,11 @@ impl<'source> TreeSitterNormalizer<'source> { } let block = self.call_block(node); - let children = if self.language == Language::Ruby - && matches!(node.kind(), "body_statement" | "block_body" | "statement") - { - let raw_named = self.raw_named_children(node); - if raw_named.len() == 1 - && raw_named[0].kind() == "call" - && node_text(node, self.source) == node_text(raw_named[0], self.source) - { - self.named_children(raw_named[0]) - } else { - self.named_children(node) - } - } else { - self.named_children(node) - }; + let child_source = self + .normalization_adapter + .statement_wrapped_call_target(node, self.source) + .unwrap_or(node); + let children = self.named_children(child_source); children.into_iter().find(|child| { Some(*child) != block && (self.call_kind(child.kind()) || self.member_read_node(*child)) @@ -6724,19 +5283,10 @@ impl<'source> TreeSitterNormalizer<'source> { return false; } - let target = if self.language == Language::Ruby { - let raw_named = self.raw_named_children(node); - if raw_named.len() == 1 - && raw_named[0].kind() == "call" - && node_text(node, self.source) == node_text(raw_named[0], self.source) - { - raw_named[0] - } else { - node - } - } else { - node - }; + let target = self + .normalization_adapter + .statement_wrapped_call_target(node, self.source) + .unwrap_or(node); self.call_block(target).is_some() && self @@ -6818,7 +5368,7 @@ impl<'source> TreeSitterNormalizer<'source> { } fn member_read_node(&self, node: TreeSitterNode<'_>) -> bool { - if self.language == Language::Lua && node.kind() == "field" { + if self.normalization_adapter.member_read_excluded(node) { return false; } matches!( @@ -7482,16 +6032,11 @@ impl<'source> TreeSitterNormalizer<'source> { } fn call_block<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { - if self.language == Language::Ruby - && matches!(node.kind(), "body_statement" | "block_body" | "statement") + if let Some(target) = self + .normalization_adapter + .statement_wrapped_call_target(node, self.source) { - let raw_named = self.raw_named_children(node); - if raw_named.len() == 1 - && raw_named[0].kind() == "call" - && node_text(node, self.source) == node_text(raw_named[0], self.source) - { - return self.call_block(raw_named[0]); - } + return self.call_block(target); } self.named_children(node) @@ -7504,29 +6049,7 @@ impl<'source> TreeSitterNormalizer<'source> { node: TreeSitterNode<'tree>, name: &str, ) -> Option> { - if self.language == Language::Python - && matches!(name, "body" | "consequence") - && matches!( - node.kind(), - "elif_clause" - | "else_clause" - | "for_statement" - | "function_definition" - | "if_statement" - | "try_statement" - | "while_statement" - | "with_statement" - ) - { - if let Some(block) = self - .raw_named_children(node) - .into_iter() - .find(|child| child.kind() == "block") - { - return Some(block); - } - } - node.child_by_field_name(name) + self.normalization_adapter.named_field(node, name) } fn parent_node<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { @@ -7552,335 +6075,22 @@ impl<'source> TreeSitterNormalizer<'source> { if node.kind() == "dotted_name" && !node_text(node, self.source).contains('.') { return Vec::new(); } - if self.language == Language::Python - && node.kind() == "with_clause" - && bare_identifier_text(node_text(node, self.source)) - { - return Vec::new(); - } - if self.language == Language::Lua - && node.kind() == "variable_list" - && self.raw_named_children(node).len() == 1 - && self - .raw_named_children(node) - .first() - .map(|child| self.identifier_kind(child.kind())) - .unwrap_or(false) - && self.lua_single_assignment_block_child(node) - { - return Vec::new(); - } - if self.language == Language::Lua - && node.kind() == "variable_list" - && self.raw_named_children(node).len() == 1 - && node - .parent() - .map(|parent| parent.kind() == "for_generic_clause") - .unwrap_or(false) - { - return Vec::new(); - } - if self.language == Language::Lua - && node.kind() == "variable_list" - && self.raw_named_children(node).len() == 1 - && node - .parent() - .map(|parent| { - parent.kind() == "variable_declaration" - && self.raw_named_children(parent).len() == 1 - }) - .unwrap_or(false) - { - return Vec::new(); - } let children = self.raw_named_children(node); - if self.language == Language::Lua - && node.kind() == "variable_list" - && children.len() == 1 - && children[0].kind() == "dot_index_expression" - && node_text(node, self.source) == node_text(children[0], self.source) - { - return self.named_children(children[0]); - } - if self.language == Language::Ruby - && INTERPOLATED_STATEMENT_WRAPPER_KINDS.contains(&node.kind()) - && children.len() == 1 - && children[0].kind() == "string" - && node_text(node, self.source) == node_text(children[0], self.source) - { - let string_children = self.raw_named_children(children[0]); - if string_children - .iter() - .any(|child| child.kind() == "interpolation") - { - return string_children; - } - } - if self.language == Language::Ruby - && matches!(node.kind(), "body_statement" | "block_body" | "statement") - && children.len() == 1 - && matches!( - children[0].kind(), - "if_modifier" | "unless_modifier" | "while_modifier" | "until_modifier" - ) - && node_text(node, self.source) == node_text(children[0], self.source) - { - return self.named_children(children[0]); - } - if self.language == Language::Ruby - && matches!(node.kind(), "body_statement" | "block_body" | "statement") - && children.len() == 1 - && children[0].kind() == "yield" - && node_text(node, self.source) == node_text(children[0], self.source) - { - return self.named_children(children[0]); - } - if self.language == Language::Lua - && node.kind() == "expression_list" - && children.len() == 1 - && self.identifier_kind(children[0].kind()) - && node - .parent() - .map(|parent| matches!(parent.kind(), "assignment_statement" | "return_statement")) - .unwrap_or(false) - && node_text(node, self.source) == node_text(children[0], self.source) - { - return Vec::new(); - } - if self.language == Language::Lua - && node.kind() == "expression_list" - && children.len() == 1 - && matches!( - children[0].kind(), - "true" | "false" | "nil" | "number" | "integer" | "float" - ) - && node - .parent() - .map(|parent| matches!(parent.kind(), "assignment_statement" | "return_statement")) - .unwrap_or(false) - && node_text(node, self.source) == node_text(children[0], self.source) - { - return Vec::new(); - } - if self.language == Language::Lua - && node.kind() == "expression_list" - && children.len() == 1 - && matches!( - children[0].kind(), - "binary_expression" - | "function_call" - | "dot_index_expression" - | "function_definition" - | "string" - ) - && node_text(node, self.source) == node_text(children[0], self.source) - { - return self.named_children(children[0]); - } - if self.language == Language::Lua - && node.kind() == "expression_list" - && children.len() == 1 - && children[0].kind() == "table_constructor" - && node_text(node, self.source) == node_text(children[0], self.source) - { - return self.named_children(children[0]); - } - if self.language == Language::Lua - && node.kind() == "field" - && children.len() == 1 - && self.identifier_kind(children[0].kind()) - && node_text(node, self.source) == node_text(children[0], self.source) - { - return Vec::new(); - } - if self.language == Language::Lua - && node.kind() == "field" - && children.len() == 1 - && children[0].kind() == "string" - && node_text(node, self.source) == node_text(children[0], self.source) - { - return self.named_children(children[0]); - } - if self.language == Language::Lua - && node.kind() == "field" - && children.len() == 1 - && children[0].kind() == "function_call" - && node_text(node, self.source) == node_text(children[0], self.source) - { - return self.named_children(children[0]); - } - if self.language == Language::Lua - && node.kind() == "block" - && children.len() == 1 - && matches!( - children[0].kind(), - "function_call" | "return_statement" | "variable_declaration" - ) - && node_text(node, self.source) == node_text(children[0], self.source) - { - return self.named_children(children[0]); - } - if self.language == Language::Python - && node.kind() == "relative_import" - && children.len() == 1 - && children[0].kind() == "import_prefix" - { - return Vec::new(); - } - if self.language == Language::Python && node.kind() == "block" && children.len() == 1 { - if children[0].kind() == "function_definition" { - return self.named_children(children[0]); - } - if children[0].kind() == "decorated_definition" { - return self.named_children(children[0]); - } - if children[0].kind() == "pass_statement" - && node_text(node, self.source).trim() == "pass" - { - return Vec::new(); - } - if matches!(children[0].kind(), "break_statement" | "continue_statement") - && bare_identifier_text(node_text(node, self.source).trim()) - { - return Vec::new(); - } - if children[0].kind() == "return_statement" - && node_text(node, self.source) == node_text(children[0], self.source) - { - if self.raw_named_children(children[0]).is_empty() { - return Vec::new(); - } - return self.named_children(children[0]); - } - if children[0].kind() == "delete_statement" { - return self.named_children(children[0]); - } - if children[0].kind() == "if_statement" { - return self.named_children(children[0]); - } - if matches!( - children[0].kind(), - "assert_statement" - | "for_statement" - | "import_from_statement" - | "import_statement" - | "raise_statement" - | "try_statement" - | "while_statement" - | "with_statement" - ) { - return self.named_children(children[0]); - } - if children[0].kind() != "expression_statement" { - return children; - } - let statement_children = self.raw_named_children(children[0]); - if statement_children.len() == 1 - && statement_children[0].kind() == "identifier" - && node_text(node, self.source) == node_text(children[0], self.source) - { - return Vec::new(); - } - if statement_children.len() == 1 && statement_children[0].kind() == "ellipsis" { - return Vec::new(); - } - if statement_children.len() == 1 - && matches!( - statement_children[0].kind(), - "assignment" - | "augmented_assignment" - | "binary_operator" - | "call" - | "string" - | "subscript" - ) - { - return self.named_children(statement_children[0]); - } - } - if self.language == Language::Python - && node.kind() == "expression_statement" - && children.len() == 1 - && children[0].kind() == "yield" - { - return self.named_children(children[0]); - } - if self.language == Language::Python - && node.kind() == "expression_statement" - && children.len() == 1 - && children[0].kind() == "identifier" - { - return Vec::new(); - } - if self.language == Language::Python - && node.kind() == "expression_statement" - && children.len() == 1 - && children[0].kind() == "binary_operator" - { - return self.named_children(children[0]); - } - if self.language == Language::Python - && node.kind() == "expression_statement" - && children.len() == 1 - && children[0].kind() == "comparison_operator" - { - return self.named_children(children[0]); - } - if self.language == Language::Python - && node.kind() == "expression_statement" - && children.len() == 1 - && children[0].kind() == "call" - { - return self.named_children(children[0]); - } - if self.language == Language::Python - && node.kind() == "expression_statement" - && children.len() == 1 - && children[0].kind() == "attribute" - { - return self.named_children(children[0]); - } - if self.language == Language::Python - && node.kind() == "expression_statement" - && children.len() == 1 - && children[0].kind() == "string" - { - return self.named_children(children[0]); - } - if self.language == Language::Python && node.kind() == "as_pattern_target" { - return Vec::new(); - } - if self.language == Language::Python - && matches!(node.kind(), "with_clause" | "with_item") - && children.len() == 1 - && matches!(children[0].kind(), "with_item" | "as_pattern") - { - return self.named_children(children[0]); - } - if self.language == Language::Python - && node.kind() == "with_item" - && children.len() == 1 - && children[0].kind() == "call" - && node_text(node, self.source) == node_text(children[0], self.source) - { - return self.named_children(children[0]); - } - if self.language == Language::Python - && node.kind() == "with_item" - && children.len() == 1 - && children[0].kind() == "attribute" - && node_text(node, self.source) == node_text(children[0], self.source) + match self + .normalization_adapter + .named_children_action(node, self.source, &children) { - return self.named_children(children[0]); + NamedChildrenAction::Default => {} + NamedChildrenAction::Drop => return Vec::new(), + NamedChildrenAction::Recurse(child) => return self.named_children(child), + NamedChildrenAction::Replace(children) => return children, } + if node.kind() == "type" && children.len() == 1 { if children[0].kind() == "union_type" { return self.named_children(children[0]); } - if self.language == Language::Python && children[0].kind() == "binary_operator" { - return self.named_children(children[0]); - } if children[0].kind() == "generic_type" { return self.named_children(children[0]); } @@ -7919,22 +6129,12 @@ impl<'source> TreeSitterNormalizer<'source> { .collect() } - fn lua_no_paren_string_argument_content<'tree>( + fn no_paren_string_argument_content<'tree>( &self, node: TreeSitterNode<'tree>, ) -> Option> { - if self.language != Language::Lua || node.kind() != "string" { - return None; - } - let parent = node.parent()?; - if parent.kind() != "arguments" - || node_text(parent, self.source) != node_text(node, self.source) - { - return None; - } - self.raw_named_children(node) - .into_iter() - .find(|child| child.kind() == "string_content") + self.normalization_adapter + .no_paren_string_argument_content(node, self.source) } fn source_before_child(&self, node: TreeSitterNode<'_>, child: TreeSitterNode<'_>) -> Node { @@ -8186,7 +6386,7 @@ impl<'source> TreeSitterNormalizer<'source> { } fn elide_tail_returns(&self, node: Option) -> Option { - if self.language != Language::Ruby { + if !self.normalization_adapter.elides_tail_returns() { return node; } let mut node = node?; @@ -8279,7 +6479,7 @@ impl<'source> TreeSitterNormalizer<'source> { } fn elide_implicit_nil_body(&self, node: Option) -> Option { - if self.language != Language::Ruby { + if !self.normalization_adapter.elides_implicit_nil_body() { return node; } let node = self.drop_trailing_nil_statement(node); diff --git a/gems/decomplex/rust/src/decomplex/ast/adapters/base.rs b/gems/decomplex/rust/src/decomplex/ast/adapters/base.rs new file mode 100644 index 000000000..cfa23654b --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/ast/adapters/base.rs @@ -0,0 +1,978 @@ +use super::super::{ + bare_identifier_text, bracketed, case_arm_descendant, concatenated_string_node, + concatenated_string_target, descendant, direct_binary_operator, element_reference_shape, + function_kind, identifier_kind_name, named_children, node_text, question_colon_ternary_parts, + raw_named_children, ruby_exception_constant_text, statement_block_wrapper, TernaryParts, + ARRAY_LITERAL_NODE_KINDS, ARRAY_LITERAL_WRAPPER_KINDS, BOOLEAN_EXPRESSION_KINDS, + CASE_ARGUMENT_WHEN_KINDS, CASE_ELSE_KINDS, CASE_NODE_KINDS, COMPARISON_EXPRESSION_KINDS, + CONCATENATED_STRING_WRAPPER_KINDS, DOTTED_EXPRESSION_WRAPPER_KINDS, + ELEMENT_REFERENCE_NODE_KINDS, ELEMENT_REFERENCE_WRAPPER_KINDS, EMPTY_BODY_WRAPPER_KINDS, + ENSURE_BODY_WRAPPER_KINDS, HASH_LITERAL_NODE_KINDS, HASH_LITERAL_WRAPPER_KINDS, + HEREDOC_BODY_WRAPPER_KINDS, IF_NODE_KINDS, INTERPOLATED_STATEMENT_WRAPPER_KINDS, + LEADING_CASE_WRAPPER_KINDS, LEADING_FUNCTION_WRAPPER_KINDS, LEADING_IF_WRAPPER_KINDS, + LEADING_LOOP_WRAPPER_KINDS, LEADING_OWNER_WRAPPER_KINDS, LOOP_NODE_KINDS, OWNER_NODE_KINDS, + OWNER_STATEMENT_NESTED_KINDS, QUESTION_COLON_TERNARY_KINDS, RESCUE_BODY_WRAPPER_KINDS, +}; +use tree_sitter::Node as TreeSitterNode; + +pub(crate) const COMMON_ASSIGNMENT_OPERATORS: &[&str] = &["=", "+=", "-=", "*=", "/=", "%="]; +pub(crate) const RUBY_ASSIGNMENT_OPERATORS: &[&str] = &[ + "=", "+=", "-=", "*=", "/=", "%=", "**=", "&&=", "||=", "&=", "|=", "^=", "<<=", ">>=", +]; +pub(crate) const PYTHON_ASSIGNMENT_OPERATORS: &[&str] = &[ + "=", "+=", "-=", "*=", "/=", "%=", "//=", "**=", "@=", "&=", "|=", "^=", "<<=", ">>=", ":=", +]; +pub(crate) const LUA_ASSIGNMENT_OPERATORS: &[&str] = &["="]; +pub(crate) const TYPESCRIPT_ASSIGNMENT_OPERATORS: &[&str] = &[ + "=", "+=", "-=", "*=", "/=", "%=", "**=", "<<=", ">>=", ">>>=", "&=", "|=", "^=", "&&=", "||=", + "??=", +]; + +pub(crate) enum NamedChildrenAction<'tree> { + Default, + Drop, + Recurse(TreeSitterNode<'tree>), + Replace(Vec>), +} + +pub(crate) trait AstNormalizationAdapter: Sync { + fn ruby(&self) -> bool { + false + } + + fn yield_statement(&self, _node: TreeSitterNode<'_>, _source: &str) -> bool { + false + } + + fn super_statement(&self, _node: TreeSitterNode<'_>, _source: &str) -> bool { + false + } + + fn safe_navigation_call(&self, _node: TreeSitterNode<'_>, _source: &str) -> bool { + false + } + + fn ternary_statement(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + self.ternary_parts(node, source).is_some() + } + + fn ternary_parts<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + question_colon_ternary_parts(node, source, QUESTION_COLON_TERNARY_KINDS) + } + + fn case_argument_list(&self, _node: TreeSitterNode<'_>, _source: &str) -> bool { + false + } + + fn case_arm(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + CASE_ARGUMENT_WHEN_KINDS.contains(&node.kind()) && !self.case_else_arm(node, source) + } + + fn case_else_node<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + let mut stack = named_children(node); + while !stack.is_empty() { + let child = stack.remove(0); + if self.case_else_node_kind(child, source) { + return Some(child); + } + if CASE_ARGUMENT_WHEN_KINDS.contains(&child.kind()) { + continue; + } + if !function_kind(child.kind()) { + stack.extend(named_children(child)); + } + } + None + } + + fn case_else_node_kind(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + CASE_ELSE_KINDS.contains(&node.kind()) || self.case_else_arm(node, source) + } + + fn case_else_arm(&self, _node: TreeSitterNode<'_>, _source: &str) -> bool { + false + } + + fn leading_function_statement(&self, _node: TreeSitterNode<'_>, _source: &str) -> bool { + false + } + + fn leading_function_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if !LEADING_FUNCTION_WRAPPER_KINDS.contains(&node.kind()) { + return None; + } + if node + .children(&mut node.walk()) + .next() + .map(|child| child.kind() == "def") + .unwrap_or(false) + { + return Some(node); + } + let raw_named = named_children(node); + if raw_named.len() == 1 + && matches!(raw_named[0].kind(), "method" | "singleton_method") + && node_text(raw_named[0], source) == node_text(node, source) + { + return Some(raw_named[0]); + } + None + } + + fn leading_function_body_kind(&self) -> &'static str { + "body_statement" + } + + fn leading_owner_statement(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + let Some(target) = self.leading_owner_target(node, source) else { + return false; + }; + target + .children(&mut target.walk()) + .next() + .map(|child| matches!(child.kind(), "class" | "module")) + .unwrap_or(false) + && named_children(target).len() >= 2 + && named_children(target) + .first() + .map(|child| !OWNER_STATEMENT_NESTED_KINDS.contains(&child.kind())) + .unwrap_or(false) + } + + fn leading_owner_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if !LEADING_OWNER_WRAPPER_KINDS.contains(&node.kind()) { + return None; + } + let raw_named = named_children(node); + if raw_named.len() == 1 + && OWNER_NODE_KINDS.contains(&raw_named[0].kind()) + && node_text(raw_named[0], source) == node_text(node, source) + { + return Some(raw_named[0]); + } + Some(node) + } + + fn leading_if_statement(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + let Some(target) = self.leading_if_target(node, source) else { + return false; + }; + target + .children(&mut target.walk()) + .next() + .map(|child| matches!(child.kind(), "if" | "unless")) + .unwrap_or(false) + && named_children(target).len() >= 2 + && named_children(target) + .first() + .map(|child| !IF_NODE_KINDS.contains(&child.kind())) + .unwrap_or(false) + } + + fn leading_if_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if !LEADING_IF_WRAPPER_KINDS.contains(&node.kind()) { + return None; + } + let raw_named = named_children(node); + if raw_named.len() == 1 + && IF_NODE_KINDS.contains(&raw_named[0].kind()) + && node_text(raw_named[0], source) == node_text(node, source) + { + return Some(raw_named[0]); + } + Some(node) + } + + fn leading_case_statement(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + let Some(target) = self.leading_case_target(node, source) else { + return false; + }; + target + .children(&mut target.walk()) + .next() + .map(|child| matches!(child.kind(), "case" | "match" | "switch")) + .unwrap_or(false) + && case_arm_descendant(target) + } + + fn leading_case_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if !LEADING_CASE_WRAPPER_KINDS.contains(&node.kind()) { + return None; + } + let raw_named = named_children(node); + if raw_named.len() == 1 + && CASE_NODE_KINDS.contains(&raw_named[0].kind()) + && node_text(raw_named[0], source) == node_text(node, source) + { + return Some(raw_named[0]); + } + Some(node) + } + + fn leading_loop_statement(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + let Some(target) = self.leading_loop_target(node, source) else { + return false; + }; + target + .children(&mut target.walk()) + .next() + .map(|child| !child.is_named() && matches!(child.kind(), "while" | "until")) + .unwrap_or(false) + && named_children(target).len() >= 2 + } + + fn leading_loop_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if !LEADING_LOOP_WRAPPER_KINDS.contains(&node.kind()) { + return None; + } + let raw_named = named_children(node); + if raw_named.len() == 1 + && LOOP_NODE_KINDS.contains(&raw_named[0].kind()) + && node_text(raw_named[0], source) == node_text(node, source) + { + return Some(raw_named[0]); + } + Some(node) + } + + fn rescue_body_statement(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + !self.rescue_clauses(node, source).is_empty() + } + + fn rescue_body_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + _source: &str, + ) -> Option> { + if RESCUE_BODY_WRAPPER_KINDS.contains(&node.kind()) { + Some(node) + } else { + None + } + } + + fn rescue_body_nodes<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Vec> { + let Some(target) = self.rescue_body_target(node, source) else { + return Vec::new(); + }; + let named = named_children(target); + let Some(index) = named.iter().position(|child| self.rescue_clause(*child)) else { + return Vec::new(); + }; + named[..index].to_vec() + } + + fn rescue_clauses<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Vec> { + let Some(target) = self.rescue_body_target(node, source) else { + return Vec::new(); + }; + named_children(target) + .into_iter() + .filter(|child| self.rescue_clause(*child)) + .collect() + } + + fn rescue_clause_exceptions<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Vec> { + let Some(exceptions) = named_children(node) + .into_iter() + .find(|child| child.kind() == "exceptions") + else { + return Vec::new(); + }; + let text = node_text(exceptions, source).trim(); + if ruby_exception_constant_text(text) + || (named_children(exceptions).is_empty() && !text.is_empty()) + { + return vec![exceptions]; + } + named_children(exceptions) + } + + fn rescue_clause_exceptions_source<'tree>( + &self, + node: TreeSitterNode<'tree>, + _source: &str, + ) -> Option> { + named_children(node) + .into_iter() + .find(|child| child.kind() == "exceptions") + } + + fn rescue_clause_exception_variable_name<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + named_children(node) + .into_iter() + .find(|child| child.kind() == "exception_variable") + .and_then(|variable| { + named_children(variable) + .into_iter() + .find(|child| identifier_kind_name(child.kind())) + }) + } + + fn rescue_clause_exception_variable_source<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + named_children(node) + .into_iter() + .find(|child| child.kind() == "exception_variable") + } + + fn rescue_clause_handler<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + named_children(node).into_iter().rev().find(|child| { + !matches!( + child.kind(), + "exceptions" | "exception_variable" | "comment" + ) + }) + } + + fn rescue_clause(&self, node: TreeSitterNode<'_>) -> bool { + node.kind() == "rescue" + } + + fn ensure_body_statement(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + self.ensure_clause(node, source).is_some() + } + + fn ensure_body_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + _source: &str, + ) -> Option> { + if ENSURE_BODY_WRAPPER_KINDS.contains(&node.kind()) { + Some(node) + } else { + None + } + } + + fn ensure_body_nodes<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Vec> { + let Some(target) = self.ensure_body_target(node, source) else { + return Vec::new(); + }; + let named = named_children(target); + let Some(index) = named + .iter() + .position(|child| self.ensure_clause_kind(*child)) + else { + return Vec::new(); + }; + named[..index].to_vec() + } + + fn ensure_clause<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + let target = self.ensure_body_target(node, source)?; + named_children(target) + .into_iter() + .find(|child| self.ensure_clause_kind(*child)) + } + + fn ensure_clause_body<'tree>( + &self, + _node: TreeSitterNode<'tree>, + ) -> Option> { + None + } + + fn ensure_clause_kind(&self, node: TreeSitterNode<'_>) -> bool { + node.kind() == "ensure" + } + + fn array_literal_statement(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + self.array_literal_target(node, source).is_some() + } + + fn array_literal_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if ARRAY_LITERAL_NODE_KINDS.contains(&node.kind()) { + return Some(node); + } + if !ARRAY_LITERAL_WRAPPER_KINDS.contains(&node.kind()) { + return None; + } + if bracketed(node, source, "[", "]") { + return Some(node); + } + + let named = named_children(node); + let child = *named.first()?; + if named.len() == 1 { + if ARRAY_LITERAL_NODE_KINDS.contains(&child.kind()) { + return Some(child); + } + + if matches!(child.kind(), "expression_statement" | "statement") + && node_text(child, source).trim() == node_text(node, source).trim() + { + return self.array_literal_target(child, source); + } + + let stripped = node_text(node, source).trim(); + if stripped == node_text(child, source) + || stripped == format!("{};", node_text(child, source)) + { + if ARRAY_LITERAL_NODE_KINDS.contains(&child.kind()) { + return Some(child); + } + } + } + + None + } + + fn array_literal_values<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Vec> { + let target = self.array_literal_target(node, source).unwrap_or(node); + named_children(target) + } + + fn element_reference_statement(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + self.element_reference_target(node, source).is_some() + } + + fn element_reference_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if ELEMENT_REFERENCE_NODE_KINDS.contains(&node.kind()) { + return Some(node); + } + if !ELEMENT_REFERENCE_WRAPPER_KINDS.contains(&node.kind()) { + return None; + } + + let named = named_children(node); + if named.len() == 1 + && ELEMENT_REFERENCE_WRAPPER_KINDS.contains(&named[0].kind()) + && node_text(named[0], source).trim() == node_text(node, source).trim() + { + return self.element_reference_target(named[0], source); + } + if named.len() == 1 && ELEMENT_REFERENCE_NODE_KINDS.contains(&named[0].kind()) { + let stripped = node_text(node, source).trim(); + let child_text = node_text(named[0], source); + if stripped == child_text || stripped == format!("{child_text};") { + return Some(named[0]); + } + } + + if element_reference_shape(node, source) { + Some(node) + } else { + None + } + } + + fn element_reference_receiver<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + let target = self.element_reference_target(node, source).unwrap_or(node); + named_children(target).first().copied() + } + + fn element_reference_arguments<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Vec> { + let target = self.element_reference_target(node, source).unwrap_or(node); + named_children(target).into_iter().skip(1).collect() + } + + fn hash_literal_statement(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + self.hash_literal_target(node, source).is_some() + } + + fn hash_literal_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if HASH_LITERAL_NODE_KINDS.contains(&node.kind()) { + return Some(node); + } + if !HASH_LITERAL_WRAPPER_KINDS.contains(&node.kind()) { + return None; + } + if statement_block_wrapper(node) { + return None; + } + if bracketed(node, source, "{", "}") { + return Some(node); + } + + let named = named_children(node); + if named.len() != 1 { + return None; + } + + let child = named[0]; + if node.kind() == "parenthesized_expression" { + return self.hash_literal_target(child, source); + } + + let stripped = node_text(node, source).trim(); + let child_text = node_text(child, source); + if stripped == child_text || stripped == format!("{child_text};") { + if HASH_LITERAL_NODE_KINDS.contains(&child.kind()) { + return Some(child); + } + if HASH_LITERAL_WRAPPER_KINDS.contains(&child.kind()) { + return self.hash_literal_target(child, source); + } + } + + None + } + + fn hash_literal_values<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Vec> { + let target = self.hash_literal_target(node, source).unwrap_or(node); + named_children(target) + } + + fn empty_body_statement(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + EMPTY_BODY_WRAPPER_KINDS.contains(&node.kind()) + && named_children(node).is_empty() + && node_text(node, source).trim().is_empty() + } + + fn heredoc_body_statement(&self, node: TreeSitterNode<'_>) -> bool { + HEREDOC_BODY_WRAPPER_KINDS.contains(&node.kind()) + && named_children(node) + .iter() + .any(|child| child.kind() == "heredoc_body") + } + + fn heredoc_call_for_body(&self, _node: TreeSitterNode<'_>, _source: &str) -> bool { + false + } + + fn interpolated_statement( + &self, + node: TreeSitterNode<'_>, + children: &[TreeSitterNode<'_>], + ) -> bool { + INTERPOLATED_STATEMENT_WRAPPER_KINDS.contains(&node.kind()) + && children.iter().any(|child| child.kind() == "interpolation") + } + + fn concatenated_string_statement( + &self, + node: TreeSitterNode<'_>, + children: &[TreeSitterNode<'_>], + ) -> bool { + if concatenated_string_node(node).is_some() { + return true; + } + if !self + .concatenated_string_wrapper_kinds() + .contains(&node.kind()) + { + return false; + } + if children.len() > 1 && children.iter().all(|child| child.kind() == "string") { + return true; + } + children.len() == 1 && concatenated_string_target(children[0]).is_some() + } + + fn zero_child_identifier_call(&self, _node: TreeSitterNode<'_>, _source: &str) -> bool { + false + } + + fn operator_call_expression_kind(&self, node: TreeSitterNode<'_>) -> bool { + matches!(node.kind(), "binary" | "binary_expression") + } + + fn boolean_expression_kind(&self, node: TreeSitterNode<'_>) -> bool { + BOOLEAN_EXPRESSION_KINDS.contains(&node.kind()) + } + + fn comparison_expression_kind(&self, node: TreeSitterNode<'_>) -> bool { + COMPARISON_EXPRESSION_KINDS.contains(&node.kind()) + } + + fn dotted_expression_wrapper(&self, node: TreeSitterNode<'_>) -> bool { + self.dotted_expression_wrapper_kinds() + .contains(&node.kind()) + } + + fn unary_not_expression(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + matches!(node.kind(), "unary" | "unary_expression") + && node_text(node, source).trim_start().starts_with('!') + } + + fn unary_minus_expression(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + matches!(node.kind(), "unary" | "unary_expression") + && node_text(node, source).trim_start().starts_with('-') + } + + fn binary_operator(&self, node: TreeSitterNode<'_>, source: &str) -> Option { + if let Some(operator) = direct_binary_operator(node, source) { + return Some(operator.to_string()); + } + + let raw_named = raw_named_children(node); + if raw_named.len() == 1 + && self.binary_wrapper_kinds().contains(&raw_named[0].kind()) + && node_text(node, source) == node_text(raw_named[0], source) + { + return self.binary_operator(raw_named[0], source); + } + + None + } + + fn class_node(&self, node: TreeSitterNode<'_>) -> bool { + matches!( + node.kind(), + "class" | "class_definition" | "class_declaration" | "class_specifier" + ) + } + + fn identifier_text_node(&self, _node: TreeSitterNode<'_>, _source: &str) -> bool { + false + } + + fn member_assignment_target(&self, _node: TreeSitterNode<'_>, _source: &str) -> bool { + false + } + + fn instance_variable(&self, node: TreeSitterNode<'_>, _source: &str) -> bool { + node.kind() == "instance_variable" + } + + fn global_variable(&self, node: TreeSitterNode<'_>, _source: &str) -> bool { + node.kind() == "global_variable" + } + + fn literal_fragment_assignment_context(&self, node: TreeSitterNode<'_>, _source: &str) -> bool { + let Some(parent) = node.parent() else { + return false; + }; + if matches!( + parent.kind(), + "string" | "delimited_symbol" | "regex" | "regex_literal" + ) { + return true; + } + + matches!( + node.kind(), + "string_content" | "escape_sequence" | "interpolation" | "string_fragment" + ) && parent + .parent() + .map(|grandparent| { + matches!( + grandparent.kind(), + "string" | "delimited_symbol" | "regex" | "regex_literal" + ) + }) + .unwrap_or(false) + } + + fn assignment_operator(&self, text: &str) -> bool { + self.assignment_operators().contains(&text) + } + + fn unwrap_node( + &self, + node: TreeSitterNode<'_>, + _source: &str, + named_child_count: usize, + ) -> bool { + matches!( + node.kind(), + "parenthesized_expression" + | "parenthesized_statements" + | "expression_statement" + | "statement" + | "case_pattern" + | "match_pattern" + | "pattern" + ) && named_child_count == 1 + } + + fn interpolated_string( + &self, + node: TreeSitterNode<'_>, + children: &[TreeSitterNode<'_>], + ) -> bool { + node.kind() == "string" && children.iter().any(|child| child.kind() == "interpolation") + } + + fn lambda_expression(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + self.lambda_target(node, source).is_some() + } + + fn lambda_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + _source: &str, + ) -> Option> { + if node.kind() == "lambda" { + Some(node) + } else { + None + } + } + + fn interpolation_node(&self, node: TreeSitterNode<'_>) -> bool { + node.kind() == "interpolation" + } + + fn explicit_alternative<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + named_children(node) + .into_iter() + .find(|child| matches!(child.kind(), "else" | "else_clause" | "else_statement")) + } + + fn named_field<'tree>( + &self, + node: TreeSitterNode<'tree>, + name: &str, + ) -> Option> { + node.child_by_field_name(name) + } + + fn named_children_action<'tree>( + &self, + _node: TreeSitterNode<'tree>, + _source: &str, + _children: &[TreeSitterNode<'tree>], + ) -> NamedChildrenAction<'tree> { + NamedChildrenAction::Default + } + + fn nested_class_body_child<'tree>( + &self, + _node: TreeSitterNode<'tree>, + _source: &str, + ) -> Option> { + None + } + + fn else_if_block<'tree>( + &self, + _node: TreeSitterNode<'tree>, + _source: &str, + ) -> Option> { + None + } + + fn logical_operator_assignment(&self, _operator: &str) -> bool { + false + } + + fn statement_wrapped_call_target<'tree>( + &self, + _node: TreeSitterNode<'tree>, + _source: &str, + ) -> Option> { + None + } + + fn inline_def_function_text_source<'tree>( + &self, + function: TreeSitterNode<'tree>, + _source: &str, + ) -> TreeSitterNode<'tree> { + function + } + + fn bare_const_call_function(&self, _function: TreeSitterNode<'_>) -> bool { + false + } + + fn normalize_default_parameters(&self) -> bool { + false + } + + fn normalize_block_parameters(&self) -> bool { + false + } + + fn boolean_statement_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + _source: &str, + _children: &[TreeSitterNode<'tree>], + ) -> TreeSitterNode<'tree> { + node + } + + fn single_assignment_block_child(&self, _node: TreeSitterNode<'_>, _source: &str) -> bool { + false + } + + fn single_assignment_statement(&self, _node: TreeSitterNode<'_>, _source: &str) -> bool { + false + } + + fn member_read_excluded(&self, _node: TreeSitterNode<'_>) -> bool { + false + } + + fn no_paren_string_argument_content<'tree>( + &self, + _node: TreeSitterNode<'tree>, + _source: &str, + ) -> Option> { + None + } + + fn elides_tail_returns(&self) -> bool { + false + } + + fn elides_implicit_nil_body(&self) -> bool { + false + } + + fn assignment_operators(&self) -> &'static [&'static str] { + COMMON_ASSIGNMENT_OPERATORS + } + + fn binary_wrapper_kinds(&self) -> &'static [&'static str] { + super::super::BINARY_WRAPPER_KINDS + } + + fn concatenated_string_wrapper_kinds(&self) -> &'static [&'static str] { + CONCATENATED_STRING_WRAPPER_KINDS + } + + fn dotted_expression_wrapper_kinds(&self) -> &'static [&'static str] { + DOTTED_EXPRESSION_WRAPPER_KINDS + } + + fn leading_function_statement_with_keyword( + &self, + node: TreeSitterNode<'_>, + source: &str, + keyword: &str, + wrapper_kinds: &[&str], + ) -> bool { + if !wrapper_kinds.contains(&node.kind()) { + return false; + } + let Some(target) = self.leading_function_target(node, source) else { + return false; + }; + target + .children(&mut target.walk()) + .next() + .map(|child| child.kind() == keyword) + .unwrap_or(false) + && named_children(target) + .iter() + .any(|child| identifier_kind_name(child.kind())) + } + + fn exact_single_named_child<'tree>( + &self, + node: TreeSitterNode<'tree>, + kinds: &[&str], + source: &str, + ) -> Option> { + let children = named_children(node); + if children.len() != 1 { + return None; + } + let child = children[0]; + if !kinds.contains(&child.kind()) || node_text(node, source) != node_text(child, source) { + return None; + } + Some(child) + } + + fn default_case_pattern(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + named_children(node) + .into_iter() + .find(|child| super::super::CASE_DEFAULT_PATTERN_KINDS.contains(&child.kind())) + .map(|pattern| node_text(pattern, source).trim() == "_") + .unwrap_or(false) + } + + fn bare_identifier_text(&self, text: &str) -> bool { + bare_identifier_text(text) + } + + fn descendant<'tree>( + &self, + node: TreeSitterNode<'tree>, + kinds: &[&str], + ) -> Option> { + descendant(node, kinds) + } +} diff --git a/gems/decomplex/rust/src/decomplex/ast/adapters/c.rs b/gems/decomplex/rust/src/decomplex/ast/adapters/c.rs new file mode 100644 index 000000000..badc37c41 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/ast/adapters/c.rs @@ -0,0 +1,5 @@ +use super::base::AstNormalizationAdapter; + +pub(crate) struct CAstAdapter; + +impl AstNormalizationAdapter for CAstAdapter {} diff --git a/gems/decomplex/rust/src/decomplex/ast/adapters/cpp.rs b/gems/decomplex/rust/src/decomplex/ast/adapters/cpp.rs new file mode 100644 index 000000000..daa9143ff --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/ast/adapters/cpp.rs @@ -0,0 +1,5 @@ +use super::base::AstNormalizationAdapter; + +pub(crate) struct CppAstAdapter; + +impl AstNormalizationAdapter for CppAstAdapter {} diff --git a/gems/decomplex/rust/src/decomplex/ast/adapters/csharp.rs b/gems/decomplex/rust/src/decomplex/ast/adapters/csharp.rs new file mode 100644 index 000000000..256fa4d24 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/ast/adapters/csharp.rs @@ -0,0 +1,5 @@ +use super::base::AstNormalizationAdapter; + +pub(crate) struct CSharpAstAdapter; + +impl AstNormalizationAdapter for CSharpAstAdapter {} diff --git a/gems/decomplex/rust/src/decomplex/ast/adapters/go.rs b/gems/decomplex/rust/src/decomplex/ast/adapters/go.rs new file mode 100644 index 000000000..79abeea2f --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/ast/adapters/go.rs @@ -0,0 +1,5 @@ +use super::base::AstNormalizationAdapter; + +pub(crate) struct GoAstAdapter; + +impl AstNormalizationAdapter for GoAstAdapter {} diff --git a/gems/decomplex/rust/src/decomplex/ast/adapters/java.rs b/gems/decomplex/rust/src/decomplex/ast/adapters/java.rs new file mode 100644 index 000000000..4acf3e41c --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/ast/adapters/java.rs @@ -0,0 +1,5 @@ +use super::base::AstNormalizationAdapter; + +pub(crate) struct JavaAstAdapter; + +impl AstNormalizationAdapter for JavaAstAdapter {} diff --git a/gems/decomplex/rust/src/decomplex/ast/adapters/javascript.rs b/gems/decomplex/rust/src/decomplex/ast/adapters/javascript.rs new file mode 100644 index 000000000..a545fc064 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/ast/adapters/javascript.rs @@ -0,0 +1 @@ +pub(crate) use super::typescript::TypeScriptAstAdapter as JavaScriptAstAdapter; diff --git a/gems/decomplex/rust/src/decomplex/ast/adapters/kotlin.rs b/gems/decomplex/rust/src/decomplex/ast/adapters/kotlin.rs new file mode 100644 index 000000000..25fc5396e --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/ast/adapters/kotlin.rs @@ -0,0 +1,5 @@ +use super::base::AstNormalizationAdapter; + +pub(crate) struct KotlinAstAdapter; + +impl AstNormalizationAdapter for KotlinAstAdapter {} diff --git a/gems/decomplex/rust/src/decomplex/ast/adapters/lua.rs b/gems/decomplex/rust/src/decomplex/ast/adapters/lua.rs new file mode 100644 index 000000000..44c5cfe9c --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/ast/adapters/lua.rs @@ -0,0 +1,497 @@ +use super::super::{ + bracketed, direct_binary_operator, lua_keyed_table_target, lua_positional_table_target, + named_children, node_text, raw_named_children, LUA_LEADING_FUNCTION_WRAPPER_KINDS, + LUA_LEADING_IF_WRAPPER_KINDS, +}; +use super::base::{AstNormalizationAdapter, NamedChildrenAction, LUA_ASSIGNMENT_OPERATORS}; +use tree_sitter::Node as TreeSitterNode; + +pub(crate) struct LuaAstAdapter; + +impl AstNormalizationAdapter for LuaAstAdapter { + fn explicit_alternative<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + named_children(node) + .into_iter() + .find(|child| matches!(child.kind(), "elseif_statement" | "else" | "else_statement")) + } + + fn ternary_parts<'tree>( + &self, + _node: TreeSitterNode<'tree>, + _source: &str, + ) -> Option> { + None + } + + fn named_children_action<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + children: &[TreeSitterNode<'tree>], + ) -> NamedChildrenAction<'tree> { + if node.kind() == "variable_list" && children.len() == 1 { + if children[0].kind() == "identifier" && lua_single_assignment_block_child(node, source) + { + return NamedChildrenAction::Drop; + } + if node + .parent() + .map(|parent| parent.kind() == "for_generic_clause") + .unwrap_or(false) + { + return NamedChildrenAction::Drop; + } + if node + .parent() + .map(|parent| { + parent.kind() == "variable_declaration" && raw_named_children(parent).len() == 1 + }) + .unwrap_or(false) + { + return NamedChildrenAction::Drop; + } + if children[0].kind() == "dot_index_expression" + && node_text(node, source) == node_text(children[0], source) + { + return NamedChildrenAction::Recurse(children[0]); + } + } + + if node.kind() == "expression_list" && children.len() == 1 { + if children[0].kind() == "identifier" + && node + .parent() + .map(|parent| { + matches!(parent.kind(), "assignment_statement" | "return_statement") + }) + .unwrap_or(false) + && node_text(node, source) == node_text(children[0], source) + { + return NamedChildrenAction::Drop; + } + if matches!( + children[0].kind(), + "true" | "false" | "nil" | "number" | "integer" | "float" + ) && node + .parent() + .map(|parent| matches!(parent.kind(), "assignment_statement" | "return_statement")) + .unwrap_or(false) + && node_text(node, source) == node_text(children[0], source) + { + return NamedChildrenAction::Drop; + } + if matches!( + children[0].kind(), + "binary_expression" + | "function_call" + | "dot_index_expression" + | "function_definition" + | "string" + | "table_constructor" + ) && node_text(node, source) == node_text(children[0], source) + { + return NamedChildrenAction::Recurse(children[0]); + } + } + + if node.kind() == "field" && children.len() == 1 { + if children[0].kind() == "identifier" + && node_text(node, source) == node_text(children[0], source) + { + return NamedChildrenAction::Drop; + } + if matches!(children[0].kind(), "string" | "function_call") + && node_text(node, source) == node_text(children[0], source) + { + return NamedChildrenAction::Recurse(children[0]); + } + } + + if node.kind() == "block" + && children.len() == 1 + && matches!( + children[0].kind(), + "function_call" | "return_statement" | "variable_declaration" + ) + && node_text(node, source) == node_text(children[0], source) + { + return NamedChildrenAction::Recurse(children[0]); + } + + NamedChildrenAction::Default + } + + fn unary_minus_expression(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + (matches!(node.kind(), "unary" | "unary_expression") + && node_text(node, source).trim_start().starts_with('-')) + || (node.kind() == "expression_list" + && node + .children(&mut node.walk()) + .next() + .map(|child| node_text(child, source) == "-") + .unwrap_or(false) + && named_children(node).len() == 1) + } + + fn binary_operator(&self, node: TreeSitterNode<'_>, source: &str) -> Option { + if let Some(operator) = direct_binary_operator(node, source) { + return Some(operator.to_string()); + } + + let child = self.exact_single_named_child(node, self.binary_wrapper_kinds(), source)?; + self.binary_operator(child, source) + } + + fn unwrap_node( + &self, + node: TreeSitterNode<'_>, + source: &str, + named_child_count: usize, + ) -> bool { + if matches!( + node.kind(), + "parenthesized_expression" + | "parenthesized_statements" + | "expression_statement" + | "statement" + | "case_pattern" + | "match_pattern" + | "pattern" + ) && named_child_count == 1 + { + return true; + } + + if node.kind() != "expression_list" || named_child_count != 1 { + return false; + } + + let raw_named = raw_named_children(node); + if raw_named.len() == 1 + && raw_named[0].kind() == "parenthesized_expression" + && node_text(raw_named[0], source) == node_text(node, source) + { + return true; + } + + let raw_children = node.children(&mut node.walk()).collect::>(); + raw_children + .first() + .map(|child| node_text(*child, source) == "(") + .unwrap_or(false) + && raw_children + .last() + .map(|child| node_text(*child, source) == ")") + .unwrap_or(false) + } + + fn leading_function_statement(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + self.leading_function_statement_with_keyword( + node, + source, + "function", + LUA_LEADING_FUNCTION_WRAPPER_KINDS, + ) + } + + fn leading_function_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if !LUA_LEADING_FUNCTION_WRAPPER_KINDS.contains(&node.kind()) { + return None; + } + if node + .children(&mut node.walk()) + .next() + .map(|child| child.kind() == "function") + .unwrap_or(false) + { + return Some(node); + } + self.exact_single_named_child(node, &["function_declaration"], source) + } + + fn leading_function_body_kind(&self) -> &'static str { + "block" + } + + fn leading_if_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if LUA_LEADING_IF_WRAPPER_KINDS.contains(&node.kind()) { + if let Some(child) = self.exact_single_named_child(node, &["if_statement"], source) { + return Some(child); + } + } + if super::super::LEADING_IF_WRAPPER_KINDS.contains(&node.kind()) { + return Some(node); + } + None + } + + fn array_literal_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if let Some(target) = lua_positional_table_target(node, source) { + return Some(target); + } + + if super::super::ARRAY_LITERAL_NODE_KINDS.contains(&node.kind()) { + return Some(node); + } + if !super::super::ARRAY_LITERAL_WRAPPER_KINDS.contains(&node.kind()) { + return None; + } + if bracketed(node, source, "[", "]") { + return Some(node); + } + let named = named_children(node); + let child = *named.first()?; + if named.len() == 1 { + if super::super::ARRAY_LITERAL_NODE_KINDS.contains(&child.kind()) { + return Some(child); + } + if matches!(child.kind(), "expression_statement" | "statement") + && node_text(child, source).trim() == node_text(node, source).trim() + { + return self.array_literal_target(child, source); + } + } + None + } + + fn array_literal_values<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Vec> { + let target = self.array_literal_target(node, source).unwrap_or(node); + if target.kind() == "arguments" { + if let Some(table) = named_children(target) + .into_iter() + .find(|child| child.kind() == "table_constructor") + { + if node_text(target, source).trim() == node_text(table, source).trim() { + return named_children(table); + } + } + } + if target.kind() == "table_constructor" { + return named_children(target); + } + + named_children(target) + } + + fn hash_literal_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if let Some(target) = lua_keyed_table_target(node, source) { + return Some(target); + } + + if super::super::HASH_LITERAL_NODE_KINDS.contains(&node.kind()) { + return Some(node); + } + if !super::super::HASH_LITERAL_WRAPPER_KINDS.contains(&node.kind()) { + return None; + } + None + } + + fn hash_literal_values<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Vec> { + let target = self.hash_literal_target(node, source).unwrap_or(node); + if target.kind() == "arguments" { + if let Some(table) = named_children(target) + .into_iter() + .find(|child| child.kind() == "table_constructor") + { + return named_children(table); + } + return named_children(target); + } + if target.kind() == "table_constructor" { + return named_children(target); + } + + named_children(target) + } + + fn identifier_text_node(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + matches!(node.kind(), "variable_list" | "expression_list") + && self.bare_identifier_text(node_text(node, source)) + } + + fn member_assignment_target(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + if node.kind() != "variable_list" { + return false; + } + + let raw_named = raw_named_children(node); + let target = if raw_named.len() == 1 + && raw_named[0].kind() == "dot_index_expression" + && node_text(node, source) == node_text(raw_named[0], source) + { + raw_named[0] + } else { + node + }; + + raw_named_children(target).len() == 2 + && target + .children(&mut target.walk()) + .any(|child| !child.is_named() && node_text(child, source) == ".") + } + + fn literal_fragment_assignment_context(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + let Some(parent) = node.parent() else { + return false; + }; + if matches!( + parent.kind(), + "string" | "delimited_symbol" | "regex" | "regex_literal" + ) { + return true; + } + matches!( + node.kind(), + "string_content" | "escape_sequence" | "interpolation" | "string_fragment" + ) && (parent.kind() == "expression_list" + || parent + .parent() + .map(|grandparent| { + matches!( + grandparent.kind(), + "string" | "delimited_symbol" | "regex" | "regex_literal" + ) + }) + .unwrap_or(false)) + && !source.is_empty() + } + + fn lambda_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if node.kind() == "function_definition" { + return Some(node); + } + + if node.kind() == "expression_list" { + if node + .children(&mut node.walk()) + .next() + .map(|child| child.kind() == "function") + .unwrap_or(false) + && named_children(node) + .iter() + .any(|child| child.kind() == "block") + { + return Some(node); + } + + let named = named_children(node); + if named.len() == 1 + && named[0].kind() == "function_definition" + && node_text(named[0], source) == node_text(node, source) + { + return Some(named[0]); + } + } + + if node.kind() == "lambda" { + Some(node) + } else { + None + } + } + + fn operator_call_expression_kind(&self, node: TreeSitterNode<'_>) -> bool { + matches!( + node.kind(), + "binary" | "binary_expression" | "expression_list" + ) + } + + fn boolean_expression_kind(&self, node: TreeSitterNode<'_>) -> bool { + super::super::BOOLEAN_EXPRESSION_KINDS.contains(&node.kind()) + || node.kind() == "expression_list" + } + + fn comparison_expression_kind(&self, node: TreeSitterNode<'_>) -> bool { + super::super::COMPARISON_EXPRESSION_KINDS.contains(&node.kind()) + || node.kind() == "expression_list" + } + + fn single_assignment_block_child(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + lua_single_assignment_block_child(node, source) + } + + fn single_assignment_statement(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + if node.kind() != "assignment_statement" { + return false; + } + let Some(parent) = node.parent() else { + return false; + }; + parent.kind() == "block" + && node_text(parent, source) == node_text(node, source) + && raw_named_children(parent).len() == 1 + } + + fn member_read_excluded(&self, node: TreeSitterNode<'_>) -> bool { + node.kind() == "field" + } + + fn no_paren_string_argument_content<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if node.kind() != "string" { + return None; + } + let parent = node.parent()?; + if parent.kind() != "arguments" || node_text(parent, source) != node_text(node, source) { + return None; + } + raw_named_children(node) + .into_iter() + .find(|child| child.kind() == "string_content") + } + + fn assignment_operators(&self) -> &'static [&'static str] { + LUA_ASSIGNMENT_OPERATORS + } +} + +fn lua_single_assignment_block_child(node: TreeSitterNode<'_>, source: &str) -> bool { + let Some(parent) = node.parent() else { + return false; + }; + if parent.kind() != "assignment_statement" { + return false; + } + let Some(grandparent) = parent.parent() else { + return false; + }; + grandparent.kind() == "block" + && node_text(grandparent, source) == node_text(parent, source) + && raw_named_children(grandparent).len() == 1 +} diff --git a/gems/decomplex/rust/src/decomplex/ast/adapters/mod.rs b/gems/decomplex/rust/src/decomplex/ast/adapters/mod.rs new file mode 100644 index 000000000..8e4a05b6d --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/ast/adapters/mod.rs @@ -0,0 +1,67 @@ +pub(crate) mod base; +mod c; +mod cpp; +mod csharp; +mod go; +mod java; +mod javascript; +mod kotlin; +mod lua; +mod python; +mod ruby; +mod rust; +mod swift; +mod typescript; +mod zig; + +pub(crate) use base::{AstNormalizationAdapter, NamedChildrenAction}; + +use super::super::syntax::Language; +use c::CAstAdapter; +use cpp::CppAstAdapter; +use csharp::CSharpAstAdapter; +use go::GoAstAdapter; +use java::JavaAstAdapter; +use javascript::JavaScriptAstAdapter; +use kotlin::KotlinAstAdapter; +use lua::LuaAstAdapter; +use python::PythonAstAdapter; +use ruby::RubyAstAdapter; +use rust::RustAstAdapter; +use swift::SwiftAstAdapter; +use typescript::TypeScriptAstAdapter; +use zig::ZigAstAdapter; + +static RUBY: RubyAstAdapter = RubyAstAdapter; +static PYTHON: PythonAstAdapter = PythonAstAdapter; +static JAVASCRIPT: JavaScriptAstAdapter = JavaScriptAstAdapter; +static TYPESCRIPT: TypeScriptAstAdapter = TypeScriptAstAdapter; +static LUA: LuaAstAdapter = LuaAstAdapter; +static C: CAstAdapter = CAstAdapter; +static CPP: CppAstAdapter = CppAstAdapter; +static CSHARP: CSharpAstAdapter = CSharpAstAdapter; +static GO: GoAstAdapter = GoAstAdapter; +static JAVA: JavaAstAdapter = JavaAstAdapter; +static KOTLIN: KotlinAstAdapter = KotlinAstAdapter; +static RUST: RustAstAdapter = RustAstAdapter; +static SWIFT: SwiftAstAdapter = SwiftAstAdapter; +static ZIG: ZigAstAdapter = ZigAstAdapter; + +pub(crate) fn normalization_adapter(language: Language) -> &'static dyn AstNormalizationAdapter { + match language { + Language::Ruby => &RUBY, + Language::Python => &PYTHON, + Language::JavaScript => &JAVASCRIPT, + Language::TypeScript => &TYPESCRIPT, + Language::Lua => &LUA, + Language::C => &C, + Language::Cpp => &CPP, + Language::CSharp => &CSHARP, + Language::Go => &GO, + Language::Java => &JAVA, + Language::Kotlin => &KOTLIN, + Language::Rust => &RUST, + Language::Swift => &SWIFT, + Language::Zig => &ZIG, + } +} diff --git a/gems/decomplex/rust/src/decomplex/ast/adapters/python.rs b/gems/decomplex/rust/src/decomplex/ast/adapters/python.rs new file mode 100644 index 000000000..9dfb8bee6 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/ast/adapters/python.rs @@ -0,0 +1,557 @@ +use super::super::{ + bare_identifier_text, named_children, node_text, raw_named_children, TernaryParts, + PYTHON_CONCATENATED_STRING_WRAPPER_KINDS, PYTHON_DOTTED_EXPRESSION_WRAPPER_KINDS, + PYTHON_LEADING_FUNCTION_WRAPPER_KINDS, PYTHON_LEADING_IF_WRAPPER_KINDS, + PYTHON_LEADING_OWNER_WRAPPER_KINDS, +}; +use super::base::{AstNormalizationAdapter, NamedChildrenAction, PYTHON_ASSIGNMENT_OPERATORS}; +use tree_sitter::Node as TreeSitterNode; + +const PYTHON_BODY_FIELD_KINDS: &[&str] = &[ + "elif_clause", + "else_clause", + "for_statement", + "function_definition", + "if_statement", + "try_statement", + "while_statement", + "with_statement", +]; + +pub(crate) struct PythonAstAdapter; + +impl AstNormalizationAdapter for PythonAstAdapter { + fn yield_statement(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + if !matches!( + node.kind(), + "body_statement" | "block" | "block_body" | "expression_statement" | "statement" + ) { + return false; + } + let named = named_children(node); + named.len() == 1 + && named[0].kind() == "yield" + && node_text(named[0], source) == node_text(node, source) + } + + fn explicit_alternative<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + named_children(node) + .into_iter() + .find(|child| matches!(child.kind(), "elif_clause" | "else" | "else_clause")) + } + + fn case_else_arm(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + node.kind() == "case_clause" && self.default_case_pattern(node, source) + } + + fn named_field<'tree>( + &self, + node: TreeSitterNode<'tree>, + name: &str, + ) -> Option> { + node.child_by_field_name(name) + .or_else(|| self.python_body_field(node, name)) + } + + fn named_children_action<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + children: &[TreeSitterNode<'tree>], + ) -> NamedChildrenAction<'tree> { + if node.kind() == "with_clause" && bare_identifier_text(node_text(node, source)) { + return NamedChildrenAction::Drop; + } + + if node.kind() == "relative_import" + && children.len() == 1 + && children[0].kind() == "import_prefix" + { + return NamedChildrenAction::Drop; + } + + if node.kind() == "block" && children.len() == 1 { + let child = children[0]; + if matches!(child.kind(), "function_definition" | "decorated_definition") { + return NamedChildrenAction::Recurse(child); + } + if child.kind() == "pass_statement" && node_text(node, source).trim() == "pass" { + return NamedChildrenAction::Drop; + } + if matches!(child.kind(), "break_statement" | "continue_statement") + && bare_identifier_text(node_text(node, source).trim()) + { + return NamedChildrenAction::Drop; + } + if child.kind() == "return_statement" + && node_text(node, source) == node_text(child, source) + { + if raw_named_children(child).is_empty() { + return NamedChildrenAction::Drop; + } + return NamedChildrenAction::Recurse(child); + } + if matches!(child.kind(), "delete_statement" | "if_statement") { + return NamedChildrenAction::Recurse(child); + } + if matches!( + child.kind(), + "assert_statement" + | "for_statement" + | "import_from_statement" + | "import_statement" + | "raise_statement" + | "try_statement" + | "while_statement" + | "with_statement" + ) { + return NamedChildrenAction::Recurse(child); + } + if child.kind() == "expression_statement" { + let statement_children = raw_named_children(child); + if statement_children.len() == 1 + && statement_children[0].kind() == "identifier" + && node_text(node, source) == node_text(child, source) + { + return NamedChildrenAction::Drop; + } + if statement_children.len() == 1 && statement_children[0].kind() == "ellipsis" { + return NamedChildrenAction::Drop; + } + if statement_children.len() == 1 + && matches!( + statement_children[0].kind(), + "assignment" + | "augmented_assignment" + | "binary_operator" + | "call" + | "string" + | "subscript" + ) + { + return NamedChildrenAction::Recurse(statement_children[0]); + } + } + } + + if node.kind() == "expression_statement" && children.len() == 1 { + let child = children[0]; + if child.kind() == "identifier" { + return NamedChildrenAction::Drop; + } + if matches!( + child.kind(), + "yield" + | "binary_operator" + | "comparison_operator" + | "call" + | "attribute" + | "string" + ) { + return NamedChildrenAction::Recurse(child); + } + } + + if node.kind() == "as_pattern_target" { + return NamedChildrenAction::Drop; + } + + if matches!(node.kind(), "with_clause" | "with_item") + && children.len() == 1 + && matches!(children[0].kind(), "with_item" | "as_pattern") + { + return NamedChildrenAction::Recurse(children[0]); + } + + if node.kind() == "with_item" + && children.len() == 1 + && matches!(children[0].kind(), "call" | "attribute") + && node_text(node, source) == node_text(children[0], source) + { + return NamedChildrenAction::Recurse(children[0]); + } + + if node.kind() == "type" && children.len() == 1 && children[0].kind() == "binary_operator" { + return NamedChildrenAction::Recurse(children[0]); + } + + NamedChildrenAction::Default + } + + fn nested_class_body_child<'tree>( + &self, + node: TreeSitterNode<'tree>, + _source: &str, + ) -> Option> { + if node.kind() != "block" { + return None; + } + let raw_children = raw_named_children(node); + if raw_children.len() == 1 + && raw_children[0].kind() == "class_definition" + && node + .parent() + .map(|parent| parent.kind() == "class_definition") + .unwrap_or(false) + { + Some(raw_children[0]) + } else { + None + } + } + + fn else_if_block<'tree>( + &self, + node: TreeSitterNode<'tree>, + _source: &str, + ) -> Option> { + if node.kind() != "else_clause" { + return None; + } + raw_named_children(node) + .into_iter() + .find(|child| child.kind() == "block") + } + + fn leading_function_statement(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + self.leading_function_statement_with_keyword( + node, + source, + "def", + PYTHON_LEADING_FUNCTION_WRAPPER_KINDS, + ) + } + + fn leading_function_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if !PYTHON_LEADING_FUNCTION_WRAPPER_KINDS.contains(&node.kind()) { + return None; + } + if node + .children(&mut node.walk()) + .next() + .map(|child| child.kind() == "def") + .unwrap_or(false) + { + return Some(node); + } + self.exact_single_named_child(node, &["function_definition"], source) + } + + fn leading_function_body_kind(&self) -> &'static str { + "block" + } + + fn leading_owner_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if PYTHON_LEADING_OWNER_WRAPPER_KINDS.contains(&node.kind()) { + let raw_named = named_children(node); + if raw_named.len() == 1 + && matches!( + raw_named[0].kind(), + "class" | "class_definition" | "class_declaration" | "module" + ) + && node_text(raw_named[0], source) == node_text(node, source) + { + return Some(raw_named[0]); + } + return Some(node); + } + if super::super::LEADING_OWNER_WRAPPER_KINDS.contains(&node.kind()) { + return Some(node); + } + None + } + + fn leading_if_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if PYTHON_LEADING_IF_WRAPPER_KINDS.contains(&node.kind()) { + if let Some(child) = self.exact_single_named_child(node, &["if_statement"], source) { + return Some(child); + } + } + if super::super::LEADING_IF_WRAPPER_KINDS.contains(&node.kind()) { + return Some(node); + } + None + } + + fn rescue_body_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if node.kind() == "try_statement" + || self.flattened_try_block(node, &["except_clause"], source) + { + return Some(node); + } + if node.kind() == "block" { + if let Some(child) = self.exact_single_named_child(node, &["try_statement"], source) { + return Some(child); + } + } + if super::super::RESCUE_BODY_WRAPPER_KINDS.contains(&node.kind()) { + return Some(node); + } + None + } + + fn rescue_body_nodes<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Vec> { + let target = self.rescue_body_target(node, source).unwrap_or(node); + if target.kind() == "try_statement" + || self.flattened_try_block(target, &["except_clause"], source) + { + return named_children(target) + .into_iter() + .take_while(|child| !matches!(child.kind(), "except_clause" | "finally_clause")) + .collect(); + } + let Some(index) = named_children(target) + .iter() + .position(|child| self.rescue_clause(*child)) + else { + return Vec::new(); + }; + named_children(target)[..index].to_vec() + } + + fn rescue_clauses<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Vec> { + let Some(target) = self.rescue_body_target(node, source) else { + return Vec::new(); + }; + named_children(target) + .into_iter() + .filter(|child| child.kind() == "except_clause") + .collect() + } + + fn rescue_clause_exceptions<'tree>( + &self, + node: TreeSitterNode<'tree>, + _source: &str, + ) -> Vec> { + let Some(pattern) = named_children(node) + .into_iter() + .find(|child| !matches!(child.kind(), "block" | "comment")) + else { + return Vec::new(); + }; + if pattern.kind() != "as_pattern" { + return vec![pattern]; + } + named_children(pattern) + .into_iter() + .find(|child| child.kind() != "as_pattern_target") + .into_iter() + .collect() + } + + fn rescue_clause_exceptions_source<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + self.rescue_clause_exceptions(node, source) + .into_iter() + .next() + } + + fn rescue_clause_exception_variable_name<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + named_children(node) + .into_iter() + .find(|child| child.kind() == "as_pattern") + .and_then(|pattern| self.descendant(pattern, &["as_pattern_target"])) + } + + fn rescue_clause_exception_variable_source<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + self.rescue_clause_exception_variable_name(node) + } + + fn rescue_clause_handler<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + named_children(node) + .into_iter() + .rev() + .find(|child| child.kind() == "block") + } + + fn ensure_body_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if node.kind() == "try_statement" + || self.flattened_try_block(node, &["finally_clause"], source) + { + return Some(node); + } + if node.kind() == "block" { + if let Some(child) = self.exact_single_named_child(node, &["try_statement"], source) { + return Some(child); + } + } + if super::super::ENSURE_BODY_WRAPPER_KINDS.contains(&node.kind()) { + return Some(node); + } + None + } + + fn ensure_body_nodes<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Vec> { + let target = self.ensure_body_target(node, source).unwrap_or(node); + if target.kind() == "try_statement" + || self.flattened_try_block(target, &["finally_clause"], source) + { + return named_children(target) + .into_iter() + .take_while(|child| child.kind() != "finally_clause") + .collect(); + } + let named = named_children(target); + let Some(index) = named + .iter() + .position(|child| self.ensure_clause_kind(*child)) + else { + return Vec::new(); + }; + named[..index].to_vec() + } + + fn ensure_clause<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + let target = self.ensure_body_target(node, source)?; + named_children(target) + .into_iter() + .find(|child| child.kind() == "finally_clause") + } + + fn ensure_clause_body<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + named_children(node) + .into_iter() + .rev() + .find(|child| child.kind() == "block") + } + + fn ternary_parts<'tree>( + &self, + node: TreeSitterNode<'tree>, + _source: &str, + ) -> Option> { + if node.kind() != "conditional_expression" { + return None; + } + let named = named_children(node); + Some(TernaryParts { + condition: *named.get(1)?, + positive: vec![*named.first()?], + negative: vec![*named.get(2)?], + }) + } + + fn unary_minus_expression(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + matches!(node.kind(), "unary" | "unary_expression" | "unary_operator") + && node_text(node, source).trim_start().starts_with('-') + } + + fn empty_body_statement(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + (super::super::EMPTY_BODY_WRAPPER_KINDS.contains(&node.kind()) + && named_children(node).is_empty() + && node_text(node, source).trim().is_empty()) + || node.kind() == "pass_statement" + || (node.kind() == "block" && node_text(node, source).trim() == "pass" && { + let named = named_children(node); + named.is_empty() || named.iter().all(|child| child.kind() == "pass_statement") + }) + } + + fn operator_call_expression_kind(&self, node: TreeSitterNode<'_>) -> bool { + matches!( + node.kind(), + "binary" | "binary_expression" | "binary_operator" + ) + } + + fn assignment_operators(&self) -> &'static [&'static str] { + PYTHON_ASSIGNMENT_OPERATORS + } + + fn concatenated_string_wrapper_kinds(&self) -> &'static [&'static str] { + PYTHON_CONCATENATED_STRING_WRAPPER_KINDS + } + + fn dotted_expression_wrapper_kinds(&self) -> &'static [&'static str] { + PYTHON_DOTTED_EXPRESSION_WRAPPER_KINDS + } +} + +impl PythonAstAdapter { + fn flattened_try_block( + &self, + node: TreeSitterNode<'_>, + clauses: &[&str], + source: &str, + ) -> bool { + node.kind() == "block" + && node + .children(&mut node.walk()) + .next() + .map(|child| node_text(child, source) == "try") + .unwrap_or(false) + && named_children(node) + .iter() + .any(|child| clauses.contains(&child.kind())) + } + + fn python_body_field<'tree>( + &self, + node: TreeSitterNode<'tree>, + name: &str, + ) -> Option> { + if !matches!(name, "body" | "consequence") + || !PYTHON_BODY_FIELD_KINDS.contains(&node.kind()) + { + return None; + } + raw_named_children(node) + .into_iter() + .find(|child| child.kind() == "block") + } +} diff --git a/gems/decomplex/rust/src/decomplex/ast/adapters/ruby.rs b/gems/decomplex/rust/src/decomplex/ast/adapters/ruby.rs new file mode 100644 index 000000000..362df5201 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/ast/adapters/ruby.rs @@ -0,0 +1,267 @@ +use super::super::{ + heredoc_marker_text, named_children, node_text, raw_named_children, ruby_variable_name_text, + CASE_ARGUMENT_WHEN_KINDS, INTERPOLATED_STATEMENT_WRAPPER_KINDS, LEADING_FUNCTION_WRAPPER_KINDS, +}; +use super::base::{AstNormalizationAdapter, NamedChildrenAction, RUBY_ASSIGNMENT_OPERATORS}; +use tree_sitter::Node as TreeSitterNode; + +pub(crate) struct RubyAstAdapter; + +impl AstNormalizationAdapter for RubyAstAdapter { + fn ruby(&self) -> bool { + true + } + + fn yield_statement(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + if !matches!( + node.kind(), + "body_statement" | "block" | "block_body" | "statement" + ) { + return false; + } + let named = named_children(node); + named.len() == 1 + && named[0].kind() == "yield" + && node_text(named[0], source) == node_text(node, source) + } + + fn super_statement(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + if !matches!( + node.kind(), + "body_statement" | "block" | "block_body" | "call" | "statement" + ) { + return false; + } + if node_text(node, source).trim() == "super" { + return true; + } + let raw = raw_named_children(node); + let named = if raw.len() == 1 && raw[0].kind() == "call" { + raw_named_children(raw[0]) + } else { + raw + }; + named + .first() + .map(|child| child.kind() == "super") + .unwrap_or(false) + && named + .iter() + .skip(1) + .all(|child| child.kind() == "argument_list") + } + + fn explicit_alternative<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + named_children(node) + .into_iter() + .find(|child| matches!(child.kind(), "elsif" | "else")) + } + + fn instance_variable(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + node.kind() == "instance_variable" + || node_text(node, source) + .strip_prefix('@') + .map(ruby_variable_name_text) + .unwrap_or(false) + } + + fn global_variable(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + node.kind() == "global_variable" + || node_text(node, source) + .strip_prefix('$') + .map(ruby_variable_name_text) + .unwrap_or(false) + } + + fn case_argument_list(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + if node.kind() != "argument_list" { + return false; + } + let raw_named = named_children(node); + let target = if raw_named.len() == 1 + && raw_named[0].kind() == "case" + && node_text(raw_named[0], source) == node_text(node, source) + { + raw_named[0] + } else { + node + }; + let has_case_keyword = target + .children(&mut target.walk()) + .any(|child| !child.is_named() && child.kind() == "case"); + has_case_keyword + && named_children(target) + .iter() + .any(|child| CASE_ARGUMENT_WHEN_KINDS.contains(&child.kind())) + } + + fn safe_navigation_call(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + node.children(&mut node.walk()) + .any(|child| !child.is_named() && node_text(child, source) == "&.") + } + + fn leading_function_statement(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + self.leading_function_statement_with_keyword( + node, + source, + "def", + LEADING_FUNCTION_WRAPPER_KINDS, + ) + } + + fn zero_child_identifier_call(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + if node.kind() != "call" || !ruby_variable_name_text(node_text(node, source)) { + return false; + } + let named = named_children(node); + named.is_empty() + || (named.len() == 1 + && super::super::identifier_kind_name(named[0].kind()) + && node_text(named[0], source) == node_text(node, source)) + } + + fn heredoc_call_for_body(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + if node.kind() == "heredoc_beginning" { + return true; + } + if matches!(node.kind(), "call" | "argument_list") + && heredoc_marker_text(node_text(node, source)) + { + return true; + } + + named_children(node).into_iter().any(|child| { + if named_children(child) + .into_iter() + .any(|grandchild| grandchild.kind() == "heredoc_body") + { + return false; + } + + self.heredoc_call_for_body(child, source) + }) + } + + fn named_children_action<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + children: &[TreeSitterNode<'tree>], + ) -> NamedChildrenAction<'tree> { + if INTERPOLATED_STATEMENT_WRAPPER_KINDS.contains(&node.kind()) + && children.len() == 1 + && children[0].kind() == "string" + && node_text(node, source) == node_text(children[0], source) + { + let string_children = raw_named_children(children[0]); + if string_children + .iter() + .any(|child| child.kind() == "interpolation") + { + return NamedChildrenAction::Replace(string_children); + } + } + + if matches!(node.kind(), "body_statement" | "block_body" | "statement") + && children.len() == 1 + && matches!( + children[0].kind(), + "if_modifier" | "unless_modifier" | "while_modifier" | "until_modifier" | "yield" + ) + && node_text(node, source) == node_text(children[0], source) + { + return NamedChildrenAction::Recurse(children[0]); + } + + NamedChildrenAction::Default + } + + fn logical_operator_assignment(&self, operator: &str) -> bool { + matches!(operator, "||" | "&&") + } + + fn statement_wrapped_call_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if !matches!( + node.kind(), + "body_statement" | "block_body" | "statement" | "argument_list" + ) { + return None; + } + let raw_named = raw_named_children(node); + if raw_named.len() == 1 + && raw_named[0].kind() == "call" + && node_text(node, source) == node_text(raw_named[0], source) + { + Some(raw_named[0]) + } else { + None + } + } + + fn inline_def_function_text_source<'tree>( + &self, + function: TreeSitterNode<'tree>, + _source: &str, + ) -> TreeSitterNode<'tree> { + if function.kind() == "call" { + return named_children(function) + .into_iter() + .next() + .unwrap_or(function); + } + function + } + + fn bare_const_call_function(&self, function: TreeSitterNode<'_>) -> bool { + matches!( + function.kind(), + "constant" | "scope_resolution" | "type_identifier" | "scoped_type_identifier" + ) + } + + fn normalize_default_parameters(&self) -> bool { + true + } + + fn normalize_block_parameters(&self) -> bool { + true + } + + fn boolean_statement_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + children: &[TreeSitterNode<'tree>], + ) -> TreeSitterNode<'tree> { + if children.len() == 1 + && matches!( + children[0].kind(), + "binary" | "binary_expression" | "binary_operator" | "boolean_operator" + ) + && node_text(node, source) == node_text(children[0], source) + { + children[0] + } else { + node + } + } + + fn elides_tail_returns(&self) -> bool { + true + } + + fn elides_implicit_nil_body(&self) -> bool { + true + } + + fn assignment_operators(&self) -> &'static [&'static str] { + RUBY_ASSIGNMENT_OPERATORS + } +} diff --git a/gems/decomplex/rust/src/decomplex/ast/adapters/rust.rs b/gems/decomplex/rust/src/decomplex/ast/adapters/rust.rs new file mode 100644 index 000000000..e7931107f --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/ast/adapters/rust.rs @@ -0,0 +1,5 @@ +use super::base::AstNormalizationAdapter; + +pub(crate) struct RustAstAdapter; + +impl AstNormalizationAdapter for RustAstAdapter {} diff --git a/gems/decomplex/rust/src/decomplex/ast/adapters/swift.rs b/gems/decomplex/rust/src/decomplex/ast/adapters/swift.rs new file mode 100644 index 000000000..f042d102a --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/ast/adapters/swift.rs @@ -0,0 +1,5 @@ +use super::base::AstNormalizationAdapter; + +pub(crate) struct SwiftAstAdapter; + +impl AstNormalizationAdapter for SwiftAstAdapter {} diff --git a/gems/decomplex/rust/src/decomplex/ast/adapters/typescript.rs b/gems/decomplex/rust/src/decomplex/ast/adapters/typescript.rs new file mode 100644 index 000000000..4354e2bf2 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/ast/adapters/typescript.rs @@ -0,0 +1,247 @@ +use super::super::{ + named_children, node_text, question_colon_ternary_parts, raw_named_children, TernaryParts, + TYPESCRIPT_TERNARY_KINDS, +}; +use super::base::{AstNormalizationAdapter, TYPESCRIPT_ASSIGNMENT_OPERATORS}; +use tree_sitter::Node as TreeSitterNode; + +pub(crate) struct TypeScriptAstAdapter; + +impl AstNormalizationAdapter for TypeScriptAstAdapter { + fn explicit_alternative<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + named_children(node) + .into_iter() + .find(|child| matches!(child.kind(), "else" | "else_clause")) + } + + fn safe_navigation_call(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + node.children(&mut node.walk()) + .any(|child| !child.is_named() && node_text(child, source) == "&.") + || node + .children(&mut node.walk()) + .any(|child| child.kind() == "optional_chain" && node_text(child, source) == "?.") + || (node.kind() == "call_expression" + && named_children(node) + .into_iter() + .any(|child| self.safe_navigation_call(child, source))) + } + + fn ternary_parts<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + question_colon_ternary_parts(node, source, TYPESCRIPT_TERNARY_KINDS) + } + + fn interpolated_string( + &self, + node: TreeSitterNode<'_>, + children: &[TreeSitterNode<'_>], + ) -> bool { + (node.kind() == "string" && children.iter().any(|child| child.kind() == "interpolation")) + || (node.kind() == "template_string" + && children + .iter() + .any(|child| child.kind() == "template_substitution")) + } + + fn lambda_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + _source: &str, + ) -> Option> { + if matches!( + node.kind(), + "arrow_function" | "function_expression" | "lambda" + ) { + Some(node) + } else { + None + } + } + + fn interpolation_node(&self, node: TreeSitterNode<'_>) -> bool { + matches!(node.kind(), "interpolation" | "template_substitution") + } + + fn rescue_body_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if node.kind() == "try_statement" { + return Some(node); + } + if node.kind() == "statement_block" { + let raw_named = raw_named_children(node); + if raw_named.len() == 1 + && raw_named[0].kind() == "try_statement" + && node_text(raw_named[0], source) == node_text(node, source) + { + return Some(raw_named[0]); + } + } + if super::super::RESCUE_BODY_WRAPPER_KINDS.contains(&node.kind()) { + return Some(node); + } + None + } + + fn rescue_body_nodes<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Vec> { + let target = self.rescue_body_target(node, source).unwrap_or(node); + if target.kind() == "try_statement" { + return named_children(target) + .into_iter() + .take_while(|child| !matches!(child.kind(), "catch_clause" | "finally_clause")) + .collect(); + } + let named = named_children(target); + let Some(index) = named.iter().position(|child| self.rescue_clause(*child)) else { + return Vec::new(); + }; + named[..index].to_vec() + } + + fn rescue_clauses<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Vec> { + let Some(target) = self.rescue_body_target(node, source) else { + return Vec::new(); + }; + named_children(target) + .into_iter() + .filter(|child| child.kind() == "catch_clause") + .collect() + } + + fn rescue_clause_exceptions<'tree>( + &self, + _node: TreeSitterNode<'tree>, + _source: &str, + ) -> Vec> { + Vec::new() + } + + fn rescue_clause_exceptions_source<'tree>( + &self, + _node: TreeSitterNode<'tree>, + _source: &str, + ) -> Option> { + None + } + + fn rescue_clause_exception_variable_name<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + named_children(node) + .into_iter() + .find(|child| super::super::identifier_kind_name(child.kind())) + } + + fn rescue_clause_exception_variable_source<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + self.rescue_clause_exception_variable_name(node) + } + + fn rescue_clause_handler<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + named_children(node) + .into_iter() + .rev() + .find(|child| child.kind() == "statement_block") + } + + fn ensure_body_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if node.kind() == "try_statement" { + return Some(node); + } + if node.kind() == "statement_block" { + let raw_named = raw_named_children(node); + if raw_named.len() == 1 + && raw_named[0].kind() == "try_statement" + && node_text(raw_named[0], source) == node_text(node, source) + { + return Some(raw_named[0]); + } + } + if super::super::ENSURE_BODY_WRAPPER_KINDS.contains(&node.kind()) { + return Some(node); + } + None + } + + fn ensure_body_nodes<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Vec> { + let target = self.ensure_body_target(node, source).unwrap_or(node); + if target.kind() == "try_statement" { + return named_children(target) + .into_iter() + .take_while(|child| child.kind() != "finally_clause") + .collect(); + } + let named = named_children(target); + let Some(index) = named + .iter() + .position(|child| self.ensure_clause_kind(*child)) + else { + return Vec::new(); + }; + named[..index].to_vec() + } + + fn ensure_clause<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + let target = self.ensure_body_target(node, source)?; + named_children(target) + .into_iter() + .find(|child| child.kind() == "finally_clause") + } + + fn ensure_clause_body<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + named_children(node) + .into_iter() + .rev() + .find(|child| child.kind() == "statement_block") + } + + fn empty_body_statement(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + (super::super::EMPTY_BODY_WRAPPER_KINDS.contains(&node.kind()) + && named_children(node).is_empty() + && node_text(node, source).trim().is_empty()) + || (node.kind() == "statement_block" + && named_children(node).is_empty() + && node_text(node, source).trim() == "{}") + } + + fn assignment_operators(&self) -> &'static [&'static str] { + TYPESCRIPT_ASSIGNMENT_OPERATORS + } +} diff --git a/gems/decomplex/rust/src/decomplex/ast/adapters/zig.rs b/gems/decomplex/rust/src/decomplex/ast/adapters/zig.rs new file mode 100644 index 000000000..5e24d2cb2 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/ast/adapters/zig.rs @@ -0,0 +1,5 @@ +use super::base::AstNormalizationAdapter; + +pub(crate) struct ZigAstAdapter; + +impl AstNormalizationAdapter for ZigAstAdapter {} diff --git a/gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs b/gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs index db9d9b52c..d9f9820ff 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs @@ -1,4 +1,7 @@ use crate::decomplex::ast::{self, Child, Node, Span}; +use crate::decomplex::syntax::adapters::false_simplicity_lexicon::{ + false_simplicity_lexicon, FalseSimplicityLexicon, +}; use crate::decomplex::syntax::Language; use anyhow::Result; use serde::Serialize; @@ -35,20 +38,6 @@ struct ClassRec { span: Span, } -#[derive(Clone, Copy)] -struct Lexicon { - dispatch_mids: &'static [&'static str], - meta_mids: &'static [&'static str], - method_obj_mids: &'static [&'static str], - io_consts: &'static [&'static str], - io_bare: &'static [&'static str], - dir_context: &'static [&'static str], - context_pairs: &'static [(&'static str, &'static [&'static str])], - context_bare: &'static [&'static str], - callback_set: &'static [&'static str], - core_consts: &'static [&'static str], -} - pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { let mut hits = Vec::new(); let mut classrecs = Vec::new(); @@ -63,669 +52,11 @@ pub fn scan_files(files: &[PathBuf], language: Language) -> Result Lexicon { - match language { - Language::Ruby => Lexicon { - dispatch_mids: &[ - "send", - "__send__", - "public_send", - "const_get", - "constantize", - "instance_variable_get", - ], - meta_mids: &[ - "define_method", - "define_singleton_method", - "alias_method", - "class_eval", - "module_eval", - "instance_eval", - "class_exec", - "module_exec", - "instance_exec", - "eval", - "const_set", - "instance_variable_set", - "remove_method", - "undef_method", - "prepend", - "singleton_class", - "binding", - ], - method_obj_mids: &["method", "public_method", "instance_method"], - io_consts: &[ - "File", - "IO", - "Dir", - "FileUtils", - "Open3", - "Socket", - "TCPSocket", - "UDPSocket", - "TCPServer", - "UNIXSocket", - "Tempfile", - "Pathname", - "Marshal", - ], - io_bare: &[ - "puts", - "print", - "warn", - "gets", - "readline", - "readlines", - "system", - "exec", - "spawn", - "fork", - "sleep", - "open", - "abort", - "exit", - "exit!", - ], - dir_context: &["pwd", "getwd", "home"], - context_pairs: RUBY_CONTEXT_PAIRS, - context_bare: &["rand", "srand"], - callback_set: RUBY_CALLBACK_SET, - core_consts: RUBY_CORE_CONSTS, - }, - Language::Python => Lexicon { - dispatch_mids: &[ - "getattr", - "setattr", - "hasattr", - "__getattr__", - "__setattr__", - "import_module", - ], - meta_mids: &[ - "eval", "exec", "compile", "type", "globals", "locals", "vars", "setattr", - "delattr", - ], - method_obj_mids: &["method"], - io_consts: &[ - "Path", - "pathlib", - "os", - "sys", - "subprocess", - "socket", - "shutil", - ], - io_bare: &["print", "input", "open", "exec", "eval"], - dir_context: &["getcwd", "home"], - context_pairs: PYTHON_CONTEXT_PAIRS, - context_bare: &["random", "randint", "randrange"], - callback_set: COMMON_CALLBACK_SET, - core_consts: EMPTY, - }, - Language::JavaScript | Language::TypeScript => Lexicon { - dispatch_mids: &["eval", "Function", "call", "apply", "bind"], - meta_mids: &[ - "eval", - "Function", - "defineProperty", - "defineProperties", - "setPrototypeOf", - ], - method_obj_mids: &["method"], - io_consts: &["console", "Console", "fs", "process", "Deno", "Bun"], - io_bare: &["setTimeout", "setInterval", "fetch", "require", "import"], - dir_context: EMPTY, - context_pairs: JS_CONTEXT_PAIRS, - context_bare: EMPTY, - callback_set: COMMON_CALLBACK_SET, - core_consts: EMPTY, - }, - Language::Go => Lexicon { - dispatch_mids: &[ - "Call", - "CallSlice", - "Method", - "MethodByName", - "ValueOf", - "TypeOf", - ], - meta_mids: &["Call", "CallSlice", "MethodByName", "New", "MakeFunc"], - method_obj_mids: &["method"], - io_consts: &["os", "io", "ioutil", "fs", "net", "http", "exec", "syscall"], - io_bare: &["panic", "print", "println", "recover"], - dir_context: &["Getwd", "UserHomeDir"], - context_pairs: GO_CONTEXT_PAIRS, - context_bare: EMPTY, - callback_set: GO_CALLBACK_SET, - core_consts: EMPTY, - }, - Language::Rust => Lexicon { - dispatch_mids: &[ - "downcast", - "downcast_ref", - "downcast_mut", - "call", - "call_mut", - "call_once", - ], - meta_mids: &["transmute", "from_raw_parts", "from_raw_parts_mut"], - method_obj_mids: &["method"], - io_consts: &["std", "tokio", "fs", "env", "process", "net", "io"], - io_bare: &["panic", "todo", "unimplemented", "unreachable"], - dir_context: &["current_dir", "home_dir"], - context_pairs: RUST_CONTEXT_PAIRS, - context_bare: EMPTY, - callback_set: RUST_CALLBACK_SET, - core_consts: EMPTY, - }, - Language::Zig => Lexicon { - dispatch_mids: &["field", "fieldParentPtr", "ptrCast", "alignCast", "call"], - meta_mids: &[ - "typeInfo", - "TypeOf", - "ptrCast", - "intFromPtr", - "ptrFromInt", - "eval", - ], - method_obj_mids: &["method"], - io_consts: &[ - "std", "os", "fs", "process", "net", "Thread", "Mutex", "Atomic", - ], - io_bare: &["panic", "unreachable"], - dir_context: EMPTY, - context_pairs: ZIG_CONTEXT_PAIRS, - context_bare: EMPTY, - callback_set: ZIG_CALLBACK_SET, - core_consts: EMPTY, - }, - Language::Lua => Lexicon { - dispatch_mids: &["load", "loadfile", "dofile", "require", "rawget", "rawset"], - meta_mids: &[ - "setmetatable", - "getmetatable", - "debug", - "eval", - "load", - "loadfile", - ], - method_obj_mids: &["method"], - io_consts: &["io", "os", "debug", "package"], - io_bare: &["print", "error", "assert", "require", "collectgarbage"], - dir_context: EMPTY, - context_pairs: LUA_CONTEXT_PAIRS, - context_bare: EMPTY, - callback_set: COMMON_CALLBACK_SET, - core_consts: EMPTY, - }, - Language::C => Lexicon { - dispatch_mids: &["dlsym", "dlopen", "GetProcAddress"], - meta_mids: &["setjmp", "longjmp", "va_start", "va_arg"], - method_obj_mids: &["method"], - io_consts: &["FILE", "DIR", "pthread", "mutex", "atomic"], - io_bare: &[ - "printf", "fprintf", "fopen", "open", "read", "write", "close", "system", "exec", - "abort", "exit", "assert", - ], - dir_context: &["getcwd", "getenv"], - context_pairs: EMPTY_PAIRS, - context_bare: &["rand", "time", "clock"], - callback_set: C_CALLBACK_SET, - core_consts: EMPTY, - }, - Language::Cpp => Lexicon { - dispatch_mids: &[ - "dynamic_cast", - "typeid", - "any_cast", - "get_if", - "visit", - "invoke", - ], - meta_mids: &["reinterpret_cast", "const_cast", "dlsym", "dlopen"], - method_obj_mids: &["method"], - io_consts: &[ - "std", - "filesystem", - "fstream", - "iostream", - "thread", - "mutex", - "atomic", - ], - io_bare: &["throw", "abort", "exit", "assert", "system"], - dir_context: &["current_path"], - context_pairs: CPP_CONTEXT_PAIRS, - context_bare: EMPTY, - callback_set: CPP_CALLBACK_SET, - core_consts: EMPTY, - }, - Language::CSharp => Lexicon { - dispatch_mids: &[ - "Invoke", - "GetMethod", - "GetProperty", - "GetField", - "Activator", - "CreateInstance", - ], - meta_mids: &["Invoke", "GetType", "Reflection", "Emit", "DynamicMethod"], - method_obj_mids: &["method"], - io_consts: &[ - "Console", - "File", - "Directory", - "Path", - "Process", - "Socket", - "HttpClient", - "Environment", - ], - io_bare: &["throw"], - dir_context: &["CurrentDirectory", "GetEnvironmentVariable"], - context_pairs: CSHARP_CONTEXT_PAIRS, - context_bare: EMPTY, - callback_set: CSHARP_CALLBACK_SET, - core_consts: EMPTY, - }, - Language::Java => Lexicon { - dispatch_mids: &[ - "invoke", - "getMethod", - "getDeclaredMethod", - "getField", - "getDeclaredField", - "forName", - ], - meta_mids: &["invoke", "setAccessible", "newInstance", "Proxy"], - method_obj_mids: &["method"], - io_consts: &[ - "System", - "File", - "Files", - "Paths", - "ProcessBuilder", - "Socket", - "HttpClient", - "Thread", - "Lock", - "AtomicReference", - ], - io_bare: &["throw"], - dir_context: &["getProperty", "getenv"], - context_pairs: JAVA_CONTEXT_PAIRS, - context_bare: EMPTY, - callback_set: JAVA_CALLBACK_SET, - core_consts: EMPTY, - }, - Language::Swift => Lexicon { - dispatch_mids: &[ - "perform", - "value", - "setValue", - "selector", - "NSClassFromString", - ], - meta_mids: &[ - "Mirror", - "unsafeBitCast", - "withUnsafePointer", - "withUnsafeBytes", - ], - method_obj_mids: &["method"], - io_consts: &[ - "FileManager", - "Process", - "URLSession", - "DispatchQueue", - "Thread", - "Lock", - "NSLock", - ], - io_bare: &[ - "print", - "fatalError", - "preconditionFailure", - "assertionFailure", - ], - dir_context: &["currentDirectoryPath", "homeDirectoryForCurrentUser"], - context_pairs: SWIFT_CONTEXT_PAIRS, - context_bare: EMPTY, - callback_set: SWIFT_CALLBACK_SET, - core_consts: EMPTY, - }, - Language::Kotlin => Lexicon { - dispatch_mids: &[ - "invoke", - "call", - "callBy", - "memberProperties", - "declaredMemberFunctions", - ], - meta_mids: &[ - "reflection", - "javaClass", - "Class", - "forName", - "setAccessible", - ], - method_obj_mids: &["method"], - io_consts: &[ - "System", - "File", - "Files", - "Paths", - "ProcessBuilder", - "Socket", - "HttpClient", - "Thread", - "Mutex", - "AtomicReference", - ], - io_bare: &["println", "print", "error", "check", "require", "TODO"], - dir_context: &["getProperty", "getenv"], - context_pairs: KOTLIN_CONTEXT_PAIRS, - context_bare: EMPTY, - callback_set: KOTLIN_CALLBACK_SET, - core_consts: EMPTY, - }, - } -} - struct FalseSimplicity { file: String, lines: Vec, language: Language, - lexicon: Lexicon, + lexicon: FalseSimplicityLexicon, hits: Vec, classrecs: Vec, } @@ -736,7 +67,7 @@ impl FalseSimplicity { file, lines, language, - lexicon: lexicon_for(language), + lexicon: false_simplicity_lexicon(language), hits: Vec::new(), classrecs: Vec::new(), } diff --git a/gems/decomplex/rust/src/decomplex/detectors/flay_similarity.rs b/gems/decomplex/rust/src/decomplex/detectors/flay_similarity.rs index 9fec9466f..146621f68 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/flay_similarity.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/flay_similarity.rs @@ -1,127 +1,11 @@ -use crate::decomplex::ast::{normalize_text, RawNode, Span}; -use crate::decomplex::syntax::{self, Document, FunctionDef, Language, SimilarityFinding}; +use crate::decomplex::ast::Span; +use crate::decomplex::syntax::adapters::language_profile; +use crate::decomplex::syntax::{self, CloneCandidate, Document, Language, SimilarityFinding}; use anyhow::Result; use std::collections::{BTreeMap, HashMap, HashSet}; use std::path::PathBuf; const MAX_FUZZY_CHILDREN: usize = 14; -const IDENTIFIER_KINDS: &[&str] = &[ - "identifier", - "constant", - "type_identifier", - "field_identifier", - "property_identifier", - "shorthand_property_identifier_pattern", - "variable_name", -]; -const LITERAL_KINDS: &[&str] = &[ - "string", - "string_content", - "string_literal", - "interpreted_string_literal", - "raw_string_literal", - "integer", - "float", - "int", - "number", - "rational", - "imaginary", - "character", - "char_literal", - "symbol", - "simple_symbol", - "true", - "false", - "nil", - "none", - "null", -]; -const SKIP_CANDIDATE_KINDS: &[&str] = &[ - "comment", - "identifier", - "constant", - "type_identifier", - "field_identifier", - "property_identifier", - "parameters", - "formal_parameters", - "parameter_list", - "argument_list", - "arguments", - "block_parameters", - "method_parameters", - "scope_resolution", -]; -const CLONE_CANDIDATE_KINDS: &[&str] = &[ - "array", - "assignment", - "assignment_statement", - "block", - "case", - "case_clause", - "class", - "class_definition", - "class_declaration", - "do_block", - "enum_declaration", - "for", - "for_statement", - "hash", - "if", - "if_statement", - "match_expression", - "match_statement", - "method", - "method_definition", - "module", - "operator_assignment", - "singleton_method", - "struct_declaration", - "switch_case", - "switch_expression", - "switch_statement", - "unless", - "until", - "while", - "while_statement", -]; -const BODY_KINDS: &[&str] = &[ - "body", - "block", - "body_statement", - "declaration_list", - "statement_block", - "compound_statement", - "suite", - "do_block", -]; -const CALL_KINDS: &[&str] = &[ - "call", - "call_expression", - "method_invocation", - "invocation_expression", -]; - -#[derive(Clone, Debug)] -struct MethodSpan { - name: String, - first_line: usize, - last_line: usize, -} - -#[derive(Clone, Debug)] -struct Candidate { - file: String, - line: usize, - span: Span, - method_name: String, - node_name: String, - mass: usize, - fingerprint: String, - raw: String, - child_fingerprints: Vec, - child_masses: Vec, -} pub fn scan_files( files: &[PathBuf], @@ -141,18 +25,11 @@ pub fn scan_documents(documents: &[Document], mass: usize, fuzzy: usize) -> Vec< struct Scanner { mass: usize, fuzzy: usize, - method_spans: HashMap>, - source_lines: HashMap>, } impl Scanner { fn new(mass: usize, fuzzy: usize) -> Self { - Self { - mass, - fuzzy, - method_spans: HashMap::new(), - source_lines: HashMap::new(), - } + Self { mass, fuzzy } } fn scan(&mut self, documents: &[Document]) -> Vec { @@ -179,44 +56,22 @@ impl Scanner { self.prune_nested_findings(findings) } - fn candidates_for_document(&mut self, document: &Document) -> Vec { - self.source_lines - .insert(document.file.clone(), document.lines.clone()); - self.method_spans.insert( - document.file.clone(), - collect_method_spans(&document.function_defs), - ); - + fn candidates_for_document(&mut self, document: &Document) -> Vec { let mut out = Vec::new(); let mut seen = HashSet::new(); - for function in &document.function_defs { - if let Some(candidate) = - self.candidate_for(&document.file, &function.body, Some("defn")) - { - self.add_candidate(&mut out, &mut seen, candidate); - } - } - - let mut nodes = Vec::new(); - document.root.walk(&mut nodes); - for node in nodes { - if candidate_node(node) { - if let Some(candidate) = self.candidate_for(&document.file, node, None) { - self.add_candidate(&mut out, &mut seen, candidate); - } - } + for candidate in language_profile(document.language).clone_candidates(document) { + self.add_candidate(&mut out, &mut seen, candidate); } out } fn add_candidate( &self, - out: &mut Vec, + out: &mut Vec, seen: &mut HashSet, - candidate: Candidate, + candidate: CloneCandidate, ) { - if candidate.mass < self.effective_mass_floor() || typed_struct_schema_text(&candidate.raw) - { + if candidate.mass < self.effective_mass_floor() { return; } let key = format!( @@ -232,47 +87,8 @@ impl Scanner { } } - fn candidate_for( - &self, - file: &str, - node: &RawNode, - node_name: Option<&str>, - ) -> Option { - let (node_fingerprint, mass) = fingerprint(node, &mut HashSet::new()); - if node_fingerprint.is_empty() { - return None; - } - let line = node.line(); - let method = self.method_span_for(file, line); - let children = fuzzy_children_for(node); - let mut child_fingerprints = Vec::new(); - let mut child_masses = Vec::new(); - for child in children { - let (child_fp, child_mass) = fingerprint(child, &mut HashSet::new()); - if !child_fp.is_empty() && child_mass > 0 { - child_fingerprints.push(child_fp); - child_masses.push(child_mass); - } - } - let candidate = Candidate { - file: file.to_string(), - line, - span: node.span, - method_name: method.name, - node_name: node_name - .map(ToString::to_string) - .unwrap_or_else(|| flay_node_name(node).to_string()), - mass, - fingerprint: node_fingerprint, - raw: normalize_text(&node.text), - child_fingerprints, - child_masses, - }; - Some(candidate) - } - - fn type2_findings(&self, candidates: &[Candidate]) -> Vec { - let mut groups: HashMap<&str, Vec> = HashMap::new(); + fn type2_findings(&self, candidates: &[CloneCandidate]) -> Vec { + let mut groups: HashMap<&str, Vec> = HashMap::new(); for candidate in candidates { groups .entry(candidate.fingerprint.as_str()) @@ -290,7 +106,7 @@ impl Scanner { .map(|candidate| candidate.raw.as_str()) .collect::>() .len(); - if raw_count < 2 || self.typed_struct_schema_cluster(&cluster) { + if raw_count < 2 { continue; } let mass = cluster @@ -303,11 +119,11 @@ impl Scanner { out } - fn type3_findings(&self, candidates: &[Candidate]) -> Vec { + fn type3_findings(&self, candidates: &[CloneCandidate]) -> Vec { if self.fuzzy == 0 { return Vec::new(); } - let mut groups: HashMap> = HashMap::new(); + let mut groups: HashMap> = HashMap::new(); for candidate in candidates { for (signature, signature_mass) in self.fuzzy_signatures(candidate) { if signature_mass >= self.effective_mass_floor() { @@ -335,7 +151,7 @@ impl Scanner { .map(|candidate| candidate.fingerprint.as_str()) .collect::>() .len(); - if fingerprint_count < 2 || self.typed_struct_schema_cluster(&cluster) { + if fingerprint_count < 2 { continue; } let mut key = cluster @@ -364,7 +180,7 @@ impl Scanner { fn finding_for( &self, - cluster: &[Candidate], + cluster: &[CloneCandidate], clone_type: &str, mass: usize, ) -> SimilarityFinding { @@ -388,12 +204,11 @@ impl Scanner { } } - fn spans_for(&self, cluster: &[Candidate]) -> BTreeMap { + fn spans_for(&self, cluster: &[CloneCandidate]) -> BTreeMap { let mut spans = BTreeMap::new(); for candidate in cluster { let value = if candidate.node_name == "defn" { - let method = self.method_span_for(&candidate.file, candidate.line); - [method.first_line, 0, method.last_line, 1] + [candidate.span[0], 0, candidate.span[2], 1] } else { candidate.span }; @@ -403,8 +218,18 @@ impl Scanner { } fn prune_nested_findings(&self, findings: Vec) -> Vec { + let defn_site_sets = findings + .iter() + .filter(|finding| finding.node == "defn") + .map(|finding| (finding.clone_type.clone(), site_identities(finding))) + .collect::>(); let mut kept = Vec::new(); for finding in findings { + if finding.node != "defn" + && defn_site_sets.contains(&(finding.clone_type.clone(), site_identities(&finding))) + { + continue; + } if kept.iter().any(|larger| nested_finding(&finding, larger)) { continue; } @@ -413,7 +238,7 @@ impl Scanner { kept } - fn fuzzy_signatures(&self, candidate: &Candidate) -> Vec<(String, usize)> { + fn fuzzy_signatures(&self, candidate: &CloneCandidate) -> Vec<(String, usize)> { let children = &candidate.child_fingerprints; if children.len() < 2 || children.len() > MAX_FUZZY_CHILDREN { return Vec::new(); @@ -438,275 +263,19 @@ impl Scanner { signatures } - fn typed_struct_schema_cluster(&self, cluster: &[Candidate]) -> bool { - cluster.iter().all(|candidate| { - self.typed_struct_schema_line(&candidate.file, candidate.line) - || typed_struct_schema_text(&candidate.raw) - }) - } - - fn typed_struct_schema_line(&self, file: &str, line_no: usize) -> bool { - self.source_lines - .get(file) - .and_then(|lines| lines.get(line_no.saturating_sub(1))) - .map(|line| { - let stripped = line.trim_start(); - stripped.starts_with("const :") || stripped.starts_with("prop :") - }) - .unwrap_or(false) - } - - fn method_span_for(&self, file: &str, line_no: usize) -> MethodSpan { - self.method_spans - .get(file) - .and_then(|spans| { - spans - .iter() - .find(|span| span.first_line <= line_no && line_no <= span.last_line) - }) - .cloned() - .unwrap_or_else(|| MethodSpan { - name: "(top-level)".to_string(), - first_line: line_no, - last_line: line_no, - }) - } - fn effective_mass_floor(&self) -> usize { self.mass .max(((self.mass as f64) * 23.0 / 8.0).ceil() as usize) } } -fn collect_method_spans(functions: &[FunctionDef]) -> Vec { - let mut spans = functions - .iter() - .map(|function| MethodSpan { - name: function.name.clone(), - first_line: function.span[0], - last_line: function.span[2], - }) - .collect::>(); - spans.sort_by_key(|method| (method.first_line, std::cmp::Reverse(method.last_line))); - spans -} - -fn candidate_node(node: &RawNode) -> bool { - node.named - && !SKIP_CANDIDATE_KINDS.contains(&node.kind.as_str()) - && CLONE_CANDIDATE_KINDS.contains(&node.kind.as_str()) - && !typed_struct_schema_text(&node.text) - && !node.named_children().is_empty() -} - -fn fuzzy_children_for(node: &RawNode) -> Vec<&RawNode> { - let source_node = body_node(node).unwrap_or(node); - let mut children = source_node.named_children(); - if children.is_empty() { - children = node.named_children(); - } - children - .into_iter() - .filter(|child| { - !SKIP_CANDIDATE_KINDS.contains(&child.kind.as_str()) - && !typed_struct_schema_text(&child.text) - }) - .collect() -} - -fn body_node(node: &RawNode) -> Option<&RawNode> { - node.children - .iter() - .find(|child| BODY_KINDS.contains(&child.kind.as_str())) -} - -fn fingerprint(node: &RawNode, active: &mut HashSet) -> (String, usize) { - let key = node_key(node); - if active.contains(&key) || node.kind == "comment" { - return (String::new(), 0); - } - active.insert(key.clone()); - let out = if matches!( - node.kind.as_str(), - "predefined_type" | "abstract_pointer_declarator" | "storage_class_specifier" | "ERROR" - ) { - let token = terminal_token(node); - if token.is_empty() { - (String::new(), 0) - } else { - (token, 1) - } - } else if CALL_KINDS.contains(&node.kind.as_str()) && call_message(node).is_some() { - fingerprint_call(node, active) - } else if node.children.is_empty() { - let token = terminal_token(node); - if token.is_empty() { - (String::new(), 0) - } else { - (token, 1) - } - } else { - let mut child_parts = Vec::new(); - let mut mass = 1; - for child in &node.children { - let (child_fp, child_mass) = fingerprint(child, active); - if child_fp.is_empty() { - continue; - } - child_parts.push(child_fp); - mass += child_mass; - } - if child_parts.is_empty() { - (terminal_token(node), 1) - } else { - (format!("{}({})", node.kind, child_parts.join(" ")), mass) - } - }; - active.remove(&key); - out -} - -fn fingerprint_call(node: &RawNode, active: &mut HashSet) -> (String, usize) { - let message = call_message(node).unwrap_or_default(); - let mut child_parts = Vec::new(); - let mut mass = 1; - for child in &node.children { - let (child_fp, child_mass) = fingerprint(child, active); - if child_fp.is_empty() { - continue; - } - child_parts.push(child_fp); - mass += child_mass; - } - ( - format!("{}<{}>({})", node.kind, message, child_parts.join(" ")), - mass, - ) -} - -fn call_message(node: &RawNode) -> Option { - if !node - .children - .iter() - .any(|child| matches!(child.kind.as_str(), "argument_list" | "arguments")) - { - return None; - } - let argument_start = node - .children - .iter() - .find(|child| matches!(child.kind.as_str(), "argument_list" | "arguments")) - .map(|child| (child.span[0], child.span[1])); - let named_before_args = node - .named_children() - .into_iter() - .filter(|child| { - argument_start - .map(|start| (child.span[0], child.span[1]) < start) - .unwrap_or(true) - }) - .collect::>(); - named_before_args - .last() - .and_then(|callee| callee_message(callee)) -} - -fn callee_message(node: &RawNode) -> Option { - if IDENTIFIER_KINDS.contains(&node.kind.as_str()) { - return Some(node.text.clone()); - } - node.named_children() - .into_iter() - .rev() - .find(|child| IDENTIFIER_KINDS.contains(&child.kind.as_str())) - .map(|child| child.text.clone()) -} - -fn terminal_token(node: &RawNode) -> String { - let kind = node.kind.as_str(); - if IDENTIFIER_KINDS.contains(&kind) { - return "id".to_string(); - } - if LITERAL_KINDS.contains(&kind) { - return literal_token(kind).to_string(); - } - let text = normalize_text(&node.text); - if text.is_empty() { - return String::new(); - } - if identifier_text(&text) { - return "id".to_string(); - } - if literal_text(&text) { - return "lit".to_string(); - } - format!("{kind}:{text}") -} - -fn literal_token(kind: &str) -> &str { - match kind { - "true" | "false" => "bool", - "nil" | "none" | "null" => "nil", - _ => "lit", - } -} - -fn identifier_text(text: &str) -> bool { - let mut chars = text.chars(); - let Some(first) = chars.next() else { - return false; - }; - (first == '_' || first.is_ascii_alphabetic()) - && chars.all(|char| { - char == '_' || char == '!' || char == '?' || char == '=' || char.is_ascii_alphanumeric() - }) -} - -fn literal_text(text: &str) -> bool { - if symbol_literal_text(text) - || quoted_literal_text(text, '"') - || quoted_literal_text(text, '\'') - { - return true; - } - text.parse::().is_ok() -} - -fn symbol_literal_text(text: &str) -> bool { - let mut chars = text.chars(); - if chars.next() != Some(':') { - return false; - } - let Some(first) = chars.next() else { - return false; - }; - (first == '_' || first.is_ascii_alphabetic()) - && chars.all(|char| char == '_' || char.is_ascii_alphanumeric()) -} - -fn quoted_literal_text(text: &str, quote: char) -> bool { - text.len() >= 2 && text.starts_with(quote) && text.ends_with(quote) -} - -fn flay_node_name(node: &RawNode) -> &str { - match node.kind.as_str() { - "method" - | "function_definition" - | "function_declaration" - | "method_definition" - | "function_item" => "defn", - "singleton_method" => "defs", - other => other, - } -} - -fn uniq_sites(candidates: Vec) -> Vec { +fn uniq_sites(candidates: Vec) -> Vec { let mut seen = HashSet::new(); let mut out = Vec::new(); for candidate in candidates { let key = format!( - "{}\0{}\0{}", - candidate.file, candidate.line, candidate.node_name + "{}\0{}\0{:?}\0{}", + candidate.file, candidate.line, candidate.span, candidate.node_name ); if seen.insert(key) { out.push(candidate); @@ -715,7 +284,7 @@ fn uniq_sites(candidates: Vec) -> Vec { out } -fn most_common_node(cluster: &[Candidate]) -> String { +fn most_common_node(cluster: &[CloneCandidate]) -> String { let mut order = Vec::new(); let mut tally: HashMap<&str, usize> = HashMap::new(); for candidate in cluster { @@ -736,7 +305,7 @@ fn most_common_node(cluster: &[Candidate]) -> String { best.to_string() } -fn site_for(candidate: &Candidate) -> String { +fn site_for(candidate: &CloneCandidate) -> String { format!( "{}:{}:{}", candidate.file, candidate.method_name, candidate.line @@ -771,13 +340,27 @@ fn site_file(site: &str) -> String { parts.join(":") } -fn typed_struct_schema_text(text: &str) -> bool { - text.contains("< T::Struct") - || text.contains(" Vec<(String, String)> { + let mut identities = finding + .sites + .iter() + .map(|site| { + let parts = site.split(':').collect::>(); + let file = if parts.len() >= 2 { + parts[..parts.len() - 2].join(":") + } else { + String::new() + }; + let method = parts + .get(parts.len().saturating_sub(2)) + .copied() + .unwrap_or_default() + .to_string(); + (file, method) }) + .collect::>(); + identities.sort(); + identities } fn clone_type_rank(clone_type: &str) -> usize { @@ -788,18 +371,6 @@ fn clone_type_rank(clone_type: &str) -> usize { } } -fn node_key(node: &RawNode) -> String { - format!( - "{}\0{}\0{}\0{}\0{}\0{}", - node.kind, - node.span[0], - node.span[1], - node.span[2], - node.span[3], - node.text.len() - ) -} - fn combinations(size: usize, count: usize) -> Vec> { fn step( start: usize, @@ -906,7 +477,8 @@ end "#, ); let function = doc.function_defs.first().expect("function"); - let (_fingerprint, mass) = fingerprint(&function.body, &mut HashSet::new()); + let (_fingerprint, mass) = + language_profile(Language::Ruby).clone_fingerprint(&function.body); assert_eq!(mass, 128); } @@ -934,7 +506,7 @@ end .into_iter() .find(|node| node.kind == "unless" && node.named) .expect("unless"); - let (_fingerprint, mass) = fingerprint(node, &mut HashSet::new()); + let (_fingerprint, mass) = language_profile(Language::Ruby).clone_fingerprint(node); assert_eq!(mass, 126); } @@ -966,7 +538,7 @@ end .into_iter() .find(|node| node.kind == "assignment" && node.named) .expect("assignment"); - let (_fingerprint, mass) = fingerprint(node, &mut HashSet::new()); + let (_fingerprint, mass) = language_profile(Language::Ruby).clone_fingerprint(node); assert_eq!(mass, 178); } @@ -998,7 +570,8 @@ end .iter() .find(|function| function.name == "body_slots") .expect("body_slots"); - let (_fingerprint, mass) = fingerprint(&function.body, &mut HashSet::new()); + let (_fingerprint, mass) = + language_profile(Language::Ruby).clone_fingerprint(&function.body); assert_eq!(mass, 110); } @@ -1029,7 +602,7 @@ end .into_iter() .find(|node| node.kind == "do_block" && node.named) .expect("do_block"); - let (_fingerprint, mass) = fingerprint(node, &mut HashSet::new()); + let (_fingerprint, mass) = language_profile(Language::Ruby).clone_fingerprint(node); assert_eq!(mass, 110); } @@ -1053,7 +626,8 @@ end "#, ); let function = doc.function_defs.first().expect("function"); - let (_fingerprint, mass) = fingerprint(&function.body, &mut HashSet::new()); + let (_fingerprint, mass) = + language_profile(Language::Ruby).clone_fingerprint(&function.body); assert_eq!(mass, 96); } @@ -1086,7 +660,7 @@ end .into_iter() .find(|node| node.kind == "case" && node.named) .expect("case"); - let (_fingerprint, mass) = fingerprint(node, &mut HashSet::new()); + let (_fingerprint, mass) = language_profile(Language::Ruby).clone_fingerprint(node); assert_eq!(mass, 136); } @@ -1118,7 +692,7 @@ end .into_iter() .find(|node| node.kind == "case" && node.named) .expect("case"); - let (_fingerprint, mass) = fingerprint(node, &mut HashSet::new()); + let (_fingerprint, mass) = language_profile(Language::Ruby).clone_fingerprint(node); assert_eq!(mass, 96); } @@ -1139,7 +713,8 @@ end "##, ); let function = doc.function_defs.first().expect("function"); - let (_fingerprint, mass) = fingerprint(&function.body, &mut HashSet::new()); + let (_fingerprint, mass) = + language_profile(Language::Ruby).clone_fingerprint(&function.body); assert_eq!(mass, 175); } @@ -1175,7 +750,7 @@ end .into_iter() .find(|node| node.kind == "module" && node.named) .expect("module"); - let (_fingerprint, mass) = fingerprint(node, &mut HashSet::new()); + let (_fingerprint, mass) = language_profile(Language::Ruby).clone_fingerprint(node); assert_eq!(mass, 150); } @@ -1194,7 +769,8 @@ end "##, ); let function = doc.function_defs.first().expect("function"); - let (_fingerprint, mass) = fingerprint(&function.body, &mut HashSet::new()); + let (_fingerprint, mass) = + language_profile(Language::Ruby).clone_fingerprint(&function.body); assert_eq!(mass, 132); } } diff --git a/gems/decomplex/rust/src/decomplex/detectors/state_branch_density.rs b/gems/decomplex/rust/src/decomplex/detectors/state_branch_density.rs index aafb8d3bd..f79a568a9 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/state_branch_density.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/state_branch_density.rs @@ -1,7 +1,7 @@ use crate::decomplex::ast::{self, Child, Node, Span}; +use crate::decomplex::syntax::adapters::language_profile; use crate::decomplex::syntax::Language; use anyhow::Result; -use regex::Regex; use serde::Serialize; use std::collections::{BTreeMap, BTreeSet}; use std::path::PathBuf; @@ -35,6 +35,7 @@ const NOISE_MIDS: &[&str] = &[ ]; pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { + let profile = language_profile(language); let mut parsed = Vec::new(); let mut global_immutable_readers: BTreeMap> = BTreeMap::new(); let mut global_immutable_reader_types: BTreeMap> = @@ -43,33 +44,34 @@ pub fn scan_files(files: &[PathBuf], language: Language) -> Result>>, immutable_reader_types: Option>>, type_aliases: Option>, + method_param_types: Option>>, ) -> Self { - let ir = immutable_readers.unwrap_or_else(|| BTreeMap::new()); // Simplified + let ir = immutable_readers.unwrap_or_else(BTreeMap::new); let irt = immutable_reader_types.unwrap_or_else(|| BTreeMap::new()); let ta = type_aliases.unwrap_or_else(|| BTreeMap::new()); - - // Re-extract if not provided (matches Ruby's initialize) - let ir = if ir.is_empty() { - let s = Self { - file: file.clone().unwrap_or_default(), - lines: lines.clone(), - decisions: Vec::new(), - immutable_readers: BTreeMap::new(), - immutable_reader_types: BTreeMap::new(), - type_aliases: BTreeMap::new(), - method_param_types: BTreeMap::new(), - }; - s.immutable_struct_readers(&lines) - } else { - ir - }; - - let irt = if irt.is_empty() { - let s = Self { - file: file.clone().unwrap_or_default(), - lines: lines.clone(), - decisions: Vec::new(), - immutable_readers: BTreeMap::new(), - immutable_reader_types: BTreeMap::new(), - type_aliases: BTreeMap::new(), - method_param_types: BTreeMap::new(), - }; - s.immutable_struct_reader_types(&lines) - } else { - irt - }; - - let ta = if ta.is_empty() { - let s = Self { - file: file.clone().unwrap_or_default(), - lines: lines.clone(), - decisions: Vec::new(), - immutable_readers: BTreeMap::new(), - immutable_reader_types: BTreeMap::new(), - type_aliases: BTreeMap::new(), - method_param_types: BTreeMap::new(), - }; - s.type_aliases(&lines) - } else { - ta - }; - - let mut s = Self { + Self { file: file.unwrap_or_default(), lines: lines.clone(), decisions: Vec::new(), immutable_readers: ir, immutable_reader_types: irt, type_aliases: ta, - method_param_types: BTreeMap::new(), - }; - s.method_param_types = s.extract_method_param_types(&lines); - s + method_param_types: method_param_types.unwrap_or_else(BTreeMap::new), + } } fn walk(&mut self, node: &Node, defstack: &[String]) { @@ -322,87 +276,6 @@ impl StateBranchDensity { } } - fn immutable_struct_readers(&self, lines: &[String]) -> BTreeMap> { - let mut readers = BTreeMap::new(); - let mut class_stack = Vec::new(); - let class_struct_re = - Regex::new(r"^\s*class\s+([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\s*<\s*T::Struct\b").unwrap(); - let const_re = Regex::new(r"^\s*const\s+:([A-Za-z_]\w*)\b").unwrap(); - let end_re = Regex::new(r"^\s*end\s*(?:#.*)?$").unwrap(); - - for line in lines { - if let Some(caps) = class_struct_re.captures(line) { - class_stack.push(caps[1].to_string()); - continue; - } - if !class_stack.is_empty() { - if let Some(caps) = const_re.captures(line) { - readers - .entry(class_stack.last().unwrap().clone()) - .or_insert_with(BTreeSet::new) - .insert(caps[1].to_string()); - continue; - } - } - if end_re.is_match(line) { - class_stack.pop(); - } - } - readers - } - - fn immutable_struct_reader_types( - &self, - lines: &[String], - ) -> BTreeMap> { - let mut reader_types = BTreeMap::new(); - let mut class_stack = Vec::new(); - let class_struct_re = - Regex::new(r"^\s*class\s+([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\s*<\s*T::Struct\b").unwrap(); - let const_type_re = - Regex::new(r"^\s*const\s+:([A-Za-z_]\w*)\s*,\s*([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\b") - .unwrap(); - let end_re = Regex::new(r"^\s*end\s*(?:#.*)?$").unwrap(); - - for line in lines { - if let Some(caps) = class_struct_re.captures(line) { - class_stack.push(caps[1].to_string()); - continue; - } - if !class_stack.is_empty() { - if let Some(caps) = const_type_re.captures(line) { - reader_types - .entry(class_stack.last().unwrap().clone()) - .or_insert_with(BTreeMap::new) - .insert(caps[1].to_string(), caps[2].to_string()); - continue; - } - } - if end_re.is_match(line) { - class_stack.pop(); - } - } - reader_types - } - - fn type_aliases(&self, lines: &[String]) -> BTreeMap { - let mut aliases = BTreeMap::new(); - let type_alias_re = - Regex::new(r"^\s*([A-Z]\w*)\s*=\s*T\.type_alias\s*\{\s*([A-Z]\w*(?:::[A-Z]\w*)*)\s*\}") - .unwrap(); - let const_alias_re = - Regex::new(r"^\s*([A-Z]\w*)\s*=\s*([A-Z]\w*(?:::[A-Z]\w*)*)\b").unwrap(); - - for line in lines { - if let Some(caps) = type_alias_re.captures(line) { - aliases.insert(caps[1].to_string(), caps[2].to_string()); - } else if let Some(caps) = const_alias_re.captures(line) { - aliases.insert(caps[1].to_string(), caps[2].to_string()); - } - } - aliases - } - fn resolve_type_alias(&self, type_name: &str) -> String { let mut seen = BTreeSet::new(); let mut current = type_name.to_string(); @@ -423,43 +296,6 @@ impl StateBranchDensity { } } } - - fn extract_method_param_types( - &self, - lines: &[String], - ) -> BTreeMap> { - let mut types_by_method = BTreeMap::new(); - let mut pending_sig = String::new(); - let def_re = Regex::new(r"^\s*def\s+([A-Za-z_]\w*[!?=]?)(?:\s|\(|$)").unwrap(); - - for line in lines { - if self.pending_sig_active(line, &pending_sig) { - pending_sig.push_str(line); - } - if let Some(caps) = def_re.captures(line) { - types_by_method.insert(caps[1].to_string(), self.sig_param_types(&pending_sig)); - pending_sig.clear(); - } - } - types_by_method - } - - fn pending_sig_active(&self, line: &str, pending_sig: &str) -> bool { - !pending_sig.is_empty() || line.trim().starts_with("sig") - } - - fn sig_param_types(&self, sig_source: &str) -> BTreeMap { - let params_re = Regex::new(r"params\s*\((.*?)\)").unwrap(); - let param_pair_re = - Regex::new(r"([A-Za-z_]\w*)\s*:\s*([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)").unwrap(); - let mut params = BTreeMap::new(); - if let Some(p_caps) = params_re.captures(sig_source) { - for pair in param_pair_re.captures_iter(&p_caps[1]) { - params.insert(pair[1].to_string(), pair[2].to_string()); - } - } - params - } } struct Report { diff --git a/gems/decomplex/rust/src/decomplex/mod.rs b/gems/decomplex/rust/src/decomplex/mod.rs index bbf05f28f..765921560 100644 --- a/gems/decomplex/rust/src/decomplex/mod.rs +++ b/gems/decomplex/rust/src/decomplex/mod.rs @@ -1,3 +1,6 @@ +#[cfg(test)] +mod architecture_test; + pub mod ast; pub mod detectors; pub mod parallel; diff --git a/gems/decomplex/rust/src/decomplex/syntax.rs b/gems/decomplex/rust/src/decomplex/syntax.rs index 6860596a2..cc0d92a2d 100644 --- a/gems/decomplex/rust/src/decomplex/syntax.rs +++ b/gems/decomplex/rust/src/decomplex/syntax.rs @@ -1,3 +1,4 @@ +pub(crate) mod adapters; pub mod tree_sitter_adapter; use crate::decomplex::ast::{RawNode, Span}; @@ -113,6 +114,20 @@ pub struct ComparisonUse { pub span: Span, } +#[derive(Clone, Debug)] +pub(crate) struct CloneCandidate { + pub(crate) file: String, + pub(crate) line: usize, + pub(crate) span: Span, + pub(crate) method_name: String, + pub(crate) node_name: String, + pub(crate) mass: usize, + pub(crate) fingerprint: String, + pub(crate) raw: String, + pub(crate) child_fingerprints: Vec, + pub(crate) child_masses: Vec, +} + #[derive(Clone, Debug, Eq, PartialEq, Serialize)] pub struct SimilarityFinding { pub at: String, diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs new file mode 100644 index 000000000..2a26fe302 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs @@ -0,0 +1,1052 @@ +use super::super::tree_sitter_adapter::{ + first_named_child, first_named_child_except, first_named_child_with_kind, first_named_text, + named_children, normalize_type_owner, previous_sibling_text, strip_assignment_suffix, + AssignmentTarget, Target, +}; +use super::super::{CloneCandidate, Document, Language}; +use crate::decomplex::ast::{node_text, normalize_text, span, RawNode}; +use std::collections::{BTreeMap, BTreeSet, HashSet}; +use tree_sitter::{Language as TreeSitterLanguage, Node}; + +pub(crate) const EMPTY_NODE_KINDS: &[&str] = &[]; +pub(crate) const DEFAULT_COMPARISON_OPERATORS: &[&str] = &["==", "!="]; +pub(crate) const DEFAULT_EXPRESSION_BODY_OPERATOR_TOKENS: &[&str] = &["="]; +pub(crate) const DEFAULT_IGNORED_STATEMENT_NODE_KINDS: &[&str] = &["comment", "heredoc_body"]; +const CLONE_IDENTIFIER_KINDS: &[&str] = &[ + "identifier", + "constant", + "type_identifier", + "field_identifier", + "property_identifier", + "shorthand_property_identifier_pattern", + "simple_identifier", + "variable_name", +]; +const CLONE_LITERAL_KINDS: &[&str] = &[ + "string", + "string_content", + "string_literal", + "interpreted_string_literal", + "raw_string_literal", + "integer", + "float", + "int", + "number", + "rational", + "imaginary", + "character", + "char_literal", + "symbol", + "simple_symbol", + "true", + "false", + "nil", + "none", + "null", +]; +const CLONE_SKIP_KINDS: &[&str] = &[ + "comment", + "identifier", + "constant", + "type_identifier", + "field_identifier", + "property_identifier", + "parameters", + "formal_parameters", + "parameter_list", + "argument_list", + "arguments", + "block_parameters", + "call_suffix", + "function_value_parameters", + "method_parameters", + "value_argument", + "scope_resolution", +]; +const CLONE_CANDIDATE_KINDS: &[&str] = &[ + "array", + "assignment", + "assignment_statement", + "block", + "case", + "case_clause", + "class", + "class_definition", + "class_declaration", + "compound_statement", + "conjunction_expression", + "control_structure_body", + "do_block", + "enum_declaration", + "for", + "for_statement", + "function_body", + "hash", + "if", + "if_statement", + "match_expression", + "match_statement", + "method", + "method_definition", + "module", + "operator_assignment", + "singleton_method", + "statements", + "struct_declaration", + "switch_case", + "switch_expression", + "switch_statement", + "unless", + "until", + "while", + "while_statement", +]; +const CLONE_BODY_KINDS: &[&str] = &[ + "body", + "block", + "body_statement", + "declaration_list", + "statement_block", + "compound_statement", + "function_body", + "statements", + "suite", + "do_block", +]; +const CLONE_CALL_KINDS: &[&str] = &[ + "call", + "call_expression", + "function_call", + "method_call", + "method_invocation", + "invocation_expression", +]; + +pub(crate) trait LanguageProfile { + fn language(&self) -> Language; + fn grammar(&self) -> TreeSitterLanguage; + + fn function_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn class_owner_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn module_owner_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn generic_owner_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn impl_owner_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn struct_owner_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn call_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn parameter_list_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn function_body_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn identifier_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn field_identifier_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn assignment_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn assignment_operator_tokens(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn declarator_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn receiver_type_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn receiver_parameter_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn first_argument_receiver_type_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn first_argument_receiver_name_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn comparison_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn comparison_operators(&self) -> &[&str] { + DEFAULT_COMPARISON_OPERATORS + } + + fn case_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn case_arm_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn case_pattern_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn case_subject_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn case_container_stop_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn case_subject_skip_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn default_case_patterns(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn boolean_and_operators(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn boolean_container_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn boolean_wrapper_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn parenthesized_wrapper_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn accessor_call_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn expression_list_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn navigation_suffix_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn field_like_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn expression_body_operator_tokens(&self) -> &[&str] { + DEFAULT_EXPRESSION_BODY_OPERATOR_TOKENS + } + + fn ignored_statement_node_kinds(&self) -> &[&str] { + DEFAULT_IGNORED_STATEMENT_NODE_KINDS + } + + fn first_argument_receiver(&self) -> bool { + false + } + + fn function_name(&self, node: Node<'_>, source: &str) -> Option { + self.default_function_name(node, source) + } + + fn default_function_name(&self, node: Node<'_>, source: &str) -> Option { + if !self.function_node_kinds().contains(&node.kind()) { + return None; + } + + node.child_by_field_name("name") + .map(|name| node_text(name, source).to_string()) + .or_else(|| self.declarator_name(node.child_by_field_name("declarator"), source)) + .or_else(|| self.first_identifier_text(node, source)) + } + + fn owner_name_from_declaration(&self, node: Node<'_>, source: &str) -> Option { + self.default_owner_name_from_declaration(node, source) + } + + fn default_owner_name_from_declaration(&self, node: Node<'_>, source: &str) -> Option { + if self.class_owner_node_kinds().contains(&node.kind()) + || self.module_owner_node_kinds().contains(&node.kind()) + || self.generic_owner_node_kinds().contains(&node.kind()) + || self.struct_owner_node_kinds().contains(&node.kind()) + { + return node + .child_by_field_name("name") + .map(|name| node_text(name, source).to_string()) + .or_else(|| self.first_identifier_text(node, source)); + } + if self.impl_owner_node_kinds().contains(&node.kind()) { + return self.impl_owner_name(node, source); + } + None + } + + fn generated_prelude(&self, _node: Node<'_>, _source: &str) -> bool { + false + } + + fn hidden_case(&self, _node: Node<'_>) -> bool { + false + } + + fn hidden_case_source_node<'tree>(&self, _node: Node<'tree>) -> Option> { + None + } + + fn case_source_node<'tree>(&self, node: Node<'tree>) -> Node<'tree> { + if self.hidden_case(node) { + self.hidden_case_source_node(node).unwrap_or(node) + } else { + node + } + } + + fn predicate_less_case(&self, node: Node<'_>) -> bool { + self.case_node_kinds().contains(&node.kind()) && self.decision_subject(node).is_none() + } + + fn case_pattern_texts(&self, patterns: &[Node<'_>], source: &str) -> Vec { + patterns + .iter() + .map(|pattern| normalize_text(node_text(*pattern, source))) + .collect() + } + + fn receiver_convention_owner_name(&self, node: Node<'_>, source: &str) -> Option { + if !self.first_argument_receiver() || !self.function_node_kinds().contains(&node.kind()) { + return None; + } + + let (type_name, _) = self.first_argument_receiver_parameter(node, source)?; + let type_name = normalize_type_owner(&type_name); + let name = self.function_name(node, source)?; + if type_name.is_empty() || name.is_empty() { + return None; + } + + let prefix = snake_case_type_name(&type_name); + if name.starts_with(&format!("{prefix}_")) { + Some(type_name) + } else { + None + } + } + + fn function_receiver_name(&self, node: Node<'_>, source: &str) -> Option { + if self.first_argument_receiver() && self.function_node_kinds().contains(&node.kind()) { + if let Some((_, name)) = self.first_argument_receiver_parameter(node, source) { + return Some(name); + } + } + None + } + + fn single_expression_body<'tree>(&self, node: Node<'tree>) -> Option> { + let mut cursor = node.walk(); + if node.children(&mut cursor).any(|child| { + self.expression_body_operator_tokens() + .contains(&child.kind()) + }) { + let named = named_children(node); + return named.last().copied(); + } + + let body = node.child_by_field_name("body").or_else(|| { + named_children(node) + .into_iter() + .find(|child| self.function_body_node_kinds().contains(&child.kind())) + })?; + let statements: Vec> = named_children(body) + .into_iter() + .filter(|child| !self.ignored_statement_node_kinds().contains(&child.kind())) + .collect(); + if statements.len() == 1 { + statements.first().copied() + } else { + None + } + } + + fn state_target(&self, lhs: Node<'_>, source: &str) -> Option { + self.default_state_target(lhs, source) + } + + fn default_state_target(&self, lhs: Node<'_>, source: &str) -> Option { + if previous_sibling_text(lhs, source).as_deref() == Some(":") { + return None; + } + + if self.accessor_call_node_kinds().contains(&lhs.kind()) { + let receiver = lhs.child_by_field_name("receiver")?; + let method = lhs.child_by_field_name("method")?; + return Some(Target { + receiver: normalize_text(node_text(receiver, source)), + field: strip_assignment_suffix(node_text(method, source)), + }); + } + + if self.field_like_node_kinds().contains(&lhs.kind()) + || self.expression_list_node_kinds().contains(&lhs.kind()) + { + let object = lhs + .child_by_field_name("object") + .or_else(|| lhs.child_by_field_name("receiver")) + .or_else(|| lhs.child_by_field_name("expression")) + .or_else(|| lhs.child_by_field_name("operand")) + .or_else(|| lhs.child_by_field_name("value")) + .or_else(|| lhs.child_by_field_name("argument")) + .or_else(|| first_named_child_except(lhs, "navigation_suffix"))?; + let field = lhs + .child_by_field_name("field") + .or_else(|| lhs.child_by_field_name("property")) + .or_else(|| lhs.child_by_field_name("name")) + .or_else(|| lhs.child_by_field_name("suffix")) + .or_else(|| first_named_child_with_kind(lhs, "navigation_suffix")) + .or_else(|| named_children(lhs).into_iter().last())?; + let field_text = self.member_field_text(field, source)?; + return Some(Target { + receiver: normalize_text(node_text(object, source)), + field: strip_assignment_suffix(&field_text), + }); + } + + None + } + + fn assignment_target<'tree>(&self, node: Node<'tree>) -> Option> { + self.default_assignment_target(node) + } + + fn default_assignment_target<'tree>( + &self, + node: Node<'tree>, + ) -> Option> { + if !self.assignment_node_kinds().contains(&node.kind()) { + return None; + } + let lhs = node + .child_by_field_name("left") + .or_else(|| first_named_child(node))?; + Some(AssignmentTarget { lhs, source: node }) + } + + fn skip_state_write_node(&self, _node: Node<'_>) -> bool { + false + } + + fn skip_state_write_target(&self, target: &Target) -> bool { + target.field == "[]" + } + + fn state_write_source_node<'tree>( + &self, + _node: Node<'tree>, + assignment: &AssignmentTarget<'tree>, + ) -> Node<'tree> { + assignment.source + } + + fn assignment_lhs_node(&self, node: Node<'_>) -> bool { + if super::super::tree_sitter_adapter::previous_sibling_raw_text(node).as_deref() + == Some(":") + { + return false; + } + super::super::tree_sitter_adapter::next_sibling_raw_text(node) + .map(|token| self.assignment_operator_tokens().contains(&token.as_str())) + .unwrap_or(false) + } + + fn parenthesized_wrapper(&self, node: Node<'_>) -> bool { + self.parenthesized_wrapper_node_kinds() + .contains(&node.kind()) + && named_children(node).len() == 1 + } + + fn boolean_container(&self, node: Node<'_>) -> bool { + if self.boolean_container_node_kinds().contains(&node.kind()) { + return true; + } + if self.parenthesized_wrapper(node) { + return first_named_child(node) + .map(|child| self.boolean_container(child)) + .unwrap_or(false); + } + if !self.boolean_wrapper_node_kinds().contains(&node.kind()) { + return false; + } + if !self + .boolean_and_operators() + .contains(&super::super::tree_sitter_adapter::direct_operator(node).as_str()) + { + return false; + } + if named_children(node).len() < 2 { + return false; + } + let mut cursor = node.walk(); + let result = node.children(&mut cursor).all(|child| { + child.is_named() + || self.boolean_and_operators().contains(&child.kind()) + || matches!(child.kind(), "(" | ")") + }); + result + } + + fn decision_subject<'tree>(&self, node: Node<'tree>) -> Option> { + node.child_by_field_name("value") + .or_else(|| node.child_by_field_name("subject")) + .or_else(|| { + named_children(node) + .into_iter() + .find(|child| self.case_subject_node_kinds().contains(&child.kind())) + }) + .or_else(|| node.child_by_field_name("condition")) + .or_else(|| { + named_children(node) + .into_iter() + .find(|child| !self.case_subject_skip_node_kinds().contains(&child.kind())) + }) + } + + fn first_identifier_text(&self, node: Node<'_>, source: &str) -> Option { + let mut kinds = Vec::new(); + kinds.extend_from_slice(self.identifier_node_kinds()); + kinds.extend_from_slice(self.field_identifier_node_kinds()); + first_named_text(node, source, &kinds) + } + + fn declarator_name(&self, node: Option>, source: &str) -> Option { + let mut pending = vec![node?]; + let mut seen = HashSet::new(); + while let Some(current) = pending.pop() { + let key = format!("{:?}\0{}", span(current), current.kind()); + if !seen.insert(key) { + continue; + } + if self.identifier_node_kinds().contains(¤t.kind()) + || self.field_identifier_node_kinds().contains(¤t.kind()) + { + return Some(node_text(current, source).to_string()); + } + let mut children = named_children(current); + children.reverse(); + pending.extend(children); + } + None + } + + fn impl_owner_name(&self, node: Node<'_>, source: &str) -> Option { + let r#type = node.child_by_field_name("type").or_else(|| { + named_children(node).into_iter().find(|child| { + self.receiver_type_node_kinds().contains(&child.kind()) + || self.identifier_node_kinds().contains(&child.kind()) + || self.field_identifier_node_kinds().contains(&child.kind()) + }) + })?; + Some(normalize_type_owner(node_text(r#type, source))) + } + + fn first_argument_receiver_parameter( + &self, + node: Node<'_>, + source: &str, + ) -> Option<(String, String)> { + let declarator = node.child_by_field_name("declarator"); + let params = declarator + .and_then(|declarator| declarator.child_by_field_name("parameters")) + .or_else(|| node.child_by_field_name("parameters")) + .or_else(|| { + named_children(node) + .into_iter() + .find(|child| self.parameter_list_node_kinds().contains(&child.kind())) + }) + .or_else(|| { + declarator.and_then(|declarator| { + named_children(declarator) + .into_iter() + .find(|child| self.parameter_list_node_kinds().contains(&child.kind())) + }) + })?; + + let first = named_children(params) + .into_iter() + .find(|child| self.receiver_parameter_node_kinds().contains(&child.kind()))?; + + let type_node = named_children(first).into_iter().find(|child| { + self.first_argument_receiver_type_node_kinds() + .contains(&child.kind()) + })?; + let name = named_children(first) + .into_iter() + .rev() + .find(|child| { + self.first_argument_receiver_name_node_kinds() + .contains(&child.kind()) + }) + .map(|child| node_text(child, source).to_string()) + .or_else(|| self.nested_receiver_name(first, source)) + .or_else(|| self.declarator_name(Some(first), source))?; + + Some((node_text(type_node, source).to_string(), name)) + } + + fn nested_receiver_name(&self, node: Node<'_>, source: &str) -> Option { + for child in named_children(node).into_iter().rev() { + let direct = named_children(child).into_iter().rev().find(|grandchild| { + self.first_argument_receiver_name_node_kinds() + .contains(&grandchild.kind()) + }); + if let Some(direct) = direct { + return Some(node_text(direct, source).to_string()); + } + } + None + } + + fn member_field_text(&self, field: Node<'_>, source: &str) -> Option { + if self.navigation_suffix_node_kinds().contains(&field.kind()) { + let suffix = field + .child_by_field_name("suffix") + .or_else(|| { + named_children(field).into_iter().find(|child| { + self.identifier_node_kinds().contains(&child.kind()) + || self.field_identifier_node_kinds().contains(&child.kind()) + }) + }) + .or_else(|| named_children(field).into_iter().last())?; + let text = node_text(suffix, source) + .trim_start_matches(['.', '?']) + .trim_start_matches("->"); + return (!text.is_empty()).then(|| text.to_string()); + } + + Some( + node_text(field, source) + .trim_start_matches(['.', '?']) + .trim_start_matches("->") + .to_string(), + ) + } + + fn method_param_types(&self, _lines: &[String]) -> BTreeMap> { + BTreeMap::new() + } + + fn immutable_struct_readers(&self, _lines: &[String]) -> BTreeMap> { + BTreeMap::new() + } + + fn immutable_struct_reader_types( + &self, + _lines: &[String], + ) -> BTreeMap> { + BTreeMap::new() + } + + fn type_aliases(&self, _lines: &[String]) -> BTreeMap { + BTreeMap::new() + } + + fn clone_candidates(&self, document: &Document) -> Vec { + default_clone_candidates(document) + } + + fn clone_fingerprint(&self, node: &RawNode) -> (String, usize) { + clone_fingerprint(node, &mut HashSet::new()) + } +} + +fn default_clone_candidates(document: &Document) -> Vec { + let mut out = Vec::new(); + let mut seen = HashSet::new(); + + for function in &document.function_defs { + let candidate = clone_candidate_for( + document, + &function.body, + Some("defn"), + Some(function.name.as_str()), + ); + clone_add_candidate(&mut out, &mut seen, candidate); + } + + let mut nodes = Vec::new(); + document.root.walk(&mut nodes); + for node in nodes { + if clone_candidate_node(node) { + let candidate = clone_candidate_for(document, node, None, None); + clone_add_candidate(&mut out, &mut seen, candidate); + } + } + + out +} + +fn clone_add_candidate( + out: &mut Vec, + seen: &mut HashSet, + candidate: Option, +) { + let Some(candidate) = candidate else { return }; + if clone_typed_struct_schema_text(&candidate.raw) { + return; + } + let key = format!( + "{}\0{}\0{:?}\0{}\0{}", + candidate.file, candidate.line, candidate.span, candidate.node_name, candidate.fingerprint + ); + if seen.insert(key) { + out.push(candidate); + } +} + +fn clone_candidate_for( + document: &Document, + node: &RawNode, + node_name: Option<&str>, + function_name: Option<&str>, +) -> Option { + let (fingerprint, mass) = clone_fingerprint(node, &mut HashSet::new()); + if fingerprint.is_empty() { + return None; + } + + let line = node.line(); + let method = clone_method_span_for(document, line); + let children = clone_fuzzy_children_for(node); + let mut child_fingerprints = Vec::new(); + let mut child_masses = Vec::new(); + for child in children { + let (child_fp, child_mass) = clone_fingerprint(child, &mut HashSet::new()); + if !child_fp.is_empty() && child_mass > 0 { + child_fingerprints.push(child_fp); + child_masses.push(child_mass); + } + } + + Some(CloneCandidate { + file: document.file.clone(), + line, + span: node.span, + method_name: function_name + .map(ToString::to_string) + .or_else(|| method.map(|function| function.name.clone())) + .unwrap_or_else(|| "(top-level)".to_string()), + node_name: node_name + .map(ToString::to_string) + .unwrap_or_else(|| clone_node_name(node).to_string()), + mass, + fingerprint, + raw: normalize_text(&node.text), + child_fingerprints, + child_masses, + }) +} + +fn clone_candidate_node(node: &RawNode) -> bool { + node.named + && !CLONE_SKIP_KINDS.contains(&node.kind.as_str()) + && CLONE_CANDIDATE_KINDS.contains(&node.kind.as_str()) + && !clone_typed_struct_schema_text(&node.text) + && !node.named_children().is_empty() +} + +fn clone_fuzzy_children_for(node: &RawNode) -> Vec<&RawNode> { + let source = clone_body_node(node).unwrap_or(node); + let mut children = source.named_children(); + if children.is_empty() { + children = node.named_children(); + } + children + .into_iter() + .filter(|child| { + !CLONE_SKIP_KINDS.contains(&child.kind.as_str()) + && !clone_typed_struct_schema_text(&child.text) + }) + .collect() +} + +fn clone_body_node(node: &RawNode) -> Option<&RawNode> { + node.children + .iter() + .find(|child| CLONE_BODY_KINDS.contains(&child.kind.as_str())) +} + +fn clone_fingerprint(node: &RawNode, active: &mut HashSet) -> (String, usize) { + let key = clone_node_key(node); + if active.contains(&key) || node.kind == "comment" { + return (String::new(), 0); + } + active.insert(key.clone()); + let out = + if CLONE_CALL_KINDS.contains(&node.kind.as_str()) && clone_call_message(node).is_some() { + clone_fingerprint_call(node, active) + } else if node.children.is_empty() { + let token = clone_terminal_token(node); + if token.is_empty() { + (String::new(), 0) + } else { + (token, 1) + } + } else { + let mut child_parts = Vec::new(); + let mut mass = 1; + for child in &node.children { + let (child_fp, child_mass) = clone_fingerprint(child, active); + if child_fp.is_empty() { + continue; + } + child_parts.push(child_fp); + mass += child_mass; + } + if child_parts.is_empty() { + (clone_terminal_token(node), 1) + } else { + (format!("{}({})", node.kind, child_parts.join(" ")), mass) + } + }; + active.remove(&key); + out +} + +fn clone_fingerprint_call(node: &RawNode, active: &mut HashSet) -> (String, usize) { + let message = clone_call_message(node).unwrap_or_default(); + let mut child_parts = Vec::new(); + let mut mass = 1; + for child in &node.children { + let (child_fp, child_mass) = clone_fingerprint(child, active); + if child_fp.is_empty() { + continue; + } + child_parts.push(child_fp); + mass += child_mass; + } + ( + format!("{}<{}>({})", node.kind, message, child_parts.join(" ")), + mass, + ) +} + +fn clone_call_message(node: &RawNode) -> Option { + if !node.children.iter().any(|child| { + matches!( + child.kind.as_str(), + "argument_list" | "arguments" | "call_suffix" + ) + }) { + return None; + } + let argument_start = node + .children + .iter() + .find(|child| { + matches!( + child.kind.as_str(), + "argument_list" | "arguments" | "call_suffix" + ) + }) + .map(|child| (child.span[0], child.span[1])); + let named_before_args = node + .named_children() + .into_iter() + .filter(|child| { + argument_start + .map(|start| (child.span[0], child.span[1]) < start) + .unwrap_or(true) + }) + .collect::>(); + named_before_args + .last() + .and_then(|callee| clone_callee_message(callee)) +} + +fn clone_callee_message(node: &RawNode) -> Option { + if CLONE_IDENTIFIER_KINDS.contains(&node.kind.as_str()) { + return Some(node.text.clone()); + } + if matches!( + node.kind.as_str(), + "navigation_expression" | "directly_assignable_expression" + ) { + return clone_navigation_suffix_message(node); + } + + node.named_children() + .into_iter() + .rev() + .find(|child| CLONE_IDENTIFIER_KINDS.contains(&child.kind.as_str())) + .map(|child| child.text.clone()) +} + +fn clone_navigation_suffix_message(node: &RawNode) -> Option { + let suffix = node + .named_children() + .into_iter() + .rev() + .find(|child| child.kind == "navigation_suffix")?; + suffix + .named_children() + .into_iter() + .rev() + .find(|child| CLONE_IDENTIFIER_KINDS.contains(&child.kind.as_str())) + .map(|child| child.text.clone()) +} + +fn clone_terminal_token(node: &RawNode) -> String { + let kind = node.kind.as_str(); + if CLONE_IDENTIFIER_KINDS.contains(&kind) { + return "id".to_string(); + } + if CLONE_LITERAL_KINDS.contains(&kind) { + return clone_literal_token(kind).to_string(); + } + let text = normalize_text(&node.text); + if text.is_empty() { + return String::new(); + } + if clone_identifier_text(&text) { + return "id".to_string(); + } + if clone_literal_text(&text) { + return "lit".to_string(); + } + format!("{kind}:{text}") +} + +fn clone_literal_token(kind: &str) -> &str { + match kind { + "true" | "false" => "bool", + "nil" | "none" | "null" => "nil", + _ => "lit", + } +} + +fn clone_identifier_text(text: &str) -> bool { + let mut chars = text.chars(); + let Some(first) = chars.next() else { + return false; + }; + (first == '_' || first.is_ascii_alphabetic()) + && chars.all(|char| { + char == '_' || char == '!' || char == '?' || char == '=' || char.is_ascii_alphanumeric() + }) +} + +fn clone_literal_text(text: &str) -> bool { + if clone_symbol_literal_text(text) + || clone_quoted_literal_text(text, '"') + || clone_quoted_literal_text(text, '\'') + { + return true; + } + text.parse::().is_ok() +} + +fn clone_symbol_literal_text(text: &str) -> bool { + let mut chars = text.chars(); + if chars.next() != Some(':') { + return false; + } + let Some(first) = chars.next() else { + return false; + }; + (first == '_' || first.is_ascii_alphabetic()) + && chars.all(|char| char == '_' || char.is_ascii_alphanumeric()) +} + +fn clone_quoted_literal_text(text: &str, quote: char) -> bool { + text.len() >= 2 && text.starts_with(quote) && text.ends_with(quote) +} + +fn clone_node_name(node: &RawNode) -> &str { + match node.kind.as_str() { + "method" + | "function_definition" + | "function_declaration" + | "method_definition" + | "function_item" => "defn", + "singleton_method" => "defs", + other => other, + } +} + +fn clone_typed_struct_schema_text(text: &str) -> bool { + text.contains("< T::Struct") + || text.contains("( + document: &'a Document, + line_no: usize, +) -> Option<&'a super::super::FunctionDef> { + document + .function_defs + .iter() + .find(|function| function.span[0] <= line_no && line_no <= function.span[2]) +} + +fn clone_node_key(node: &RawNode) -> String { + format!( + "{}\0{}\0{}\0{}\0{}\0{}", + node.kind, + node.span[0], + node.span[1], + node.span[2], + node.span[3], + node.text.len() + ) +} + +fn snake_case_type_name(type_str: &str) -> String { + type_str + .split("::") + .last() + .unwrap_or(type_str) + .chars() + .enumerate() + .fold(String::new(), |mut acc, (index, ch)| { + if index > 0 && ch.is_ascii_uppercase() { + acc.push('_'); + } + acc.push(ch.to_ascii_lowercase()); + acc + }) +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/c.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/c.rs new file mode 100644 index 000000000..26f0836de --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/c.rs @@ -0,0 +1,122 @@ +use super::super::tree_sitter_adapter::normalize_type_owner; +use super::super::Language; +use super::base::LanguageProfile; +use tree_sitter::{Language as TreeSitterLanguage, Node}; + +pub(crate) struct CProfile; + +impl LanguageProfile for CProfile { + fn language(&self) -> Language { + Language::C + } + + fn grammar(&self) -> TreeSitterLanguage { + tree_sitter_c::LANGUAGE.into() + } + + fn first_argument_receiver(&self) -> bool { + true + } + + fn function_node_kinds(&self) -> &[&str] { + &["function_definition"] + } + + fn struct_owner_node_kinds(&self) -> &[&str] { + &["struct_specifier"] + } + + fn parameter_list_node_kinds(&self) -> &[&str] { + &["parameter_list"] + } + + fn function_body_node_kinds(&self) -> &[&str] { + &["compound_statement"] + } + + fn call_node_kinds(&self) -> &[&str] { + &["call_expression"] + } + + fn identifier_node_kinds(&self) -> &[&str] { + &["identifier"] + } + + fn field_identifier_node_kinds(&self) -> &[&str] { + &["field_identifier"] + } + + fn assignment_node_kinds(&self) -> &[&str] { + &["assignment_expression"] + } + + fn assignment_operator_tokens(&self) -> &[&str] { + &["=", "+=", "-=", "*=", "/=", "%="] + } + + fn receiver_parameter_node_kinds(&self) -> &[&str] { + &["parameter_declaration"] + } + + fn first_argument_receiver_type_node_kinds(&self) -> &[&str] { + &[ + "type_identifier", + "primitive_type", + "qualified_identifier", + "scoped_type_identifier", + ] + } + + fn first_argument_receiver_name_node_kinds(&self) -> &[&str] { + &["identifier", "field_identifier"] + } + + fn comparison_node_kinds(&self) -> &[&str] { + &["binary_expression"] + } + + fn case_node_kinds(&self) -> &[&str] { + &["switch_statement"] + } + + fn case_arm_node_kinds(&self) -> &[&str] { + &["case_statement"] + } + + fn case_container_stop_node_kinds(&self) -> &[&str] { + &["function_definition", "struct_specifier"] + } + + fn case_subject_skip_node_kinds(&self) -> &[&str] { + &["case_statement", "else", "comment"] + } + + fn default_case_patterns(&self) -> &[&str] { + &["_", "default"] + } + + fn boolean_and_operators(&self) -> &[&str] { + &["&&", "and"] + } + + fn boolean_container_node_kinds(&self) -> &[&str] { + &["binary_expression"] + } + + fn parenthesized_wrapper_node_kinds(&self) -> &[&str] { + &["parenthesized_expression"] + } + + fn field_like_node_kinds(&self) -> &[&str] { + &["field_expression"] + } + + fn receiver_convention_owner_name(&self, node: Node<'_>, source: &str) -> Option { + if !self.first_argument_receiver() || node.kind() != "function_definition" { + return None; + } + + let (type_name, name) = self.first_argument_receiver_parameter(node, source)?; + (name == "self").then(|| normalize_type_owner(&type_name)) + } +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/cpp.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/cpp.rs new file mode 100644 index 000000000..8bc0ca830 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/cpp.rs @@ -0,0 +1,125 @@ +use super::super::Language; +use super::base::LanguageProfile; +use tree_sitter::Language as TreeSitterLanguage; + +pub(crate) struct CppProfile; + +impl LanguageProfile for CppProfile { + fn language(&self) -> Language { + Language::Cpp + } + + fn grammar(&self) -> TreeSitterLanguage { + tree_sitter_cpp::LANGUAGE.into() + } + + fn function_node_kinds(&self) -> &[&str] { + &["function_definition"] + } + + fn class_owner_node_kinds(&self) -> &[&str] { + &["class_specifier"] + } + + fn struct_owner_node_kinds(&self) -> &[&str] { + &["struct_specifier"] + } + + fn parameter_list_node_kinds(&self) -> &[&str] { + &["parameter_list"] + } + + fn function_body_node_kinds(&self) -> &[&str] { + &["compound_statement"] + } + + fn call_node_kinds(&self) -> &[&str] { + &["call_expression"] + } + + fn identifier_node_kinds(&self) -> &[&str] { + &[ + "identifier", + "type_identifier", + "qualified_identifier", + "namespace_identifier", + ] + } + + fn field_identifier_node_kinds(&self) -> &[&str] { + &["field_identifier"] + } + + fn assignment_node_kinds(&self) -> &[&str] { + &["assignment_expression"] + } + + fn assignment_operator_tokens(&self) -> &[&str] { + &["=", "+=", "-=", "*=", "/=", "%="] + } + + fn receiver_type_node_kinds(&self) -> &[&str] { + &[ + "type_identifier", + "qualified_identifier", + "scoped_type_identifier", + ] + } + + fn receiver_parameter_node_kinds(&self) -> &[&str] { + &["parameter_declaration"] + } + + fn first_argument_receiver_type_node_kinds(&self) -> &[&str] { + &[ + "type_identifier", + "primitive_type", + "qualified_identifier", + "scoped_type_identifier", + ] + } + + fn first_argument_receiver_name_node_kinds(&self) -> &[&str] { + &["identifier", "field_identifier"] + } + + fn comparison_node_kinds(&self) -> &[&str] { + &["binary_expression"] + } + + fn case_node_kinds(&self) -> &[&str] { + &["switch_statement"] + } + + fn case_arm_node_kinds(&self) -> &[&str] { + &["case_statement"] + } + + fn case_container_stop_node_kinds(&self) -> &[&str] { + &["function_definition", "class_specifier", "struct_specifier"] + } + + fn case_subject_skip_node_kinds(&self) -> &[&str] { + &["case_statement", "else", "comment"] + } + + fn default_case_patterns(&self) -> &[&str] { + &["_", "default"] + } + + fn boolean_and_operators(&self) -> &[&str] { + &["&&", "and"] + } + + fn boolean_container_node_kinds(&self) -> &[&str] { + &["binary_expression"] + } + + fn parenthesized_wrapper_node_kinds(&self) -> &[&str] { + &["condition_clause", "parenthesized_expression"] + } + + fn field_like_node_kinds(&self) -> &[&str] { + &["field_expression"] + } +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/csharp.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/csharp.rs new file mode 100644 index 000000000..55fc8c99a --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/csharp.rs @@ -0,0 +1,91 @@ +use super::super::Language; +use super::base::LanguageProfile; +use tree_sitter::Language as TreeSitterLanguage; + +pub(crate) struct CSharpProfile; + +impl LanguageProfile for CSharpProfile { + fn language(&self) -> Language { + Language::CSharp + } + + fn grammar(&self) -> TreeSitterLanguage { + tree_sitter_c_sharp::LANGUAGE.into() + } + + fn function_node_kinds(&self) -> &[&str] { + &["method_declaration"] + } + + fn class_owner_node_kinds(&self) -> &[&str] { + &["class_declaration"] + } + + fn parameter_list_node_kinds(&self) -> &[&str] { + &["parameter_list"] + } + + fn function_body_node_kinds(&self) -> &[&str] { + &["block", "declaration_list"] + } + + fn call_node_kinds(&self) -> &[&str] { + &["invocation_expression"] + } + + fn identifier_node_kinds(&self) -> &[&str] { + &["identifier"] + } + + fn assignment_node_kinds(&self) -> &[&str] { + &["assignment_expression"] + } + + fn assignment_operator_tokens(&self) -> &[&str] { + &["=", "+=", "-=", "*=", "/=", "%="] + } + + fn declarator_node_kinds(&self) -> &[&str] { + &["variable_declaration", "variable_declarator"] + } + + fn comparison_node_kinds(&self) -> &[&str] { + &["binary_expression"] + } + + fn case_node_kinds(&self) -> &[&str] { + &["switch_statement"] + } + + fn case_arm_node_kinds(&self) -> &[&str] { + &["switch_section"] + } + + fn case_container_stop_node_kinds(&self) -> &[&str] { + &["method_declaration", "class_declaration"] + } + + fn case_subject_skip_node_kinds(&self) -> &[&str] { + &["switch_section", "else", "comment"] + } + + fn default_case_patterns(&self) -> &[&str] { + &["_", "default"] + } + + fn boolean_and_operators(&self) -> &[&str] { + &["&&", "and"] + } + + fn boolean_container_node_kinds(&self) -> &[&str] { + &["binary_expression"] + } + + fn parenthesized_wrapper_node_kinds(&self) -> &[&str] { + &["parenthesized_expression"] + } + + fn field_like_node_kinds(&self) -> &[&str] { + &["member_access_expression"] + } +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/false_simplicity_lexicon.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/false_simplicity_lexicon.rs new file mode 100644 index 000000000..c646225f2 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/false_simplicity_lexicon.rs @@ -0,0 +1,673 @@ +use super::super::Language; + +#[derive(Clone, Copy)] +pub(crate) struct FalseSimplicityLexicon { + pub(crate) dispatch_mids: &'static [&'static str], + pub(crate) meta_mids: &'static [&'static str], + pub(crate) method_obj_mids: &'static [&'static str], + pub(crate) io_consts: &'static [&'static str], + pub(crate) io_bare: &'static [&'static str], + pub(crate) dir_context: &'static [&'static str], + pub(crate) context_pairs: &'static [(&'static str, &'static [&'static str])], + pub(crate) context_bare: &'static [&'static str], + pub(crate) callback_set: &'static [&'static str], + pub(crate) core_consts: &'static [&'static str], +} + +const EMPTY: &[&str] = &[]; +const EMPTY_PAIRS: &[(&str, &[&str])] = &[]; +const COMMON_CALLBACK_SET: &[&str] = &[ + "transaction", + "synchronize", + "lock", + "with_lock", + "unlock", + "mutex", + "atomic", + "subscribe", + "callback", + "hook", +]; + +const RUBY_CONTEXT_PAIRS: &[(&str, &[&str])] = &[ + ("Time", &["now", "current"]), + ("Date", &["today", "current"]), + ("DateTime", &["now", "current"]), + ("Process", &["pid", "ppid", "uid", "gid", "euid"]), + ("Thread", &["current", "list", "main"]), + ("Fiber", &["current"]), + ("Random", &["rand", "bytes"]), + ("GC", &["stat", "count"]), + ("ObjectSpace", &["each_object", "count_objects"]), +]; +const PYTHON_CONTEXT_PAIRS: &[(&str, &[&str])] = &[ + ("time", &["time", "monotonic", "perf_counter"]), + ("datetime", &["now", "today", "utcnow"]), + ("random", &["random", "randint", "randrange", "choice"]), +]; +const JS_CONTEXT_PAIRS: &[(&str, &[&str])] = &[ + ("Date", &["now"]), + ("Math", &["random"]), + ("performance", &["now"]), +]; +const GO_CONTEXT_PAIRS: &[(&str, &[&str])] = &[ + ("time", &["Now", "Since", "Until"]), + ("rand", &["Int", "Intn", "Float64", "Read"]), +]; +const RUST_CONTEXT_PAIRS: &[(&str, &[&str])] = &[("SystemTime", &["now"]), ("Instant", &["now"])]; +const ZIG_CONTEXT_PAIRS: &[(&str, &[&str])] = + &[("time", &["timestamp", "nanoTimestamp", "milliTimestamp"])]; +const LUA_CONTEXT_PAIRS: &[(&str, &[&str])] = &[ + ("os", &["time", "clock", "date", "getenv"]), + ("math", &["random"]), +]; +const CPP_CONTEXT_PAIRS: &[(&str, &[&str])] = + &[("chrono", &["now"]), ("random_device", &["operator()"])]; +const CSHARP_CONTEXT_PAIRS: &[(&str, &[&str])] = &[ + ("DateTime", &["Now", "UtcNow", "Today"]), + ("Guid", &["NewGuid"]), + ("Random", &["Next", "NextDouble"]), +]; +const JAVA_CONTEXT_PAIRS: &[(&str, &[&str])] = &[ + ( + "System", + &["currentTimeMillis", "nanoTime", "getenv", "getProperty"], + ), + ("Instant", &["now"]), + ("UUID", &["randomUUID"]), + ("Math", &["random"]), +]; +const SWIFT_CONTEXT_PAIRS: &[(&str, &[&str])] = &[("Date", &["now"]), ("UUID", &["init"])]; +const KOTLIN_CONTEXT_PAIRS: &[(&str, &[&str])] = &[ + ( + "System", + &["currentTimeMillis", "nanoTime", "getenv", "getProperty"], + ), + ("Instant", &["now"]), + ("UUID", &["randomUUID"]), + ("Random", &["nextInt", "nextLong", "nextDouble"]), +]; + +const RUBY_CALLBACK_SET: &[&str] = &[ + "transaction", + "synchronize", + "lock", + "with_lock", + "unlock", + "mutex", + "atomic", + "reentrant", + "subscribe", + "callback", + "hook", +]; +const GO_CALLBACK_SET: &[&str] = &[ + "transaction", + "synchronize", + "lock", + "with_lock", + "unlock", + "mutex", + "atomic", + "subscribe", + "callback", + "hook", + "Lock", + "Unlock", + "RLock", + "RUnlock", + "Do", + "Go", + "Add", + "Done", + "Wait", +]; +const RUST_CALLBACK_SET: &[&str] = &[ + "transaction", + "synchronize", + "lock", + "with_lock", + "unlock", + "mutex", + "atomic", + "subscribe", + "callback", + "hook", + "read", + "write", + "spawn", + "await", +]; +const ZIG_CALLBACK_SET: &[&str] = &[ + "transaction", + "synchronize", + "lock", + "with_lock", + "unlock", + "mutex", + "atomic", + "subscribe", + "callback", + "hook", + "spawn", + "wait", + "signal", +]; +const C_CALLBACK_SET: &[&str] = &[ + "transaction", + "synchronize", + "lock", + "with_lock", + "unlock", + "mutex", + "atomic", + "subscribe", + "callback", + "hook", + "pthread_mutex_lock", + "pthread_mutex_unlock", +]; +const CPP_CALLBACK_SET: &[&str] = &[ + "transaction", + "synchronize", + "lock", + "with_lock", + "unlock", + "mutex", + "atomic", + "subscribe", + "callback", + "hook", + "try_lock", + "wait", + "notify_one", + "notify_all", +]; +const CSHARP_CALLBACK_SET: &[&str] = &[ + "transaction", + "synchronize", + "lock", + "with_lock", + "unlock", + "mutex", + "atomic", + "subscribe", + "callback", + "hook", + "Lock", + "Monitor", + "Enter", + "Exit", + "Wait", + "Pulse", +]; +const JAVA_CALLBACK_SET: &[&str] = &[ + "transaction", + "synchronize", + "lock", + "with_lock", + "unlock", + "mutex", + "atomic", + "subscribe", + "callback", + "hook", + "wait", + "notify", + "notifyAll", + "submit", + "execute", +]; +const SWIFT_CALLBACK_SET: &[&str] = &[ + "transaction", + "synchronize", + "lock", + "with_lock", + "unlock", + "mutex", + "atomic", + "subscribe", + "callback", + "hook", + "async", + "sync", +]; +const KOTLIN_CALLBACK_SET: &[&str] = &[ + "transaction", + "synchronize", + "lock", + "with_lock", + "unlock", + "mutex", + "atomic", + "subscribe", + "callback", + "hook", + "synchronized", + "launch", + "async", + "await", +]; + +const RUBY_CORE_CONSTS: &[&str] = &[ + "String", + "Symbol", + "Integer", + "Float", + "Numeric", + "Rational", + "Complex", + "Array", + "Hash", + "Set", + "Range", + "Struct", + "Object", + "BasicObject", + "Kernel", + "Module", + "Class", + "Comparable", + "Enumerable", + "Enumerator", + "Proc", + "Method", + "UnboundMethod", + "NilClass", + "TrueClass", + "FalseClass", + "Exception", + "StandardError", + "RuntimeError", + "ArgumentError", + "TypeError", + "NameError", + "NoMethodError", + "IO", + "File", + "Dir", + "Time", + "Date", + "DateTime", + "Regexp", + "MatchData", + "Thread", + "Mutex", + "Fiber", + "Process", + "Math", + "GC", + "ObjectSpace", + "Marshal", + "Random", + "Encoding", +]; + +pub(crate) fn false_simplicity_lexicon(language: Language) -> FalseSimplicityLexicon { + match language { + Language::Ruby => FalseSimplicityLexicon { + dispatch_mids: &[ + "send", + "__send__", + "public_send", + "const_get", + "constantize", + "instance_variable_get", + ], + meta_mids: &[ + "define_method", + "define_singleton_method", + "alias_method", + "class_eval", + "module_eval", + "instance_eval", + "class_exec", + "module_exec", + "instance_exec", + "eval", + "const_set", + "instance_variable_set", + "remove_method", + "undef_method", + "prepend", + "singleton_class", + "binding", + ], + method_obj_mids: &["method", "public_method", "instance_method"], + io_consts: &[ + "File", + "IO", + "Dir", + "FileUtils", + "Open3", + "Socket", + "TCPSocket", + "UDPSocket", + "TCPServer", + "UNIXSocket", + "Tempfile", + "Pathname", + "Marshal", + ], + io_bare: &[ + "puts", + "print", + "warn", + "gets", + "readline", + "readlines", + "system", + "exec", + "spawn", + "fork", + "sleep", + "open", + "abort", + "exit", + "exit!", + ], + dir_context: &["pwd", "getwd", "home"], + context_pairs: RUBY_CONTEXT_PAIRS, + context_bare: &["rand", "srand"], + callback_set: RUBY_CALLBACK_SET, + core_consts: RUBY_CORE_CONSTS, + }, + Language::Python => FalseSimplicityLexicon { + dispatch_mids: &[ + "getattr", + "setattr", + "hasattr", + "__getattr__", + "__setattr__", + "import_module", + ], + meta_mids: &[ + "eval", "exec", "compile", "type", "globals", "locals", "vars", "setattr", + "delattr", + ], + method_obj_mids: &["method"], + io_consts: &[ + "Path", + "pathlib", + "os", + "sys", + "subprocess", + "socket", + "shutil", + ], + io_bare: &["print", "input", "open", "exec", "eval"], + dir_context: &["getcwd", "home"], + context_pairs: PYTHON_CONTEXT_PAIRS, + context_bare: &["random", "randint", "randrange"], + callback_set: COMMON_CALLBACK_SET, + core_consts: EMPTY, + }, + Language::JavaScript | Language::TypeScript => FalseSimplicityLexicon { + dispatch_mids: &["eval", "Function", "call", "apply", "bind"], + meta_mids: &[ + "eval", + "Function", + "defineProperty", + "defineProperties", + "setPrototypeOf", + ], + method_obj_mids: &["method"], + io_consts: &["console", "Console", "fs", "process", "Deno", "Bun"], + io_bare: &["setTimeout", "setInterval", "fetch", "require", "import"], + dir_context: EMPTY, + context_pairs: JS_CONTEXT_PAIRS, + context_bare: EMPTY, + callback_set: COMMON_CALLBACK_SET, + core_consts: EMPTY, + }, + Language::Go => FalseSimplicityLexicon { + dispatch_mids: &[ + "Call", + "CallSlice", + "Method", + "MethodByName", + "ValueOf", + "TypeOf", + ], + meta_mids: &["Call", "CallSlice", "MethodByName", "New", "MakeFunc"], + method_obj_mids: &["method"], + io_consts: &["os", "io", "ioutil", "fs", "net", "http", "exec", "syscall"], + io_bare: &["panic", "print", "println", "recover"], + dir_context: &["Getwd", "UserHomeDir"], + context_pairs: GO_CONTEXT_PAIRS, + context_bare: EMPTY, + callback_set: GO_CALLBACK_SET, + core_consts: EMPTY, + }, + Language::Rust => FalseSimplicityLexicon { + dispatch_mids: &[ + "downcast", + "downcast_ref", + "downcast_mut", + "call", + "call_mut", + "call_once", + ], + meta_mids: &["transmute", "from_raw_parts", "from_raw_parts_mut"], + method_obj_mids: &["method"], + io_consts: &["std", "tokio", "fs", "env", "process", "net", "io"], + io_bare: &["panic", "todo", "unimplemented", "unreachable"], + dir_context: &["current_dir", "home_dir"], + context_pairs: RUST_CONTEXT_PAIRS, + context_bare: EMPTY, + callback_set: RUST_CALLBACK_SET, + core_consts: EMPTY, + }, + Language::Zig => FalseSimplicityLexicon { + dispatch_mids: &["field", "fieldParentPtr", "ptrCast", "alignCast", "call"], + meta_mids: &[ + "typeInfo", + "TypeOf", + "ptrCast", + "intFromPtr", + "ptrFromInt", + "eval", + ], + method_obj_mids: &["method"], + io_consts: &[ + "std", "os", "fs", "process", "net", "Thread", "Mutex", "Atomic", + ], + io_bare: &["panic", "unreachable"], + dir_context: EMPTY, + context_pairs: ZIG_CONTEXT_PAIRS, + context_bare: EMPTY, + callback_set: ZIG_CALLBACK_SET, + core_consts: EMPTY, + }, + Language::Lua => FalseSimplicityLexicon { + dispatch_mids: &["load", "loadfile", "dofile", "require", "rawget", "rawset"], + meta_mids: &[ + "setmetatable", + "getmetatable", + "debug", + "eval", + "load", + "loadfile", + ], + method_obj_mids: &["method"], + io_consts: &["io", "os", "debug", "package"], + io_bare: &["print", "error", "assert", "require", "collectgarbage"], + dir_context: EMPTY, + context_pairs: LUA_CONTEXT_PAIRS, + context_bare: EMPTY, + callback_set: COMMON_CALLBACK_SET, + core_consts: EMPTY, + }, + Language::C => FalseSimplicityLexicon { + dispatch_mids: &["dlsym", "dlopen", "GetProcAddress"], + meta_mids: &["setjmp", "longjmp", "va_start", "va_arg"], + method_obj_mids: &["method"], + io_consts: &["FILE", "DIR", "pthread", "mutex", "atomic"], + io_bare: &[ + "printf", "fprintf", "fopen", "open", "read", "write", "close", "system", "exec", + "abort", "exit", "assert", + ], + dir_context: &["getcwd", "getenv"], + context_pairs: EMPTY_PAIRS, + context_bare: &["rand", "time", "clock"], + callback_set: C_CALLBACK_SET, + core_consts: EMPTY, + }, + Language::Cpp => FalseSimplicityLexicon { + dispatch_mids: &[ + "dynamic_cast", + "typeid", + "any_cast", + "get_if", + "visit", + "invoke", + ], + meta_mids: &["reinterpret_cast", "const_cast", "dlsym", "dlopen"], + method_obj_mids: &["method"], + io_consts: &[ + "std", + "filesystem", + "fstream", + "iostream", + "thread", + "mutex", + "atomic", + ], + io_bare: &["throw", "abort", "exit", "assert", "system"], + dir_context: &["current_path"], + context_pairs: CPP_CONTEXT_PAIRS, + context_bare: EMPTY, + callback_set: CPP_CALLBACK_SET, + core_consts: EMPTY, + }, + Language::CSharp => FalseSimplicityLexicon { + dispatch_mids: &[ + "Invoke", + "GetMethod", + "GetProperty", + "GetField", + "Activator", + "CreateInstance", + ], + meta_mids: &["Invoke", "GetType", "Reflection", "Emit", "DynamicMethod"], + method_obj_mids: &["method"], + io_consts: &[ + "Console", + "File", + "Directory", + "Path", + "Process", + "Socket", + "HttpClient", + "Environment", + ], + io_bare: &["throw"], + dir_context: &["CurrentDirectory", "GetEnvironmentVariable"], + context_pairs: CSHARP_CONTEXT_PAIRS, + context_bare: EMPTY, + callback_set: CSHARP_CALLBACK_SET, + core_consts: EMPTY, + }, + Language::Java => FalseSimplicityLexicon { + dispatch_mids: &[ + "invoke", + "getMethod", + "getDeclaredMethod", + "getField", + "getDeclaredField", + "forName", + ], + meta_mids: &["invoke", "setAccessible", "newInstance", "Proxy"], + method_obj_mids: &["method"], + io_consts: &[ + "System", + "File", + "Files", + "Paths", + "ProcessBuilder", + "Socket", + "HttpClient", + "Thread", + "Lock", + "AtomicReference", + ], + io_bare: &["throw"], + dir_context: &["getProperty", "getenv"], + context_pairs: JAVA_CONTEXT_PAIRS, + context_bare: EMPTY, + callback_set: JAVA_CALLBACK_SET, + core_consts: EMPTY, + }, + Language::Swift => FalseSimplicityLexicon { + dispatch_mids: &[ + "perform", + "value", + "setValue", + "selector", + "NSClassFromString", + ], + meta_mids: &[ + "Mirror", + "unsafeBitCast", + "withUnsafePointer", + "withUnsafeBytes", + ], + method_obj_mids: &["method"], + io_consts: &[ + "FileManager", + "Process", + "URLSession", + "DispatchQueue", + "Thread", + "Lock", + "NSLock", + ], + io_bare: &[ + "print", + "fatalError", + "preconditionFailure", + "assertionFailure", + ], + dir_context: &["currentDirectoryPath", "homeDirectoryForCurrentUser"], + context_pairs: SWIFT_CONTEXT_PAIRS, + context_bare: EMPTY, + callback_set: SWIFT_CALLBACK_SET, + core_consts: EMPTY, + }, + Language::Kotlin => FalseSimplicityLexicon { + dispatch_mids: &[ + "invoke", + "call", + "callBy", + "memberProperties", + "declaredMemberFunctions", + ], + meta_mids: &[ + "reflection", + "javaClass", + "Class", + "forName", + "setAccessible", + ], + method_obj_mids: &["method"], + io_consts: &[ + "System", + "File", + "Files", + "Paths", + "ProcessBuilder", + "Socket", + "HttpClient", + "Thread", + "Mutex", + "AtomicReference", + ], + io_bare: &["println", "print", "error", "check", "require", "TODO"], + dir_context: &["getProperty", "getenv"], + context_pairs: KOTLIN_CONTEXT_PAIRS, + context_bare: EMPTY, + callback_set: KOTLIN_CALLBACK_SET, + core_consts: EMPTY, + }, + } +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/go.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/go.rs new file mode 100644 index 000000000..a51f5f04d --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/go.rs @@ -0,0 +1,111 @@ +use super::super::Language; +use super::base::LanguageProfile; +use tree_sitter::Language as TreeSitterLanguage; + +pub(crate) struct GoProfile; + +impl LanguageProfile for GoProfile { + fn language(&self) -> Language { + Language::Go + } + + fn grammar(&self) -> TreeSitterLanguage { + tree_sitter_go::LANGUAGE.into() + } + + fn function_node_kinds(&self) -> &[&str] { + &["function_declaration", "method_declaration"] + } + + fn generic_owner_node_kinds(&self) -> &[&str] { + &["type_spec"] + } + + fn parameter_list_node_kinds(&self) -> &[&str] { + &["parameter_list"] + } + + fn function_body_node_kinds(&self) -> &[&str] { + &["block", "statement_list"] + } + + fn call_node_kinds(&self) -> &[&str] { + &["call_expression"] + } + + fn identifier_node_kinds(&self) -> &[&str] { + &["identifier", "type_identifier"] + } + + fn field_identifier_node_kinds(&self) -> &[&str] { + &["field_identifier"] + } + + fn assignment_node_kinds(&self) -> &[&str] { + &["assignment_statement", "short_var_declaration"] + } + + fn assignment_operator_tokens(&self) -> &[&str] { + &["=", ":=", "+=", "-=", "*=", "/=", "%="] + } + + fn receiver_type_node_kinds(&self) -> &[&str] { + &["pointer_type", "type_identifier"] + } + + fn receiver_parameter_node_kinds(&self) -> &[&str] { + &["parameter_declaration"] + } + + fn first_argument_receiver_type_node_kinds(&self) -> &[&str] { + &["type_identifier", "pointer_type"] + } + + fn first_argument_receiver_name_node_kinds(&self) -> &[&str] { + &["identifier", "field_identifier"] + } + + fn comparison_node_kinds(&self) -> &[&str] { + &["binary_expression"] + } + + fn case_node_kinds(&self) -> &[&str] { + &["expression_switch_statement"] + } + + fn case_arm_node_kinds(&self) -> &[&str] { + &["expression_case"] + } + + fn case_container_stop_node_kinds(&self) -> &[&str] { + &["function_declaration", "method_declaration", "type_spec"] + } + + fn case_subject_skip_node_kinds(&self) -> &[&str] { + &["expression_case", "else", "comment"] + } + + fn default_case_patterns(&self) -> &[&str] { + &["_", "default"] + } + + fn boolean_and_operators(&self) -> &[&str] { + &["&&", "and"] + } + + fn boolean_container_node_kinds(&self) -> &[&str] { + &["binary_expression"] + } + + fn boolean_wrapper_node_kinds(&self) -> &[&str] { + &["expression_list"] + } + + fn parenthesized_wrapper_node_kinds(&self) -> &[&str] { + &["parenthesized_expression"] + } + + fn field_like_node_kinds(&self) -> &[&str] { + &["selector_expression"] + } +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/java.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/java.rs new file mode 100644 index 000000000..acf69741e --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/java.rs @@ -0,0 +1,87 @@ +use super::super::Language; +use super::base::LanguageProfile; +use tree_sitter::Language as TreeSitterLanguage; + +pub(crate) struct JavaProfile; + +impl LanguageProfile for JavaProfile { + fn language(&self) -> Language { + Language::Java + } + + fn grammar(&self) -> TreeSitterLanguage { + tree_sitter_java::LANGUAGE.into() + } + + fn function_node_kinds(&self) -> &[&str] { + &["method_declaration"] + } + + fn class_owner_node_kinds(&self) -> &[&str] { + &["class_declaration"] + } + + fn parameter_list_node_kinds(&self) -> &[&str] { + &["formal_parameters"] + } + + fn function_body_node_kinds(&self) -> &[&str] { + &["block"] + } + + fn call_node_kinds(&self) -> &[&str] { + &["method_invocation"] + } + + fn identifier_node_kinds(&self) -> &[&str] { + &["identifier", "type_identifier"] + } + + fn assignment_node_kinds(&self) -> &[&str] { + &["assignment_expression"] + } + + fn assignment_operator_tokens(&self) -> &[&str] { + &["=", "+=", "-=", "*=", "/=", "%="] + } + + fn comparison_node_kinds(&self) -> &[&str] { + &["binary_expression"] + } + + fn case_node_kinds(&self) -> &[&str] { + &["switch_expression"] + } + + fn case_arm_node_kinds(&self) -> &[&str] { + &["switch_block_statement_group"] + } + + fn case_container_stop_node_kinds(&self) -> &[&str] { + &["method_declaration", "class_declaration"] + } + + fn case_subject_skip_node_kinds(&self) -> &[&str] { + &["switch_block_statement_group", "else", "comment"] + } + + fn default_case_patterns(&self) -> &[&str] { + &["_", "default"] + } + + fn boolean_and_operators(&self) -> &[&str] { + &["&&", "and"] + } + + fn boolean_container_node_kinds(&self) -> &[&str] { + &["binary_expression"] + } + + fn parenthesized_wrapper_node_kinds(&self) -> &[&str] { + &["parenthesized_expression"] + } + + fn field_like_node_kinds(&self) -> &[&str] { + &["field_access"] + } +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/javascript.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/javascript.rs new file mode 100644 index 000000000..2a36abe45 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/javascript.rs @@ -0,0 +1,95 @@ +use super::super::Language; +use super::base::LanguageProfile; +use tree_sitter::Language as TreeSitterLanguage; + +pub(crate) struct JavaScriptProfile; + +impl LanguageProfile for JavaScriptProfile { + fn language(&self) -> Language { + Language::JavaScript + } + + fn grammar(&self) -> TreeSitterLanguage { + tree_sitter_javascript::LANGUAGE.into() + } + + fn function_node_kinds(&self) -> &[&str] { + &["function_declaration", "method_definition"] + } + + fn class_owner_node_kinds(&self) -> &[&str] { + &["class_declaration"] + } + + fn parameter_list_node_kinds(&self) -> &[&str] { + &["formal_parameters"] + } + + fn function_body_node_kinds(&self) -> &[&str] { + &["statement_block"] + } + + fn call_node_kinds(&self) -> &[&str] { + &["call_expression"] + } + + fn identifier_node_kinds(&self) -> &[&str] { + &["identifier"] + } + + fn field_identifier_node_kinds(&self) -> &[&str] { + &["property_identifier"] + } + + fn assignment_node_kinds(&self) -> &[&str] { + &["assignment_expression", "augmented_assignment_expression"] + } + + fn assignment_operator_tokens(&self) -> &[&str] { + &["=", "+=", "-=", "*=", "/=", "%=", "&&=", "||="] + } + + fn comparison_node_kinds(&self) -> &[&str] { + &["binary_expression"] + } + + fn case_node_kinds(&self) -> &[&str] { + &["switch_statement"] + } + + fn case_arm_node_kinds(&self) -> &[&str] { + &["switch_case"] + } + + fn case_container_stop_node_kinds(&self) -> &[&str] { + &[ + "function_declaration", + "method_definition", + "class_declaration", + ] + } + + fn case_subject_skip_node_kinds(&self) -> &[&str] { + &["switch_case", "else", "comment"] + } + + fn default_case_patterns(&self) -> &[&str] { + &["_", "default"] + } + + fn boolean_and_operators(&self) -> &[&str] { + &["&&", "and"] + } + + fn boolean_container_node_kinds(&self) -> &[&str] { + &["binary_expression"] + } + + fn parenthesized_wrapper_node_kinds(&self) -> &[&str] { + &["parenthesized_expression"] + } + + fn field_like_node_kinds(&self) -> &[&str] { + &["member_expression"] + } +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/kotlin.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/kotlin.rs new file mode 100644 index 000000000..e28347285 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/kotlin.rs @@ -0,0 +1,109 @@ +use super::super::Language; +use super::base::LanguageProfile; +use tree_sitter::Language as TreeSitterLanguage; + +pub(crate) struct KotlinProfile; + +impl LanguageProfile for KotlinProfile { + fn language(&self) -> Language { + Language::Kotlin + } + + fn grammar(&self) -> TreeSitterLanguage { + tree_sitter_kotlin_ng::LANGUAGE.into() + } + + fn function_node_kinds(&self) -> &[&str] { + &["function_declaration"] + } + + fn class_owner_node_kinds(&self) -> &[&str] { + &["class_declaration"] + } + + fn parameter_list_node_kinds(&self) -> &[&str] { + &["function_value_parameters"] + } + + fn function_body_node_kinds(&self) -> &[&str] { + &["function_body", "statements"] + } + + fn call_node_kinds(&self) -> &[&str] { + &["call_expression"] + } + + fn identifier_node_kinds(&self) -> &[&str] { + &["simple_identifier", "type_identifier"] + } + + fn assignment_node_kinds(&self) -> &[&str] { + &["assignment"] + } + + fn assignment_operator_tokens(&self) -> &[&str] { + &["=", "+=", "-=", "*=", "/=", "%="] + } + + fn comparison_node_kinds(&self) -> &[&str] { + &[ + "equality_expression", + "comparison_expression", + "conjunction_expression", + "additive_expression", + "multiplicative_expression", + ] + } + + fn case_node_kinds(&self) -> &[&str] { + &["when_expression"] + } + + fn case_arm_node_kinds(&self) -> &[&str] { + &["when_entry"] + } + + fn case_pattern_node_kinds(&self) -> &[&str] { + &["when_condition", "pattern"] + } + + fn case_subject_node_kinds(&self) -> &[&str] { + &["when_subject"] + } + + fn case_container_stop_node_kinds(&self) -> &[&str] { + &["function_declaration", "class_declaration"] + } + + fn case_subject_skip_node_kinds(&self) -> &[&str] { + &["when_entry", "else", "line_comment"] + } + + fn default_case_patterns(&self) -> &[&str] { + &["_", "default", "else"] + } + + fn boolean_and_operators(&self) -> &[&str] { + &["&&", "and"] + } + + fn boolean_container_node_kinds(&self) -> &[&str] { + &[ + "conjunction_expression", + "equality_expression", + "comparison_expression", + ] + } + + fn boolean_wrapper_node_kinds(&self) -> &[&str] { + &["statements", "pattern"] + } + + fn navigation_suffix_node_kinds(&self) -> &[&str] { + &["navigation_suffix"] + } + + fn field_like_node_kinds(&self) -> &[&str] { + &["navigation_expression", "directly_assignable_expression"] + } +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/lua.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/lua.rs new file mode 100644 index 000000000..8dedf0062 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/lua.rs @@ -0,0 +1,76 @@ +use super::super::Language; +use super::base::LanguageProfile; +use crate::decomplex::ast::line; +use tree_sitter::{Language as TreeSitterLanguage, Node}; + +pub(crate) struct LuaProfile; + +impl LanguageProfile for LuaProfile { + fn language(&self) -> Language { + Language::Lua + } + + fn grammar(&self) -> TreeSitterLanguage { + tree_sitter_lua::LANGUAGE.into() + } + + fn function_node_kinds(&self) -> &[&str] { + &["function_declaration"] + } + + fn parameter_list_node_kinds(&self) -> &[&str] { + &["parameters"] + } + + fn function_body_node_kinds(&self) -> &[&str] { + &["block"] + } + + fn call_node_kinds(&self) -> &[&str] { + &["function_call", "method_call"] + } + + fn identifier_node_kinds(&self) -> &[&str] { + &["identifier"] + } + + fn assignment_node_kinds(&self) -> &[&str] { + &["assignment_statement"] + } + + fn assignment_operator_tokens(&self) -> &[&str] { + &["=", "+=", "-=", "*=", "/=", "%="] + } + + fn comparison_node_kinds(&self) -> &[&str] { + &["binary_expression"] + } + + fn boolean_and_operators(&self) -> &[&str] { + &["and", "&&"] + } + + fn boolean_container_node_kinds(&self) -> &[&str] { + &["binary_expression"] + } + + fn boolean_wrapper_node_kinds(&self) -> &[&str] { + &["expression_list"] + } + + fn expression_list_node_kinds(&self) -> &[&str] { + &["expression_list"] + } + + fn field_like_node_kinds(&self) -> &[&str] { + &["dot_index_expression", "variable_list"] + } + + fn generated_prelude(&self, node: Node<'_>, source: &str) -> bool { + if line(node) != 1 { + return false; + } + let first_line = source.lines().next().unwrap_or(""); + first_line.contains("_tl_compat") && first_line.contains("compat53.module") + } +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/mod.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/mod.rs new file mode 100644 index 000000000..6dc088f4e --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/mod.rs @@ -0,0 +1,68 @@ +pub(crate) mod base; +mod c; +mod cpp; +mod csharp; +pub(crate) mod false_simplicity_lexicon; +mod go; +mod java; +mod javascript; +mod kotlin; +mod lua; +mod python; +mod ruby; +mod rust; +mod swift; +mod typescript; +mod zig; + +pub(crate) use base::LanguageProfile; + +use super::Language; +use c::CProfile; +use cpp::CppProfile; +use csharp::CSharpProfile; +use go::GoProfile; +use java::JavaProfile; +use javascript::JavaScriptProfile; +use kotlin::KotlinProfile; +use lua::LuaProfile; +use python::PythonProfile; +use ruby::RubyProfile; +use rust::RustProfile; +use swift::SwiftProfile; +use typescript::TypeScriptProfile; +use zig::ZigProfile; + +static RUBY_PROFILE: RubyProfile = RubyProfile; +static PYTHON_PROFILE: PythonProfile = PythonProfile; +static JAVASCRIPT_PROFILE: JavaScriptProfile = JavaScriptProfile; +static JAVA_PROFILE: JavaProfile = JavaProfile; +static TYPESCRIPT_PROFILE: TypeScriptProfile = TypeScriptProfile; +static SWIFT_PROFILE: SwiftProfile = SwiftProfile; +static KOTLIN_PROFILE: KotlinProfile = KotlinProfile; +static GO_PROFILE: GoProfile = GoProfile; +static RUST_PROFILE: RustProfile = RustProfile; +static ZIG_PROFILE: ZigProfile = ZigProfile; +static LUA_PROFILE: LuaProfile = LuaProfile; +static C_PROFILE: CProfile = CProfile; +static CPP_PROFILE: CppProfile = CppProfile; +static CSHARP_PROFILE: CSharpProfile = CSharpProfile; + +pub(crate) fn language_profile(language: Language) -> &'static dyn LanguageProfile { + match language { + Language::Ruby => &RUBY_PROFILE, + Language::Python => &PYTHON_PROFILE, + Language::JavaScript => &JAVASCRIPT_PROFILE, + Language::Java => &JAVA_PROFILE, + Language::TypeScript => &TYPESCRIPT_PROFILE, + Language::Swift => &SWIFT_PROFILE, + Language::Kotlin => &KOTLIN_PROFILE, + Language::Go => &GO_PROFILE, + Language::Rust => &RUST_PROFILE, + Language::Zig => &ZIG_PROFILE, + Language::Lua => &LUA_PROFILE, + Language::C => &C_PROFILE, + Language::Cpp => &CPP_PROFILE, + Language::CSharp => &CSHARP_PROFILE, + } +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/python.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/python.rs new file mode 100644 index 000000000..e94e630b5 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/python.rs @@ -0,0 +1,95 @@ +use super::super::Language; +use super::base::LanguageProfile; +use tree_sitter::Language as TreeSitterLanguage; + +pub(crate) struct PythonProfile; + +impl LanguageProfile for PythonProfile { + fn language(&self) -> Language { + Language::Python + } + + fn grammar(&self) -> TreeSitterLanguage { + tree_sitter_python::LANGUAGE.into() + } + + fn function_node_kinds(&self) -> &[&str] { + &["function_definition"] + } + + fn class_owner_node_kinds(&self) -> &[&str] { + &["class_definition"] + } + + fn parameter_list_node_kinds(&self) -> &[&str] { + &["parameters"] + } + + fn function_body_node_kinds(&self) -> &[&str] { + &["block"] + } + + fn call_node_kinds(&self) -> &[&str] { + &["call"] + } + + fn identifier_node_kinds(&self) -> &[&str] { + &["identifier"] + } + + fn assignment_node_kinds(&self) -> &[&str] { + &["assignment", "augmented_assignment"] + } + + fn assignment_operator_tokens(&self) -> &[&str] { + &["=", "+=", "-=", "*=", "/=", "%="] + } + + fn comparison_node_kinds(&self) -> &[&str] { + &["comparison_operator", "binary_operator", "boolean_operator"] + } + + fn case_node_kinds(&self) -> &[&str] { + &["match_statement"] + } + + fn case_arm_node_kinds(&self) -> &[&str] { + &["case_clause"] + } + + fn case_pattern_node_kinds(&self) -> &[&str] { + &["case_pattern", "pattern"] + } + + fn case_container_stop_node_kinds(&self) -> &[&str] { + &["function_definition", "class_definition"] + } + + fn case_subject_skip_node_kinds(&self) -> &[&str] { + &["case_clause", "else", "comment"] + } + + fn default_case_patterns(&self) -> &[&str] { + &["_", "default"] + } + + fn boolean_and_operators(&self) -> &[&str] { + &["and", "&&"] + } + + fn boolean_container_node_kinds(&self) -> &[&str] { + &["binary_operator", "boolean_operator", "comparison_operator"] + } + + fn boolean_wrapper_node_kinds(&self) -> &[&str] { + &["block"] + } + + fn parenthesized_wrapper_node_kinds(&self) -> &[&str] { + &["parenthesized_expression"] + } + + fn field_like_node_kinds(&self) -> &[&str] { + &["attribute"] + } +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/ruby.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/ruby.rs new file mode 100644 index 000000000..c00a944a9 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/ruby.rs @@ -0,0 +1,396 @@ +use super::super::tree_sitter_adapter::{ + first_child_kind, first_named_text, named_children, next_sibling_raw_text, AssignmentTarget, + Target, +}; +use super::super::Language; +use super::base::LanguageProfile; +use crate::decomplex::ast::{node_text, normalize_text}; +use regex::Regex; +use std::collections::{BTreeMap, BTreeSet}; +use tree_sitter::{Language as TreeSitterLanguage, Node}; + +pub(crate) struct RubyProfile; + +impl LanguageProfile for RubyProfile { + fn language(&self) -> Language { + Language::Ruby + } + + fn grammar(&self) -> TreeSitterLanguage { + tree_sitter_ruby::LANGUAGE.into() + } + + fn function_node_kinds(&self) -> &[&str] { + &["method"] + } + + fn class_owner_node_kinds(&self) -> &[&str] { + &["class"] + } + + fn module_owner_node_kinds(&self) -> &[&str] { + &["module"] + } + + fn call_node_kinds(&self) -> &[&str] { + &["call"] + } + + fn parameter_list_node_kinds(&self) -> &[&str] { + &["method_parameters"] + } + + fn function_body_node_kinds(&self) -> &[&str] { + &["body_statement", "do_block"] + } + + fn identifier_node_kinds(&self) -> &[&str] { + &["identifier", "constant"] + } + + fn assignment_node_kinds(&self) -> &[&str] { + &["assignment", "operator_assignment"] + } + + fn assignment_operator_tokens(&self) -> &[&str] { + &["=", "+=", "-=", "*=", "/=", "%=", "&&=", "||="] + } + + fn comparison_node_kinds(&self) -> &[&str] { + &["binary"] + } + + fn case_node_kinds(&self) -> &[&str] { + &["case"] + } + + fn case_arm_node_kinds(&self) -> &[&str] { + &["when"] + } + + fn case_pattern_node_kinds(&self) -> &[&str] { + &["pattern"] + } + + fn case_container_stop_node_kinds(&self) -> &[&str] { + &["method", "class", "module"] + } + + fn case_subject_skip_node_kinds(&self) -> &[&str] { + &["when", "else", "then", "comment"] + } + + fn default_case_patterns(&self) -> &[&str] { + &["_", "default", "else"] + } + + fn boolean_and_operators(&self) -> &[&str] { + &["&&", "and"] + } + + fn boolean_container_node_kinds(&self) -> &[&str] { + &["binary"] + } + + fn boolean_wrapper_node_kinds(&self) -> &[&str] { + &["body_statement", "pattern", "argument_list"] + } + + fn accessor_call_node_kinds(&self) -> &[&str] { + &["call"] + } + + fn function_name(&self, node: Node<'_>, source: &str) -> Option { + match node.kind() { + "singleton_method" => { + let name = node + .child_by_field_name("name") + .map(|name| node_text(name, source).to_string()) + .or_else(|| { + named_children(node) + .into_iter() + .rev() + .find(|child| { + matches!( + child.kind(), + "identifier" | "field_identifier" | "property_identifier" + ) + }) + .map(|child| node_text(child, source).to_string()) + })?; + Some(format!("self.{name}")) + } + "body_statement" if first_child_kind(node) == Some("def") => { + hidden_ruby_method_name(node, source) + } + "argument_list" if first_child_kind(node) == Some("def") => { + inline_def_name(node, source) + } + _ => self.default_function_name(node, source), + } + } + + fn owner_name_from_declaration(&self, node: Node<'_>, source: &str) -> Option { + if node.kind() == "body_statement" + && matches!(first_child_kind(node), Some("class" | "module")) + { + return first_named_text(node, source, &["constant", "identifier", "type_identifier"]); + } + self.default_owner_name_from_declaration(node, source) + } + + fn hidden_case(&self, node: Node<'_>) -> bool { + matches!( + node.kind(), + "body_statement" | "block_body" | "argument_list" + ) && first_child_kind(node) == Some("case") + } + + fn hidden_case_source_node<'tree>(&self, node: Node<'tree>) -> Option> { + let mut cursor = node.walk(); + let result = node + .children(&mut cursor) + .find(|child| child.kind() == "case"); + result + } + + fn predicate_less_case(&self, node: Node<'_>) -> bool { + (node.kind() == "case" || self.hidden_case(node)) && self.decision_subject(node).is_none() + } + + fn case_pattern_texts(&self, patterns: &[Node<'_>], source: &str) -> Vec { + ruby_case_pattern_texts(patterns, source) + } + + fn state_target(&self, lhs: Node<'_>, source: &str) -> Option { + ruby_state_variable_target(lhs, source).or_else(|| self.default_state_target(lhs, source)) + } + + fn assignment_target<'tree>(&self, node: Node<'tree>) -> Option> { + self.default_assignment_target(node) + .or_else(|| match node.kind() { + "instance_variable" | "global_variable" if self.assignment_lhs_node(node) => { + Some(AssignmentTarget { + lhs: node, + source: node.parent().unwrap_or(node), + }) + } + _ => None, + }) + } + + fn skip_state_write_node(&self, node: Node<'_>) -> bool { + node.kind() == "operator_assignment" + || (self.assignment_lhs_node(node) + && next_sibling_raw_text(node).as_deref() != Some("=") + && node.kind() != "instance_variable") + } + + fn skip_state_write_target(&self, target: &Target) -> bool { + target.field == "[]" || target.field.starts_with('$') + } + + fn method_param_types(&self, lines: &[String]) -> BTreeMap> { + ruby_method_param_types(lines) + } + + fn immutable_struct_readers(&self, lines: &[String]) -> BTreeMap> { + ruby_immutable_struct_readers(lines) + } + + fn immutable_struct_reader_types( + &self, + lines: &[String], + ) -> BTreeMap> { + ruby_immutable_struct_reader_types(lines) + } + + fn type_aliases(&self, lines: &[String]) -> BTreeMap { + ruby_type_aliases(lines) + } +} + +fn hidden_ruby_method_name(node: Node<'_>, source: &str) -> Option { + let children = named_children(node); + let receiver_index = children + .iter() + .position(|child| matches!(child.kind(), "self" | "constant")); + let search: Vec> = if let Some(index) = receiver_index { + children.into_iter().skip(index + 1).collect() + } else { + children + }; + let name = search + .into_iter() + .find(|child| { + matches!( + child.kind(), + "identifier" | "field_identifier" | "property_identifier" + ) + }) + .map(|child| node_text(child, source).to_string())?; + if receiver_index.is_some() { + Some(format!("self.{name}")) + } else { + Some(name) + } +} + +fn inline_def_name(node: Node<'_>, source: &str) -> Option { + hidden_ruby_method_name(node, source) +} + +fn ruby_state_variable_target(node: Node<'_>, source: &str) -> Option { + matches!(node.kind(), "instance_variable" | "global_variable").then(|| Target { + receiver: "self".to_string(), + field: node_text(node, source).to_string(), + }) +} + +fn ruby_case_pattern_texts(patterns: &[Node<'_>], source: &str) -> Vec { + if patterns.is_empty() { + return Vec::new(); + } + let texts = patterns + .iter() + .map(|pattern| normalize_text(node_text(*pattern, source))) + .collect::>(); + if !texts.iter().any(|text| text.starts_with('*')) { + return texts; + } + + let mut out = Vec::new(); + let mut pending_plain = Vec::new(); + for (index, text) in texts.iter().enumerate() { + if text.starts_with('*') { + if !pending_plain.is_empty() { + out.push(pending_plain.join(", ")); + pending_plain.clear(); + } + if texts.len() == 1 || index > 0 { + out.push(text.trim_start_matches('*').to_string()); + } else { + out.push(text.clone()); + } + } else { + pending_plain.push(text.clone()); + } + } + if !pending_plain.is_empty() { + out.push(pending_plain.join(", ")); + } + out +} + +fn ruby_immutable_struct_readers(lines: &[String]) -> BTreeMap> { + let mut readers = BTreeMap::new(); + let mut class_stack = Vec::new(); + let class_struct_re = + Regex::new(r"^\s*class\s+([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\s*<\s*T::Struct\b").unwrap(); + let const_re = Regex::new(r"^\s*const\s+:([A-Za-z_]\w*)\b").unwrap(); + let end_re = Regex::new(r"^\s*end\s*(?:#.*)?$").unwrap(); + + for line in lines { + if let Some(caps) = class_struct_re.captures(line) { + class_stack.push(caps[1].to_string()); + continue; + } + if !class_stack.is_empty() { + if let Some(caps) = const_re.captures(line) { + readers + .entry(class_stack.last().unwrap().clone()) + .or_insert_with(BTreeSet::new) + .insert(caps[1].to_string()); + continue; + } + } + if end_re.is_match(line) { + class_stack.pop(); + } + } + readers +} + +fn ruby_immutable_struct_reader_types( + lines: &[String], +) -> BTreeMap> { + let mut reader_types = BTreeMap::new(); + let mut class_stack = Vec::new(); + let class_struct_re = + Regex::new(r"^\s*class\s+([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\s*<\s*T::Struct\b").unwrap(); + let const_type_re = + Regex::new(r"^\s*const\s+:([A-Za-z_]\w*)\s*,\s*([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\b") + .unwrap(); + let end_re = Regex::new(r"^\s*end\s*(?:#.*)?$").unwrap(); + + for line in lines { + if let Some(caps) = class_struct_re.captures(line) { + class_stack.push(caps[1].to_string()); + continue; + } + if !class_stack.is_empty() { + if let Some(caps) = const_type_re.captures(line) { + reader_types + .entry(class_stack.last().unwrap().clone()) + .or_insert_with(BTreeMap::new) + .insert(caps[1].to_string(), caps[2].to_string()); + continue; + } + } + if end_re.is_match(line) { + class_stack.pop(); + } + } + reader_types +} + +fn ruby_type_aliases(lines: &[String]) -> BTreeMap { + let mut aliases = BTreeMap::new(); + let type_alias_re = + Regex::new(r"^\s*([A-Z]\w*)\s*=\s*T\.type_alias\s*\{\s*([A-Z]\w*(?:::[A-Z]\w*)*)\s*\}") + .unwrap(); + let const_alias_re = Regex::new(r"^\s*([A-Z]\w*)\s*=\s*([A-Z]\w*(?:::[A-Z]\w*)*)\b").unwrap(); + + for line in lines { + if let Some(caps) = type_alias_re.captures(line) { + aliases.insert(caps[1].to_string(), caps[2].to_string()); + } else if let Some(caps) = const_alias_re.captures(line) { + aliases.insert(caps[1].to_string(), caps[2].to_string()); + } + } + aliases +} + +fn ruby_method_param_types(lines: &[String]) -> BTreeMap> { + let mut types_by_method = BTreeMap::new(); + let mut pending_sig = String::new(); + let def_re = Regex::new(r"^\s*def\s+([A-Za-z_]\w*[!?=]?)(?:\s|\(|$)").unwrap(); + + for line in lines { + if ruby_pending_sig_active(line, &pending_sig) { + pending_sig.push_str(line); + } + if let Some(caps) = def_re.captures(line) { + types_by_method.insert(caps[1].to_string(), ruby_sig_param_types(&pending_sig)); + pending_sig.clear(); + } + } + types_by_method +} + +fn ruby_pending_sig_active(line: &str, pending_sig: &str) -> bool { + !pending_sig.is_empty() || line.trim().starts_with("sig") +} + +fn ruby_sig_param_types(sig_source: &str) -> BTreeMap { + let params_re = Regex::new(r"params\s*\((.*?)\)").unwrap(); + let param_pair_re = + Regex::new(r"([A-Za-z_]\w*)\s*:\s*([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)").unwrap(); + let mut params = BTreeMap::new(); + if let Some(p_caps) = params_re.captures(sig_source) { + for pair in param_pair_re.captures_iter(&p_caps[1]) { + params.insert(pair[1].to_string(), pair[2].to_string()); + } + } + params +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/rust.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/rust.rs new file mode 100644 index 000000000..608a7c94f --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/rust.rs @@ -0,0 +1,103 @@ +use super::super::Language; +use super::base::LanguageProfile; +use tree_sitter::Language as TreeSitterLanguage; + +pub(crate) struct RustProfile; + +impl LanguageProfile for RustProfile { + fn language(&self) -> Language { + Language::Rust + } + + fn grammar(&self) -> TreeSitterLanguage { + tree_sitter_rust::LANGUAGE.into() + } + + fn function_node_kinds(&self) -> &[&str] { + &["function_item"] + } + + fn impl_owner_node_kinds(&self) -> &[&str] { + &["impl_item"] + } + + fn struct_owner_node_kinds(&self) -> &[&str] { + &["struct_item"] + } + + fn parameter_list_node_kinds(&self) -> &[&str] { + &["parameters"] + } + + fn function_body_node_kinds(&self) -> &[&str] { + &["block", "declaration_list"] + } + + fn call_node_kinds(&self) -> &[&str] { + &["call_expression"] + } + + fn identifier_node_kinds(&self) -> &[&str] { + &["identifier", "type_identifier"] + } + + fn field_identifier_node_kinds(&self) -> &[&str] { + &["field_identifier"] + } + + fn assignment_node_kinds(&self) -> &[&str] { + &["assignment_expression", "compound_assignment_expr"] + } + + fn assignment_operator_tokens(&self) -> &[&str] { + &["=", "+=", "-=", "*=", "/=", "%="] + } + + fn receiver_type_node_kinds(&self) -> &[&str] { + &["type_identifier", "generic_type", "scoped_type_identifier"] + } + + fn comparison_node_kinds(&self) -> &[&str] { + &["binary_expression"] + } + + fn case_node_kinds(&self) -> &[&str] { + &["match_expression"] + } + + fn case_arm_node_kinds(&self) -> &[&str] { + &["match_arm"] + } + + fn case_pattern_node_kinds(&self) -> &[&str] { + &["match_pattern", "pattern"] + } + + fn case_container_stop_node_kinds(&self) -> &[&str] { + &["function_item", "impl_item", "struct_item"] + } + + fn case_subject_skip_node_kinds(&self) -> &[&str] { + &["match_arm", "else", "comment"] + } + + fn default_case_patterns(&self) -> &[&str] { + &["_", "default"] + } + + fn boolean_and_operators(&self) -> &[&str] { + &["&&", "and"] + } + + fn boolean_container_node_kinds(&self) -> &[&str] { + &["binary_expression"] + } + + fn parenthesized_wrapper_node_kinds(&self) -> &[&str] { + &["parenthesized_expression", "tuple_expression"] + } + + fn field_like_node_kinds(&self) -> &[&str] { + &["field_expression", "scoped_identifier"] + } +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/swift.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/swift.rs new file mode 100644 index 000000000..af73a5320 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/swift.rs @@ -0,0 +1,105 @@ +use super::super::Language; +use super::base::LanguageProfile; +use tree_sitter::Language as TreeSitterLanguage; + +pub(crate) struct SwiftProfile; + +impl LanguageProfile for SwiftProfile { + fn language(&self) -> Language { + Language::Swift + } + + fn grammar(&self) -> TreeSitterLanguage { + tree_sitter_swift::LANGUAGE.into() + } + + fn function_node_kinds(&self) -> &[&str] { + &["function_declaration"] + } + + fn class_owner_node_kinds(&self) -> &[&str] { + &["class_declaration"] + } + + fn parameter_list_node_kinds(&self) -> &[&str] { + &["function_value_parameters"] + } + + fn function_body_node_kinds(&self) -> &[&str] { + &["function_body", "statements"] + } + + fn call_node_kinds(&self) -> &[&str] { + &["call_expression"] + } + + fn identifier_node_kinds(&self) -> &[&str] { + &["simple_identifier", "type_identifier"] + } + + fn assignment_node_kinds(&self) -> &[&str] { + &["assignment"] + } + + fn assignment_operator_tokens(&self) -> &[&str] { + &["=", "+=", "-=", "*=", "/=", "%="] + } + + fn comparison_node_kinds(&self) -> &[&str] { + &[ + "equality_expression", + "comparison_expression", + "conjunction_expression", + "additive_expression", + "multiplicative_expression", + ] + } + + fn case_node_kinds(&self) -> &[&str] { + &["switch_statement"] + } + + fn case_arm_node_kinds(&self) -> &[&str] { + &["switch_entry"] + } + + fn case_pattern_node_kinds(&self) -> &[&str] { + &["switch_pattern", "pattern"] + } + + fn case_container_stop_node_kinds(&self) -> &[&str] { + &["function_declaration", "class_declaration"] + } + + fn case_subject_skip_node_kinds(&self) -> &[&str] { + &["switch_entry", "else", "comment"] + } + + fn default_case_patterns(&self) -> &[&str] { + &["_", "default"] + } + + fn boolean_and_operators(&self) -> &[&str] { + &["&&", "and"] + } + + fn boolean_container_node_kinds(&self) -> &[&str] { + &[ + "conjunction_expression", + "equality_expression", + "comparison_expression", + ] + } + + fn boolean_wrapper_node_kinds(&self) -> &[&str] { + &["statements", "pattern"] + } + + fn navigation_suffix_node_kinds(&self) -> &[&str] { + &["navigation_suffix"] + } + + fn field_like_node_kinds(&self) -> &[&str] { + &["navigation_expression", "directly_assignable_expression"] + } +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/typescript.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/typescript.rs new file mode 100644 index 000000000..183a2755f --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/typescript.rs @@ -0,0 +1,95 @@ +use super::super::Language; +use super::base::LanguageProfile; +use tree_sitter::Language as TreeSitterLanguage; + +pub(crate) struct TypeScriptProfile; + +impl LanguageProfile for TypeScriptProfile { + fn language(&self) -> Language { + Language::TypeScript + } + + fn grammar(&self) -> TreeSitterLanguage { + tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into() + } + + fn function_node_kinds(&self) -> &[&str] { + &["function_declaration", "method_definition"] + } + + fn class_owner_node_kinds(&self) -> &[&str] { + &["class_declaration"] + } + + fn parameter_list_node_kinds(&self) -> &[&str] { + &["formal_parameters"] + } + + fn function_body_node_kinds(&self) -> &[&str] { + &["statement_block"] + } + + fn call_node_kinds(&self) -> &[&str] { + &["call_expression"] + } + + fn identifier_node_kinds(&self) -> &[&str] { + &["identifier"] + } + + fn field_identifier_node_kinds(&self) -> &[&str] { + &["property_identifier"] + } + + fn assignment_node_kinds(&self) -> &[&str] { + &["assignment_expression", "augmented_assignment_expression"] + } + + fn assignment_operator_tokens(&self) -> &[&str] { + &["=", "+=", "-=", "*=", "/=", "%=", "&&=", "||="] + } + + fn comparison_node_kinds(&self) -> &[&str] { + &["binary_expression"] + } + + fn case_node_kinds(&self) -> &[&str] { + &["switch_statement"] + } + + fn case_arm_node_kinds(&self) -> &[&str] { + &["switch_case"] + } + + fn case_container_stop_node_kinds(&self) -> &[&str] { + &[ + "function_declaration", + "method_definition", + "class_declaration", + ] + } + + fn case_subject_skip_node_kinds(&self) -> &[&str] { + &["switch_case", "else", "comment"] + } + + fn default_case_patterns(&self) -> &[&str] { + &["_", "default"] + } + + fn boolean_and_operators(&self) -> &[&str] { + &["&&", "and"] + } + + fn boolean_container_node_kinds(&self) -> &[&str] { + &["binary_expression"] + } + + fn parenthesized_wrapper_node_kinds(&self) -> &[&str] { + &["parenthesized_expression"] + } + + fn field_like_node_kinds(&self) -> &[&str] { + &["member_expression"] + } +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/zig.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/zig.rs new file mode 100644 index 000000000..66c9723ea --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/zig.rs @@ -0,0 +1,79 @@ +use super::super::Language; +use super::base::LanguageProfile; +use tree_sitter::Language as TreeSitterLanguage; + +pub(crate) struct ZigProfile; + +impl LanguageProfile for ZigProfile { + fn language(&self) -> Language { + Language::Zig + } + + fn grammar(&self) -> TreeSitterLanguage { + tree_sitter_zig::LANGUAGE.into() + } + + fn function_node_kinds(&self) -> &[&str] { + &["function_declaration"] + } + + fn parameter_list_node_kinds(&self) -> &[&str] { + &["parameters"] + } + + fn function_body_node_kinds(&self) -> &[&str] { + &["block", "block_expression"] + } + + fn call_node_kinds(&self) -> &[&str] { + &["call_expression"] + } + + fn identifier_node_kinds(&self) -> &[&str] { + &["identifier"] + } + + fn assignment_node_kinds(&self) -> &[&str] { + &["assignment_expression"] + } + + fn assignment_operator_tokens(&self) -> &[&str] { + &["=", "+=", "-=", "*=", "/=", "%="] + } + + fn comparison_node_kinds(&self) -> &[&str] { + &["binary_expression"] + } + + fn case_node_kinds(&self) -> &[&str] { + &["switch_expression"] + } + + fn case_arm_node_kinds(&self) -> &[&str] { + &["switch_case"] + } + + fn case_container_stop_node_kinds(&self) -> &[&str] { + &["function_declaration", "struct_declaration"] + } + + fn case_subject_skip_node_kinds(&self) -> &[&str] { + &["switch_case", "else", "comment"] + } + + fn default_case_patterns(&self) -> &[&str] { + &["_", "default", "else"] + } + + fn boolean_and_operators(&self) -> &[&str] { + &["and", "&&"] + } + + fn boolean_container_node_kinds(&self) -> &[&str] { + &["binary_expression"] + } + + fn field_like_node_kinds(&self) -> &[&str] { + &["field_expression"] + } +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs b/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs index 41931fba0..465e71140 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs @@ -1,4 +1,5 @@ use super::{ + adapters::{language_profile, LanguageProfile}, ComparisonUse, DecisionSite, Document, FunctionDef, Language, PredicateAlias, StateWrite, }; use crate::decomplex::ast::{line, node_text, normalize_text, span, RawNode}; @@ -6,295 +7,7 @@ use anyhow::{Context, Result}; use std::collections::HashSet; use std::fs; use std::path::{Path, PathBuf}; -use tree_sitter::{Language as TreeSitterLanguage, Node, Parser}; - -trait LanguageProfile { - fn language(&self) -> Language; - fn grammar(&self) -> TreeSitterLanguage; - - fn first_argument_receiver(&self) -> bool { - false - } - - fn function_name(&self, node: Node<'_>, source: &str) -> Option { - generic_function_name(node, source) - } - - fn owner_name_from_declaration(&self, node: Node<'_>, source: &str) -> Option { - generic_owner_name_from_declaration(node, source) - } - - fn generated_prelude(&self, _node: Node<'_>, _source: &str) -> bool { - false - } - - fn hidden_case(&self, _node: Node<'_>) -> bool { - false - } - - fn predicate_less_case(&self, node: Node<'_>) -> bool { - node.kind() == "case" && decision_subject(node).is_none() - } - - fn case_pattern_texts(&self, patterns: &[Node<'_>], source: &str) -> Vec { - patterns - .iter() - .map(|pattern| normalize_text(node_text(*pattern, source))) - .collect() - } - - fn receiver_convention_owner_name(&self, node: Node<'_>, source: &str) -> Option { - if !self.first_argument_receiver() || node.kind() != "function_definition" { - return None; - } - - let (type_name, _) = first_argument_receiver_parameter(node, source)?; - let type_name = normalize_type_owner(&type_name); - let name = self.function_name(node, source)?; - - if name.starts_with(&snake_case_type_name(&type_name)) { - Some(type_name) - } else if type_name.ends_with("_t") - && name.starts_with(type_name.strip_suffix("_t").unwrap()) - { - Some(type_name) - } else { - None - } - } - - fn function_receiver_name(&self, node: Node<'_>, source: &str) -> Option { - if self.first_argument_receiver() && node.kind() == "function_definition" { - if let Some((_, name)) = first_argument_receiver_parameter(node, source) { - return Some(name); - } - } - None - } - - fn state_target(&self, lhs: Node<'_>, source: &str) -> Option { - generic_state_target(lhs, source) - } - - fn assignment_target<'tree>(&self, node: Node<'tree>) -> Option> { - generic_assignment_target(node) - } - - fn skip_state_write_node(&self, _node: Node<'_>) -> bool { - false - } - - fn skip_state_write_target(&self, target: &Target) -> bool { - target.field == "[]" - } - - fn state_write_source_node<'tree>( - &self, - _node: Node<'tree>, - assignment: &AssignmentTarget<'tree>, - ) -> Node<'tree> { - assignment.source - } -} - -macro_rules! default_profile { - ($name:ident, $language:ident, $grammar:expr) => { - struct $name; - - impl LanguageProfile for $name { - fn language(&self) -> Language { - Language::$language - } - - fn grammar(&self) -> TreeSitterLanguage { - $grammar.into() - } - } - }; -} - -struct RubyProfile; - -impl LanguageProfile for RubyProfile { - fn language(&self) -> Language { - Language::Ruby - } - - fn grammar(&self) -> TreeSitterLanguage { - tree_sitter_ruby::LANGUAGE.into() - } - - fn function_name(&self, node: Node<'_>, source: &str) -> Option { - match node.kind() { - "singleton_method" => { - let name = node - .child_by_field_name("name") - .map(|name| node_text(name, source).to_string()) - .or_else(|| { - named_children(node) - .into_iter() - .rev() - .find(|child| { - matches!( - child.kind(), - "identifier" | "field_identifier" | "property_identifier" - ) - }) - .map(|child| node_text(child, source).to_string()) - })?; - Some(format!("self.{name}")) - } - "body_statement" if first_child_kind(node) == Some("def") => { - hidden_ruby_method_name(node, source) - } - "argument_list" if first_child_kind(node) == Some("def") => { - inline_def_name(node, source) - } - _ => generic_function_name(node, source), - } - } - - fn owner_name_from_declaration(&self, node: Node<'_>, source: &str) -> Option { - if node.kind() == "body_statement" - && matches!(first_child_kind(node), Some("class" | "module")) - { - return first_named_text(node, source, &["constant", "identifier", "type_identifier"]); - } - generic_owner_name_from_declaration(node, source) - } - - fn hidden_case(&self, node: Node<'_>) -> bool { - matches!( - node.kind(), - "body_statement" | "block_body" | "argument_list" - ) && first_child_kind(node) == Some("case") - } - - fn predicate_less_case(&self, node: Node<'_>) -> bool { - (node.kind() == "case" || self.hidden_case(node)) && decision_subject(node).is_none() - } - - fn case_pattern_texts(&self, patterns: &[Node<'_>], source: &str) -> Vec { - ruby_case_pattern_texts(patterns, source) - } - - fn state_target(&self, lhs: Node<'_>, source: &str) -> Option { - ruby_state_variable_target(lhs, source).or_else(|| generic_state_target(lhs, source)) - } - - fn assignment_target<'tree>(&self, node: Node<'tree>) -> Option> { - generic_assignment_target(node).or_else(|| match node.kind() { - "instance_variable" | "global_variable" if assignment_lhs_node(node) => { - Some(AssignmentTarget { - lhs: node, - source: node.parent().unwrap_or(node), - }) - } - _ => None, - }) - } - - fn skip_state_write_node(&self, node: Node<'_>) -> bool { - node.kind() == "operator_assignment" - || (assignment_lhs_node(node) - && next_sibling_raw_text(node).as_deref() != Some("=") - && node.kind() != "instance_variable") - } - - fn skip_state_write_target(&self, target: &Target) -> bool { - target.field == "[]" || target.field.starts_with('$') - } -} - -struct CProfile; - -impl LanguageProfile for CProfile { - fn language(&self) -> Language { - Language::C - } - - fn grammar(&self) -> TreeSitterLanguage { - tree_sitter_c::LANGUAGE.into() - } - - fn first_argument_receiver(&self) -> bool { - true - } -} - -struct LuaProfile; - -impl LanguageProfile for LuaProfile { - fn language(&self) -> Language { - Language::Lua - } - - fn grammar(&self) -> TreeSitterLanguage { - tree_sitter_lua::LANGUAGE.into() - } - - fn generated_prelude(&self, node: Node<'_>, source: &str) -> bool { - if line(node) != 1 { - return false; - } - let first_line = source.lines().next().unwrap_or(""); - first_line.contains("_tl_compat") && first_line.contains("compat53.module") - } -} - -default_profile!(PythonProfile, Python, tree_sitter_python::LANGUAGE); -default_profile!( - JavaScriptProfile, - JavaScript, - tree_sitter_javascript::LANGUAGE -); -default_profile!(JavaProfile, Java, tree_sitter_java::LANGUAGE); -default_profile!( - TypeScriptProfile, - TypeScript, - tree_sitter_typescript::LANGUAGE_TYPESCRIPT -); -default_profile!(SwiftProfile, Swift, tree_sitter_swift::LANGUAGE); -default_profile!(KotlinProfile, Kotlin, tree_sitter_kotlin_ng::LANGUAGE); -default_profile!(GoProfile, Go, tree_sitter_go::LANGUAGE); -default_profile!(RustProfile, Rust, tree_sitter_rust::LANGUAGE); -default_profile!(ZigProfile, Zig, tree_sitter_zig::LANGUAGE); -default_profile!(CppProfile, Cpp, tree_sitter_cpp::LANGUAGE); -default_profile!(CSharpProfile, CSharp, tree_sitter_c_sharp::LANGUAGE); - -static RUBY_PROFILE: RubyProfile = RubyProfile; -static PYTHON_PROFILE: PythonProfile = PythonProfile; -static JAVASCRIPT_PROFILE: JavaScriptProfile = JavaScriptProfile; -static JAVA_PROFILE: JavaProfile = JavaProfile; -static TYPESCRIPT_PROFILE: TypeScriptProfile = TypeScriptProfile; -static SWIFT_PROFILE: SwiftProfile = SwiftProfile; -static KOTLIN_PROFILE: KotlinProfile = KotlinProfile; -static GO_PROFILE: GoProfile = GoProfile; -static RUST_PROFILE: RustProfile = RustProfile; -static ZIG_PROFILE: ZigProfile = ZigProfile; -static LUA_PROFILE: LuaProfile = LuaProfile; -static C_PROFILE: CProfile = CProfile; -static CPP_PROFILE: CppProfile = CppProfile; -static CSHARP_PROFILE: CSharpProfile = CSharpProfile; - -fn language_profile(language: Language) -> &'static dyn LanguageProfile { - match language { - Language::Ruby => &RUBY_PROFILE, - Language::Python => &PYTHON_PROFILE, - Language::JavaScript => &JAVASCRIPT_PROFILE, - Language::Java => &JAVA_PROFILE, - Language::TypeScript => &TYPESCRIPT_PROFILE, - Language::Swift => &SWIFT_PROFILE, - Language::Kotlin => &KOTLIN_PROFILE, - Language::Go => &GO_PROFILE, - Language::Rust => &RUST_PROFILE, - Language::Zig => &ZIG_PROFILE, - Language::Lua => &LUA_PROFILE, - Language::C => &C_PROFILE, - Language::Cpp => &CPP_PROFILE, - Language::CSharp => &CSHARP_PROFILE, - } -} +use tree_sitter::{Node, Parser}; pub fn parse_file(file: PathBuf, language: Language) -> Result { let parsed = ParsedDocument::parse(file, language)?; @@ -494,13 +207,11 @@ fn record_predicate_alias( language: Language, out: &mut Vec, ) { - if !matches!(node.kind(), "method" | "function_definition") { - return; - } - let Some(name) = language_profile(language).function_name(node, source) else { + let profile = language_profile(language); + let Some(name) = profile.function_name(node, source) else { return; }; - let Some(body) = method_single_expression_body(node) else { + let Some(body) = profile.single_expression_body(node) else { return; }; let text = normalize_text(node_text(body, source)); @@ -526,7 +237,7 @@ fn record_comparison_use( context: &ContextState, out: &mut Vec, ) { - if !comparison_node(node, source) { + if !comparison_node(language_profile(_language), node, source) { return; } let raw = normalize_text(node_text(node, source)); @@ -540,14 +251,13 @@ fn record_comparison_use( }); } -fn comparison_node(node: Node<'_>, source: &str) -> bool { - if matches!(node.kind(), "binary" | "binary_expression") { - return matches!( - direct_operator_from_source(node, source).as_str(), - "==" | "!=" - ); +fn comparison_node(profile: &dyn LanguageProfile, node: Node<'_>, source: &str) -> bool { + if profile.comparison_node_kinds().contains(&node.kind()) { + return profile + .comparison_operators() + .contains(&direct_operator_from_source(node, source).as_str()); } - if node.kind() != "call" { + if !profile.call_node_kinds().contains(&node.kind()) { return false; } node.child_by_field_name("method") @@ -569,13 +279,13 @@ fn record_decision_site( return; } - if boolean_container(node) && boolean_and(node, source) { - record_conjunction_decision(node, source, file, context, out, seen); + if profile.boolean_container(node) && boolean_and(profile, node, source) { + record_conjunction_decision(profile, node, source, file, context, out, seen); return; } - if case_node(node) || profile.hidden_case(node) { - let decision_node = case_source_node(node, profile); + if case_node(profile, node) || profile.hidden_case(node) { + let decision_node = profile.case_source_node(node); if profile.predicate_less_case(decision_node) { return; } @@ -593,13 +303,14 @@ fn record_decision_site( function: context.current_function(), line: line(decision_node), span: span(decision_node), - predicate: decision_predicate(decision_node, source), + predicate: decision_predicate(profile, decision_node, source), }, ); } } fn record_conjunction_decision( + profile: &dyn LanguageProfile, mut node: Node<'_>, source: &str, file: &Path, @@ -607,11 +318,11 @@ fn record_conjunction_decision( out: &mut Vec, seen: &mut HashSet, ) { - let from_wrapper = parenthesized_wrapper(node); + let from_wrapper = profile.parenthesized_wrapper(node); if from_wrapper && node .parent() - .map(|parent| boolean_container(parent) && boolean_and(parent, source)) + .map(|parent| profile.boolean_container(parent) && boolean_and(profile, parent, source)) .unwrap_or(false) { return; @@ -627,8 +338,8 @@ fn record_conjunction_decision( && node .parent() .map(|parent| { - boolean_container(parent) - && boolean_and(parent, source) + profile.boolean_container(parent) + && boolean_and(profile, parent, source) && span(parent) != span(node) }) .unwrap_or(false) @@ -636,7 +347,7 @@ fn record_conjunction_decision( return; } - let mut members = flatten_boolean_and(node, source) + let mut members = flatten_boolean_and(profile, node, source) .into_iter() .map(|child| decision_member_text(child, source)) .collect::>(); @@ -676,29 +387,6 @@ fn push_decision_site(out: &mut Vec, seen: &mut HashSet, s } } -fn method_single_expression_body(node: Node<'_>) -> Option> { - let mut cursor = node.walk(); - if node.children(&mut cursor).any(|child| child.kind() == "=") { - let named = named_children(node); - return named.last().copied(); - } - - let body = node.child_by_field_name("body").or_else(|| { - named_children(node) - .into_iter() - .find(|child| child.kind() == "body_statement") - })?; - let statements: Vec> = named_children(body) - .into_iter() - .filter(|child| !matches!(child.kind(), "comment" | "heredoc_body")) - .collect(); - if statements.len() == 1 { - statements.first().copied() - } else { - None - } -} - fn push_owner_context( node: Node<'_>, source: &str, @@ -794,169 +482,18 @@ fn record_state_write( } #[derive(Clone, Debug, Eq, PartialEq)] -struct AssignmentTarget<'tree> { - lhs: Node<'tree>, - source: Node<'tree>, +pub(crate) struct AssignmentTarget<'tree> { + pub(crate) lhs: Node<'tree>, + pub(crate) source: Node<'tree>, } #[derive(Clone, Debug, Eq, PartialEq)] -struct Target { - receiver: String, - field: String, -} - -fn generic_assignment_target(node: Node<'_>) -> Option> { - match node.kind() { - "assignment" | "assignment_expression" | "assignment_statement" => { - let lhs = node - .child_by_field_name("left") - .or_else(|| first_named_child(node))?; - Some(AssignmentTarget { lhs, source: node }) - } - _ => None, - } +pub(crate) struct Target { + pub(crate) receiver: String, + pub(crate) field: String, } -fn assignment_lhs_node(node: Node<'_>) -> bool { - if previous_sibling_raw_text(node).as_deref() == Some(":") { - return false; - } - matches!( - next_sibling_raw_text(node).as_deref(), - Some("=" | "+=" | "-=" | "*=" | "/=" | "%=" | "&&=" | "||=") - ) -} - -fn generic_state_target(lhs: Node<'_>, source: &str) -> Option { - if previous_sibling_text(lhs, source).as_deref() == Some(":") { - return None; - } - - match lhs.kind() { - "call" => { - let receiver = lhs.child_by_field_name("receiver")?; - let method = lhs.child_by_field_name("method")?; - Some(Target { - receiver: normalize_text(node_text(receiver, source)), - field: strip_assignment_suffix(node_text(method, source)), - }) - } - "field" - | "field_access" - | "selector_expression" - | "member_expression" - | "member_access_expression" - | "attribute" - | "field_expression" - | "navigation_expression" - | "directly_assignable_expression" - | "expression_list" => { - let object = lhs - .child_by_field_name("object") - .or_else(|| lhs.child_by_field_name("receiver")) - .or_else(|| lhs.child_by_field_name("expression")) - .or_else(|| lhs.child_by_field_name("operand")) - .or_else(|| lhs.child_by_field_name("value")) - .or_else(|| lhs.child_by_field_name("argument")) - .or_else(|| first_named_child_except(lhs, "navigation_suffix"))?; - let field = lhs - .child_by_field_name("field") - .or_else(|| lhs.child_by_field_name("property")) - .or_else(|| lhs.child_by_field_name("name")) - .or_else(|| lhs.child_by_field_name("suffix")) - .or_else(|| first_named_child_with_kind(lhs, "navigation_suffix")) - .or_else(|| last_named_child(lhs))?; - let field_text = member_field_text(field, source)?; - Some(Target { - receiver: normalize_text(node_text(object, source)), - field: strip_assignment_suffix(&field_text), - }) - } - _ => None, - } -} - -fn ruby_state_variable_target(node: Node<'_>, source: &str) -> Option { - matches!(node.kind(), "instance_variable" | "global_variable").then(|| Target { - receiver: "self".to_string(), - field: node_text(node, source).to_string(), - }) -} - -fn generic_function_name(node: Node<'_>, source: &str) -> Option { - match node.kind() { - "method" - | "function_definition" - | "function_declaration" - | "method_definition" - | "function_item" => node - .child_by_field_name("name") - .map(|name| node_text(name, source).to_string()) - .or_else(|| declarator_name(node.child_by_field_name("declarator"), source)) - .or_else(|| { - first_named_text( - node, - source, - &["identifier", "constant", "property_identifier"], - ) - }), - "method_declaration" => node - .child_by_field_name("name") - .map(|name| node_text(name, source).to_string()) - .or_else(|| first_named_text(node, source, &["field_identifier", "identifier"])), - _ => None, - } -} - -fn declarator_name(node: Option>, source: &str) -> Option { - let mut pending = vec![node?]; - let mut seen = HashSet::new(); - while let Some(current) = pending.pop() { - let key = format!("{:?}\0{}", span(current), current.kind()); - if !seen.insert(key) { - continue; - } - if matches!( - current.kind(), - "identifier" | "simple_identifier" | "field_identifier" | "property_identifier" - ) { - return Some(node_text(current, source).to_string()); - } - let mut children = named_children(current); - children.reverse(); - pending.extend(children); - } - None -} - -fn generic_owner_name_from_declaration(node: Node<'_>, source: &str) -> Option { - match node.kind() { - "class" | "module" | "class_definition" | "class_declaration" | "class_specifier" => node - .child_by_field_name("name") - .map(|name| node_text(name, source).to_string()) - .or_else(|| { - first_named_text(node, source, &["constant", "identifier", "type_identifier"]) - }), - "impl_item" | "impl_block" => impl_owner_name(node, source), - "struct_item" | "struct_spec" | "struct_specifier" | "type_spec" | "type_declaration" => { - node.child_by_field_name("name") - .map(|name| node_text(name, source).to_string()) - .or_else(|| first_named_text(node, source, &["type_identifier", "identifier"])) - } - _ => None, - } -} - -fn impl_owner_name(node: Node<'_>, source: &str) -> Option { - let r#type = node.child_by_field_name("type").or_else(|| { - named_children(node) - .into_iter() - .find(|child| child.kind().contains("type") || child.kind().contains("identifier")) - })?; - Some(normalize_type_owner(node_text(r#type, source))) -} - -fn normalize_type_owner(text: &str) -> String { +pub(crate) fn normalize_type_owner(text: &str) -> String { let value = text.trim(); let value = value.trim_start_matches(['&', '*']); let value = value @@ -968,36 +505,6 @@ fn normalize_type_owner(text: &str) -> String { value.split('.').last().unwrap_or("").to_string() } -fn hidden_ruby_method_name(node: Node<'_>, source: &str) -> Option { - let children = named_children(node); - let receiver_index = children - .iter() - .position(|child| matches!(child.kind(), "self" | "constant")); - let search: Vec> = if let Some(index) = receiver_index { - children.into_iter().skip(index + 1).collect() - } else { - children - }; - let name = search - .into_iter() - .find(|child| { - matches!( - child.kind(), - "identifier" | "field_identifier" | "property_identifier" - ) - }) - .map(|child| node_text(child, source).to_string())?; - if receiver_index.is_some() { - Some(format!("self.{name}")) - } else { - Some(name) - } -} - -fn inline_def_name(node: Node<'_>, source: &str) -> Option { - hidden_ruby_method_name(node, source) -} - fn file_owner(file: &Path) -> String { file.file_stem() .and_then(|stem| stem.to_str()) @@ -1006,161 +513,92 @@ fn file_owner(file: &Path) -> String { .to_string() } -fn first_named_text(node: Node<'_>, source: &str, kinds: &[&str]) -> Option { +pub(crate) fn first_named_text(node: Node<'_>, source: &str, kinds: &[&str]) -> Option { named_children(node) .into_iter() .find(|child| kinds.iter().any(|kind| *kind == child.kind())) .map(|child| node_text(child, source).to_string()) } -fn first_named_child(node: Node<'_>) -> Option> { +pub(crate) fn first_named_child(node: Node<'_>) -> Option> { let mut cursor = node.walk(); let child = node.named_children(&mut cursor).next(); child } -fn last_named_child(node: Node<'_>) -> Option> { - named_children(node).into_iter().last() -} - -fn first_named_child_except<'tree>(node: Node<'tree>, excluded_kind: &str) -> Option> { +pub(crate) fn first_named_child_except<'tree>( + node: Node<'tree>, + excluded_kind: &str, +) -> Option> { named_children(node) .into_iter() .find(|child| child.kind() != excluded_kind) } -fn first_named_child_with_kind<'tree>(node: Node<'tree>, kind: &str) -> Option> { +pub(crate) fn first_named_child_with_kind<'tree>( + node: Node<'tree>, + kind: &str, +) -> Option> { named_children(node) .into_iter() .find(|child| child.kind() == kind) } -fn named_children(node: Node<'_>) -> Vec> { +pub(crate) fn named_children(node: Node<'_>) -> Vec> { let mut cursor = node.walk(); node.named_children(&mut cursor).collect() } -fn first_child_kind(node: Node<'_>) -> Option<&str> { +pub(crate) fn first_child_kind(node: Node<'_>) -> Option<&str> { let mut cursor = node.walk(); let kind = node.children(&mut cursor).next().map(|child| child.kind()); kind } -fn previous_sibling_text(node: Node<'_>, source: &str) -> Option { +pub(crate) fn previous_sibling_text(node: Node<'_>, source: &str) -> Option { node.prev_sibling() .map(|sibling| node_text(sibling, source).to_string()) } -fn previous_sibling_raw_text(node: Node<'_>) -> Option { +pub(crate) fn previous_sibling_raw_text(node: Node<'_>) -> Option { node.prev_sibling() .map(|sibling| sibling.kind().to_string()) } -fn next_sibling_raw_text(node: Node<'_>) -> Option { +pub(crate) fn next_sibling_raw_text(node: Node<'_>) -> Option { node.next_sibling() .map(|sibling| sibling.kind().to_string()) } -fn member_field_text(field: Node<'_>, source: &str) -> Option { - if field.kind() == "navigation_suffix" { - let suffix = field - .child_by_field_name("suffix") - .or_else(|| { - named_children(field).into_iter().find(|child| { - matches!( - child.kind(), - "identifier" - | "simple_identifier" - | "field_identifier" - | "property_identifier" - ) - }) - }) - .or_else(|| last_named_child(field))?; - let text = node_text(suffix, source) - .trim_start_matches(['.', '?']) - .trim_start_matches("->"); - return (!text.is_empty()).then(|| text.to_string()); - } - - Some( - node_text(field, source) - .trim_start_matches(['.', '?']) - .trim_start_matches("->") - .to_string(), - ) -} - -fn strip_assignment_suffix(text: &str) -> String { +pub(crate) fn strip_assignment_suffix(text: &str) -> String { text.strip_suffix('=').unwrap_or(text).to_string() } -fn case_node(node: Node<'_>) -> bool { - matches!( - node.kind(), - "case" - | "when_expression" - | "switch_statement" - | "switch_expression" - | "match_statement" - | "match_expression" - ) -} - -fn case_source_node<'tree>(node: Node<'tree>, profile: &dyn LanguageProfile) -> Node<'tree> { - if !profile.hidden_case(node) { - return node; - } - let mut cursor = node.walk(); - let result = node - .children(&mut cursor) - .find(|child| child.kind() == "case") - .unwrap_or(node); - result +fn case_node(profile: &dyn LanguageProfile, node: Node<'_>) -> bool { + profile.case_node_kinds().contains(&node.kind()) } fn case_patterns(node: Node<'_>, source: &str, profile: &dyn LanguageProfile) -> Vec { - let mut out = case_arms(node) + let mut out = case_arms(profile, node) .into_iter() .flat_map(|arm| case_arm_patterns(arm, source, profile)) - .filter(|pattern| !default_case_pattern(pattern)) + .filter(|pattern| !default_case_pattern(profile, pattern)) .collect::>(); out.sort(); out.dedup(); out } -fn case_arms(node: Node<'_>) -> Vec> { +fn case_arms<'tree>(profile: &dyn LanguageProfile, node: Node<'tree>) -> Vec> { let mut arms = Vec::new(); let mut stack = named_children(node); while let Some(child) = stack.pop() { - if matches!( - child.kind(), - "when" - | "switch_case" - | "case_clause" - | "expression_case" - | "case_statement" - | "switch_section" - | "switch_block_statement_group" - | "switch_entry" - | "when_entry" - | "match_arm" - ) { + if profile.case_arm_node_kinds().contains(&child.kind()) { arms.push(child); - } else if !matches!( - child.kind(), - "method" - | "function_definition" - | "function_declaration" - | "method_definition" - | "method_declaration" - | "function_item" - | "class" - | "module" - | "class_definition" - | "class_declaration" - ) { + } else if !profile + .case_container_stop_node_kinds() + .contains(&child.kind()) + { stack.extend(named_children(child)); } } @@ -1169,96 +607,44 @@ fn case_arms(node: Node<'_>) -> Vec> { } fn case_arm_patterns(child: Node<'_>, source: &str, profile: &dyn LanguageProfile) -> Vec { - match child.kind() { - "when" | "match_arm" => { - let mut patterns = named_children(child) - .into_iter() - .filter(|node| matches!(node.kind(), "pattern" | "case_pattern" | "match_pattern")) - .collect::>(); - if patterns.is_empty() { - patterns = child - .child_by_field_name("pattern") - .or_else(|| first_named_child(child)) - .into_iter() - .collect(); - } - profile.case_pattern_texts(&patterns, source) - } - "switch_case" - | "case_clause" - | "expression_case" - | "case_statement" - | "switch_section" - | "switch_block_statement_group" - | "switch_entry" - | "when_entry" => { - if node_text(child, source).trim_start().starts_with("else") { - return Vec::new(); - } - let value = child - .child_by_field_name("value") - .or_else(|| child.child_by_field_name("pattern")) - .or_else(|| { - named_children(child) - .into_iter() - .find(|candidate| candidate.kind() == "when_condition") - }) - .or_else(|| { - named_children(child) - .into_iter() - .find(|candidate| candidate.kind() == "switch_pattern") - }) - .or_else(|| first_named_child(child)); - value - .filter(|node| !node.kind().contains("statement") && !node.kind().contains("block")) - .map(|node| vec![normalize_text(node_text(node, source))]) - .unwrap_or_default() - } - _ => Vec::new(), + if !profile.case_arm_node_kinds().contains(&child.kind()) { + return Vec::new(); } -} - -fn ruby_case_pattern_texts(patterns: &[Node<'_>], source: &str) -> Vec { - if patterns.is_empty() { + if node_text(child, source).trim_start().starts_with("else") { return Vec::new(); } - let texts = patterns - .iter() - .map(|pattern| normalize_text(node_text(*pattern, source))) + + let patterns = named_children(child) + .into_iter() + .filter(|node| profile.case_pattern_node_kinds().contains(&node.kind())) .collect::>(); - if !texts.iter().any(|text| text.starts_with('*')) { - return texts; + if !patterns.is_empty() { + return profile.case_pattern_texts(&patterns, source); } - let mut out = Vec::new(); - let mut pending_plain = Vec::new(); - for (index, text) in texts.iter().enumerate() { - if text.starts_with('*') { - if !pending_plain.is_empty() { - out.push(pending_plain.join(", ")); - pending_plain.clear(); - } - if texts.len() == 1 || index > 0 { - out.push(text.trim_start_matches('*').to_string()); - } else { - out.push(text.clone()); - } - } else { - pending_plain.push(text.clone()); - } - } - if !pending_plain.is_empty() { - out.push(pending_plain.join(", ")); - } - out + let value = child + .child_by_field_name("value") + .or_else(|| child.child_by_field_name("pattern")) + .or_else(|| { + named_children(child).into_iter().find(|candidate| { + profile + .case_pattern_node_kinds() + .contains(&candidate.kind()) + }) + }) + .or_else(|| first_named_child(child)); + value + .filter(|node| !node.kind().contains("statement") && !node.kind().contains("block")) + .map(|node| vec![normalize_text(node_text(node, source))]) + .unwrap_or_default() } -fn default_case_pattern(text: &str) -> bool { - matches!(text, "" | "_" | "default") +fn default_case_pattern(profile: &dyn LanguageProfile, text: &str) -> bool { + text.is_empty() || profile.default_case_patterns().contains(&text) } -fn decision_predicate(node: Node<'_>, source: &str) -> String { - let target = decision_subject(node); +fn decision_predicate(profile: &dyn LanguageProfile, node: Node<'_>, source: &str) -> String { + let target = profile.decision_subject(node); normalize_text( target .map(|child| node_text(child, source)) @@ -1266,102 +652,36 @@ fn decision_predicate(node: Node<'_>, source: &str) -> String { ) } -fn decision_subject(node: Node<'_>) -> Option> { - node.child_by_field_name("value") - .or_else(|| node.child_by_field_name("subject")) - .or_else(|| { - named_children(node) - .into_iter() - .find(|child| child.kind() == "when_subject") - }) - .or_else(|| node.child_by_field_name("condition")) - .or_else(|| { - named_children(node).into_iter().find(|child| { - !matches!( - child.kind(), - "when" - | "switch_case" - | "case_clause" - | "expression_case" - | "case_statement" - | "switch_section" - | "switch_block_statement_group" - | "switch_entry" - | "when_entry" - | "match_arm" - | "else" - | "then" - | "comment" - ) - }) - }) -} - -fn boolean_container(node: Node<'_>) -> bool { - if matches!( - node.kind(), - "binary" | "binary_expression" | "boolean_operator" - ) { - return true; - } - if parenthesized_wrapper(node) { +fn boolean_and(profile: &dyn LanguageProfile, node: Node<'_>, source: &str) -> bool { + if profile.parenthesized_wrapper(node) { return first_named_child(node) - .map(boolean_container) + .map(|child| boolean_and(profile, child, source)) .unwrap_or(false); } - if !matches!( - node.kind(), - "body_statement" | "block_body" | "statement" | "pattern" | "argument_list" - ) { - return false; - } - if !matches!(direct_operator(node).as_str(), "&&" | "and") { - return false; - } - if named_children(node).len() < 2 { - return false; - } - let mut cursor = node.walk(); - let result = node - .children(&mut cursor) - .all(|child| child.is_named() || matches!(child.kind(), "&&" | "and" | "(" | ")")); - result + profile + .boolean_and_operators() + .contains(&direct_operator_from_source(node, source).as_str()) } -fn boolean_and(node: Node<'_>, source: &str) -> bool { - if parenthesized_wrapper(node) { - return first_named_child(node) - .map(|child| boolean_and(child, source)) - .unwrap_or(false); - } - matches!( - direct_operator_from_source(node, source).as_str(), - "&&" | "and" - ) -} - -fn flatten_boolean_and<'tree>(node: Node<'tree>, source: &str) -> Vec> { - if !(boolean_container(node) && boolean_and(node, source)) { +fn flatten_boolean_and<'tree>( + profile: &dyn LanguageProfile, + node: Node<'tree>, + source: &str, +) -> Vec> { + if !(profile.boolean_container(node) && boolean_and(profile, node, source)) { return vec![node]; } - if parenthesized_wrapper(node) { + if profile.parenthesized_wrapper(node) { return first_named_child(node) - .map(|child| flatten_boolean_and(child, source)) + .map(|child| flatten_boolean_and(profile, child, source)) .unwrap_or_else(|| vec![node]); } named_children(node) .into_iter() - .flat_map(|child| flatten_boolean_and(child, source)) + .flat_map(|child| flatten_boolean_and(profile, child, source)) .collect() } -fn parenthesized_wrapper(node: Node<'_>) -> bool { - matches!( - node.kind(), - "parenthesized_statements" | "parenthesized_expression" - ) && named_children(node).len() == 1 -} - fn conjunction_span(node: Node<'_>) -> [usize; 4] { let mut base = span(node); if node.kind() == "pattern" && node.start_position().column > 0 { @@ -1405,7 +725,7 @@ fn enclosing_parentheses_wrap_all(text: &str) -> bool { depth == 0 } -fn direct_operator(node: Node<'_>) -> String { +pub(crate) fn direct_operator(node: Node<'_>) -> String { let mut cursor = node.walk(); let result = node .children(&mut cursor) @@ -1537,7 +857,7 @@ mod c_tests { #[test] fn test_c_assignment() { let mut file = NamedTempFile::new().unwrap(); - file.write_all(b"typedef struct Node { int storage; } Node; void node_set(Node* node) { node->storage = 1; }") + file.write_all(b"typedef struct Node { int storage; } Node; void node_set(Node* self) { self->storage = 1; }") .unwrap(); let doc = parse_file(file.path().to_path_buf(), Language::C).unwrap(); assert_eq!(doc.function_defs[0].owner, "Node"); @@ -1546,47 +866,6 @@ mod c_tests { } } -fn first_argument_receiver_parameter(node: Node<'_>, source: &str) -> Option<(String, String)> { - let params = node - .child_by_field_name("declarator") - .and_then(|d| d.child_by_field_name("parameters")) - .or_else(|| node.child_by_field_name("parameters")) - .or_else(|| first_named_child_with_kind(node, "parameter_list")) - .or_else(|| { - node.child_by_field_name("declarator") - .and_then(|d| first_named_child_with_kind(d, "parameter_list")) - })?; - - let first = first_named_child_with_kind(params, "parameter_declaration")?; - - let type_node = named_children(first).into_iter().find(|child| { - matches!( - child.kind(), - "type_identifier" - | "primitive_type" - | "qualified_identifier" - | "scoped_type_identifier" - ) - })?; - - let name = named_children(first) - .into_iter() - .rev() - .find(|child| matches!(child.kind(), "identifier" | "field_identifier")) - .map(|child| node_text(child, source).to_string()) - .or_else(|| declarator_name(Some(first), source))?; - - Some((node_text(type_node, source).to_string(), name)) -} - -fn snake_case_type_name(type_str: &str) -> String { - let mut parts = type_str.split("::"); - let mut last = parts.last().unwrap_or(type_str).to_string(); - // Simplified snake casing logic - last.make_ascii_lowercase(); - last -} - fn normalize_target_receiver(mut target: Target, context: &ContextState) -> Target { if let Some(current_receiver) = &context.receiver { if &target.receiver == current_receiver { diff --git a/gems/decomplex/test/ast_test.rb b/gems/decomplex/test/ast_test.rb index d487e51d4..6a479bd58 100644 --- a/gems/decomplex/test/ast_test.rb +++ b/gems/decomplex/test/ast_test.rb @@ -400,7 +400,8 @@ def test_tree_sitter_normalizer_selects_language_specific_normalization_adapters python: Decomplex::Ast::PythonTreeSitterNormalizationAdapter, lua: Decomplex::Ast::LuaTreeSitterNormalizationAdapter, typescript: Decomplex::Ast::TypeScriptTreeSitterNormalizationAdapter, - javascript: Decomplex::Ast::TypeScriptTreeSitterNormalizationAdapter + javascript: Decomplex::Ast::TypeScriptTreeSitterNormalizationAdapter, + rust: Decomplex::Ast::RustTreeSitterNormalizationAdapter }.each do |language, adapter_class| assert_instance_of adapter_class, Decomplex::Ast::TreeSitterNormalizationAdapter.for(fake_document(language)) end From 33eb75e0042a745a9ecad50657f368558274ed9c Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Fri, 19 Jun 2026 20:09:21 +0000 Subject: [PATCH 30/52] Add Rust PHP support for Decomplex --- .../decomplex/lib/decomplex/native/command.rb | 1 + gems/decomplex/rust/Cargo.lock | 13 +- gems/decomplex/rust/Cargo.toml | 1 + gems/decomplex/rust/src/bin/dump_ast.rs | 1 + .../rust/src/decomplex/architecture_test.rs | 4 + gems/decomplex/rust/src/decomplex/ast-test.rs | 1 + gems/decomplex/rust/src/decomplex/ast.rs | 261 ++++++++++++------ .../rust/src/decomplex/ast/adapters/base.rs | 52 ++++ .../rust/src/decomplex/ast/adapters/mod.rs | 4 + .../rust/src/decomplex/ast/adapters/php.rs | 181 ++++++++++++ .../decomplex/detectors/decision_pressure.rs | 12 +- .../rust/src/decomplex/detectors/fat_union.rs | 32 ++- .../src/decomplex/detectors/local_flow.rs | 3 +- .../detectors/operational_discontinuity.rs | 3 +- .../detectors/redundant_nil_guard.rs | 15 +- .../src/decomplex/detectors/semantic_alias.rs | 33 ++- gems/decomplex/rust/src/decomplex/syntax.rs | 2 + .../src/decomplex/syntax/adapters/base.rs | 4 + .../adapters/false_simplicity_lexicon.rs | 41 +++ .../rust/src/decomplex/syntax/adapters/mod.rs | 4 + .../rust/src/decomplex/syntax/adapters/php.rs | 199 +++++++++++++ .../decomplex/syntax/tree_sitter_adapter.rs | 16 +- 22 files changed, 777 insertions(+), 106 deletions(-) create mode 100644 gems/decomplex/rust/src/decomplex/ast/adapters/php.rs create mode 100644 gems/decomplex/rust/src/decomplex/syntax/adapters/php.rs diff --git a/gems/decomplex/lib/decomplex/native/command.rb b/gems/decomplex/lib/decomplex/native/command.rb index 16829d427..5adb4dbac 100644 --- a/gems/decomplex/lib/decomplex/native/command.rb +++ b/gems/decomplex/lib/decomplex/native/command.rb @@ -54,6 +54,7 @@ def language_for(path) when ".c", ".h" then "c" when ".cpp", ".cc", ".cxx", ".hpp", ".hh", ".hxx" then "cpp" when ".cs" then "csharp" + when ".php" then "php" else "ruby" end end diff --git a/gems/decomplex/rust/Cargo.lock b/gems/decomplex/rust/Cargo.lock index 92cf25461..00787e223 100644 --- a/gems/decomplex/rust/Cargo.lock +++ b/gems/decomplex/rust/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "aho-corasick" @@ -58,6 +58,7 @@ dependencies = [ "tree-sitter-kotlin-ng", "tree-sitter-language", "tree-sitter-lua", + "tree-sitter-php", "tree-sitter-python", "tree-sitter-ruby", "tree-sitter-rust", @@ -373,6 +374,16 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-php" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d8c17c3ab69052c5eeaa7ff5cd972dd1bc25d1b97ee779fec391ad3b5df5592" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-python" version = "0.25.0" diff --git a/gems/decomplex/rust/Cargo.toml b/gems/decomplex/rust/Cargo.toml index f1ae79567..818495631 100644 --- a/gems/decomplex/rust/Cargo.toml +++ b/gems/decomplex/rust/Cargo.toml @@ -30,6 +30,7 @@ tree-sitter-cpp = "0.23.4" tree-sitter-c-sharp = "=0.23.5" tree-sitter-swift = "=0.7.1" tree-sitter-kotlin-ng = "1.1.0" +tree-sitter-php = "=0.24.2" [dev-dependencies] tempfile = "=3.10.1" diff --git a/gems/decomplex/rust/src/bin/dump_ast.rs b/gems/decomplex/rust/src/bin/dump_ast.rs index c798831fd..abacb80d3 100644 --- a/gems/decomplex/rust/src/bin/dump_ast.rs +++ b/gems/decomplex/rust/src/bin/dump_ast.rs @@ -99,5 +99,6 @@ fn language_grammar(language: Language) -> TreeSitterLanguage { Language::C => tree_sitter_c::LANGUAGE.into(), Language::Cpp => tree_sitter_cpp::LANGUAGE.into(), Language::CSharp => tree_sitter_c_sharp::LANGUAGE.into(), + Language::Php => tree_sitter_php::LANGUAGE_PHP.into(), } } diff --git a/gems/decomplex/rust/src/decomplex/architecture_test.rs b/gems/decomplex/rust/src/decomplex/architecture_test.rs index 9c379865d..20e0bd185 100644 --- a/gems/decomplex/rust/src/decomplex/architecture_test.rs +++ b/gems/decomplex/rust/src/decomplex/architecture_test.rs @@ -17,6 +17,7 @@ fn every_supported_language_has_a_syntax_adapter_file() { "javascript.rs", "kotlin.rs", "lua.rs", + "php.rs", "python.rs", "ruby.rs", "rust.rs", @@ -46,6 +47,7 @@ fn every_supported_language_has_an_ast_adapter_file() { "javascript.rs", "kotlin.rs", "lua.rs", + "php.rs", "python.rs", "ruby.rs", "rust.rs", @@ -83,6 +85,7 @@ fn tree_sitter_adapter_does_not_define_concrete_language_profiles() { "struct CProfile", "struct CppProfile", "struct CSharpProfile", + "struct PhpProfile", ]; for pattern in forbidden { @@ -242,6 +245,7 @@ fn ast_normalizer_does_not_branch_on_language_after_parser_setup() { "Language::C", "Language::Cpp", "Language::CSharp", + "Language::Php", "Self::Ruby", "Self::Python", "Self::Lua", diff --git a/gems/decomplex/rust/src/decomplex/ast-test.rs b/gems/decomplex/rust/src/decomplex/ast-test.rs index 630e7a6b4..48a12e4f8 100644 --- a/gems/decomplex/rust/src/decomplex/ast-test.rs +++ b/gems/decomplex/rust/src/decomplex/ast-test.rs @@ -129,6 +129,7 @@ fn ruby_language_name(language: Language) -> &'static str { Language::C => "c", Language::Cpp => "cpp", Language::CSharp => "csharp", + Language::Php => "php", } } diff --git a/gems/decomplex/rust/src/decomplex/ast.rs b/gems/decomplex/rust/src/decomplex/ast.rs index 3f23318a9..5e020c165 100644 --- a/gems/decomplex/rust/src/decomplex/ast.rs +++ b/gems/decomplex/rust/src/decomplex/ast.rs @@ -275,6 +275,7 @@ fn language_grammar(language: Language) -> TreeSitterLanguage { Language::C => tree_sitter_c::LANGUAGE.into(), Language::Cpp => tree_sitter_cpp::LANGUAGE.into(), Language::CSharp => tree_sitter_c_sharp::LANGUAGE.into(), + Language::Php => tree_sitter_php::LANGUAGE_PHP.into(), } } @@ -832,7 +833,7 @@ impl<'source> TreeSitterNormalizer<'source> { if self.leading_loop_statement(node) { return self.normalize_leading_loop_statement(node); } - if let Some(loop_type) = loop_kind(node.kind()) { + if let Some(loop_type) = self.loop_node_type(node.kind()) { return self.normalize_loop(node, loop_type); } if self.case_kind(node.kind()) || self.hidden_match(node) { @@ -920,6 +921,21 @@ impl<'source> TreeSitterNormalizer<'source> { if self.global_variable(node) { return Some(self.normalize_global_variable(node)); } + if self.self_identifier(node) { + return Some(self.wrap("SELF", Vec::new(), node)); + } + if let Some(name) = self + .normalization_adapter + .local_identifier_text(node, self.source) + { + return Some(self.normalize_identifier_with_name(node, name)); + } + if let Some(name) = self + .normalization_adapter + .constant_identifier_text(node, self.source) + { + return Some(self.wrap("CONST", vec![Child::Symbol(name)], node)); + } if self.class_node(node) { return self.normalize_class(node); } @@ -963,9 +979,7 @@ impl<'source> TreeSitterNormalizer<'source> { "expression_list" if self.single_short_var_lhs(node) => { Some(self.wrap(&kind_type(node.kind()), Vec::new(), node)) } - "call" | "call_expression" | "method_call" | "method_call_expression" => { - self.normalize_call(node) - } + _ if self.call_node(node) => self.normalize_call(node), _ if self.member_read_node(node) => self.normalize_member_read(node), _ if self.unwrap_node(node) => self .named_children(node) @@ -1557,9 +1571,18 @@ impl<'source> TreeSitterNormalizer<'source> { fn normalize_when(&mut self, node: TreeSitterNode<'_>) -> Option { let patterns = self.normalize_patterns(node); - let body = self - .when_body(node) - .and_then(|body| self.normalize_body(body)); + let body = if let Some(body_nodes) = self + .normalization_adapter + .case_arm_body_nodes(node, self.source) + { + body_nodes + .first() + .copied() + .and_then(|source| self.normalize_body_nodes(body_nodes, source)) + } else { + self.when_body(node) + .and_then(|body| self.normalize_body(body)) + }; Some(self.wrap( "WHEN", vec![ @@ -1820,7 +1843,7 @@ impl<'source> TreeSitterNormalizer<'source> { } let children = self.named_children(node); if children.len() == 1 - && self.call_kind(children[0].kind()) + && self.call_node(children[0]) && node_text(children[0], self.source) == node_text(node, self.source) { if let Some(call) = self.normalize_return_value_call(children[0]) { @@ -1828,7 +1851,10 @@ impl<'source> TreeSitterNormalizer<'source> { } } if let (Some(function), Some(nested_args)) = (children.first(), children.get(1)) { - if self.identifier_kind(function.kind()) && nested_args.kind() == "argument_list" { + if let Some(function_name) = self + .identifier_text(*function) + .filter(|_| nested_args.kind() == "argument_list") + { let args = self .named_children(*nested_args) .into_iter() @@ -1844,10 +1870,7 @@ impl<'source> TreeSitterNormalizer<'source> { }; return Some(self.wrap( "FCALL", - vec![ - Child::Symbol(node_text(*function, self.source).to_string()), - args_child, - ], + vec![Child::Symbol(function_name), args_child], node, )); } @@ -1871,9 +1894,9 @@ impl<'source> TreeSitterNormalizer<'source> { .named_field(node, "function") .or_else(|| self.named_field(node, "call")) .or_else(|| self.named_children(node).into_iter().next())?; - if !self.identifier_kind(function.kind()) { + let Some(function_name) = self.identifier_text(function) else { return None; - } + }; let args_node = self .named_field(node, "arguments") @@ -1907,10 +1930,7 @@ impl<'source> TreeSitterNormalizer<'source> { Some(self.wrap( "FCALL", - vec![ - Child::Symbol(node_text(function, self.source).to_string()), - args_child, - ], + vec![Child::Symbol(function_name), args_child], node, )) } @@ -2352,7 +2372,7 @@ impl<'source> TreeSitterNormalizer<'source> { { return None; } - if !self.identifier_kind(left.kind()) { + if self.identifier_text(left).is_none() { return None; } let name = self.target_name(left); @@ -2399,12 +2419,8 @@ impl<'source> TreeSitterNormalizer<'source> { } fn assignment_receiver(&mut self, left: TreeSitterNode<'_>) -> Option { - if self.identifier_kind(left.kind()) { - return Some(self.wrap( - "LVAR", - vec![Child::String(node_text(left, self.source).to_string())], - left, - )); + if let Some(name) = self.identifier_text(left) { + return Some(self.wrap("LVAR", vec![Child::String(name)], left)); } if self.instance_variable(left) { return Some(self.wrap( @@ -2509,6 +2525,12 @@ impl<'source> TreeSitterNormalizer<'source> { } fn normalize_member_read(&mut self, node: TreeSitterNode<'_>) -> Option { + if let Some(field) = self + .normalization_adapter + .state_field_name(node, self.source) + { + return Some(self.wrap("IVAR", vec![Child::String(field)], node)); + } let Some((receiver, method)) = self.member_parts(node) else { let children = self.normalize_children(node); return Some(self.wrap(&kind_type(node.kind()), children, node)); @@ -2734,6 +2756,29 @@ impl<'source> TreeSitterNormalizer<'source> { block: Option>, ) -> Option { let call_source = block.map(|block| self.source_before_child(node, block)); + if let Some(name) = self + .normalization_adapter + .intrinsic_call_name(node, self.source) + { + let args = self.call_arguments(node, None); + let node_type = if block.is_some() || !args.is_empty() { + "FCALL" + } else { + "VCALL" + }; + let children = vec![ + Child::Symbol(name.to_string()), + if let Some(source) = call_source.as_ref() { + self.list_or_nil_from_source_node(args, source) + } else { + list_or_nil(args, node, self) + }, + ]; + if let Some(source) = call_source.as_ref() { + return Some(self.wrap_from_source_node(node_type, children, source)); + } + return Some(self.wrap(node_type, children, node)); + } if self.dotted_call(node) { let (receiver, method) = self.dotted_call_parts(node, block)?; let args = self.call_arguments(node, None); @@ -2767,14 +2812,14 @@ impl<'source> TreeSitterNormalizer<'source> { .find(|child| Some(*child) != block) })?; let args = self.call_arguments(node, Some(function)); - if self.identifier_kind(function.kind()) { + if let Some(function_name) = self.identifier_text(function) { let node_type = if block.is_some() || !args.is_empty() { "FCALL" } else { "VCALL" }; let children = vec![ - Child::Symbol(node_text(function, self.source).to_string()), + Child::Symbol(function_name), if let Some(source) = call_source.as_ref() { self.list_or_nil_from_source_node(args, source) } else { @@ -3772,7 +3817,13 @@ impl<'source> TreeSitterNormalizer<'source> { } fn normalize_identifier(&mut self, node: TreeSitterNode<'_>) -> Node { - let name = node_text(node, self.source).to_string(); + let name = self + .identifier_text(node) + .unwrap_or_else(|| node_text(node, self.source).to_string()); + self.normalize_identifier_with_name(node, name) + } + + fn normalize_identifier_with_name(&mut self, node: TreeSitterNode<'_>, name: String) -> Node { if self.ruby_vcall_identifier(node, &name) || self.vcall_identifier(node, &name) { self.wrap("VCALL", vec![Child::Symbol(name)], node) } else { @@ -3950,7 +4001,7 @@ impl<'source> TreeSitterNormalizer<'source> { first_column: node_span[1], last_lineno: node_span[2], last_column: node_span[3], - text: node_text(source, self.source).to_string(), + text: self.source_text(node_text(source, self.source)), } } @@ -3975,7 +4026,7 @@ impl<'source> TreeSitterNormalizer<'source> { first_column: first_span[1], last_lineno: last_span[2], last_column: last_span[3], - text, + text: self.source_text(&text), } } @@ -3987,7 +4038,7 @@ impl<'source> TreeSitterNormalizer<'source> { first_column: source.first_column, last_lineno: source.last_lineno, last_column: source.last_column, - text: source.text.clone(), + text: self.source_text(&source.text), } } @@ -4005,7 +4056,7 @@ impl<'source> TreeSitterNormalizer<'source> { first_column: node_span[1], last_lineno: node_span[2], last_column: node_span[3], - text: text.to_string(), + text: self.source_text(text), } } @@ -4093,12 +4144,8 @@ impl<'source> TreeSitterNormalizer<'source> { node: TreeSitterNode<'_>, locals: &mut BTreeSet, ) { - if self.identifier_kind(node.kind()) { - locals.insert( - node_text(node, self.source) - .trim_start_matches('*') - .to_string(), - ); + if let Some(name) = self.identifier_text(node) { + locals.insert(name); return; } if matches!( @@ -4116,18 +4163,8 @@ impl<'source> TreeSitterNormalizer<'source> { } fn collect_identifier_names(&self, node: TreeSitterNode<'_>, locals: &mut BTreeSet) { - if self.identifier_kind(node.kind()) { - locals.insert( - node_text(node, self.source) - .trim_start_matches('*') - .to_string(), - ); - } - if self - .normalization_adapter - .identifier_text_node(node, self.source) - { - locals.insert(node_text(node, self.source).to_string()); + if let Some(name) = self.identifier_text(node) { + locals.insert(name); } for child in self.raw_named_children(node) { self.collect_identifier_names(child, locals); @@ -4142,13 +4179,9 @@ impl<'source> TreeSitterNormalizer<'source> { if let Some(name) = self .named_children(node) .into_iter() - .find(|child| self.identifier_kind(child.kind())) + .find_map(|child| self.identifier_text(child)) { - locals.insert( - node_text(name, self.source) - .trim_start_matches('*') - .to_string(), - ); + locals.insert(name); } } @@ -5189,7 +5222,7 @@ impl<'source> TreeSitterNormalizer<'source> { let children = self.named_children(child_source); children.into_iter().find(|child| { - Some(*child) != block && (self.call_kind(child.kind()) || self.member_read_node(*child)) + Some(*child) != block && (self.call_node(*child) || self.member_read_node(*child)) }) } @@ -5293,7 +5326,7 @@ impl<'source> TreeSitterNormalizer<'source> { .named_children(target) .into_iter() .next() - .map(|child| self.identifier_kind(child.kind())) + .map(|child| self.identifier_text(child).is_some()) .unwrap_or(false) } @@ -5308,7 +5341,7 @@ impl<'source> TreeSitterNormalizer<'source> { if !node .children(&mut node.walk()) - .any(|child| matches!(node_text(child, self.source), "." | "&.")) + .any(|child| self.member_access_operator(node_text(child, self.source))) { return false; } @@ -5362,6 +5395,9 @@ impl<'source> TreeSitterNormalizer<'source> { .collect::>(); let receiver = *callable.first()?; let method = node_text(*callable.get(1)?, self.source) + .trim_start_matches("::") + .trim_start_matches("->") + .trim_start_matches(['.', '?']) .trim_end_matches('=') .to_string(); Some((receiver, method)) @@ -5455,6 +5491,8 @@ impl<'source> TreeSitterNormalizer<'source> { return suffix .map(|suffix| { node_text(suffix, self.source) + .trim_start_matches("::") + .trim_start_matches("->") .trim_start_matches(['.', '?']) .to_string() }) @@ -5462,6 +5500,8 @@ impl<'source> TreeSitterNormalizer<'source> { } node_text(node, self.source) + .trim_start_matches("::") + .trim_start_matches("->") .trim_start_matches(['.', '?']) .to_string() } @@ -5545,7 +5585,7 @@ impl<'source> TreeSitterNormalizer<'source> { return self.normalize_dotted_expression(args).into_iter().collect(); } if children.len() == 1 - && self.call_kind(children[0].kind()) + && self.call_node(children[0]) && self.call_block(children[0]).is_some() { return self @@ -5853,6 +5893,16 @@ impl<'source> TreeSitterNormalizer<'source> { right: Option, source: TreeSitterNode<'_>, ) -> Option { + if let Some(field) = self + .normalization_adapter + .state_field_name(left, self.source) + { + return Some(self.wrap( + "IASGN", + vec![Child::String(field), optional_node(right)], + source, + )); + } if self.instance_variable(left) { return Some(self.wrap( "IASGN", @@ -5938,9 +5988,9 @@ impl<'source> TreeSitterNormalizer<'source> { fn target_name(&self, node: TreeSitterNode<'_>) -> String { let text = node_text(node, self.source); - if self.identifier_kind(node.kind()) - || matches!(node.kind(), "splat" | "splat_parameter" | "rest_assignment") - { + if let Some(name) = self.identifier_text(node) { + name + } else if matches!(node.kind(), "splat" | "splat_parameter" | "rest_assignment") { text.trim_start_matches('*').to_string() } else { text.to_string() @@ -5956,10 +6006,13 @@ impl<'source> TreeSitterNormalizer<'source> { self.named_field(node, "name") .or_else(|| { self.named_children(node).into_iter().find(|child| { - self.identifier_kind(child.kind()) || child.kind() == "constant" + self.identifier_text(*child).is_some() || child.kind() == "constant" }) }) - .map(|name| node_text(name, self.source).to_string()) + .map(|name| { + self.identifier_text(name) + .unwrap_or_else(|| node_text(name, self.source).to_string()) + }) .unwrap_or_default(), ) } @@ -5978,7 +6031,7 @@ impl<'source> TreeSitterNormalizer<'source> { .iter() .rev() .copied() - .find(|child| self.identifier_kind(child.kind())) + .find(|child| self.identifier_text(*child).is_some()) }); let parameters = self.named_field(node, "parameters"); let body = self @@ -6004,7 +6057,7 @@ impl<'source> TreeSitterNormalizer<'source> { self.named_children(node) .into_iter() .rev() - .find(|child| self.identifier_kind(child.kind())) + .find(|child| self.identifier_text(*child).is_some()) }) .map(|name| node_text(name, self.source).to_string()) .unwrap_or_default() @@ -6163,7 +6216,7 @@ impl<'source> TreeSitterNormalizer<'source> { first_column: first_span[1], last_lineno, last_column, - text, + text: self.source_text(&text), } } @@ -6222,7 +6275,7 @@ impl<'source> TreeSitterNormalizer<'source> { first_column: first_node.first_column, last_lineno: last_node.last_lineno, last_column: last_node.last_column, - text, + text: self.source_text(&text), } } @@ -6298,6 +6351,41 @@ impl<'source> TreeSitterNormalizer<'source> { identifier_kind_name(kind) } + fn identifier_text(&self, node: TreeSitterNode<'_>) -> Option { + if self.identifier_kind(node.kind()) { + return Some( + node_text(node, self.source) + .trim_start_matches('*') + .to_string(), + ); + } + self.normalization_adapter + .local_identifier_text(node, self.source) + } + + fn self_identifier(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .self_identifier(node, self.source) + } + + fn call_node(&self, node: TreeSitterNode<'_>) -> bool { + self.call_kind(node.kind()) || self.normalization_adapter.call_node(node, self.source) + } + + fn loop_node_type(&self, kind: &str) -> Option<&'static str> { + self.normalization_adapter + .loop_node_type(kind) + .or_else(|| loop_kind(kind)) + } + + fn member_access_operator(&self, text: &str) -> bool { + self.normalization_adapter.member_access_operator(text) + } + + fn source_text(&self, text: &str) -> String { + self.normalization_adapter.source_text(text) + } + fn const_kind(&self, kind: &str) -> bool { matches!( kind, @@ -6313,21 +6401,22 @@ impl<'source> TreeSitterNormalizer<'source> { } fn block_kind(&self, kind: &str) -> bool { - matches!( - kind, - "block" - | "body_statement" - | "statement_block" - | "statement_list" - | "class_body" - | "switch_body" - | "match_block" - | "then" - | "block_body" - | "control_structure_body" - | "function_body" - | "statements" - ) + self.normalization_adapter.block_node_kind(kind) + || matches!( + kind, + "block" + | "body_statement" + | "statement_block" + | "statement_list" + | "class_body" + | "switch_body" + | "match_block" + | "then" + | "block_body" + | "control_structure_body" + | "function_body" + | "statements" + ) } fn case_kind(&self, kind: &str) -> bool { @@ -6375,7 +6464,7 @@ impl<'source> TreeSitterNormalizer<'source> { node: TreeSitterNode<'tree>, ) -> Option> { for child in self.named_children(node) { - if self.call_kind(child.kind()) && self.dotted_call(child) { + if self.call_node(child) && self.dotted_call(child) { return Some(child); } if let Some(found) = self.first_dotted_call_descendant(child) { diff --git a/gems/decomplex/rust/src/decomplex/ast/adapters/base.rs b/gems/decomplex/rust/src/decomplex/ast/adapters/base.rs index cfa23654b..2a44dc455 100644 --- a/gems/decomplex/rust/src/decomplex/ast/adapters/base.rs +++ b/gems/decomplex/rust/src/decomplex/ast/adapters/base.rs @@ -72,6 +72,14 @@ pub(crate) trait AstNormalizationAdapter: Sync { CASE_ARGUMENT_WHEN_KINDS.contains(&node.kind()) && !self.case_else_arm(node, source) } + fn case_arm_body_nodes<'tree>( + &self, + _node: TreeSitterNode<'tree>, + _source: &str, + ) -> Option>> { + None + } + fn case_else_node<'tree>( &self, node: TreeSitterNode<'tree>, @@ -702,6 +710,50 @@ pub(crate) trait AstNormalizationAdapter: Sync { false } + fn local_identifier_text(&self, _node: TreeSitterNode<'_>, _source: &str) -> Option { + None + } + + fn constant_identifier_text(&self, _node: TreeSitterNode<'_>, _source: &str) -> Option { + None + } + + fn self_identifier(&self, _node: TreeSitterNode<'_>, _source: &str) -> bool { + false + } + + fn call_node(&self, _node: TreeSitterNode<'_>, _source: &str) -> bool { + false + } + + fn intrinsic_call_name( + &self, + _node: TreeSitterNode<'_>, + _source: &str, + ) -> Option<&'static str> { + None + } + + fn block_node_kind(&self, _kind: &str) -> bool { + false + } + + fn loop_node_type(&self, _kind: &str) -> Option<&'static str> { + None + } + + fn member_access_operator(&self, text: &str) -> bool { + matches!(text, "." | "&.") + } + + fn source_text(&self, text: &str) -> String { + text.to_string() + } + + fn state_field_name(&self, _node: TreeSitterNode<'_>, _source: &str) -> Option { + None + } + fn member_assignment_target(&self, _node: TreeSitterNode<'_>, _source: &str) -> bool { false } diff --git a/gems/decomplex/rust/src/decomplex/ast/adapters/mod.rs b/gems/decomplex/rust/src/decomplex/ast/adapters/mod.rs index 8e4a05b6d..90d7a0a69 100644 --- a/gems/decomplex/rust/src/decomplex/ast/adapters/mod.rs +++ b/gems/decomplex/rust/src/decomplex/ast/adapters/mod.rs @@ -7,6 +7,7 @@ mod java; mod javascript; mod kotlin; mod lua; +mod php; mod python; mod ruby; mod rust; @@ -25,6 +26,7 @@ use java::JavaAstAdapter; use javascript::JavaScriptAstAdapter; use kotlin::KotlinAstAdapter; use lua::LuaAstAdapter; +use php::PhpAstAdapter; use python::PythonAstAdapter; use ruby::RubyAstAdapter; use rust::RustAstAdapter; @@ -46,6 +48,7 @@ static KOTLIN: KotlinAstAdapter = KotlinAstAdapter; static RUST: RustAstAdapter = RustAstAdapter; static SWIFT: SwiftAstAdapter = SwiftAstAdapter; static ZIG: ZigAstAdapter = ZigAstAdapter; +static PHP: PhpAstAdapter = PhpAstAdapter; pub(crate) fn normalization_adapter(language: Language) -> &'static dyn AstNormalizationAdapter { match language { @@ -63,5 +66,6 @@ pub(crate) fn normalization_adapter(language: Language) -> &'static dyn AstNorma Language::Rust => &RUST, Language::Swift => &SWIFT, Language::Zig => &ZIG, + Language::Php => &PHP, } } diff --git a/gems/decomplex/rust/src/decomplex/ast/adapters/php.rs b/gems/decomplex/rust/src/decomplex/ast/adapters/php.rs new file mode 100644 index 000000000..63758c9e2 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/ast/adapters/php.rs @@ -0,0 +1,181 @@ +use super::super::node_text; +use super::base::{AstNormalizationAdapter, COMMON_ASSIGNMENT_OPERATORS}; +use tree_sitter::Node as TreeSitterNode; + +pub(crate) struct PhpAstAdapter; + +impl AstNormalizationAdapter for PhpAstAdapter { + fn local_identifier_text(&self, node: TreeSitterNode<'_>, source: &str) -> Option { + if !matches!(node.kind(), "name" | "qualified_name" | "variable_name") { + return None; + } + let text = php_identifier_text(node_text(node, source)); + if matches!(node.kind(), "name" | "qualified_name") && php_constant_identifier(&text) { + return None; + } + (!text.is_empty()).then_some(text) + } + + fn constant_identifier_text(&self, node: TreeSitterNode<'_>, source: &str) -> Option { + if !matches!(node.kind(), "name" | "qualified_name") { + return None; + } + let text = php_identifier_text(node_text(node, source)); + php_constant_identifier(&text).then_some(text) + } + + fn self_identifier(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + node.kind() == "variable_name" && php_identifier_text(node_text(node, source)) == "this" + } + + fn call_node(&self, node: TreeSitterNode<'_>, _source: &str) -> bool { + matches!( + node.kind(), + "function_call_expression" + | "member_call_expression" + | "scoped_call_expression" + | "print_intrinsic" + ) + } + + fn intrinsic_call_name(&self, node: TreeSitterNode<'_>, _source: &str) -> Option<&'static str> { + (node.kind() == "print_intrinsic").then_some("print") + } + + fn block_node_kind(&self, kind: &str) -> bool { + matches!(kind, "compound_statement" | "declaration_list") + } + + fn loop_node_type(&self, kind: &str) -> Option<&'static str> { + (kind == "foreach_statement").then_some("FOR") + } + + fn member_access_operator(&self, text: &str) -> bool { + matches!(text, "." | "&." | "->" | "::") + } + + fn source_text(&self, text: &str) -> String { + php_normalize_source(text) + } + + fn state_field_name(&self, node: TreeSitterNode<'_>, source: &str) -> Option { + if node.kind() != "member_access_expression" { + return None; + } + let receiver = php_member_receiver(node)?; + if !matches!( + php_identifier_text(node_text(receiver, source)).as_str(), + "this" | "self" + ) { + return None; + } + let field = php_member_name(node)?; + let field = php_identifier_text(node_text(field, source)); + (!field.is_empty()).then_some(field) + } + + fn class_node(&self, node: TreeSitterNode<'_>) -> bool { + node.kind() == "class_declaration" + } + + fn member_assignment_target(&self, node: TreeSitterNode<'_>, _source: &str) -> bool { + node.kind() == "member_access_expression" + } + + fn member_read_excluded(&self, node: TreeSitterNode<'_>) -> bool { + node.parent() + .map(|parent| { + matches!( + parent.kind(), + "member_call_expression" | "scoped_call_expression" + ) + }) + .unwrap_or(false) + } + + fn named_children_action<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + children: &[TreeSitterNode<'tree>], + ) -> super::base::NamedChildrenAction<'tree> { + if matches!(node.kind(), "compound_statement" | "declaration_list") + && children.len() == 1 + && node_text(node, source) == node_text(children[0], source) + { + return super::base::NamedChildrenAction::Recurse(children[0]); + } + + super::base::NamedChildrenAction::Default + } + + fn case_arm_body_nodes<'tree>( + &self, + node: TreeSitterNode<'tree>, + _source: &str, + ) -> Option>> { + if node.kind() != "case_statement" { + return None; + } + let mut children = php_named_children(node).into_iter(); + children.next()?; + let mut body = Vec::new(); + for child in children { + if child.kind() == "case_statement" { + break; + } + body.push(child); + } + Some(body) + } + + fn assignment_operators(&self) -> &'static [&'static str] { + COMMON_ASSIGNMENT_OPERATORS + } +} + +fn php_named_children<'tree>(node: TreeSitterNode<'tree>) -> Vec> { + let mut cursor = node.walk(); + node.named_children(&mut cursor).collect() +} + +fn php_member_receiver<'tree>(node: TreeSitterNode<'tree>) -> Option> { + node.child_by_field_name("object") + .or_else(|| node.child_by_field_name("receiver")) + .or_else(|| node.child_by_field_name("expression")) + .or_else(|| php_named_children(node).into_iter().next()) +} + +fn php_member_name<'tree>(node: TreeSitterNode<'tree>) -> Option> { + node.child_by_field_name("name") + .or_else(|| node.child_by_field_name("field")) + .or_else(|| php_named_children(node).into_iter().rev().next()) +} + +fn php_identifier_text(text: &str) -> String { + text.trim().trim_start_matches('$').to_string() +} + +fn php_constant_identifier(text: &str) -> bool { + text.chars() + .next() + .map(|ch| ch == '_' || ch.is_ascii_uppercase()) + .unwrap_or(false) +} + +fn php_normalize_source(source: &str) -> String { + let mut out = String::new(); + let mut chars = source.chars().peekable(); + while let Some(ch) = chars.next() { + if ch == '$' + && chars + .peek() + .map(|next| *next == '_' || next.is_ascii_alphabetic()) + .unwrap_or(false) + { + continue; + } + out.push(ch); + } + out.replace("->", ".").replace("::", ".") +} diff --git a/gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs b/gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs index cfa3177c7..4a72f1057 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs @@ -5,7 +5,17 @@ use serde::Serialize; use std::collections::{BTreeMap, BTreeSet}; use std::path::PathBuf; -const GUARD_MIDS: &[&str] = &["is_a?", "kind_of?", "instance_of?", "nil?", "respond_to?"]; +const GUARD_MIDS: &[&str] = &[ + "is_a?", + "kind_of?", + "instance_of?", + "nil?", + "respond_to?", + "is_none", + "is_some", + "is_null", + "isNull", +]; const TRANSIENT_NOARG_MIDS: &[&str] = &["pop", "shift"]; #[derive(Clone, Debug, Eq, PartialEq, Serialize)] diff --git a/gems/decomplex/rust/src/decomplex/detectors/fat_union.rs b/gems/decomplex/rust/src/decomplex/detectors/fat_union.rs index 2649caefb..d7a61bccf 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/fat_union.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/fat_union.rs @@ -14,6 +14,10 @@ pub struct FatUnionReport { pub struct FatUnionRow { pub name: String, pub common: Vec, + pub variant: Vec, + pub degenerate: bool, + pub support: usize, + pub scatter: usize, pub variant_set: Vec, pub at: String, pub spans: BTreeMap, @@ -111,13 +115,17 @@ impl FatUnion { current_when = when_node.children.get(2).and_then(ast::node); } - if variants.len() < 2 { + if variants.len() < 3 { return; } let mut common = None; + let mut member_counts: BTreeMap = BTreeMap::new(); for v in variants.values() { let names: BTreeSet<_> = v.reads.iter().map(|r| r.name.clone()).collect(); + for name in &names { + *member_counts.entry(name.clone()).or_insert(0) += 1; + } match common { None => common = Some(names), Some(ref mut c) => { @@ -127,7 +135,17 @@ impl FatUnion { } let common = common.unwrap_or_default(); - if common.is_empty() { + if common.len() < 2 { + return; + } + let variant: BTreeSet<_> = member_counts + .iter() + .filter_map(|(name, count)| { + (*count == 1 && !common.contains(name)).then(|| name.clone()) + }) + .collect(); + let total = common.len() + variant.len(); + if total == 0 || (common.len() as f64 / total as f64) < 0.6 { return; } @@ -150,10 +168,16 @@ impl FatUnion { variant_set.sort(); let mut common_vec: Vec<_> = common.into_iter().collect(); common_vec.sort(); + let mut variant_vec: Vec<_> = variant.into_iter().collect(); + variant_vec.sort(); self.reports.push(FatUnionRow { name: subject_name, common: common_vec, + variant: variant_vec.clone(), + degenerate: variant_vec.is_empty(), + support: 1, + scatter: 1, variant_set, at, spans, @@ -167,7 +191,9 @@ impl FatUnion { node }; match n.r#type.as_str() { - "CONSTANT" | "SCOPE_RESOLUTION" => Some(ast::slice(n, &self.lines)), + "CONST" | "CONSTANT" | "COLON2" | "COLON3" | "SCOPE_RESOLUTION" => { + Some(ast::slice(n, &self.lines)) + } _ => None, } } diff --git a/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs b/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs index 2bc476eb2..f933e39c3 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs @@ -189,7 +189,8 @@ impl LocalFlow { .map(|s| s.as_str()) .unwrap_or(""); let stripped = text.trim(); - if stripped.starts_with('#') { + if stripped.starts_with('#') || stripped.starts_with("//") || stripped.starts_with("--") + { return Some(RawBoundary { line: line_number, kind: "comment".to_string(), diff --git a/gems/decomplex/rust/src/decomplex/detectors/operational_discontinuity.rs b/gems/decomplex/rust/src/decomplex/detectors/operational_discontinuity.rs index 4fd2685d8..5cd9e6a32 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/operational_discontinuity.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/operational_discontinuity.rs @@ -164,7 +164,8 @@ impl OperationalDiscontinuity { fn phase_marker(&self, reset: &ResetPoint) -> bool { let re = - regex::Regex::new(r"(?i)^\#\s*(?:\d+[a-z]?\s*[.)]|(?:phase|step|stage)\b)").unwrap(); + regex::Regex::new(r"(?i)^(?:#|//|--)\s*(?:\d+[a-z]?\s*[.)]|(?:phase|step|stage)\b)") + .unwrap(); re.is_match(&reset.text) } diff --git a/gems/decomplex/rust/src/decomplex/detectors/redundant_nil_guard.rs b/gems/decomplex/rust/src/decomplex/detectors/redundant_nil_guard.rs index fb71bd95b..c130a27c5 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/redundant_nil_guard.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/redundant_nil_guard.rs @@ -60,6 +60,8 @@ impl Finding { } const TERMINATING_CALLS: &[&str] = &["raise", "fail", "abort", "exit", "exit!"]; +const NIL_PREDICATE_MIDS: &[&str] = &["nil?", "isNull", "is_null", "nil", "is_none"]; +const NON_NIL_PREDICATE_MIDS: &[&str] = &["isSome", "is_some", "present"]; pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { let mut findings = Vec::new(); @@ -285,13 +287,24 @@ impl RedundantNilGuard { _ => return None, }; let args = node.children.get(2); - if mid == "nil?" && (args.is_none() || matches!(args, Some(Child::Nil))) { + if NIL_PREDICATE_MIDS.contains(&mid.as_str()) + && (args.is_none() || matches!(args, Some(Child::Nil))) + { let subject = self.subject_key(recv)?; return Some(NilFact { local: subject, non_nil_when_true: false, }); } + if NON_NIL_PREDICATE_MIDS.contains(&mid.as_str()) + && (args.is_none() || matches!(args, Some(Child::Nil))) + { + let subject = self.subject_key(recv)?; + return Some(NilFact { + local: subject, + non_nil_when_true: true, + }); + } None } "OPCALL" => { diff --git a/gems/decomplex/rust/src/decomplex/detectors/semantic_alias.rs b/gems/decomplex/rust/src/decomplex/detectors/semantic_alias.rs index c1823ca49..6de5233e8 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/semantic_alias.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/semantic_alias.rs @@ -140,18 +140,21 @@ impl SemanticAlias { fn record_pred(&mut self, node: &Node) { if let Some(Child::Symbol(name)) = node.children.first() { - if !name.ends_with('?') { - return; - } - let stmts = ast::body_stmts(node); if stmts.len() != 1 { return; } + let Some(body) = self.predicate_body(stmts[0]) else { + return; + }; + let body_source = ast::slice(body, &self.lines); + if !self.semantic_predicate_definition(name, &body_source) { + return; + } self.preds.push(Pred { name: name.clone(), - canon: self.canon(&ast::slice(stmts[0], &self.lines)), + canon: self.canon(&body_source), file: self.file.clone(), line: node.first_lineno, span: [ @@ -163,6 +166,24 @@ impl SemanticAlias { }); } } + + fn predicate_body<'a>(&self, node: &'a Node) -> Option<&'a Node> { + if node.r#type == "RETURN" { + node.children.iter().filter_map(ast::node).next() + } else { + Some(node) + } + } + + fn semantic_predicate_definition(&self, name: &str, body: &str) -> bool { + name.ends_with('?') + || body.contains("==") + || body.contains("!=") + || body.contains("&&") + || body.contains("||") + || body.contains(" and ") + || body.contains(" or ") + } } struct Report { @@ -230,7 +251,7 @@ impl Report { if ps.is_empty() { continue; } - if u.defn.ends_with('?') && ps.iter().any(|p| p.name == u.defn) { + if ps.iter().any(|p| p.name == u.defn) { continue; } diff --git a/gems/decomplex/rust/src/decomplex/syntax.rs b/gems/decomplex/rust/src/decomplex/syntax.rs index cc0d92a2d..b74305fb4 100644 --- a/gems/decomplex/rust/src/decomplex/syntax.rs +++ b/gems/decomplex/rust/src/decomplex/syntax.rs @@ -24,6 +24,7 @@ pub enum Language { C, Cpp, CSharp, + Php, } impl Language { @@ -43,6 +44,7 @@ impl Language { "c" => Ok(Self::C), "cpp" => Ok(Self::Cpp), "csharp" => Ok(Self::CSharp), + "php" => Ok(Self::Php), _ => bail!("unsupported Decomplex native language: {value}"), } } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs index 2a26fe302..414d4630d 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs @@ -318,6 +318,10 @@ pub(crate) trait LanguageProfile { false } + fn normalize_source_text(&self, text: &str) -> String { + normalize_text(text) + } + fn hidden_case(&self, _node: Node<'_>) -> bool { false } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/false_simplicity_lexicon.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/false_simplicity_lexicon.rs index c646225f2..d15b5ee2d 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/false_simplicity_lexicon.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/false_simplicity_lexicon.rs @@ -87,6 +87,11 @@ const KOTLIN_CONTEXT_PAIRS: &[(&str, &[&str])] = &[ ("UUID", &["randomUUID"]), ("Random", &["nextInt", "nextLong", "nextDouble"]), ]; +const PHP_CONTEXT_PAIRS: &[(&str, &[&str])] = &[ + ("DateTime", &["createFromFormat"]), + ("DateTimeImmutable", &["createFromFormat"]), + ("random_int", &["call"]), +]; const RUBY_CALLBACK_SET: &[&str] = &[ "transaction", @@ -669,5 +674,41 @@ pub(crate) fn false_simplicity_lexicon(language: Language) -> FalseSimplicityLex callback_set: KOTLIN_CALLBACK_SET, core_consts: EMPTY, }, + Language::Php => FalseSimplicityLexicon { + dispatch_mids: &[ + "call_user_func", + "call_user_func_array", + "__call", + "__callStatic", + ], + meta_mids: &[ + "eval", + "ReflectionClass", + "ReflectionMethod", + "ReflectionFunction", + "class_alias", + ], + method_obj_mids: &["Closure", "fromCallable"], + io_consts: &["FilesystemIterator", "DirectoryIterator", "PDO", "mysqli"], + io_bare: &[ + "print", + "printf", + "fopen", + "file_get_contents", + "file_put_contents", + "exec", + "shell_exec", + "system", + "passthru", + "die", + "exit", + "trigger_error", + ], + dir_context: &["getcwd", "getenv"], + context_pairs: PHP_CONTEXT_PAIRS, + context_bare: &["time", "microtime", "random_int", "rand", "mt_rand"], + callback_set: COMMON_CALLBACK_SET, + core_consts: EMPTY, + }, } } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/mod.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/mod.rs index 6dc088f4e..ec6e69829 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/mod.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/mod.rs @@ -8,6 +8,7 @@ mod java; mod javascript; mod kotlin; mod lua; +mod php; mod python; mod ruby; mod rust; @@ -26,6 +27,7 @@ use java::JavaProfile; use javascript::JavaScriptProfile; use kotlin::KotlinProfile; use lua::LuaProfile; +use php::PhpProfile; use python::PythonProfile; use ruby::RubyProfile; use rust::RustProfile; @@ -47,6 +49,7 @@ static LUA_PROFILE: LuaProfile = LuaProfile; static C_PROFILE: CProfile = CProfile; static CPP_PROFILE: CppProfile = CppProfile; static CSHARP_PROFILE: CSharpProfile = CSharpProfile; +static PHP_PROFILE: PhpProfile = PhpProfile; pub(crate) fn language_profile(language: Language) -> &'static dyn LanguageProfile { match language { @@ -64,5 +67,6 @@ pub(crate) fn language_profile(language: Language) -> &'static dyn LanguageProfi Language::C => &C_PROFILE, Language::Cpp => &CPP_PROFILE, Language::CSharp => &CSHARP_PROFILE, + Language::Php => &PHP_PROFILE, } } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/php.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/php.rs new file mode 100644 index 000000000..89a50a142 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/php.rs @@ -0,0 +1,199 @@ +use super::super::tree_sitter_adapter::{named_children, AssignmentTarget, Target}; +use super::super::Language; +use super::base::LanguageProfile; +use crate::decomplex::ast::{node_text, normalize_text}; +use tree_sitter::{Language as TreeSitterLanguage, Node}; + +pub(crate) struct PhpProfile; + +impl LanguageProfile for PhpProfile { + fn language(&self) -> Language { + Language::Php + } + + fn grammar(&self) -> TreeSitterLanguage { + tree_sitter_php::LANGUAGE_PHP.into() + } + + fn function_node_kinds(&self) -> &[&str] { + &["function_definition", "method_declaration"] + } + + fn class_owner_node_kinds(&self) -> &[&str] { + &["class_declaration"] + } + + fn parameter_list_node_kinds(&self) -> &[&str] { + &["formal_parameters"] + } + + fn function_body_node_kinds(&self) -> &[&str] { + &["compound_statement", "declaration_list"] + } + + fn call_node_kinds(&self) -> &[&str] { + &[ + "function_call_expression", + "member_call_expression", + "scoped_call_expression", + "print_intrinsic", + ] + } + + fn identifier_node_kinds(&self) -> &[&str] { + &["name", "variable_name"] + } + + fn assignment_node_kinds(&self) -> &[&str] { + &["assignment_expression", "augmented_assignment_expression"] + } + + fn assignment_operator_tokens(&self) -> &[&str] { + &["=", "+=", "-=", "*=", "/=", "%="] + } + + fn comparison_node_kinds(&self) -> &[&str] { + &["binary_expression"] + } + + fn comparison_operators(&self) -> &[&str] { + &["==", "!=", "===", "!==", "<", "<=", ">", ">="] + } + + fn case_node_kinds(&self) -> &[&str] { + &["switch_statement"] + } + + fn case_arm_node_kinds(&self) -> &[&str] { + &["case_statement"] + } + + fn case_container_stop_node_kinds(&self) -> &[&str] { + &[ + "function_definition", + "method_declaration", + "class_declaration", + ] + } + + fn case_subject_skip_node_kinds(&self) -> &[&str] { + &["case_statement", "else", "comment"] + } + + fn default_case_patterns(&self) -> &[&str] { + &["_", "default"] + } + + fn boolean_and_operators(&self) -> &[&str] { + &["&&", "and"] + } + + fn boolean_container_node_kinds(&self) -> &[&str] { + &["binary_expression"] + } + + fn parenthesized_wrapper_node_kinds(&self) -> &[&str] { + &["parenthesized_expression"] + } + + fn field_like_node_kinds(&self) -> &[&str] { + &[ + "member_access_expression", + "member_call_expression", + "class_constant_access_expression", + ] + } + + fn normalize_source_text(&self, text: &str) -> String { + normalize_text(&php_normalize_source(text)) + } + + fn function_name(&self, node: Node<'_>, source: &str) -> Option { + if self.function_node_kinds().contains(&node.kind()) { + return node + .child_by_field_name("name") + .or_else(|| php_first_name_node(node)) + .and_then(|name| php_name_text(name, source)); + } + self.default_function_name(node, source) + } + + fn owner_name_from_declaration(&self, node: Node<'_>, source: &str) -> Option { + if node.kind() == "class_declaration" { + return node + .child_by_field_name("name") + .or_else(|| php_first_name_node(node)) + .and_then(|name| php_name_text(name, source)); + } + self.default_owner_name_from_declaration(node, source) + } + + fn assignment_target<'tree>(&self, node: Node<'tree>) -> Option> { + self.default_assignment_target(node) + } + + fn state_target(&self, lhs: Node<'_>, source: &str) -> Option { + let target = self.default_state_target(lhs, source)?; + Some(Target { + receiver: php_normalize_receiver(&target.receiver), + field: php_identifier_text_value(&target.field), + }) + } + + fn member_field_text(&self, field: Node<'_>, source: &str) -> Option { + php_name_text(field, source) + } + + fn case_pattern_texts(&self, patterns: &[Node<'_>], source: &str) -> Vec { + patterns + .iter() + .map(|pattern| normalize_text(&php_normalize_source(node_text(*pattern, source)))) + .collect() + } +} + +fn php_first_name_node<'tree>(node: Node<'tree>) -> Option> { + named_children(node) + .into_iter() + .find(|child| php_name_node(*child)) +} + +fn php_name_node(node: Node<'_>) -> bool { + matches!(node.kind(), "name" | "qualified_name" | "variable_name") +} + +fn php_name_text(node: Node<'_>, source: &str) -> Option { + let text = php_identifier_text_value(node_text(node, source)); + (!text.is_empty()).then_some(text) +} + +fn php_identifier_text_value(text: &str) -> String { + text.trim().trim_start_matches('$').to_string() +} + +fn php_normalize_receiver(receiver: &str) -> String { + let value = php_identifier_text_value(receiver); + if value == "this" { + "self".to_string() + } else { + value + } +} + +fn php_normalize_source(source: &str) -> String { + let mut out = String::new(); + let mut chars = source.chars().peekable(); + while let Some(ch) = chars.next() { + if ch == '$' { + if chars + .peek() + .map(|next| *next == '_' || next.is_ascii_alphabetic()) + .unwrap_or(false) + { + continue; + } + } + out.push(ch); + } + out.replace("->", ".").replace("::", ".") +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs b/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs index 465e71140..8d82bedb7 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs @@ -214,7 +214,7 @@ fn record_predicate_alias( let Some(body) = profile.single_expression_body(node) else { return; }; - let text = normalize_text(node_text(body, source)); + let text = profile.normalize_source_text(node_text(body, source)); if text.is_empty() || text == "nil" || text.len() > 200 { return; } @@ -240,7 +240,7 @@ fn record_comparison_use( if !comparison_node(language_profile(_language), node, source) { return; } - let raw = normalize_text(node_text(node, source)); + let raw = language_profile(_language).normalize_source_text(node_text(node, source)); out.push(ComparisonUse { canon_source: raw.clone(), raw, @@ -303,7 +303,11 @@ fn record_decision_site( function: context.current_function(), line: line(decision_node), span: span(decision_node), - predicate: decision_predicate(profile, decision_node, source), + predicate: profile.normalize_source_text(&decision_predicate( + profile, + decision_node, + source, + )), }, ); } @@ -349,7 +353,7 @@ fn record_conjunction_decision( let mut members = flatten_boolean_and(profile, node, source) .into_iter() - .map(|child| decision_member_text(child, source)) + .map(|child| profile.normalize_source_text(&decision_member_text(child, source))) .collect::>(); members.sort(); members.dedup(); @@ -367,7 +371,7 @@ fn record_conjunction_decision( function: context.current_function(), line: conjunction_span(node)[0], span: conjunction_span(node), - predicate: normalize_text(node_text(node, source)), + predicate: profile.normalize_source_text(node_text(node, source)), }, ); } @@ -635,7 +639,7 @@ fn case_arm_patterns(child: Node<'_>, source: &str, profile: &dyn LanguageProfil .or_else(|| first_named_child(child)); value .filter(|node| !node.kind().contains("statement") && !node.kind().contains("block")) - .map(|node| vec![normalize_text(node_text(node, source))]) + .map(|node| vec![profile.normalize_source_text(node_text(node, source))]) .unwrap_or_default() } From 15140bb50e743f37e0fa454318f83a80baea4e40 Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Fri, 19 Jun 2026 22:06:42 +0000 Subject: [PATCH 31/52] Fix decomplex Rust facts indexing architecture --- gems/decomplex/exe/decomplex | 96 +++ gems/decomplex/lib/decomplex.rb | 1 + .../lib/decomplex/native/path_condition.rb | 3 +- .../lib/decomplex/native/report_facts.rb | 17 + gems/decomplex/lib/decomplex/report.rb | 171 ++--- gems/decomplex/lib/decomplex/report_facts.rb | 270 ++++++++ .../rust/src/decomplex/architecture_test.rs | 10 + gems/decomplex/rust/src/decomplex/ast.rs | 6 +- .../rust/src/decomplex/detectors/co_update.rs | 49 +- .../decomplex/detectors/decision_pressure.rs | 16 +- .../src/decomplex/detectors/derived_state.rs | 16 +- .../decomplex/detectors/false_simplicity.rs | 21 +- .../rust/src/decomplex/detectors/fat_union.rs | 16 +- .../src/decomplex/detectors/function_lcom.rs | 12 +- .../detectors/implicit_control_flow.rs | 39 +- .../detectors/inconsistent_rename_clone.rs | 16 +- .../src/decomplex/detectors/local_flow.rs | 20 +- .../src/decomplex/detectors/locality_drag.rs | 12 +- .../rust/src/decomplex/detectors/miner.rs | 12 +- .../detectors/operational_discontinuity.rs | 16 +- .../detectors/oversized_predicate.rs | 19 +- .../src/decomplex/detectors/path_condition.rs | 66 +- .../decomplex/detectors/predicate_alias.rs | 85 +-- .../detectors/redundant_nil_guard.rs | 16 +- .../src/decomplex/detectors/semantic_alias.rs | 174 ++--- .../src/decomplex/detectors/sequence_mine.rs | 16 +- .../detectors/state_branch_density.rs | 43 +- .../src/decomplex/detectors/state_mesh.rs | 50 +- .../detectors/structural_topology.rs | 24 +- .../detectors/temporal_ordering_pressure.rs | 17 +- .../weighted_inlined_cognitive_complexity.rs | 23 +- gems/decomplex/rust/src/decomplex/mod.rs | 1 + .../rust/src/decomplex/report_facts.rs | 634 ++++++++++++++++++ gems/decomplex/rust/src/decomplex/syntax.rs | 46 +- .../decomplex/syntax/tree_sitter_adapter.rs | 3 +- gems/decomplex/rust/src/main.rs | 213 +++++- gems/decomplex/test/report_test.rb | 29 + 37 files changed, 1779 insertions(+), 499 deletions(-) create mode 100644 gems/decomplex/lib/decomplex/native/report_facts.rb create mode 100644 gems/decomplex/lib/decomplex/report_facts.rb create mode 100644 gems/decomplex/rust/src/decomplex/report_facts.rs diff --git a/gems/decomplex/exe/decomplex b/gems/decomplex/exe/decomplex index 551493426..67fbce8cc 100755 --- a/gems/decomplex/exe/decomplex +++ b/gems/decomplex/exe/decomplex @@ -60,6 +60,97 @@ if ARGV[0] == "delta" exit 0 end +if ARGV[0] == "facts" + require_relative "../lib/decomplex/report_facts" + args = ARGV[1..] + engine = "ruby" + out_path = nil + jobs = nil + benchmark = false + args = args.reject do |arg| + case arg + when /\A--engine=(.+)\z/ + engine = Regexp.last_match(1) + true + when /\A--output=(.+)\z/ + out_path = Regexp.last_match(1) + true + when /\A--jobs=(\d+)\z/ + jobs = Integer(Regexp.last_match(1)) + true + when "--benchmark" + benchmark = true + true + else + false + end + end + files = collect_files(args) + abort no_files_message if files.empty? + started = Process.clock_gettime(Process::CLOCK_MONOTONIC) + facts = Decomplex::ReportFacts.from_files(files, engine: engine, jobs: jobs) + elapsed = Process.clock_gettime(Process::CLOCK_MONOTONIC) - started + output = Decomplex::ReportFacts.to_json(facts) + if out_path + File.write(out_path, output) + warn "wrote #{out_path} (#{files.size} files)" + else + puts output + end + if benchmark + warn format("decomplex facts engine=%s files=%d elapsed=%.6fs", + engine, files.size, elapsed) + end + exit 0 +end + +if ARGV[0] == "render-report" + require_relative "../lib/decomplex/report" + args = ARGV[1..] + format = "markdown" + out_path = nil + input_path = nil + args = args.reject do |arg| + case arg + when "--from-stdin" + true + when /\A--input=(.+)\z/ + input_path = Regexp.last_match(1) + true + when /\A--format=(.+)\z/ + format = Regexp.last_match(1) + true + when /\A--output=(.+)\z/ + out_path = Regexp.last_match(1) + true + else + false + end + end + abort "usage: decomplex render-report --from-stdin|--input=FILE [--format=markdown|sarif|json] [--output=FILE]" unless args.empty? + + payload = input_path ? File.read(input_path) : STDIN.read + abort "render-report requires facts JSON on stdin or --input=FILE" if payload.to_s.strip.empty? + + report = Decomplex::Report.from_facts(payload) + output = + case format.to_s + when "markdown", "md" + report.to_markdown + when "sarif", "json" + report.to_sarif + else + abort "unsupported render-report format: #{format}" + end + if out_path + File.write(out_path, output) + warn "wrote #{out_path}" + else + puts output + end + exit 0 +end + if ARGV[0] == "detector" args = ARGV[1..] detector = args&.shift @@ -300,6 +391,8 @@ if ARGV.empty? || ARGV[0] == "-h" || ARGV[0] == "--help" decomplex FILE_OR_DIR [FILE_OR_DIR ...] decomplex report [--output=FILE] [--emit-json=FILE] [--sarif=FILE] [--exclude=GLOB] FILE_OR_DIR ... + decomplex facts [--engine=ruby|rust] [--output=FILE] [--benchmark] [--jobs=N] FILE_OR_DIR ... + decomplex render-report --from-stdin|--input=FILE [--format=markdown|sarif|json] [--output=FILE] decomplex detector DETECTOR --engine=ruby|rust --json [--benchmark] [--mass=N] [--fuzzy=N] [--jobs=N] FILE_OR_DIR ... decomplex detector DETECTOR --compare-engines [--mass=N] [--fuzzy=N] [--jobs=N] FILE_OR_DIR ... decomplex state-mesh [--output=FILE] [--exclude=GLOB] FILE_OR_DIR ... @@ -315,6 +408,9 @@ if ARGV.empty? || ARGV[0] == "-h" || ARGV[0] == "--help" Subcommands: report Full markdown report with all detectors + facts Emit report-ready detector facts before convergence/root-cause/rendering + render-report + Render a precomputed facts payload without parsing source or running detectors detector Single-detector canonical JSON for migration/benchmarking state-mesh JSON graph of reader/writer hierarchy by field state-branches diff --git a/gems/decomplex/lib/decomplex.rb b/gems/decomplex/lib/decomplex.rb index dd72c819e..c993004a5 100644 --- a/gems/decomplex/lib/decomplex.rb +++ b/gems/decomplex/lib/decomplex.rb @@ -33,6 +33,7 @@ require_relative "decomplex/convergence" require_relative "decomplex/root_cause" require_relative "decomplex/delta" +require_relative "decomplex/report_facts" # Decomplex: decision-level duplication + neglected-condition detector. # See decomplex.gemspec for the rationale. v0 scope is exact-match diff --git a/gems/decomplex/lib/decomplex/native/path_condition.rb b/gems/decomplex/lib/decomplex/native/path_condition.rb index 71dd0f5a6..aa0090f7c 100644 --- a/gems/decomplex/lib/decomplex/native/path_condition.rb +++ b/gems/decomplex/lib/decomplex/native/path_condition.rb @@ -11,7 +11,8 @@ module PathCondition def scan(files, jobs: nil) paths = Array(files).map(&:to_s) language = Command.language_for(paths.first) - JSON.parse(Command.run("path-condition", "--language", language, *Command.jobs_args(jobs), *paths)) + payload = JSON.parse(Command.run("path-condition", "--language", language, *Command.jobs_args(jobs), *paths)) + { "neglected" => payload.fetch("neglected", []) } end end diff --git a/gems/decomplex/lib/decomplex/native/report_facts.rb b/gems/decomplex/lib/decomplex/native/report_facts.rb new file mode 100644 index 000000000..332a77f10 --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/report_facts.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module ReportFacts + module_function + + def collect(files, jobs: nil) + paths = Array(files).map(&:to_s) + JSON.parse(Command.run("facts", *Command.jobs_args(jobs), *paths)) + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/report.rb b/gems/decomplex/lib/decomplex/report.rb index 53bd92902..a842f6b93 100644 --- a/gems/decomplex/lib/decomplex/report.rb +++ b/gems/decomplex/lib/decomplex/report.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require_relative "../decomplex" +require_relative "report_facts" module Decomplex # Aggregates every detector over a file set and renders a single @@ -8,134 +9,68 @@ module Decomplex # prioritisation, per-detector sections, run summary). Every number # is a ranked CANDIDATE count, never a verdict. class Report - def initialize(files) + def initialize(files, facts: nil) @files = files - run + facts ? apply_facts(facts) : run + end + + def self.from_facts(facts) + normalized = ReportFacts.normalize(facts) + new(normalized.fetch(:files), facts: normalized) end def run - m = Miner.scan(@files) - @miss = m.missing_abstractions - @negc = m.neglected_conditions - cu = CoUpdate.scan(@files) - @negu = cu.neglected_updates - @copair = cu.co_written_pairs - pa = PredicateAlias.scan(@files) - @palias = pa.alias_clusters - sa = SemanticAlias.scan(@files) - @salias = sa.alias_clusters - @reif = sa.reification_misses - pc = PathCondition.scan(@files) - @pcneg = pc.neglected - @pcsc = pc.scattered - sm = SequenceMine.scan(@files) - @broken = sm.broken_protocol - icf = ImplicitControlFlow.scan(@files) - @implicit_control_flow = icf.ordered_protocols( - min_support: Integer(ENV.fetch("DECOMPLEX_ICF_MIN_SUPPORT", "1")) - ) - @derived = DerivedState.scan(@files) - @rename_clones = InconsistentRenameClone.scan(@files) - @similarity = FlaySimilarity.scan( - @files, - mass: Integer(ENV.fetch("DECOMPLEX_SIMILARITY_MASS", - ENV.fetch("DECOMPLEX_FLAY_MASS", FlaySimilarity::DEFAULT_MASS))), - fuzzy: Integer(ENV.fetch("DECOMPLEX_SIMILARITY_FUZZY", - ENV.fetch("DECOMPLEX_FLAY_FUZZY", FlaySimilarity::DEFAULT_FUZZY))) - ) - @pressure = DecisionPressure.scan(@files).ranked - @redundant_nil = RedundantNilGuard.scan(@files) - @fsimple = FalseSimplicity.scan(@files).findings - @oversized_predicates = OversizedPredicate.scan(@files).findings - @fatu = FatUnion.scan(@files).fat_unions - state_mesh = StateMesh.scan(@files, min_writes: 1) - state_mesh.run - @state_heat = state_mesh.findings - @state_branch = StateBranchDensity.scan(@files).findings - @temporal_ordering = TemporalOrderingPressure.scan(@files) - @weighted_inlined_complexity = WeightedInlinedCognitiveComplexity.scan( - @files, - min_score: Float(ENV.fetch( - "DECOMPLEX_WICC_MIN_SCORE", - WeightedInlinedCognitiveComplexity::DEFAULT_MIN_SCORE - )), - min_hidden: Float(ENV.fetch( - "DECOMPLEX_WICC_MIN_HIDDEN", - WeightedInlinedCognitiveComplexity::DEFAULT_MIN_HIDDEN - )), - max_depth: Integer(ENV.fetch( - "DECOMPLEX_WICC_MAX_DEPTH", - WeightedInlinedCognitiveComplexity::DEFAULT_MAX_DEPTH - )) - ) - @locality_drag = LocalityDrag.scan( - @files, - min_unrelated_statements: Integer(ENV.fetch( - "DECOMPLEX_LOCALITY_DRAG_MIN_UNRELATED_STATEMENTS", - LocalityDrag::DEFAULT_MIN_UNRELATED_STATEMENTS - )), - min_gap_lines: Integer(ENV.fetch( - "DECOMPLEX_LOCALITY_DRAG_MIN_GAP_LINES", - LocalityDrag::DEFAULT_MIN_GAP_LINES - )), - min_local_complexity: Float(ENV.fetch( - "DECOMPLEX_LOCALITY_DRAG_MIN_LOCAL_COMPLEXITY", - LocalityDrag::DEFAULT_MIN_LOCAL_COMPLEXITY - )), - min_score: Integer(ENV.fetch( - "DECOMPLEX_LOCALITY_DRAG_MIN_SCORE", - LocalityDrag::DEFAULT_MIN_SCORE - )), - max_findings_per_method: Integer(ENV.fetch( - "DECOMPLEX_LOCALITY_DRAG_MAX_FINDINGS_PER_METHOD", - LocalityDrag::DEFAULT_MAX_FINDINGS_PER_METHOD - )) - ) - @function_lcom = FunctionLCOM.scan( - @files, - min_components: Integer(ENV.fetch( - "DECOMPLEX_FUNCTION_LCOM_MIN_COMPONENTS", - FunctionLCOM::DEFAULT_MIN_COMPONENTS - )), - min_locals: Integer(ENV.fetch( - "DECOMPLEX_FUNCTION_LCOM_MIN_LOCALS", - FunctionLCOM::DEFAULT_MIN_LOCALS - )), - min_statements: Integer(ENV.fetch( - "DECOMPLEX_FUNCTION_LCOM_MIN_STATEMENTS", - FunctionLCOM::DEFAULT_MIN_STATEMENTS - )), - min_score: Integer(ENV.fetch( - "DECOMPLEX_FUNCTION_LCOM_MIN_SCORE", - FunctionLCOM::DEFAULT_MIN_SCORE - )) - ) - operational_discontinuity = OperationalDiscontinuity.scan( - @files, - min_dead: Integer(ENV.fetch( - "DECOMPLEX_OPERATIONAL_DISCONTINUITY_MIN_DEAD", - OperationalDiscontinuity::DEFAULT_MIN_DEAD - )), - min_new: Integer(ENV.fetch( - "DECOMPLEX_OPERATIONAL_DISCONTINUITY_MIN_NEW", - OperationalDiscontinuity::DEFAULT_MIN_NEW - )), - max_continuing: Integer(ENV.fetch( - "DECOMPLEX_OPERATIONAL_DISCONTINUITY_MAX_CONTINUING", - OperationalDiscontinuity::DEFAULT_MAX_CONTINUING - )), - min_score: Integer(ENV.fetch( - "DECOMPLEX_OPERATIONAL_DISCONTINUITY_MIN_SCORE", - OperationalDiscontinuity::DEFAULT_MIN_SCORE - )) - ) + apply_facts(ReportFacts.from_files(@files, engine: "ruby")) + end + + def apply_facts(facts) + normalized = ReportFacts.normalize(facts) + @files = normalized.fetch(:files) + detectors = normalized.fetch(:detectors) + + miner = detectors.fetch(:miner) + @miss = miner.fetch(:missing_abstractions, []) + @negc = miner.fetch(:neglected_conditions, []) + + co_update = detectors.fetch(:co_update) + @negu = co_update.fetch(:neglected_updates, []) + @copair = co_update.fetch(:co_written_pairs, []) + + @palias = detectors.fetch(:predicate_alias).fetch(:alias_clusters, []) + + semantic_alias = detectors.fetch(:semantic_alias) + @salias = semantic_alias.fetch(:alias_clusters, []) + @reif = semantic_alias.fetch(:reification_misses, []) + + path_condition = detectors.fetch(:path_condition) + @pcneg = path_condition.fetch(:neglected, []) + @pcsc = path_condition.fetch(:scattered, []) + + @broken = detectors.fetch(:sequence_mine).fetch(:broken_protocol, []) + @implicit_control_flow = detectors.fetch(:implicit_control_flow).fetch(:ordered_protocols, []) + @derived = detectors.fetch(:derived_state, []) + @rename_clones = detectors.fetch(:inconsistent_rename_clone, []) + @similarity = detectors.fetch(:flay_similarity, []) + @pressure = detectors.fetch(:decision_pressure, []) + @redundant_nil = detectors.fetch(:redundant_nil_guard, []) + @fsimple = detectors.fetch(:false_simplicity, []) + @oversized_predicates = detectors.fetch(:oversized_predicate, []) + @fatu = detectors.fetch(:fat_union).fetch(:fat_unions, []) + @state_heat = detectors.fetch(:state_heatmap, []) + @state_branch = detectors.fetch(:state_branch_density, []) + @temporal_ordering = detectors.fetch(:temporal_ordering_pressure, []) + @weighted_inlined_complexity = detectors.fetch(:weighted_inlined_complexity, []) + @locality_drag = detectors.fetch(:locality_drag, []) + @function_lcom = detectors.fetch(:function_lcom, []) + operational_discontinuity = detectors.fetch(:operational_discontinuity, []) @operational_discontinuity_high_confidence, @operational_discontinuity = - operational_discontinuity.partition { |finding| OperationalDiscontinuity.high_confidence?(finding) } + operational_discontinuity.partition { |finding| finding[:confidence].to_s == "high" } # sections_data also asserts the span contract -- running it on # the normal report path keeps that tripwire live. sd = sections_data @convergence = Convergence.rollup(sd) @root = RootCause.cluster(sd) + self end # tier = signal quality (1 = highest signal / lowest false-positive, diff --git a/gems/decomplex/lib/decomplex/report_facts.rb b/gems/decomplex/lib/decomplex/report_facts.rb new file mode 100644 index 000000000..d8871d64c --- /dev/null +++ b/gems/decomplex/lib/decomplex/report_facts.rb @@ -0,0 +1,270 @@ +# frozen_string_literal: true + +require "json" +require_relative "miner" +require_relative "co_update" +require_relative "predicate_alias" +require_relative "semantic_alias" +require_relative "path_condition" +require_relative "sequence_mine" +require_relative "ordered_protocol_mine" +require_relative "derived_state" +require_relative "inconsistent_rename_clone" +require_relative "flay_similarity" +require_relative "decision_pressure" +require_relative "redundant_nil_guard" +require_relative "false_simplicity" +require_relative "oversized_predicate" +require_relative "fat_union" +require_relative "state_mesh" +require_relative "state_branch_density" +require_relative "temporal_ordering_pressure" +require_relative "weighted_inlined_cognitive_complexity" +require_relative "locality_drag" +require_relative "function_lcom" +require_relative "operational_discontinuity" +require_relative "native/report_facts" + +module Decomplex + # Stable boundary between analysis and reporting. + # + # ReportFacts contains the report-ready detector outputs before + # Convergence, RootCause, Markdown, or SARIF post-processing runs. + module ReportFacts + FORMAT = "decomplex.report-facts.v1" + ENUM_KEYS = %i[kind mode confidence clone_type].freeze + + module_function + + def from_files(files, engine: "ruby", jobs: nil) + paths = Array(files).map(&:to_s) + case engine.to_s + when "ruby" + { + "format" => FORMAT, + "files" => paths, + "detectors" => json_safe(ruby_detector_facts(paths)) + } + when "rust" + Native::ReportFacts.collect(paths, jobs: jobs) + else + raise ArgumentError, "unsupported decomplex facts engine: #{engine}" + end + end + + def to_json(facts, pretty: true) + pretty ? JSON.pretty_generate(json_safe(facts)) : JSON.generate(json_safe(facts)) + end + + def normalize(payload) + raw = payload.is_a?(String) ? JSON.parse(payload) : payload + deep_hydrate(raw) + end + + def json_safe(value) + case value + when Hash + value.to_h { |key, child| [key.to_s, json_safe(child)] } + when Array + value.map { |child| json_safe(child) } + when Symbol + value.to_s + else + value + end + end + + def state_heatmap_findings_from_graph(graph, limit_sites: 12) + fields = graph.fetch("fields", {}) + fields.map do |field, row| + writers = Array(row["writers"]) + readers = Array(row["readers"]) + re_derivations = Array(row["re_derivations"]) + metrics = row.fetch("metrics", {}) + sites = site_locations(writers + readers) + + re_derivations.map { |site| site_location(site) } + spans = (writers + readers).each_with_object({}) do |site, out| + out[site_location(site)] = site["span"] + end + + { + "at" => sites.first, + "field" => field, + "writes" => metrics.fetch("writes", 0), + "reads" => metrics.fetch("reads", 0), + "re_derivations" => metrics.fetch("re_derivations", 0), + "scatter" => metrics.fetch("scatter", 0), + "write_scatter" => metrics.fetch("write_scatter", 0), + "read_scatter" => metrics.fetch("read_scatter", 0), + "receiver_types" => metrics.fetch("receiver_types", 0), + "messiness" => row.fetch("messiness", 0), + "pressure" => metrics.fetch("pressure", 0), + "top_writers" => site_locations(writers.first(4)), + "top_readers" => site_locations(readers.first(4)), + "sites" => sites.first(limit_sites), + "spans" => spans + } + end + end + + def ruby_detector_facts(files) + m = Miner.scan(files) + cu = CoUpdate.scan(files) + pa = PredicateAlias.scan(files) + sa = SemanticAlias.scan(files) + pc = PathCondition.scan(files) + sm = SequenceMine.scan(files) + icf = ImplicitControlFlow.scan(files) + state_mesh = StateMesh.scan(files, min_writes: 1) + state_mesh.run + operational_discontinuity = OperationalDiscontinuity.scan( + files, + min_dead: Integer(ENV.fetch( + "DECOMPLEX_OPERATIONAL_DISCONTINUITY_MIN_DEAD", + OperationalDiscontinuity::DEFAULT_MIN_DEAD + )), + min_new: Integer(ENV.fetch( + "DECOMPLEX_OPERATIONAL_DISCONTINUITY_MIN_NEW", + OperationalDiscontinuity::DEFAULT_MIN_NEW + )), + max_continuing: Integer(ENV.fetch( + "DECOMPLEX_OPERATIONAL_DISCONTINUITY_MAX_CONTINUING", + OperationalDiscontinuity::DEFAULT_MAX_CONTINUING + )), + min_score: Integer(ENV.fetch( + "DECOMPLEX_OPERATIONAL_DISCONTINUITY_MIN_SCORE", + OperationalDiscontinuity::DEFAULT_MIN_SCORE + )) + ) + + { + miner: { + missing_abstractions: m.missing_abstractions, + neglected_conditions: m.neglected_conditions + }, + co_update: { + co_written_pairs: cu.co_written_pairs, + neglected_updates: cu.neglected_updates + }, + predicate_alias: { alias_clusters: pa.alias_clusters }, + semantic_alias: { + alias_clusters: sa.alias_clusters, + reification_misses: sa.reification_misses + }, + path_condition: { + neglected: pc.neglected, + scattered: pc.scattered + }, + sequence_mine: { broken_protocol: sm.broken_protocol }, + implicit_control_flow: { + ordered_protocols: icf.ordered_protocols( + min_support: Integer(ENV.fetch("DECOMPLEX_ICF_MIN_SUPPORT", "1")) + ) + }, + derived_state: DerivedState.scan(files), + inconsistent_rename_clone: InconsistentRenameClone.scan(files), + flay_similarity: FlaySimilarity.scan( + files, + mass: Integer(ENV.fetch( + "DECOMPLEX_SIMILARITY_MASS", + ENV.fetch("DECOMPLEX_FLAY_MASS", FlaySimilarity::DEFAULT_MASS) + )), + fuzzy: Integer(ENV.fetch( + "DECOMPLEX_SIMILARITY_FUZZY", + ENV.fetch("DECOMPLEX_FLAY_FUZZY", FlaySimilarity::DEFAULT_FUZZY) + )) + ), + decision_pressure: DecisionPressure.scan(files).ranked, + redundant_nil_guard: RedundantNilGuard.scan(files), + false_simplicity: FalseSimplicity.scan(files).findings, + oversized_predicate: OversizedPredicate.scan(files).findings, + fat_union: { fat_unions: FatUnion.scan(files).fat_unions }, + state_heatmap: state_mesh.findings, + state_branch_density: StateBranchDensity.scan(files).findings, + temporal_ordering_pressure: TemporalOrderingPressure.scan(files), + weighted_inlined_complexity: WeightedInlinedCognitiveComplexity.scan( + files, + min_score: Float(ENV.fetch( + "DECOMPLEX_WICC_MIN_SCORE", + WeightedInlinedCognitiveComplexity::DEFAULT_MIN_SCORE + )), + min_hidden: Float(ENV.fetch( + "DECOMPLEX_WICC_MIN_HIDDEN", + WeightedInlinedCognitiveComplexity::DEFAULT_MIN_HIDDEN + )), + max_depth: Integer(ENV.fetch( + "DECOMPLEX_WICC_MAX_DEPTH", + WeightedInlinedCognitiveComplexity::DEFAULT_MAX_DEPTH + )) + ), + locality_drag: LocalityDrag.scan( + files, + min_unrelated_statements: Integer(ENV.fetch( + "DECOMPLEX_LOCALITY_DRAG_MIN_UNRELATED_STATEMENTS", + LocalityDrag::DEFAULT_MIN_UNRELATED_STATEMENTS + )), + min_gap_lines: Integer(ENV.fetch( + "DECOMPLEX_LOCALITY_DRAG_MIN_GAP_LINES", + LocalityDrag::DEFAULT_MIN_GAP_LINES + )), + min_local_complexity: Float(ENV.fetch( + "DECOMPLEX_LOCALITY_DRAG_MIN_LOCAL_COMPLEXITY", + LocalityDrag::DEFAULT_MIN_LOCAL_COMPLEXITY + )), + min_score: Integer(ENV.fetch( + "DECOMPLEX_LOCALITY_DRAG_MIN_SCORE", + LocalityDrag::DEFAULT_MIN_SCORE + )), + max_findings_per_method: Integer(ENV.fetch( + "DECOMPLEX_LOCALITY_DRAG_MAX_FINDINGS_PER_METHOD", + LocalityDrag::DEFAULT_MAX_FINDINGS_PER_METHOD + )) + ), + function_lcom: FunctionLCOM.scan( + files, + min_components: Integer(ENV.fetch( + "DECOMPLEX_FUNCTION_LCOM_MIN_COMPONENTS", + FunctionLCOM::DEFAULT_MIN_COMPONENTS + )), + min_locals: Integer(ENV.fetch( + "DECOMPLEX_FUNCTION_LCOM_MIN_LOCALS", + FunctionLCOM::DEFAULT_MIN_LOCALS + )), + min_statements: Integer(ENV.fetch( + "DECOMPLEX_FUNCTION_LCOM_MIN_STATEMENTS", + FunctionLCOM::DEFAULT_MIN_STATEMENTS + )), + min_score: Integer(ENV.fetch( + "DECOMPLEX_FUNCTION_LCOM_MIN_SCORE", + FunctionLCOM::DEFAULT_MIN_SCORE + )) + ), + operational_discontinuity: operational_discontinuity + } + end + + def deep_hydrate(value, key: nil) + case value + when Hash + value.each_with_object({}) do |(child_key, child), out| + hydrated_key = key == :spans ? child_key.to_s : child_key.to_s.to_sym + out[hydrated_key] = deep_hydrate(child, key: hydrated_key) + end + when Array + value.map { |child| deep_hydrate(child, key: key) } + when String + ENUM_KEYS.include?(key) ? value.to_sym : value + else + value + end + end + + def site_locations(sites) + Array(sites).map { |site| site_location(site) } + end + + def site_location(site) + "#{site.fetch('file')}:#{site.fetch('defn')}:#{site.fetch('line')}" + end + end +end diff --git a/gems/decomplex/rust/src/decomplex/architecture_test.rs b/gems/decomplex/rust/src/decomplex/architecture_test.rs index 20e0bd185..fc14b60d7 100644 --- a/gems/decomplex/rust/src/decomplex/architecture_test.rs +++ b/gems/decomplex/rust/src/decomplex/architecture_test.rs @@ -157,6 +157,16 @@ fn detectors_do_not_import_tree_sitter_directly() { } } +#[test] +fn report_facts_uses_document_detector_apis() { + let path = crate_src().join("report_facts.rs"); + let source = fs::read_to_string(&path).expect("read report_facts.rs"); + assert!( + !source.contains("::scan_files("), + "report_facts.rs must build shared documents once and call detector scan_documents APIs" + ); +} + #[test] fn false_simplicity_detector_does_not_own_language_lexicons() { let path = crate_src().join("detectors/false_simplicity.rs"); diff --git a/gems/decomplex/rust/src/decomplex/ast.rs b/gems/decomplex/rust/src/decomplex/ast.rs index 5e020c165..3fe25bd9b 100644 --- a/gems/decomplex/rust/src/decomplex/ast.rs +++ b/gems/decomplex/rust/src/decomplex/ast.rs @@ -254,11 +254,15 @@ pub fn parse_with_language(file: &Path, language: Language) -> Result<(Node, Vec let tree = parser .parse(&source, None) .with_context(|| format!("tree-sitter produced no tree for {}", file.display()))?; - let root = TreeSitterNormalizer::new(&source, language).normalize(tree.root_node()); + let root = normalize_tree(tree.root_node(), &source, language); let lines = source.lines().map(ToString::to_string).collect(); Ok((root, lines)) } +pub fn normalize_tree(root: TreeSitterNode<'_>, source: &str, language: Language) -> Node { + TreeSitterNormalizer::new(source, language).normalize(root) +} + fn language_grammar(language: Language) -> TreeSitterLanguage { match language { Language::Ruby => tree_sitter_ruby::LANGUAGE.into(), diff --git a/gems/decomplex/rust/src/decomplex/detectors/co_update.rs b/gems/decomplex/rust/src/decomplex/detectors/co_update.rs index e09d02e4f..8b0527e9d 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/co_update.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/co_update.rs @@ -1,5 +1,5 @@ use crate::decomplex::ast::Span; -use crate::decomplex::syntax::{self, Language, StateWrite}; +use crate::decomplex::syntax::{self, Document, Language, StateWrite}; use anyhow::Result; use serde::Serialize; use std::collections::{BTreeMap, BTreeSet}; @@ -40,34 +40,45 @@ struct Write { } pub fn scan_files(files: &[PathBuf], language: Language) -> Result { + let documents = syntax::parse_files(files, language)?; + Ok(scan_documents(&documents)) +} + +pub fn scan_documents(documents: &[Document]) -> CoUpdateReport { let mut writes = Vec::new(); - for file in files { - let doc = syntax::parse_file(file.clone(), language)?; - for w in doc.state_writes { - writes.push(Write { - attr: w.field, - recv: w.receiver, - file: w.file, - defn: w.function, - line: w.line, - span: w.span, - }); + for doc in documents { + for w in &doc.state_writes { + writes.push(write_from_state_write(w)); } } let report = Report::new(writes); - Ok(CoUpdateReport { + CoUpdateReport { co_written_pairs: report.co_written_pairs(3), neglected_updates: report.neglected_updates(3), - }) + } +} + +pub fn state_writes_for_documents(documents: &[Document]) -> Vec { + documents + .iter() + .flat_map(|document| document.state_writes.clone()) + .collect() } pub fn state_writes_for_files(files: &[PathBuf], language: Language) -> Result> { - let mut out = Vec::new(); - for file in files { - let doc = syntax::parse_file(file.clone(), language)?; - out.extend(doc.state_writes); + let documents = syntax::parse_files(files, language)?; + Ok(state_writes_for_documents(&documents)) +} + +fn write_from_state_write(w: &StateWrite) -> Write { + Write { + attr: w.field.clone(), + recv: w.receiver.clone(), + file: w.file.clone(), + defn: w.function.clone(), + line: w.line, + span: w.span, } - Ok(out) } struct Report { diff --git a/gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs b/gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs index 4a72f1057..b02ccec76 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs @@ -1,5 +1,5 @@ use crate::decomplex::ast::{self, Child, Node, Span}; -use crate::decomplex::syntax::Language; +use crate::decomplex::syntax::{self, Document, Language}; use anyhow::Result; use serde::Serialize; use std::collections::{BTreeMap, BTreeSet}; @@ -38,18 +38,22 @@ struct Hit { } pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { + let documents = syntax::parse_files(files, language)?; + Ok(scan_documents(&documents)) +} + +pub fn scan_documents(documents: &[Document]) -> Vec { let mut guard = Vec::new(); let mut dispatch = Vec::new(); - for file in files { - let (root, lines) = ast::parse_with_language(file, language)?; - let mut detector = DecisionPressure::new(file.to_string_lossy().to_string(), lines); - detector.walk(&root, &Vec::new(), &BTreeMap::new()); + for document in documents { + let mut detector = DecisionPressure::new(document.file.clone(), document.lines.clone()); + detector.walk(&document.normalized_root, &Vec::new(), &BTreeMap::new()); guard.extend(detector.guard_hits); dispatch.extend(detector.dispatch_hits); } - Ok(Report::new(guard, dispatch).ranked()) + Report::new(guard, dispatch).ranked() } struct DecisionPressure { diff --git a/gems/decomplex/rust/src/decomplex/detectors/derived_state.rs b/gems/decomplex/rust/src/decomplex/detectors/derived_state.rs index 04f9514af..662311b4b 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/derived_state.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/derived_state.rs @@ -1,5 +1,5 @@ use crate::decomplex::ast::{self, Child, Node, Span}; -use crate::decomplex::syntax::Language; +use crate::decomplex::syntax::{self, Document, Language}; use anyhow::Result; use serde::Serialize; use std::collections::{BTreeMap, BTreeSet}; @@ -27,16 +27,20 @@ struct Asgn { } pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { + let documents = syntax::parse_files(files, language)?; + Ok(scan_documents(&documents)) +} + +pub fn scan_documents(documents: &[Document]) -> Vec { let mut out = Vec::new(); - for file in files { - let (root, lines) = ast::parse_with_language(file, language)?; - let detector = DerivedState::new(file.to_string_lossy().to_string(), lines); - detector.each_method(&root, &mut |file, defn, stmts| { + for document in documents { + let detector = DerivedState::new(document.file.clone(), document.lines.clone()); + detector.each_method(&document.normalized_root, &mut |file, defn, stmts| { out.extend(analyze(file, defn, stmts)); }); } out.sort_by(|a, b| b.gap.cmp(&a.gap)); - Ok(out) + out } struct DerivedState { diff --git a/gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs b/gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs index d9f9820ff..aab87205c 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs @@ -2,7 +2,7 @@ use crate::decomplex::ast::{self, Child, Node, Span}; use crate::decomplex::syntax::adapters::false_simplicity_lexicon::{ false_simplicity_lexicon, FalseSimplicityLexicon, }; -use crate::decomplex::syntax::Language; +use crate::decomplex::syntax::{self, Document, Language}; use anyhow::Result; use serde::Serialize; use std::collections::{BTreeMap, BTreeSet}; @@ -39,17 +39,24 @@ struct ClassRec { } pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { + let documents = syntax::parse_files(files, language)?; + Ok(scan_documents(&documents)) +} + +pub fn scan_documents(documents: &[Document]) -> Vec { let mut hits = Vec::new(); let mut classrecs = Vec::new(); - for file in files { - let (root, lines) = ast::parse_with_language(file, language)?; - let mut detector = - FalseSimplicity::new(file.to_string_lossy().to_string(), lines, language); - detector.walk(&root, &[], &[]); + for document in documents { + let mut detector = FalseSimplicity::new( + document.file.clone(), + document.lines.clone(), + document.language, + ); + detector.walk(&document.normalized_root, &[], &[]); hits.extend(detector.hits); classrecs.extend(detector.classrecs); } - Ok(Report::new(hits, classrecs).findings()) + Report::new(hits, classrecs).findings() } struct FalseSimplicity { diff --git a/gems/decomplex/rust/src/decomplex/detectors/fat_union.rs b/gems/decomplex/rust/src/decomplex/detectors/fat_union.rs index d7a61bccf..040f5d583 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/fat_union.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/fat_union.rs @@ -1,5 +1,5 @@ use crate::decomplex::ast::{self, Child, Node, Span}; -use crate::decomplex::syntax::Language; +use crate::decomplex::syntax::{self, Document, Language}; use anyhow::Result; use serde::Serialize; use std::collections::{BTreeMap, BTreeSet}; @@ -35,11 +35,15 @@ struct VariantReads { } pub fn scan_files(files: &[PathBuf], language: Language) -> Result { + let documents = syntax::parse_files(files, language)?; + Ok(scan_documents(&documents)) +} + +pub fn scan_documents(documents: &[Document]) -> FatUnionReport { let mut out = Vec::new(); - for file in files { - let (root, lines) = ast::parse_with_language(file, language)?; - let mut detector = FatUnion::new(file.to_string_lossy().to_string(), lines); - detector.walk(&root, &Vec::new()); + for document in documents { + let mut detector = FatUnion::new(document.file.clone(), document.lines.clone()); + detector.walk(&document.normalized_root, &Vec::new()); out.extend(detector.findings()); } out.sort_by(|a, b| { @@ -48,7 +52,7 @@ pub fn scan_files(files: &[PathBuf], language: Language) -> Result Result> { let summaries = local_flow::scan_files(files, language)?; - Ok(FunctionLcom::new(summaries).findings()) + Ok(scan_summaries(summaries)) +} + +pub fn scan_documents(documents: &[Document]) -> Vec { + scan_summaries(local_flow::scan_documents(documents)) +} + +pub fn scan_summaries(summaries: Vec) -> Vec { + FunctionLcom::new(summaries).findings() } struct FunctionLcom { diff --git a/gems/decomplex/rust/src/decomplex/detectors/implicit_control_flow.rs b/gems/decomplex/rust/src/decomplex/detectors/implicit_control_flow.rs index 7c3a52718..7be958b60 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/implicit_control_flow.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/implicit_control_flow.rs @@ -1,5 +1,5 @@ use crate::decomplex::ast::{self, Child, Node, Span}; -use crate::decomplex::syntax::Language; +use crate::decomplex::syntax::{self, Document, Language}; use anyhow::Result; use serde::Serialize; use std::collections::{BTreeMap, BTreeSet}; @@ -167,27 +167,25 @@ const NON_MUTATING_OPERATOR_MIDS: &[&str] = &["!", "!=", "!~"]; const MUTATING_SUFFIXES: &[&str] = &["!"]; pub fn scan_files(files: &[PathBuf], language: Language) -> Result { - let mut parsed = BTreeMap::new(); - for file in files { - parsed.insert( - file.to_string_lossy().to_string(), - ast::parse_with_language(file, language)?, - ); - } + let documents = syntax::parse_files(files, language)?; + Ok(scan_documents(&documents)) +} - let effect_index = EffectIndex::build(&parsed); +pub fn scan_documents(documents: &[Document]) -> ImplicitControlFlowReport { + let effect_index = EffectIndex::build_documents(documents); let mut sequences = Vec::new(); - for (file, (root, lines)) in &parsed { - let mut miner = ImplicitControlFlow::new(file.clone(), lines.clone(), &effect_index); - miner.walk(root, &Vec::new()); + for document in documents { + let mut miner = + ImplicitControlFlow::new(document.file.clone(), document.lines.clone(), &effect_index); + miner.walk(&document.normalized_root, &Vec::new()); sequences.extend(miner.sequences); } let report = Report::new(sequences); - Ok(ImplicitControlFlowReport { + ImplicitControlFlowReport { ordered_protocols: report.ordered_protocols(1), order_drift: report.drift(4, 0.75), - }) + } } struct ImplicitControlFlow<'a> { @@ -501,11 +499,18 @@ struct EffectIndex { } impl EffectIndex { - fn build(parsed: &BTreeMap)>) -> Self { + fn build_documents(documents: &[Document]) -> Self { let mut effects = Vec::new(); - for (file, (root, lines)) in parsed { - effects.extend(EffectCollector::new(file.clone(), lines.clone()).scan(root)); + for document in documents { + effects.extend( + EffectCollector::new(document.file.clone(), document.lines.clone()) + .scan(&document.normalized_root), + ); } + Self::from_effects(effects) + } + + fn from_effects(effects: Vec) -> Self { let mut by_owner_name = BTreeMap::new(); let mut by_name = BTreeMap::new(); for e in effects { diff --git a/gems/decomplex/rust/src/decomplex/detectors/inconsistent_rename_clone.rs b/gems/decomplex/rust/src/decomplex/detectors/inconsistent_rename_clone.rs index 4317e1932..4f0817655 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/inconsistent_rename_clone.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/inconsistent_rename_clone.rs @@ -1,5 +1,5 @@ use crate::decomplex::ast::{self, Child, Node, Span}; -use crate::decomplex::syntax::Language; +use crate::decomplex::syntax::{self, Document, Language}; use anyhow::Result; use serde::Serialize; use std::collections::{BTreeMap, BTreeSet}; @@ -44,13 +44,17 @@ pub fn scan_files( files: &[PathBuf], language: Language, ) -> Result> { + let documents = syntax::parse_files(files, language)?; + Ok(scan_documents(&documents)) +} + +pub fn scan_documents(documents: &[Document]) -> Vec { let mut blocks = Vec::new(); - for file in files { - let (root, _lines) = ast::parse_with_language(file, language)?; - let detector = InconsistentRenameClone::new(file.to_string_lossy().to_string()); - detector.collect(&root, &Vec::new(), &mut blocks); + for document in documents { + let detector = InconsistentRenameClone::new(document.file.clone()); + detector.collect(&document.normalized_root, &Vec::new(), &mut blocks); } - Ok(Report::new(blocks).inconsistent_renames()) + Report::new(blocks).inconsistent_renames() } struct InconsistentRenameClone { diff --git a/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs b/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs index f933e39c3..9ffaa0637 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs @@ -1,5 +1,5 @@ use crate::decomplex::ast::{self, Child, Node, Span}; -use crate::decomplex::syntax::Language; +use crate::decomplex::syntax::{self, Document, Language}; use anyhow::Result; use serde::Serialize; use std::collections::BTreeSet; @@ -53,13 +53,21 @@ const LOCAL_READ_TYPES: &[&str] = &["LVAR", "DVAR"]; const LOCAL_WRITE_TYPES: &[&str] = &["LASGN", "DASGN"]; pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { + let documents = syntax::parse_files(files, language)?; + Ok(scan_documents(&documents)) +} + +pub fn scan_documents(documents: &[Document]) -> Vec { let mut out = Vec::new(); - for file in files { - let (root, lines) = ast::parse_with_language(file, language)?; - let mut detector = LocalFlow::new(file.to_string_lossy().to_string(), lines, language); - out.extend(detector.scan(&root)); + for document in documents { + let mut detector = LocalFlow::new( + document.file.clone(), + document.lines.clone(), + document.language, + ); + out.extend(detector.scan(&document.normalized_root)); } - Ok(out) + out } struct LocalFlow { diff --git a/gems/decomplex/rust/src/decomplex/detectors/locality_drag.rs b/gems/decomplex/rust/src/decomplex/detectors/locality_drag.rs index d8769054f..7310c785b 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/locality_drag.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/locality_drag.rs @@ -1,6 +1,6 @@ use crate::decomplex::ast::Span; use crate::decomplex::detectors::{local_flow, weighted_inlined_cognitive_complexity}; -use crate::decomplex::syntax::Language; +use crate::decomplex::syntax::{Document, Language}; use anyhow::Result; use serde::Serialize; use std::collections::{BTreeMap, BTreeSet}; @@ -47,8 +47,16 @@ pub struct BoundaryInfo { pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { let summaries = local_flow::scan_files(files, language)?; + Ok(scan_summaries(summaries)) +} + +pub fn scan_documents(documents: &[Document]) -> Vec { + scan_summaries(local_flow::scan_documents(documents)) +} + +pub fn scan_summaries(summaries: Vec) -> Vec { let mut detector = LocalityDrag::new(summaries); - Ok(detector.findings()) + detector.findings() } struct LocalityDrag { diff --git a/gems/decomplex/rust/src/decomplex/detectors/miner.rs b/gems/decomplex/rust/src/decomplex/detectors/miner.rs index 8653c69e3..9dea61398 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/miner.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/miner.rs @@ -1,5 +1,5 @@ use crate::decomplex::ast::Span; -use crate::decomplex::syntax::{self, DecisionSite, Language}; +use crate::decomplex::syntax::{self, DecisionSite, Document, Language}; use anyhow::Result; use serde::Serialize; use std::collections::{BTreeMap, BTreeSet}; @@ -33,15 +33,19 @@ pub struct NeglectedCondition { pub fn scan_files(files: &[PathBuf], language: Language) -> Result { let documents = syntax::parse_files(files, language)?; + Ok(scan_documents(&documents)) +} + +pub fn scan_documents(documents: &[Document]) -> MinerReport { let mut sites = Vec::new(); for doc in documents { - sites.extend(doc.decision_sites); + sites.extend(doc.decision_sites.clone()); } let m = Miner::new(sites); - Ok(MinerReport { + MinerReport { missing_abstractions: m.missing_abstractions(2), neglected_conditions: m.neglected_conditions(3), - }) + } } struct Miner { diff --git a/gems/decomplex/rust/src/decomplex/detectors/operational_discontinuity.rs b/gems/decomplex/rust/src/decomplex/detectors/operational_discontinuity.rs index 5cd9e6a32..002045b0d 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/operational_discontinuity.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/operational_discontinuity.rs @@ -1,6 +1,6 @@ use crate::decomplex::ast::Span; use crate::decomplex::detectors::local_flow; -use crate::decomplex::syntax::Language; +use crate::decomplex::syntax::{Document, Language}; use anyhow::Result; use serde::Serialize; use std::collections::{BTreeMap, BTreeSet}; @@ -46,8 +46,18 @@ pub fn scan_files( language: Language, ) -> Result> { let summaries = local_flow::scan_files(files, language)?; + Ok(scan_summaries(summaries)) +} + +pub fn scan_documents(documents: &[Document]) -> Vec { + scan_summaries(local_flow::scan_documents(documents)) +} + +pub fn scan_summaries( + summaries: Vec, +) -> Vec { let detector = OperationalDiscontinuity::new(summaries); - Ok(detector.findings()) + detector.findings() } struct OperationalDiscontinuity { @@ -104,7 +114,7 @@ impl OperationalDiscontinuity { let score = resets .iter() - .map(|r| (r.dead.len() as isize + r.new.len() as isize - r.continuing.len() as isize)) + .map(|r| r.dead.len() as isize + r.new.len() as isize - r.continuing.len() as isize) .sum::() + (resets.len() as isize * 8); if score < self.min_score { diff --git a/gems/decomplex/rust/src/decomplex/detectors/oversized_predicate.rs b/gems/decomplex/rust/src/decomplex/detectors/oversized_predicate.rs index f31282df6..45f7599c8 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/oversized_predicate.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/oversized_predicate.rs @@ -1,8 +1,8 @@ use crate::decomplex::ast::{self, Child, Node, Span}; -use crate::decomplex::syntax::Language; +use crate::decomplex::syntax::{self, Document, Language}; use anyhow::Result; use serde::Serialize; -use std::collections::{BTreeMap, BTreeSet}; +use std::collections::BTreeMap; use std::path::PathBuf; #[derive(Clone, Debug, Eq, PartialEq, Serialize)] @@ -23,14 +23,19 @@ const LIMIT: usize = 3; const PREDICATE_NODES: &[&str] = &["IF", "WHILE", "UNTIL"]; pub fn scan_files(files: &[PathBuf], language: Language) -> Result { + let documents = syntax::parse_files(files, language)?; + Ok(scan_documents(&documents)) +} + +pub fn scan_documents(documents: &[Document]) -> ResultReport { let mut findings = Vec::new(); - for file in files { - let (root, lines) = ast::parse_with_language(file, language)?; - let mut scanner = OversizedPredicate::new(file.to_string_lossy().to_string(), lines, LIMIT); - scanner.walk(&root, &Vec::new()); + for document in documents { + let mut scanner = + OversizedPredicate::new(document.file.clone(), document.lines.clone(), LIMIT); + scanner.walk(&document.normalized_root, &Vec::new()); findings.extend(scanner.findings); } - Ok(ResultReport { findings }) + ResultReport { findings } } struct OversizedPredicate { diff --git a/gems/decomplex/rust/src/decomplex/detectors/path_condition.rs b/gems/decomplex/rust/src/decomplex/detectors/path_condition.rs index c227d7def..a8fe23d8e 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/path_condition.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/path_condition.rs @@ -1,5 +1,5 @@ use crate::decomplex::ast::{self, Child, Node, Span}; -use crate::decomplex::syntax::Language; +use crate::decomplex::syntax::{self, Document, Language}; use anyhow::Result; use serde::Serialize; use std::collections::{BTreeMap, BTreeSet}; @@ -8,6 +8,7 @@ use std::path::PathBuf; #[derive(Clone, Debug, Eq, PartialEq, Serialize)] pub struct PathConditionReport { pub neglected: Vec, + pub scattered: Vec, } #[derive(Clone, Debug, Eq, PartialEq, Serialize)] @@ -20,6 +21,16 @@ pub struct NeglectedPathCondition { pub action: String, } +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct ScatteredPathCondition { + pub guards: Vec, + pub support: usize, + pub scatter: usize, + pub rank: usize, + pub sites: Vec, + pub spans: BTreeMap, +} + #[derive(Clone, Debug)] struct Site { guards: Vec, @@ -31,14 +42,18 @@ struct Site { } pub fn scan_files(files: &[PathBuf], language: Language) -> Result { + let documents = syntax::parse_files(files, language)?; + Ok(scan_documents(&documents)) +} + +pub fn scan_documents(documents: &[Document]) -> PathConditionReport { let mut sites = Vec::new(); - for file in files { - let (root, lines) = ast::parse_with_language(file, language)?; - let mut pc = PathCondition::new(file.to_string_lossy().to_string(), lines); - pc.walk(&root, &Vec::new(), &Vec::new()); + for document in documents { + let mut pc = PathCondition::new(document.file.clone(), document.lines.clone()); + pc.walk(&document.normalized_root, &Vec::new(), &Vec::new()); sites.extend(pc.sites); } - Ok(Report::new(sites).findings()) + Report::new(sites).findings() } struct PathCondition { @@ -220,9 +235,48 @@ impl Report { fn findings(&self) -> PathConditionReport { PathConditionReport { neglected: self.neglected(3), + scattered: self.scattered(2), } } + fn scattered(&self, min_scatter: usize) -> Vec { + let mut out = Vec::new(); + for (guards, sites) in &self.groups { + let scatter = sites + .iter() + .map(|site| (site.file.clone(), site.defn.clone())) + .collect::>() + .len(); + if scatter < min_scatter { + continue; + } + + let locations = sites + .iter() + .map(|site| format!("{}:{}:{}", site.file, site.defn, site.line)) + .collect::>(); + let spans = sites + .iter() + .map(|site| { + ( + format!("{}:{}:{}", site.file, site.defn, site.line), + site.span, + ) + }) + .collect::>(); + out.push(ScatteredPathCondition { + guards: guards.clone(), + support: sites.len(), + scatter, + rank: sites.len() * scatter, + sites: locations, + spans, + }); + } + out.sort_by(|a, b| b.rank.cmp(&a.rank).then_with(|| a.guards.cmp(&b.guards))); + out + } + fn neglected(&self, min_support: usize) -> Vec { let popular: Vec<_> = self .groups diff --git a/gems/decomplex/rust/src/decomplex/detectors/predicate_alias.rs b/gems/decomplex/rust/src/decomplex/detectors/predicate_alias.rs index 5c2073953..095115cfb 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/predicate_alias.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/predicate_alias.rs @@ -1,5 +1,5 @@ -use crate::decomplex::ast::{self, Child, Node, Span}; -use crate::decomplex::syntax::Language; +use crate::decomplex::ast::Span; +use crate::decomplex::syntax::{self, Document, Language}; use anyhow::Result; use serde::Serialize; use std::collections::{BTreeMap, BTreeSet}; @@ -29,76 +29,23 @@ struct Pred { } pub fn scan_files(files: &[PathBuf], language: Language) -> Result { - let mut preds = Vec::new(); - for file in files { - let (root, lines) = ast::parse_with_language(file, language)?; - let mut p = PredicateAlias::new(file.to_string_lossy().to_string(), lines); - p.walk(&root); - preds.extend(p.preds); - } - Ok(Report::new(preds).findings()) + let documents = syntax::parse_files(files, language)?; + Ok(scan_documents(&documents)) } -struct PredicateAlias { - file: String, - lines: Vec, - preds: Vec, -} - -impl PredicateAlias { - fn new(file: String, lines: Vec) -> Self { - Self { - file, - lines, - preds: Vec::new(), - } - } - - fn walk(&mut self, node: &Node) { - if node.r#type == "DEFN" { - self.record_def(node); - } - for child in node.children.iter().filter_map(ast::node) { - self.walk(child); - } - } - - fn record_def(&mut self, node: &Node) { - let name = match node.children.get(0) { - Some(Child::Symbol(s)) => s.clone(), - _ => return, - }; - let scope = node.children.get(1).and_then(ast::node); - let Some(scope) = scope else { return }; - if scope.r#type != "SCOPE" { - return; - }; - - let body = scope.children.get(2).and_then(ast::node); - let Some(body) = body else { return }; - if body.r#type == "BLOCK" { - return; - }; - - let txt = ast::slice(body, &self.lines); - if txt.is_empty() || txt.len() > 200 { - return; - }; - - self.preds.push(Pred { - name: name.clone(), - body: txt, - file: self.file.clone(), - defn: name, - line: node.first_lineno, - span: [ - node.first_lineno, - node.first_column, - node.last_lineno, - node.last_column, - ], - }); +pub fn scan_documents(documents: &[Document]) -> PredicateAliasReport { + let mut preds = Vec::new(); + for document in documents { + preds.extend(document.predicate_aliases.iter().map(|predicate| Pred { + name: predicate.name.clone(), + body: predicate.body.clone(), + file: predicate.file.clone(), + defn: predicate.defn.clone(), + line: predicate.line, + span: predicate.span, + })); } + Report::new(preds).findings() } struct Report { diff --git a/gems/decomplex/rust/src/decomplex/detectors/redundant_nil_guard.rs b/gems/decomplex/rust/src/decomplex/detectors/redundant_nil_guard.rs index c130a27c5..1b5c346f0 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/redundant_nil_guard.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/redundant_nil_guard.rs @@ -1,5 +1,5 @@ use crate::decomplex::ast::{self, Child, Node, Span}; -use crate::decomplex::syntax::Language; +use crate::decomplex::syntax::{self, Document, Language}; use anyhow::Result; use serde::Serialize; use std::collections::{BTreeMap, BTreeSet}; @@ -64,11 +64,15 @@ const NIL_PREDICATE_MIDS: &[&str] = &["nil?", "isNull", "is_null", "nil", "is_no const NON_NIL_PREDICATE_MIDS: &[&str] = &["isSome", "is_some", "present"]; pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { + let documents = syntax::parse_files(files, language)?; + Ok(scan_documents(&documents)) +} + +pub fn scan_documents(documents: &[Document]) -> Vec { let mut findings = Vec::new(); - for file in files { - let (root, lines) = ast::parse_with_language(file, language)?; - let mut scanner = RedundantNilGuard::new(file.to_string_lossy().to_string(), lines); - scanner.walk(&root, &Vec::new()); + for document in documents { + let mut scanner = RedundantNilGuard::new(document.file.clone(), document.lines.clone()); + scanner.walk(&document.normalized_root, &Vec::new()); findings.extend(scanner.findings); } let mut out: Vec<_> = findings.into_iter().map(|f| f.to_h()).collect(); @@ -79,7 +83,7 @@ pub fn scan_files(files: &[PathBuf], language: Language) -> Result Result { - let mut preds = Vec::new(); - let mut uses = Vec::new(); - for file in files { - let (root, lines) = ast::parse_with_language(file, language)?; - let mut scanner = SemanticAlias::new(file.to_string_lossy().to_string(), lines); - scanner.walk(&root, &Vec::new()); - preds.extend(scanner.preds); - uses.extend(scanner.uses); - } - Ok(Report::new(preds, uses).findings()) -} - -struct SemanticAlias { - file: String, - lines: Vec, - preds: Vec, - uses: Vec, + let documents = syntax::parse_files(files, language)?; + Ok(scan_documents(&documents)) } -impl SemanticAlias { - fn new(file: String, lines: Vec) -> Self { - Self { - file, - lines, - preds: Vec::new(), - uses: Vec::new(), - } - } - - fn walk(&mut self, node: &Node, defstack: &[String]) { - let mut next_defstack = defstack.to_vec(); - if matches!(node.r#type.as_str(), "DEFN" | "DEFS") { - let name_index = if node.r#type == "DEFS" { 1 } else { 0 }; - if let Some(Child::Symbol(name)) = node.children.get(name_index) { - next_defstack.push(name.clone()); +pub fn scan_documents(documents: &[Document]) -> SemanticAliasReport { + let mut preds = Vec::new(); + let mut uses = Vec::new(); + for document in documents { + for predicate in &document.predicate_aliases { + if !semantic_predicate_definition(&predicate.name, &predicate.body) { + continue; } - } - - if node.r#type == "DEFN" { - self.record_pred(node); - } - - if matches!(node.r#type.as_str(), "CALL" | "OPCALL") && self.comparison(node) { - let c = self.canon(&ast::slice(node, &self.lines)); - self.uses.push(Use { - canon: c, - file: self.file.clone(), - defn: next_defstack - .last() - .cloned() - .unwrap_or_else(|| "(top-level)".to_string()), - line: node.first_lineno, - raw: ast::slice(node, &self.lines), - span: [ - node.first_lineno, - node.first_column, - node.last_lineno, - node.last_column, - ], + preds.push(Pred { + name: predicate.name.clone(), + canon: canon(&predicate.body), + file: predicate.file.clone(), + line: predicate.line, + span: predicate.span, }); } - - for child in node.children.iter().filter_map(ast::node) { - self.walk(child, &next_defstack); - } + uses.extend(document.comparison_uses.iter().map(|comparison| Use { + canon: canon(&comparison.raw), + file: comparison.file.clone(), + defn: comparison.function.clone(), + line: comparison.line, + raw: comparison.raw.clone(), + span: comparison.span, + })); } + Report::new(preds, uses).findings() +} - fn canon(&self, text: &str) -> String { - let (mut t, _) = ast::canon_polarity(text); - t = t.strip_prefix("self.").unwrap_or(&t).to_string(); - t = t.strip_prefix('@').unwrap_or(&t).to_string(); - - // Ruby: t = t.sub(/\A[A-Za-z_]\w*(?:\([^)]*\))?\.(?=[A-Za-z_]\w*\s*(==|!=|\.))/, "") - let re = regex::Regex::new( - r"^[A-Za-z_]\w*(?:\([^)]*\))?\.(?P[A-Za-z_]\w*\s*(?:==|!=|\.))", - ) - .unwrap(); - t = re.replace(&t, "$rest").to_string(); - - t.split_whitespace().collect::>().join(" ") - } +fn canon(text: &str) -> String { + let (mut t, _) = ast::canon_polarity(text); + t = t.strip_prefix("self.").unwrap_or(&t).to_string(); + t = t.strip_prefix('@').unwrap_or(&t).to_string(); - fn comparison(&self, node: &Node) -> bool { - let mid = node.children.get(1); - match mid { - Some(Child::Symbol(s)) => matches!(s.as_str(), "==" | "!=" | "nil?"), - _ => false, - } - } + // Ruby: t = t.sub(/\A[A-Za-z_]\w*(?:\([^)]*\))?\.(?=[A-Za-z_]\w*\s*(==|!=|\.))/, "") + let re = + regex::Regex::new(r"^[A-Za-z_]\w*(?:\([^)]*\))?\.(?P[A-Za-z_]\w*\s*(?:==|!=|\.))") + .unwrap(); + t = re.replace(&t, "$rest").to_string(); - fn record_pred(&mut self, node: &Node) { - if let Some(Child::Symbol(name)) = node.children.first() { - let stmts = ast::body_stmts(node); - if stmts.len() != 1 { - return; - } - let Some(body) = self.predicate_body(stmts[0]) else { - return; - }; - let body_source = ast::slice(body, &self.lines); - if !self.semantic_predicate_definition(name, &body_source) { - return; - } - - self.preds.push(Pred { - name: name.clone(), - canon: self.canon(&body_source), - file: self.file.clone(), - line: node.first_lineno, - span: [ - node.first_lineno, - node.first_column, - node.last_lineno, - node.last_column, - ], - }); - } - } - - fn predicate_body<'a>(&self, node: &'a Node) -> Option<&'a Node> { - if node.r#type == "RETURN" { - node.children.iter().filter_map(ast::node).next() - } else { - Some(node) - } - } + t.split_whitespace().collect::>().join(" ") +} - fn semantic_predicate_definition(&self, name: &str, body: &str) -> bool { - name.ends_with('?') - || body.contains("==") - || body.contains("!=") - || body.contains("&&") - || body.contains("||") - || body.contains(" and ") - || body.contains(" or ") - } +fn semantic_predicate_definition(name: &str, body: &str) -> bool { + name.ends_with('?') + || body.contains("==") + || body.contains("!=") + || body.contains("&&") + || body.contains("||") + || body.contains(" and ") + || body.contains(" or ") } struct Report { diff --git a/gems/decomplex/rust/src/decomplex/detectors/sequence_mine.rs b/gems/decomplex/rust/src/decomplex/detectors/sequence_mine.rs index 56e538d40..efde4c7b1 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/sequence_mine.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/sequence_mine.rs @@ -1,5 +1,5 @@ use crate::decomplex::ast::{self, Child, Node, Span}; -use crate::decomplex::syntax::Language; +use crate::decomplex::syntax::{self, Document, Language}; use anyhow::Result; use serde::Serialize; use std::collections::{BTreeMap, BTreeSet}; @@ -31,14 +31,18 @@ struct Call { } pub fn scan_files(files: &[PathBuf], language: Language) -> Result { + let documents = syntax::parse_files(files, language)?; + Ok(scan_documents(&documents)) +} + +pub fn scan_documents(documents: &[Document]) -> BrokenProtocolReport { let mut calls = Vec::new(); - for file in files { - let (root, lines) = ast::parse_with_language(file, language)?; - let mut sm = SequenceMine::new(file.to_string_lossy().to_string(), lines); - sm.walk(&root, &Vec::new()); + for document in documents { + let mut sm = SequenceMine::new(document.file.clone(), document.lines.clone()); + sm.walk(&document.normalized_root, &Vec::new()); calls.extend(sm.calls); } - Ok(Report::new(calls).findings()) + Report::new(calls).findings() } const DECLARATIVE_MIDS: &[&str] = &[ diff --git a/gems/decomplex/rust/src/decomplex/detectors/state_branch_density.rs b/gems/decomplex/rust/src/decomplex/detectors/state_branch_density.rs index f79a568a9..253d10847 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/state_branch_density.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/state_branch_density.rs @@ -1,6 +1,7 @@ use crate::decomplex::ast::{self, Child, Node, Span}; +use crate::decomplex::parallel; use crate::decomplex::syntax::adapters::language_profile; -use crate::decomplex::syntax::Language; +use crate::decomplex::syntax::{self, Document, Language}; use anyhow::Result; use serde::Serialize; use std::collections::{BTreeMap, BTreeSet}; @@ -35,49 +36,53 @@ const NOISE_MIDS: &[&str] = &[ ]; pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { - let profile = language_profile(language); - let mut parsed = Vec::new(); + let documents = syntax::parse_files(files, language)?; + Ok(scan_documents(&documents)) +} + +pub fn scan_documents(documents: &[Document]) -> Vec { let mut global_immutable_readers: BTreeMap> = BTreeMap::new(); let mut global_immutable_reader_types: BTreeMap> = BTreeMap::new(); let mut global_type_aliases: BTreeMap = BTreeMap::new(); - for file in files { - let (root, lines) = ast::parse_with_language(file, language)?; + for document in documents { + let profile = language_profile(document.language); - for (name, readers) in profile.immutable_struct_readers(&lines) { + for (name, readers) in profile.immutable_struct_readers(&document.lines) { global_immutable_readers .entry(name) .or_default() .extend(readers); } - for (name, reader_types) in profile.immutable_struct_reader_types(&lines) { + for (name, reader_types) in profile.immutable_struct_reader_types(&document.lines) { global_immutable_reader_types .entry(name) .or_default() .extend(reader_types); } - global_type_aliases.extend(profile.type_aliases(&lines)); - - parsed.push((file.to_string_lossy().to_string(), root, lines)); + global_type_aliases.extend(profile.type_aliases(&document.lines)); } - let mut all_decisions = Vec::new(); - for (file, root, lines) in parsed { - let method_param_types = profile.method_param_types(&lines); + let decision_chunks = parallel::map_ordered(documents, |document| { + let profile = language_profile(document.language); + let method_param_types = profile.method_param_types(&document.lines); let mut scanner = StateBranchDensity::new( - Some(file), - lines, + Some(document.file.clone()), + document.lines.clone(), Some(global_immutable_readers.clone()), Some(global_immutable_reader_types.clone()), Some(global_type_aliases.clone()), Some(method_param_types), ); - scanner.walk(&root, &Vec::new()); - all_decisions.extend(scanner.decisions); - } + scanner.walk(&document.normalized_root, &Vec::new()); + Ok(scanner.decisions) + }) + .expect("state-branch-density document scan"); + + let all_decisions = decision_chunks.into_iter().flatten().collect(); - Ok(Report::new(all_decisions).findings()) + Report::new(all_decisions).findings() } struct StateBranchDensity { diff --git a/gems/decomplex/rust/src/decomplex/detectors/state_mesh.rs b/gems/decomplex/rust/src/decomplex/detectors/state_mesh.rs index 6c958932c..e5123edc3 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/state_mesh.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/state_mesh.rs @@ -1,6 +1,6 @@ use crate::decomplex::ast::{self, Child, Node, Span}; use crate::decomplex::detectors::semantic_alias; -use crate::decomplex::syntax::Language; +use crate::decomplex::syntax::{self, Document, Language}; use anyhow::Result; use serde::Serialize; use std::collections::{BTreeMap, BTreeSet}; @@ -147,15 +147,30 @@ struct FieldMetrics { } pub fn scan_files(files: &[PathBuf], language: Language) -> Result { + let documents = syntax::parse_files(files, language)?; + Ok(scan_documents(&documents)) +} + +pub fn scan_documents(documents: &[Document]) -> StateMeshReport { + let semantic_aliases = semantic_alias::scan_documents(documents); + scan_documents_with_semantic_aliases(documents, &semantic_aliases) +} + +pub fn scan_documents_with_semantic_aliases( + documents: &[Document], + semantic_aliases: &semantic_alias::SemanticAliasReport, +) -> StateMeshReport { let mut src_map = BTreeMap::new(); - for file in files { - let (root, lines) = ast::parse_with_language(file, language)?; - src_map.insert(file.to_string_lossy().to_string(), (root, lines)); + for document in documents { + src_map.insert( + document.file.clone(), + (document.normalized_root.clone(), document.lines.clone()), + ); } let mut sm = StateMesh::new(src_map); - sm.run(language)?; - Ok(sm.to_json_graph()) + sm.run(semantic_aliases); + sm.to_json_graph() } struct StateMesh { @@ -179,15 +194,14 @@ impl StateMesh { } } - fn run(&mut self, language: Language) -> Result<()> { + fn run(&mut self, semantic_aliases: &semantic_alias::SemanticAliasReport) { self.discover_fields(); if self.known_field_norms().is_empty() { - return Ok(()); + return; } self.find_reads(); - self.find_re_derivations(language)?; - Ok(()) + self.find_re_derivations(semantic_aliases); } fn discover_fields(&mut self) { @@ -386,16 +400,13 @@ impl StateMesh { } } - fn find_re_derivations(&mut self, language: Language) -> Result<()> { + fn find_re_derivations(&mut self, semantic_aliases: &semantic_alias::SemanticAliasReport) { let field_norms = self.known_field_norms(); if field_norms.is_empty() { - return Ok(()); + return; } - let files: Vec<_> = self.src_map.keys().map(PathBuf::from).collect(); - let sa = semantic_alias::scan_files(&files, language)?; - - for m in sa.reification_misses { + for m in &semantic_aliases.reification_misses { let loc = m.at.clone(); let parts: Vec<&str> = loc.split(':').collect(); if parts.len() < 3 { @@ -414,13 +425,12 @@ impl StateMesh { file, defn, line, - raw: m.raw, - predicate: m.predicate, - canon: m.canon, + raw: m.raw.clone(), + predicate: m.predicate.clone(), + canon: m.canon.clone(), }); } } - Ok(()) } fn metrics(&self) -> Vec { diff --git a/gems/decomplex/rust/src/decomplex/detectors/structural_topology.rs b/gems/decomplex/rust/src/decomplex/detectors/structural_topology.rs index 32e1fc264..506b54268 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/structural_topology.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/structural_topology.rs @@ -1,5 +1,5 @@ use crate::decomplex::ast::{self, Node, Span}; -use crate::decomplex::syntax::Language; +use crate::decomplex::syntax::{self, Document, Language}; use anyhow::Result; use serde::Serialize; use std::collections::BTreeMap; @@ -44,23 +44,25 @@ const CONDITIONAL_TYPES: &[&str] = &["IF", "UNLESS", "CASE", "CASE2"]; const ITERATION_TYPES: &[&str] = &["ITER", "FOR", "WHILE", "UNTIL"]; pub fn scan_files(files: &[PathBuf], language: Language) -> Result { + let documents = syntax::parse_files(files, language)?; + Ok(scan_documents(&documents)) +} + +pub fn scan_documents(documents: &[Document]) -> StructuralTopologyReport { let mut methods = Vec::new(); - let mut parsed = Vec::new(); - for file in files { - let (root, lines) = ast::parse_with_language(file, language)?; - let mut mc = MethodCollector::new(file.to_string_lossy().to_string(), lines.clone()); - methods.extend(mc.scan(&root)); - parsed.push((file.to_string_lossy().to_string(), root, lines)); + for document in documents { + let mut mc = MethodCollector::new(document.file.clone(), document.lines.clone()); + methods.extend(mc.scan(&document.normalized_root)); } let mut edges = Vec::new(); - for (file, root, lines) in &parsed { - let mut ec = EdgeCollector::new(file.clone(), lines.clone(), &methods); - edges.extend(ec.scan(root)); + for document in documents { + let mut ec = EdgeCollector::new(document.file.clone(), document.lines.clone(), &methods); + edges.extend(ec.scan(&document.normalized_root)); } - Ok(StructuralTopologyReport { methods, edges }) + StructuralTopologyReport { methods, edges } } pub struct Graph { diff --git a/gems/decomplex/rust/src/decomplex/detectors/temporal_ordering_pressure.rs b/gems/decomplex/rust/src/decomplex/detectors/temporal_ordering_pressure.rs index 1a3247e36..bfdec618f 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/temporal_ordering_pressure.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/temporal_ordering_pressure.rs @@ -1,5 +1,5 @@ use crate::decomplex::ast::{self, Child, Node, Span}; -use crate::decomplex::syntax::Language; +use crate::decomplex::syntax::{self, Document, Language}; use anyhow::Result; use serde::Serialize; use std::collections::{BTreeMap, BTreeSet}; @@ -36,11 +36,16 @@ pub fn scan_files( files: &[PathBuf], language: Language, ) -> Result> { + let documents = syntax::parse_files(files, language)?; + Ok(scan_documents(&documents)) +} + +pub fn scan_documents(documents: &[Document]) -> Vec { let mut rows = Vec::new(); - for file in files { - let (root, lines) = ast::parse_with_language(file, language)?; - let mut detector = TemporalOrderingPressure::new(file.to_string_lossy().to_string(), lines); - rows.extend(detector.scan(&root)); + for document in documents { + let mut detector = + TemporalOrderingPressure::new(document.file.clone(), document.lines.clone()); + rows.extend(detector.scan(&document.normalized_root)); } rows.sort_by(|a, b| { b.score @@ -49,7 +54,7 @@ pub fn scan_files( .then_with(|| a.file.cmp(&b.file)) .then_with(|| a.owner.cmp(&b.owner)) }); - Ok(rows) + rows } struct TemporalOrderingPressure { diff --git a/gems/decomplex/rust/src/decomplex/detectors/weighted_inlined_cognitive_complexity.rs b/gems/decomplex/rust/src/decomplex/detectors/weighted_inlined_cognitive_complexity.rs index 9958c889d..0db7f64ee 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/weighted_inlined_cognitive_complexity.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/weighted_inlined_cognitive_complexity.rs @@ -1,6 +1,6 @@ use crate::decomplex::ast::{self, Node, Span}; use crate::decomplex::detectors::structural_topology; -use crate::decomplex::syntax::Language; +use crate::decomplex::syntax::{self, Document, Language}; use anyhow::Result; use serde::Serialize; use std::collections::{BTreeMap, BTreeSet}; @@ -26,21 +26,18 @@ pub fn scan_files( files: &[PathBuf], language: Language, ) -> Result> { - let mut parsed = BTreeMap::new(); - for file in files { - parsed.insert( - file.to_string_lossy().to_string(), - ast::parse_with_language(file, language)?, - ); - } + let documents = syntax::parse_files(files, language)?; + Ok(scan_documents(&documents)) +} - let topology_report = structural_topology::scan_files(files, language)?; +pub fn scan_documents(documents: &[Document]) -> Vec { + let topology_report = structural_topology::scan_documents(documents); let topology = structural_topology::Graph::new(topology_report.methods, topology_report.edges); let mut bodies = Vec::new(); - for (file, (root, lines)) in &parsed { - let mut collector = MethodBodyCollector::new(file.clone(), lines.clone()); - bodies.extend(collector.scan(root)); + for document in documents { + let mut collector = MethodBodyCollector::new(document.file.clone(), document.lines.clone()); + bodies.extend(collector.scan(&document.normalized_root)); } let mut scores = BTreeMap::new(); @@ -62,7 +59,7 @@ pub fn scan_files( } let analyzer = Analyzer::new(topology, scores, 12.0, 15.0, 2); - Ok(analyzer.findings()) + analyzer.findings() } struct MethodBody { diff --git a/gems/decomplex/rust/src/decomplex/mod.rs b/gems/decomplex/rust/src/decomplex/mod.rs index 765921560..6f0af1177 100644 --- a/gems/decomplex/rust/src/decomplex/mod.rs +++ b/gems/decomplex/rust/src/decomplex/mod.rs @@ -4,4 +4,5 @@ mod architecture_test; pub mod ast; pub mod detectors; pub mod parallel; +pub mod report_facts; pub mod syntax; diff --git a/gems/decomplex/rust/src/decomplex/report_facts.rs b/gems/decomplex/rust/src/decomplex/report_facts.rs new file mode 100644 index 000000000..18be88423 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/report_facts.rs @@ -0,0 +1,634 @@ +use crate::decomplex::detectors::{ + co_update, decision_pressure, derived_state, false_simplicity, fat_union, flay_similarity, + function_lcom, implicit_control_flow, inconsistent_rename_clone, local_flow, locality_drag, + miner, operational_discontinuity, oversized_predicate, path_condition, predicate_alias, + redundant_nil_guard, semantic_alias, sequence_mine, state_branch_density, state_mesh, + temporal_ordering_pressure, weighted_inlined_cognitive_complexity, +}; +use crate::decomplex::parallel; +use crate::decomplex::syntax::{self, Document, Language}; +use anyhow::{bail, Context, Result}; +use serde::Serialize; +use serde_json::{json, Map, Value}; +use std::collections::BTreeMap; +use std::fs; +use std::path::{Path, PathBuf}; +use std::sync::mpsc; +use std::thread; + +pub const FORMAT: &str = "decomplex.report-facts.v1"; + +const DEFAULT_MASS: usize = 32; +const DEFAULT_FUZZY: usize = 1; +const DEFAULT_EXCLUDE_DIRS: &[&str] = &[ + ".clear-cache", + ".clear-transpile-cache", + ".global-zig-cache", + ".zig-cache", + "zig-cache", + "zig-out", + "node_modules", +]; + +#[derive(Clone, Debug)] +pub struct Options { + pub language: Option, + pub excludes: Vec, + pub mass: usize, + pub fuzzy: usize, +} + +impl Default for Options { + fn default() -> Self { + Self { + language: None, + excludes: Vec::new(), + mass: DEFAULT_MASS, + fuzzy: DEFAULT_FUZZY, + } + } +} + +#[derive(Clone, Debug)] +pub struct SourceFile { + pub path: PathBuf, + pub language: Language, +} + +struct SharedFacts { + local_summaries: Vec, + semantic_aliases: semantic_alias::SemanticAliasReport, +} + +impl SharedFacts { + fn new(documents: &[Document]) -> Self { + thread::scope(|scope| { + let local_summaries = scope.spawn(|| local_flow::scan_documents(documents)); + let semantic_aliases = scope.spawn(|| semantic_alias::scan_documents(documents)); + Self { + local_summaries: local_summaries.join().expect("local-flow facts worker"), + semantic_aliases: semantic_aliases + .join() + .expect("semantic-alias facts worker"), + } + }) + } +} + +pub fn collect(targets: &[PathBuf], options: &Options) -> Result { + let files = collect_source_files(targets, options)?; + facts_for_source_files(&files, options) +} + +pub fn collect_source_files(targets: &[PathBuf], options: &Options) -> Result> { + let mut files = Vec::new(); + for target in targets { + expand_target(target, options, &mut files) + .with_context(|| format!("failed to collect {}", target.display()))?; + } + files.sort_by(|left, right| left.path.cmp(&right.path)); + files.dedup_by(|left, right| left.path == right.path); + Ok(files) +} + +pub fn facts_for_source_files(files: &[SourceFile], options: &Options) -> Result { + if files.is_empty() { + bail!("facts requires at least one supported source file"); + } + + let documents = parallel::map_ordered(files, |file| { + syntax::parse_file(file.path.clone(), file.language) + })?; + let shared = SharedFacts::new(&documents); + let mut groups: BTreeMap> = BTreeMap::new(); + for document in documents { + groups.entry(document.language).or_default().push(document); + } + + let detectors = collect_detector_facts(&groups, &shared, options)?; + + Ok(json!({ + "format": FORMAT, + "files": files.iter().map(|file| file.path.to_string_lossy().to_string()).collect::>(), + "languages": language_counts(files), + "detectors": detectors, + })) +} + +fn collect_detector_facts( + groups: &BTreeMap>, + shared: &SharedFacts, + options: &Options, +) -> Result> { + if parallel::job_count() <= 1 { + return collect_detector_facts_sequential(groups, shared, options); + } + + let (tx, rx) = mpsc::channel(); + thread::scope(|scope| { + macro_rules! spawn_detector { + ($name:expr, $body:expr) => {{ + let tx = tx.clone(); + scope.spawn(move || { + let result: Result = (|| $body)(); + let _ = tx.send(($name.to_string(), result)); + }); + }}; + } + + spawn_detector!("miner", { + merge_object_reports( + groups, + &["missing_abstractions", "neglected_conditions"], + |documents| json_value(miner::scan_documents(documents)), + ) + }); + spawn_detector!("co_update", { + merge_object_reports( + groups, + &["co_written_pairs", "neglected_updates"], + |documents| json_value(co_update::scan_documents(documents)), + ) + }); + spawn_detector!("predicate_alias", { + merge_object_reports(groups, &["alias_clusters"], |documents| { + json_value(predicate_alias::scan_documents(documents)) + }) + }); + spawn_detector!("semantic_alias", { + json_value(shared.semantic_aliases.clone()) + }); + spawn_detector!("path_condition", { + merge_object_reports(groups, &["neglected", "scattered"], |documents| { + json_value(path_condition::scan_documents(documents)) + }) + }); + spawn_detector!("sequence_mine", { + merge_object_reports(groups, &["broken"], |documents| { + json_value(sequence_mine::scan_documents(documents)) + }) + .map(rename_broken_protocol) + }); + spawn_detector!("implicit_control_flow", { + merge_object_reports(groups, &["ordered_protocols"], |documents| { + json_value(implicit_control_flow::scan_documents(documents)) + }) + }); + spawn_detector!("derived_state", { + merge_array_reports(groups, |documents| { + json_value(derived_state::scan_documents(documents)) + }) + }); + spawn_detector!("inconsistent_rename_clone", { + merge_array_reports(groups, |documents| { + json_value(inconsistent_rename_clone::scan_documents(documents)) + }) + }); + spawn_detector!("flay_similarity", { + merge_array_reports(groups, |documents| { + json_value(flay_similarity::scan_documents( + documents, + options.mass, + options.fuzzy, + )) + }) + }); + spawn_detector!("decision_pressure", { + merge_array_reports(groups, |documents| { + json_value(decision_pressure::scan_documents(documents)) + }) + }); + spawn_detector!("redundant_nil_guard", { + merge_array_reports(groups, |documents| { + json_value(redundant_nil_guard::scan_documents(documents)) + }) + }); + spawn_detector!("false_simplicity", { + merge_array_reports(groups, |documents| { + json_value(false_simplicity::scan_documents(documents)) + }) + }); + spawn_detector!("oversized_predicate", { + Ok(merge_object_reports(groups, &["findings"], |documents| { + json_value(oversized_predicate::scan_documents(documents)) + })? + .get("findings") + .cloned() + .unwrap_or_else(|| Value::Array(Vec::new()))) + }); + spawn_detector!("fat_union", { + merge_object_reports(groups, &["fat_unions"], |documents| { + json_value(fat_union::scan_documents(documents)) + }) + }); + spawn_detector!("state_heatmap", { + state_heatmap_findings_for_groups(groups, &shared.semantic_aliases) + }); + spawn_detector!("state_branch_density", { + merge_array_reports(groups, |documents| { + json_value(state_branch_density::scan_documents(documents)) + }) + }); + spawn_detector!("temporal_ordering_pressure", { + merge_array_reports(groups, |documents| { + json_value(temporal_ordering_pressure::scan_documents(documents)) + }) + }); + spawn_detector!("weighted_inlined_complexity", { + merge_array_reports(groups, |documents| { + json_value(weighted_inlined_cognitive_complexity::scan_documents( + documents, + )) + }) + }); + spawn_detector!("locality_drag", { + json_value(locality_drag::scan_summaries( + shared.local_summaries.clone(), + )) + }); + spawn_detector!("function_lcom", { + json_value(function_lcom::scan_summaries( + shared.local_summaries.clone(), + )) + }); + spawn_detector!("operational_discontinuity", { + json_value(operational_discontinuity::scan_summaries( + shared.local_summaries.clone(), + )) + }); + drop(tx); + }); + + let mut detectors = Map::new(); + let mut first_error = None; + for (name, result) in rx { + match result { + Ok(value) => { + detectors.insert(name, value); + } + Err(error) => { + if first_error.is_none() { + first_error = Some(error.context(format!("failed to collect {name} facts"))); + } + } + } + } + if let Some(error) = first_error { + return Err(error); + } + Ok(detectors) +} + +fn collect_detector_facts_sequential( + groups: &BTreeMap>, + shared: &SharedFacts, + options: &Options, +) -> Result> { + let mut detectors = Map::new(); + detectors.insert( + "miner".to_string(), + merge_object_reports( + groups, + &["missing_abstractions", "neglected_conditions"], + |documents| json_value(miner::scan_documents(documents)), + )?, + ); + detectors.insert( + "co_update".to_string(), + merge_object_reports( + groups, + &["co_written_pairs", "neglected_updates"], + |documents| json_value(co_update::scan_documents(documents)), + )?, + ); + detectors.insert( + "predicate_alias".to_string(), + merge_object_reports(groups, &["alias_clusters"], |documents| { + json_value(predicate_alias::scan_documents(documents)) + })?, + ); + detectors.insert( + "semantic_alias".to_string(), + json_value(shared.semantic_aliases.clone())?, + ); + detectors.insert( + "path_condition".to_string(), + merge_object_reports(groups, &["neglected", "scattered"], |documents| { + json_value(path_condition::scan_documents(documents)) + })?, + ); + detectors.insert( + "sequence_mine".to_string(), + merge_object_reports(groups, &["broken"], |documents| { + json_value(sequence_mine::scan_documents(documents)) + }) + .map(rename_broken_protocol)?, + ); + detectors.insert( + "implicit_control_flow".to_string(), + merge_object_reports(groups, &["ordered_protocols"], |documents| { + json_value(implicit_control_flow::scan_documents(documents)) + })?, + ); + detectors.insert( + "derived_state".to_string(), + merge_array_reports(groups, |documents| { + json_value(derived_state::scan_documents(documents)) + })?, + ); + detectors.insert( + "inconsistent_rename_clone".to_string(), + merge_array_reports(groups, |documents| { + json_value(inconsistent_rename_clone::scan_documents(documents)) + })?, + ); + detectors.insert( + "flay_similarity".to_string(), + merge_array_reports(groups, |documents| { + json_value(flay_similarity::scan_documents( + documents, + options.mass, + options.fuzzy, + )) + })?, + ); + detectors.insert( + "decision_pressure".to_string(), + merge_array_reports(groups, |documents| { + json_value(decision_pressure::scan_documents(documents)) + })?, + ); + detectors.insert( + "redundant_nil_guard".to_string(), + merge_array_reports(groups, |documents| { + json_value(redundant_nil_guard::scan_documents(documents)) + })?, + ); + detectors.insert( + "false_simplicity".to_string(), + merge_array_reports(groups, |documents| { + json_value(false_simplicity::scan_documents(documents)) + })?, + ); + detectors.insert( + "oversized_predicate".to_string(), + merge_object_reports(groups, &["findings"], |documents| { + json_value(oversized_predicate::scan_documents(documents)) + })? + .get("findings") + .cloned() + .unwrap_or_else(|| Value::Array(Vec::new())), + ); + detectors.insert( + "fat_union".to_string(), + merge_object_reports(groups, &["fat_unions"], |documents| { + json_value(fat_union::scan_documents(documents)) + })?, + ); + detectors.insert( + "state_heatmap".to_string(), + state_heatmap_findings_for_groups(groups, &shared.semantic_aliases)?, + ); + detectors.insert( + "state_branch_density".to_string(), + merge_array_reports(groups, |documents| { + json_value(state_branch_density::scan_documents(documents)) + })?, + ); + detectors.insert( + "temporal_ordering_pressure".to_string(), + merge_array_reports(groups, |documents| { + json_value(temporal_ordering_pressure::scan_documents(documents)) + })?, + ); + detectors.insert( + "weighted_inlined_complexity".to_string(), + merge_array_reports(groups, |documents| { + json_value(weighted_inlined_cognitive_complexity::scan_documents( + documents, + )) + })?, + ); + detectors.insert( + "locality_drag".to_string(), + json_value(locality_drag::scan_summaries( + shared.local_summaries.clone(), + ))?, + ); + detectors.insert( + "function_lcom".to_string(), + json_value(function_lcom::scan_summaries( + shared.local_summaries.clone(), + ))?, + ); + detectors.insert( + "operational_discontinuity".to_string(), + json_value(operational_discontinuity::scan_summaries( + shared.local_summaries.clone(), + ))?, + ); + Ok(detectors) +} + +fn merge_object_reports( + groups: &BTreeMap>, + fields: &[&str], + scan: F, +) -> Result +where + F: Fn(&[Document]) -> Result, +{ + let mut merged = Map::new(); + for field in fields { + merged.insert((*field).to_string(), Value::Array(Vec::new())); + } + + for (language, documents) in groups { + let value = scan(documents)?; + let object = value + .as_object() + .with_context(|| format!("{} detector did not return an object", language.as_str()))?; + for field in fields { + let rows = object + .get(*field) + .and_then(Value::as_array) + .with_context(|| format!("detector result missing array field {field}"))?; + merged + .get_mut(*field) + .and_then(Value::as_array_mut) + .expect("merged array") + .extend(rows.iter().cloned()); + } + } + Ok(Value::Object(merged)) +} + +fn json_value(value: T) -> Result { + Ok(serde_json::to_value(value)?) +} + +fn merge_array_reports(groups: &BTreeMap>, scan: F) -> Result +where + F: Fn(&[Document]) -> Result, +{ + let mut rows = Vec::new(); + for (language, documents) in groups { + let value = scan(documents)?; + rows.extend( + value + .as_array() + .with_context(|| format!("{} detector did not return an array", language.as_str()))? + .iter() + .cloned(), + ); + } + Ok(Value::Array(rows)) +} + +fn rename_broken_protocol(mut value: Value) -> Value { + if let Some(object) = value.as_object_mut() { + if let Some(rows) = object.remove("broken") { + object.insert("broken_protocol".to_string(), rows); + } + } + value +} + +fn state_heatmap_findings_for_groups( + groups: &BTreeMap>, + semantic_aliases: &semantic_alias::SemanticAliasReport, +) -> Result { + let mut rows = Vec::new(); + for documents in groups.values() { + let report = state_mesh::scan_documents_with_semantic_aliases(documents, semantic_aliases); + rows.extend(state_heatmap_findings(&report)); + } + Ok(Value::Array(rows)) +} + +fn state_heatmap_findings(report: &state_mesh::StateMeshReport) -> Vec { + let mut rows = Vec::new(); + for (field, row) in &report.fields { + let mut sites = Vec::new(); + sites.extend(row.writers.iter().map(site_location)); + sites.extend(row.readers.iter().map(site_location)); + sites.extend(row.re_derivations.iter().map(re_derivation_location)); + + let spans = row + .writers + .iter() + .chain(row.readers.iter()) + .map(|site| (site_location(site), json!(site.span))) + .collect::>(); + + rows.push(json!({ + "at": sites.first().cloned(), + "field": field, + "writes": row.metrics.writes, + "reads": row.metrics.reads, + "re_derivations": row.metrics.re_derivations, + "scatter": row.metrics.scatter, + "write_scatter": row.metrics.write_scatter, + "read_scatter": row.metrics.read_scatter, + "receiver_types": row.metrics.receiver_types, + "messiness": row.messiness, + "pressure": row.metrics.pressure, + "top_writers": row.writers.iter().take(4).map(site_location).collect::>(), + "top_readers": row.readers.iter().take(4).map(site_location).collect::>(), + "sites": sites.into_iter().take(12).collect::>(), + "spans": spans, + })); + } + rows +} + +fn site_location(site: &state_mesh::SiteInfo) -> String { + format!("{}:{}:{}", site.file, site.defn, site.line) +} + +fn re_derivation_location(site: &state_mesh::ReDerivationInfo) -> String { + format!("{}:{}:{}", site.file, site.defn, site.line) +} + +fn language_counts(files: &[SourceFile]) -> BTreeMap { + let mut counts = BTreeMap::new(); + for file in files { + *counts + .entry(file.language.as_str().to_string()) + .or_insert(0) += 1; + } + counts +} + +fn expand_target(target: &Path, options: &Options, out: &mut Vec) -> Result<()> { + if target.is_dir() { + expand_directory(target, options, out) + } else if target.is_file() { + push_source_file(target, options, out); + Ok(()) + } else { + Ok(()) + } +} + +fn expand_directory(dir: &Path, options: &Options, out: &mut Vec) -> Result<()> { + for entry in fs::read_dir(dir).with_context(|| format!("failed to read {}", dir.display()))? { + let entry = entry?; + let path = entry.path(); + if excluded_path(&path, options) { + continue; + } + if path.is_dir() { + expand_directory(&path, options, out)?; + } else if path.is_file() { + push_source_file(&path, options, out); + } + } + Ok(()) +} + +fn push_source_file(path: &Path, options: &Options, out: &mut Vec) { + if excluded_path(path, options) { + return; + } + let Some(file_name) = path.file_name().and_then(|value| value.to_str()) else { + return; + }; + if file_name.starts_with('.') || file_name == "all-tests.zig" { + return; + } + + let language = options.language.or_else(|| { + path.extension() + .and_then(|value| value.to_str()) + .and_then(|extension| Language::for_extension(&extension.to_ascii_lowercase())) + }); + let Some(language) = language else { + return; + }; + out.push(SourceFile { + path: path.to_path_buf(), + language, + }); +} + +fn excluded_path(path: &Path, options: &Options) -> bool { + let text = path.to_string_lossy().replace('\\', "/"); + if DEFAULT_EXCLUDE_DIRS.iter().any(|dir| { + text == *dir || text.ends_with(&format!("/{dir}")) || text.contains(&format!("/{dir}/")) + }) { + return true; + } + + options.excludes.iter().any(|pattern| { + let pattern = pattern.replace('\\', "/"); + if let Some(prefix) = pattern.strip_suffix("/**") { + let prefix = prefix.strip_prefix("**/").unwrap_or(prefix); + text == prefix + || text.ends_with(&format!("/{prefix}")) + || text.contains(&format!("/{prefix}/")) + } else { + text == pattern || text.ends_with(&format!("/{pattern}")) || text.contains(&pattern) + } + }) +} diff --git a/gems/decomplex/rust/src/decomplex/syntax.rs b/gems/decomplex/rust/src/decomplex/syntax.rs index b74305fb4..c813c574d 100644 --- a/gems/decomplex/rust/src/decomplex/syntax.rs +++ b/gems/decomplex/rust/src/decomplex/syntax.rs @@ -1,14 +1,14 @@ pub(crate) mod adapters; pub mod tree_sitter_adapter; -use crate::decomplex::ast::{RawNode, Span}; +use crate::decomplex::ast::{Node as NormalizedNode, RawNode, Span}; use crate::decomplex::parallel; use anyhow::{bail, Result}; use serde::Serialize; use std::collections::BTreeMap; use std::path::PathBuf; -#[derive(Clone, Copy, Debug, Eq, PartialEq)] +#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)] pub enum Language { Ruby, Python, @@ -48,6 +48,47 @@ impl Language { _ => bail!("unsupported Decomplex native language: {value}"), } } + + pub fn as_str(self) -> &'static str { + match self { + Self::Ruby => "ruby", + Self::Python => "python", + Self::JavaScript => "javascript", + Self::Java => "java", + Self::TypeScript => "typescript", + Self::Swift => "swift", + Self::Kotlin => "kotlin", + Self::Go => "go", + Self::Rust => "rust", + Self::Zig => "zig", + Self::Lua => "lua", + Self::C => "c", + Self::Cpp => "cpp", + Self::CSharp => "csharp", + Self::Php => "php", + } + } + + pub fn for_extension(extension: &str) -> Option { + match extension { + "rb" => Some(Self::Ruby), + "py" => Some(Self::Python), + "js" | "jsx" | "mjs" | "cjs" => Some(Self::JavaScript), + "java" => Some(Self::Java), + "ts" | "tsx" => Some(Self::TypeScript), + "swift" => Some(Self::Swift), + "kt" | "kts" => Some(Self::Kotlin), + "go" => Some(Self::Go), + "rs" => Some(Self::Rust), + "zig" => Some(Self::Zig), + "lua" => Some(Self::Lua), + "c" | "h" => Some(Self::C), + "cpp" | "cc" | "cxx" | "hpp" | "hh" | "hxx" => Some(Self::Cpp), + "cs" => Some(Self::CSharp), + "php" => Some(Self::Php), + _ => None, + } + } } #[derive(Clone, Debug)] @@ -57,6 +98,7 @@ pub struct Document { pub source: String, pub lines: Vec, pub root: RawNode, + pub normalized_root: NormalizedNode, pub function_defs: Vec, pub state_writes: Vec, pub decision_sites: Vec, diff --git a/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs b/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs index 8d82bedb7..a4f79bcf1 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs @@ -2,7 +2,7 @@ use super::{ adapters::{language_profile, LanguageProfile}, ComparisonUse, DecisionSite, Document, FunctionDef, Language, PredicateAlias, StateWrite, }; -use crate::decomplex::ast::{line, node_text, normalize_text, span, RawNode}; +use crate::decomplex::ast::{line, node_text, normalize_text, normalize_tree, span, RawNode}; use anyhow::{Context, Result}; use std::collections::HashSet; use std::fs; @@ -41,6 +41,7 @@ pub fn parse_file(file: PathBuf, language: Language) -> Result { source: parsed.source.clone(), lines: parsed.source.lines().map(ToString::to_string).collect(), root: RawNode::from_tree_sitter(parsed.tree.root_node(), &parsed.source), + normalized_root: normalize_tree(parsed.tree.root_node(), &parsed.source, language), function_defs, state_writes, decision_sites, diff --git a/gems/decomplex/rust/src/main.rs b/gems/decomplex/rust/src/main.rs index eb4304dca..7d74510eb 100644 --- a/gems/decomplex/rust/src/main.rs +++ b/gems/decomplex/rust/src/main.rs @@ -9,8 +9,11 @@ use decomplex::detectors::{ structural_topology, temporal_ordering_pressure, weighted_inlined_cognitive_complexity, }; use decomplex::parallel; +use decomplex::report_facts::{self, Options as ReportFactsOptions}; use decomplex::syntax::Language; +use std::io::Write; use std::path::PathBuf; +use std::process::{Command as ProcessCommand, Stdio}; fn main() -> Result<()> { let worker = std::thread::Builder::new() @@ -233,6 +236,27 @@ fn run() -> Result<()> { .with_context(|| "failed to scan fat-union facts")?; println!("{}", serde_json::to_string(&findings)?); } + Command::Facts { + options, + targets, + output, + .. + } => { + let facts = report_facts::collect(&targets, &options) + .with_context(|| "failed to collect report facts")?; + write_json(&facts, output.as_ref())?; + } + Command::Report { + options, + targets, + format, + output, + .. + } => { + let facts = report_facts::collect(&targets, &options) + .with_context(|| "failed to collect report facts")?; + render_report_with_ruby(&facts, &format, output.as_ref())?; + } } Ok(()) } @@ -365,6 +389,19 @@ enum Command { files: Vec, jobs: Option, }, + Facts { + options: ReportFactsOptions, + targets: Vec, + output: Option, + jobs: Option, + }, + Report { + options: ReportFactsOptions, + targets: Vec, + format: String, + output: Option, + jobs: Option, + }, } impl Command { @@ -394,7 +431,9 @@ impl Command { | Self::SequenceMine { jobs, .. } | Self::FunctionLcom { jobs, .. } | Self::FalseSimplicity { jobs, .. } - | Self::FatUnion { jobs, .. } => *jobs, + | Self::FatUnion { jobs, .. } + | Self::Facts { jobs, .. } + | Self::Report { jobs, .. } => *jobs, } } } @@ -405,6 +444,31 @@ fn parse_args(args: Vec) -> Result { bail!("usage: decomplex-rust COMMAND [--language ruby] [--jobs N] FILE..."); }; match command.as_str() { + "facts" => { + let args = parse_report_facts_args(cursor.collect(), false)?; + if args.targets.is_empty() { + bail!("facts requires at least one file or directory"); + } + Ok(Command::Facts { + options: args.options, + targets: args.targets, + output: args.output, + jobs: args.jobs, + }) + } + "report" => { + let args = parse_report_facts_args(cursor.collect(), true)?; + if args.targets.is_empty() { + bail!("report requires at least one file or directory"); + } + Ok(Command::Report { + options: args.options, + targets: args.targets, + format: args.format, + output: args.output, + jobs: args.jobs, + }) + } "state-writes" => { let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; if files.is_empty() { @@ -724,6 +788,153 @@ fn parse_args(args: Vec) -> Result { } } +struct ReportFactsArgs { + options: ReportFactsOptions, + targets: Vec, + output: Option, + jobs: Option, + format: String, +} + +fn parse_report_facts_args(args: Vec, allow_format: bool) -> Result { + let mut options = ReportFactsOptions::default(); + let mut targets = Vec::new(); + let mut output = None; + let mut jobs = None; + let mut format = "markdown".to_string(); + let mut cursor = args.into_iter(); + while let Some(arg) = cursor.next() { + if arg == "--language" { + let value = cursor + .next() + .with_context(|| "--language requires a value")?; + options.language = Some(Language::parse(&value)?); + } else if let Some(value) = arg.strip_prefix("--language=") { + options.language = Some(Language::parse(value)?); + } else if arg == "--jobs" { + jobs = Some(parse_jobs( + cursor.next().with_context(|| "--jobs requires a value")?, + )?); + } else if let Some(value) = arg.strip_prefix("--jobs=") { + jobs = Some(parse_jobs(value.to_string())?); + } else if arg == "--exclude" { + options.excludes.push( + cursor + .next() + .with_context(|| "--exclude requires a value")?, + ); + } else if let Some(value) = arg.strip_prefix("--exclude=") { + options.excludes.push(value.to_string()); + } else if arg == "--output" { + output = Some(PathBuf::from( + cursor.next().with_context(|| "--output requires a value")?, + )); + } else if let Some(value) = arg.strip_prefix("--output=") { + output = Some(PathBuf::from(value)); + } else if arg == "--format" { + if !allow_format { + bail!("facts does not support --format"); + } + format = cursor.next().with_context(|| "--format requires a value")?; + } else if let Some(value) = arg.strip_prefix("--format=") { + if !allow_format { + bail!("facts does not support --format"); + } + format = value.to_string(); + } else if arg == "--mass" { + options.mass = cursor + .next() + .with_context(|| "--mass requires a value")? + .parse() + .with_context(|| "--mass must be an integer")?; + } else if let Some(value) = arg.strip_prefix("--mass=") { + options.mass = value.parse().with_context(|| "--mass must be an integer")?; + } else if arg == "--fuzzy" { + options.fuzzy = cursor + .next() + .with_context(|| "--fuzzy requires a value")? + .parse() + .with_context(|| "--fuzzy must be an integer")?; + } else if let Some(value) = arg.strip_prefix("--fuzzy=") { + options.fuzzy = value + .parse() + .with_context(|| "--fuzzy must be an integer")?; + } else { + targets.push(PathBuf::from(arg)); + } + } + Ok(ReportFactsArgs { + options, + targets, + output, + jobs, + format, + }) +} + +fn write_json(value: &serde_json::Value, output: Option<&PathBuf>) -> Result<()> { + let text = serde_json::to_string_pretty(value)?; + if let Some(path) = output { + std::fs::write(path, text)?; + } else { + println!("{text}"); + } + Ok(()) +} + +fn render_report_with_ruby( + facts: &serde_json::Value, + format: &str, + output: Option<&PathBuf>, +) -> Result<()> { + let mut command = ruby_renderer_command(); + command + .arg("render-report") + .arg("--from-stdin") + .arg(format!("--format={format}")) + .stdin(Stdio::piped()) + .stdout(Stdio::inherit()) + .stderr(Stdio::inherit()); + if let Some(path) = output { + command.arg(format!("--output={}", path.display())); + } + + let mut child = command + .spawn() + .with_context(|| "failed to start Ruby decomplex renderer")?; + { + let stdin = child + .stdin + .as_mut() + .with_context(|| "failed to open Ruby renderer stdin")?; + stdin.write_all(serde_json::to_string(facts)?.as_bytes())?; + } + let status = child + .wait() + .with_context(|| "failed to wait for Ruby decomplex renderer")?; + if !status.success() { + bail!("Ruby decomplex renderer failed with status {status}"); + } + Ok(()) +} + +fn ruby_renderer_command() -> ProcessCommand { + if let Ok(program) = std::env::var("DECOMPLEX_RUBY_RENDERER") { + if !program.trim().is_empty() { + return ProcessCommand::new(program); + } + } + + let mut command = ProcessCommand::new("ruby"); + command.arg( + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("..") + .join("exe") + .join("decomplex"), + ); + command +} + fn parse_language_files_and_jobs( args: Vec, ) -> Result<(String, Vec, Option)> { diff --git a/gems/decomplex/test/report_test.rb b/gems/decomplex/test/report_test.rb index bae30b880..68736f921 100644 --- a/gems/decomplex/test/report_test.rb +++ b/gems/decomplex/test/report_test.rb @@ -49,6 +49,35 @@ def test_json_report_is_sarif_alias assert_equal JSON.parse(r.to_sarif), JSON.parse(r.to_json) end + def test_report_facts_round_trip_to_same_markdown + f = Tempfile.new(["rep_facts", ".rb"]) + f.write("def a(n)\n case n\n when A then 1\n when B then 2\n end\nend\n" \ + "def b(n)\n case n\n when A then 3\n when B then 4\n end\nend\n") + f.close + + facts = Decomplex::ReportFacts.from_files([f.path], engine: "ruby") + from_source = Decomplex::Report.new([f.path]).to_markdown + from_facts = Decomplex::Report.from_facts(JSON.generate(facts)).to_markdown + + assert_equal Decomplex::ReportFacts::FORMAT, facts.fetch("format") + assert_equal from_source, from_facts + ensure + f&.unlink + end + + def test_report_from_facts_does_not_reparse_source + f = Tempfile.new(["rep_facts_deleted", ".rb"]) + f.write("def a(n)\n if n && ready?\n run\n end\nend\n") + f.close + + facts = Decomplex::ReportFacts.from_files([f.path], engine: "ruby") + f.unlink + + md = Decomplex::Report.from_facts(JSON.generate(facts)).to_markdown + assert_includes md, "# Decomplex Report" + assert_includes md, "Files analyzed: 1" + end + def test_compact_sarif_omits_heavy_payloads_for_ci_uploads sarif = JSON.parse(report.to_sarif(include_snapshot: false, include_finding_payload: false, max_results: 2)) run = sarif.fetch("runs").first From e9eb509f25f2d36b17887fe8a49743530aa29e33 Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Sat, 20 Jun 2026 01:57:52 +0000 Subject: [PATCH 32/52] Advance Rust decomplex parity and oracle coverage --- .../decomplex/examples/oracles/co-update.json | 1 - .../examples/oracles/decision-pressure.json | 1 - .../examples/oracles/derived-state.json | 1 - .../examples/oracles/false-simplicity.json | 1 - .../decomplex/examples/oracles/fat-union.json | 1 - .../examples/oracles/flay-similarity.json | 1 - .../examples/oracles/function-lcom.json | 1 - .../oracles/implicit-control-flow.json | 1 - .../oracles/inconsistent-rename-clone.json | 1 - .../examples/oracles/local-flow.json | 1 - .../examples/oracles/locality-drag.json | 1 - gems/decomplex/examples/oracles/miner.json | 1 - .../oracles/operational-discontinuity.json | 1 - .../examples/oracles/oversized-predicate.json | 1 - .../examples/oracles/path-condition.json | 1 - .../examples/oracles/predicate-alias.json | 1 - .../examples/oracles/redundant-nil-guard.json | 1 - .../examples/oracles/semantic-alias.json | 1 - .../examples/oracles/sequence-mine.json | 1 - .../oracles/state-branch-density.json | 1 - .../examples/oracles/state-mesh.json | 1 - .../examples/oracles/structural-topology.json | 1 - .../oracles/temporal-ordering-pressure.json | 1 - .../oracles/weighted-inlined-complexity.json | 1 - gems/decomplex/rust/src/decomplex/ast.rs | 26 +- .../rust/src/decomplex/convergence.rs | 207 +++ gems/decomplex/rust/src/decomplex/delta.rs | 94 ++ .../decomplex/detectors/decision_pressure.rs | 401 +++--- .../src/decomplex/detectors/derived_state.rs | 146 +- .../detectors/implicit_control_flow.rs | 101 +- .../src/decomplex/detectors/local_flow.rs | 138 +- .../detectors/oversized_predicate.rs | 40 +- .../src/decomplex/detectors/path_condition.rs | 3 - .../src/decomplex/detectors/sequence_mine.rs | 162 ++- .../src/decomplex/detectors/state_mesh.rs | 97 +- .../detectors/structural_topology.rs | 603 +++----- .../detectors/temporal_ordering_pressure.rs | 106 +- .../weighted_inlined_cognitive_complexity.rs | 457 +++--- gems/decomplex/rust/src/decomplex/mod.rs | 6 + gems/decomplex/rust/src/decomplex/report.rs | 1237 +++++++++++++++++ .../rust/src/decomplex/report_facts.rs | 148 +- .../rust/src/decomplex/report_value.rs | 111 ++ .../rust/src/decomplex/root_cause.rs | 287 ++++ gems/decomplex/rust/src/decomplex/sarif.rs | 219 +++ gems/decomplex/rust/src/decomplex/syntax.rs | 31 + .../src/decomplex/syntax/adapters/base.rs | 343 ++++- .../rust/src/decomplex/syntax/adapters/c.rs | 4 + .../rust/src/decomplex/syntax/adapters/cpp.rs | 4 + .../src/decomplex/syntax/adapters/csharp.rs | 4 + .../rust/src/decomplex/syntax/adapters/go.rs | 8 + .../src/decomplex/syntax/adapters/java.rs | 4 + .../decomplex/syntax/adapters/javascript.rs | 4 + .../src/decomplex/syntax/adapters/kotlin.rs | 4 + .../rust/src/decomplex/syntax/adapters/lua.rs | 8 +- .../rust/src/decomplex/syntax/adapters/php.rs | 8 + .../src/decomplex/syntax/adapters/python.rs | 4 + .../src/decomplex/syntax/adapters/ruby.rs | 611 +++++++- .../src/decomplex/syntax/adapters/rust.rs | 4 + .../src/decomplex/syntax/adapters/swift.rs | 14 +- .../decomplex/syntax/adapters/typescript.rs | 4 + .../rust/src/decomplex/syntax/adapters/zig.rs | 6 +- .../decomplex/syntax/tree_sitter_adapter.rs | 268 +++- gems/decomplex/rust/src/main.rs | 183 ++- gems/decomplex/test/examples_oracle_test.rb | 19 +- 64 files changed, 4896 insertions(+), 1252 deletions(-) create mode 100644 gems/decomplex/rust/src/decomplex/convergence.rs create mode 100644 gems/decomplex/rust/src/decomplex/delta.rs create mode 100644 gems/decomplex/rust/src/decomplex/report.rs create mode 100644 gems/decomplex/rust/src/decomplex/report_value.rs create mode 100644 gems/decomplex/rust/src/decomplex/root_cause.rs create mode 100644 gems/decomplex/rust/src/decomplex/sarif.rs diff --git a/gems/decomplex/examples/oracles/co-update.json b/gems/decomplex/examples/oracles/co-update.json index 1ebd374bd..5ebc590a0 100644 --- a/gems/decomplex/examples/oracles/co-update.json +++ b/gems/decomplex/examples/oracles/co-update.json @@ -1,6 +1,5 @@ { "detector": "co-update", - "engine": "ruby", "options": { }, "expected": { diff --git a/gems/decomplex/examples/oracles/decision-pressure.json b/gems/decomplex/examples/oracles/decision-pressure.json index 34e94069d..f7550f8d8 100644 --- a/gems/decomplex/examples/oracles/decision-pressure.json +++ b/gems/decomplex/examples/oracles/decision-pressure.json @@ -1,6 +1,5 @@ { "detector": "decision-pressure", - "engine": "ruby", "options": { }, "expected": [ diff --git a/gems/decomplex/examples/oracles/derived-state.json b/gems/decomplex/examples/oracles/derived-state.json index 3ca4bc743..1dd101977 100644 --- a/gems/decomplex/examples/oracles/derived-state.json +++ b/gems/decomplex/examples/oracles/derived-state.json @@ -1,6 +1,5 @@ { "detector": "derived-state", - "engine": "ruby", "options": { }, "expected": [ diff --git a/gems/decomplex/examples/oracles/false-simplicity.json b/gems/decomplex/examples/oracles/false-simplicity.json index eede2341d..807166a32 100644 --- a/gems/decomplex/examples/oracles/false-simplicity.json +++ b/gems/decomplex/examples/oracles/false-simplicity.json @@ -1,6 +1,5 @@ { "detector": "false-simplicity", - "engine": "ruby", "options": { }, "expected": [ diff --git a/gems/decomplex/examples/oracles/fat-union.json b/gems/decomplex/examples/oracles/fat-union.json index 09fa37249..855427eee 100644 --- a/gems/decomplex/examples/oracles/fat-union.json +++ b/gems/decomplex/examples/oracles/fat-union.json @@ -1,6 +1,5 @@ { "detector": "fat-union", - "engine": "ruby", "options": { }, "expected": [ diff --git a/gems/decomplex/examples/oracles/flay-similarity.json b/gems/decomplex/examples/oracles/flay-similarity.json index d3540e736..0aa527fb4 100644 --- a/gems/decomplex/examples/oracles/flay-similarity.json +++ b/gems/decomplex/examples/oracles/flay-similarity.json @@ -1,6 +1,5 @@ { "detector": "flay-similarity", - "engine": "ruby", "options": { }, "expected": [ diff --git a/gems/decomplex/examples/oracles/function-lcom.json b/gems/decomplex/examples/oracles/function-lcom.json index 884ce9f7d..7dfcf5103 100644 --- a/gems/decomplex/examples/oracles/function-lcom.json +++ b/gems/decomplex/examples/oracles/function-lcom.json @@ -1,6 +1,5 @@ { "detector": "function-lcom", - "engine": "ruby", "options": { }, "expected": [ diff --git a/gems/decomplex/examples/oracles/implicit-control-flow.json b/gems/decomplex/examples/oracles/implicit-control-flow.json index b04cdd332..ce5649f3e 100644 --- a/gems/decomplex/examples/oracles/implicit-control-flow.json +++ b/gems/decomplex/examples/oracles/implicit-control-flow.json @@ -1,6 +1,5 @@ { "detector": "implicit-control-flow", - "engine": "ruby", "options": { }, "expected": { diff --git a/gems/decomplex/examples/oracles/inconsistent-rename-clone.json b/gems/decomplex/examples/oracles/inconsistent-rename-clone.json index ee66f85d2..487a93610 100644 --- a/gems/decomplex/examples/oracles/inconsistent-rename-clone.json +++ b/gems/decomplex/examples/oracles/inconsistent-rename-clone.json @@ -1,6 +1,5 @@ { "detector": "inconsistent-rename-clone", - "engine": "ruby", "options": { }, "expected": [ diff --git a/gems/decomplex/examples/oracles/local-flow.json b/gems/decomplex/examples/oracles/local-flow.json index fec5d2c17..4b12452d2 100644 --- a/gems/decomplex/examples/oracles/local-flow.json +++ b/gems/decomplex/examples/oracles/local-flow.json @@ -1,6 +1,5 @@ { "detector": "local-flow", - "engine": "ruby", "options": { }, "expected": [ diff --git a/gems/decomplex/examples/oracles/locality-drag.json b/gems/decomplex/examples/oracles/locality-drag.json index 51f7d4683..918b9c1bd 100644 --- a/gems/decomplex/examples/oracles/locality-drag.json +++ b/gems/decomplex/examples/oracles/locality-drag.json @@ -1,6 +1,5 @@ { "detector": "locality-drag", - "engine": "ruby", "options": { }, "expected": [ diff --git a/gems/decomplex/examples/oracles/miner.json b/gems/decomplex/examples/oracles/miner.json index 86539e517..6ccddd467 100644 --- a/gems/decomplex/examples/oracles/miner.json +++ b/gems/decomplex/examples/oracles/miner.json @@ -1,6 +1,5 @@ { "detector": "miner", - "engine": "ruby", "options": { }, "expected": { diff --git a/gems/decomplex/examples/oracles/operational-discontinuity.json b/gems/decomplex/examples/oracles/operational-discontinuity.json index 3db9915de..1fbbe2f4a 100644 --- a/gems/decomplex/examples/oracles/operational-discontinuity.json +++ b/gems/decomplex/examples/oracles/operational-discontinuity.json @@ -1,6 +1,5 @@ { "detector": "operational-discontinuity", - "engine": "ruby", "options": { }, "expected": [ diff --git a/gems/decomplex/examples/oracles/oversized-predicate.json b/gems/decomplex/examples/oracles/oversized-predicate.json index 6fe219147..a3901ad4b 100644 --- a/gems/decomplex/examples/oracles/oversized-predicate.json +++ b/gems/decomplex/examples/oracles/oversized-predicate.json @@ -1,6 +1,5 @@ { "detector": "oversized-predicate", - "engine": "ruby", "options": { }, "expected": [ diff --git a/gems/decomplex/examples/oracles/path-condition.json b/gems/decomplex/examples/oracles/path-condition.json index ab60d7313..b26ed9dbc 100644 --- a/gems/decomplex/examples/oracles/path-condition.json +++ b/gems/decomplex/examples/oracles/path-condition.json @@ -1,6 +1,5 @@ { "detector": "path-condition", - "engine": "ruby", "options": { }, "expected": [ diff --git a/gems/decomplex/examples/oracles/predicate-alias.json b/gems/decomplex/examples/oracles/predicate-alias.json index 2cd893067..ab4f95343 100644 --- a/gems/decomplex/examples/oracles/predicate-alias.json +++ b/gems/decomplex/examples/oracles/predicate-alias.json @@ -1,6 +1,5 @@ { "detector": "predicate-alias", - "engine": "ruby", "options": { }, "expected": { diff --git a/gems/decomplex/examples/oracles/redundant-nil-guard.json b/gems/decomplex/examples/oracles/redundant-nil-guard.json index 28dc8d6e7..9bb46a735 100644 --- a/gems/decomplex/examples/oracles/redundant-nil-guard.json +++ b/gems/decomplex/examples/oracles/redundant-nil-guard.json @@ -1,6 +1,5 @@ { "detector": "redundant-nil-guard", - "engine": "ruby", "options": { }, "expected": [ diff --git a/gems/decomplex/examples/oracles/semantic-alias.json b/gems/decomplex/examples/oracles/semantic-alias.json index 308d1b8a0..678e710b5 100644 --- a/gems/decomplex/examples/oracles/semantic-alias.json +++ b/gems/decomplex/examples/oracles/semantic-alias.json @@ -1,6 +1,5 @@ { "detector": "semantic-alias", - "engine": "ruby", "options": { }, "expected": { diff --git a/gems/decomplex/examples/oracles/sequence-mine.json b/gems/decomplex/examples/oracles/sequence-mine.json index 58921b6de..ca294ef5b 100644 --- a/gems/decomplex/examples/oracles/sequence-mine.json +++ b/gems/decomplex/examples/oracles/sequence-mine.json @@ -1,6 +1,5 @@ { "detector": "sequence-mine", - "engine": "ruby", "options": { }, "expected": [ diff --git a/gems/decomplex/examples/oracles/state-branch-density.json b/gems/decomplex/examples/oracles/state-branch-density.json index ef98b453e..395a31a46 100644 --- a/gems/decomplex/examples/oracles/state-branch-density.json +++ b/gems/decomplex/examples/oracles/state-branch-density.json @@ -1,6 +1,5 @@ { "detector": "state-branch-density", - "engine": "ruby", "options": { }, "expected": [ diff --git a/gems/decomplex/examples/oracles/state-mesh.json b/gems/decomplex/examples/oracles/state-mesh.json index 0d70466c3..7d4e4d024 100644 --- a/gems/decomplex/examples/oracles/state-mesh.json +++ b/gems/decomplex/examples/oracles/state-mesh.json @@ -1,6 +1,5 @@ { "detector": "state-mesh", - "engine": "ruby", "options": { }, "expected": { diff --git a/gems/decomplex/examples/oracles/structural-topology.json b/gems/decomplex/examples/oracles/structural-topology.json index 91ab0e67b..e437e4606 100644 --- a/gems/decomplex/examples/oracles/structural-topology.json +++ b/gems/decomplex/examples/oracles/structural-topology.json @@ -1,6 +1,5 @@ { "detector": "structural-topology", - "engine": "ruby", "options": { }, "expected": { diff --git a/gems/decomplex/examples/oracles/temporal-ordering-pressure.json b/gems/decomplex/examples/oracles/temporal-ordering-pressure.json index 8542c51b5..beba2d751 100644 --- a/gems/decomplex/examples/oracles/temporal-ordering-pressure.json +++ b/gems/decomplex/examples/oracles/temporal-ordering-pressure.json @@ -1,6 +1,5 @@ { "detector": "temporal-ordering-pressure", - "engine": "ruby", "options": { }, "expected": [ diff --git a/gems/decomplex/examples/oracles/weighted-inlined-complexity.json b/gems/decomplex/examples/oracles/weighted-inlined-complexity.json index fdf8fe941..3a0c82850 100644 --- a/gems/decomplex/examples/oracles/weighted-inlined-complexity.json +++ b/gems/decomplex/examples/oracles/weighted-inlined-complexity.json @@ -1,6 +1,5 @@ { "detector": "weighted-inlined-complexity", - "engine": "ruby", "options": { }, "expected": [ diff --git a/gems/decomplex/rust/src/decomplex/ast.rs b/gems/decomplex/rust/src/decomplex/ast.rs index 3fe25bd9b..7778f6d34 100644 --- a/gems/decomplex/rust/src/decomplex/ast.rs +++ b/gems/decomplex/rust/src/decomplex/ast.rs @@ -305,10 +305,28 @@ pub fn body_stmts(defn_node: &Node) -> Vec<&Node> { let Some(body) = scope.children.get(2).and_then(node) else { return Vec::new(); }; - if body.r#type == "BLOCK" { - body.children.iter().filter_map(node).collect() - } else { - vec![body] + statement_nodes(body) +} + +fn statement_nodes(body: &Node) -> Vec<&Node> { + match body.r#type.as_str() { + "BLOCK" | "COMPOUND_STATEMENT" | "DECLARATION_LIST" | "FUNCTION_BODY" | "HASH" + | "STATEMENTS" => body.children.iter().filter_map(node).collect(), + "RESCUE" | "ENSURE" => { + let mut out = Vec::new(); + if let Some(primary) = body.children.first().and_then(node) { + out.extend(statement_nodes(primary)); + } + out.extend( + body.children + .iter() + .skip(1) + .filter_map(node) + .filter(|child| child.r#type != "SCOPE"), + ); + out + } + _ => vec![body], } } diff --git a/gems/decomplex/rust/src/decomplex/convergence.rs b/gems/decomplex/rust/src/decomplex/convergence.rs new file mode 100644 index 000000000..e646028d3 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/convergence.rs @@ -0,0 +1,207 @@ +use crate::decomplex::report::ReportSection; +use crate::decomplex::report_value as rv; +use serde_json::Value; +use std::collections::{BTreeMap, HashMap}; + +pub const TIER_WEIGHT: &[(i64, i64)] = &[(1, 3), (2, 2), (3, 1)]; + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct Unit { + pub file: String, + pub method: String, + pub detectors: Vec, + pub n_detectors: usize, + pub score: i64, + pub findings: usize, + pub at: String, +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct FileRollup { + pub file: String, + pub detectors: Vec, + pub n_detectors: usize, + pub methods: usize, + pub score: i64, +} + +#[derive(Clone, Debug)] +struct Accumulator { + dets: BTreeMap, + tiers: BTreeMap, + findings: usize, + at: Option, +} + +pub fn rollup(sections: &[ReportSection], min_detectors: usize) -> Vec { + let mut acc: HashMap<(String, String), Accumulator> = HashMap::new(); + for section in sections { + for finding in §ion.findings { + for loc in locations(finding) { + let (Some(file), Some(method), line) = parse_loc(&loc) else { + continue; + }; + if file.is_empty() || method.is_empty() { + continue; + } + let unit = acc + .entry((file.clone(), method.clone())) + .or_insert_with(|| Accumulator { + dets: BTreeMap::new(), + tiers: BTreeMap::new(), + findings: 0, + at: None, + }); + *unit.dets.entry(section.title.clone()).or_insert(0) += 1; + unit.tiers.insert(section.title.clone(), section.tier); + unit.findings += 1; + if unit.at.is_none() { + unit.at = Some(match line { + Some(line) => format!("{file}:{method}:{line}"), + None => format!("{file}:{method}"), + }); + } + } + } + } + + let mut units = acc + .into_iter() + .filter_map(|((file, method), data)| { + if data.dets.len() < min_detectors { + return None; + } + let detectors = data.dets.keys().cloned().collect::>(); + let score = data.tiers.values().map(|tier| tier_weight(*tier)).sum(); + Some(Unit { + file, + method, + n_detectors: detectors.len(), + detectors, + score, + findings: data.findings, + at: data.at.unwrap_or_default(), + }) + }) + .collect::>(); + units.sort_by(|left, right| { + right + .n_detectors + .cmp(&left.n_detectors) + .then_with(|| right.score.cmp(&left.score)) + .then_with(|| right.findings.cmp(&left.findings)) + .then_with(|| left.file.cmp(&right.file)) + .then_with(|| left.method.cmp(&right.method)) + }); + units +} + +pub fn by_file(units: &[Unit]) -> Vec { + let mut grouped: BTreeMap> = BTreeMap::new(); + for unit in units { + grouped.entry(unit.file.clone()).or_default().push(unit); + } + + let mut rows = grouped + .into_iter() + .filter_map(|(file, units)| { + let mut detectors = units + .iter() + .flat_map(|unit| unit.detectors.iter().cloned()) + .collect::>(); + detectors.sort(); + detectors.dedup(); + if detectors.len() < 2 { + return None; + } + let score = units.iter().map(|unit| unit.score).sum(); + Some(FileRollup { + file, + n_detectors: detectors.len(), + detectors, + methods: units.len(), + score, + }) + }) + .collect::>(); + rows.sort_by(|left, right| { + right + .n_detectors + .cmp(&left.n_detectors) + .then_with(|| right.score.cmp(&left.score)) + .then_with(|| right.methods.cmp(&left.methods)) + .then_with(|| left.file.cmp(&right.file)) + }); + rows +} + +pub fn locations(finding: &Value) -> Vec { + let mut out = Vec::new(); + for key in ["at", "ref_at"] { + if let Some(Value::String(text)) = rv::get(finding, key) { + out.push(text.clone()); + } + } + if let Some(Value::Array(sites)) = rv::get(finding, "sites") { + out.extend( + sites + .iter() + .filter_map(|site| site.as_str().map(ToOwned::to_owned)), + ); + } + out +} + +pub fn parse_loc(loc: &str) -> (Option, Option, Option) { + let mut parts = loc.split(':').map(ToOwned::to_owned).collect::>(); + if parts.len() < 2 { + return (None, None, None); + } + let line = if parts + .last() + .is_some_and(|part| part.chars().all(|ch| ch.is_ascii_digit())) + { + parts.pop() + } else { + None + }; + let method = parts.pop(); + let file = Some(parts.join(":")); + (file, method, line) +} + +pub fn tier_weight(tier: i64) -> i64 { + TIER_WEIGHT + .iter() + .find_map(|(key, value)| (*key == tier).then_some(*value)) + .unwrap_or(1) +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + #[test] + fn parse_loc_splits_from_the_right() { + assert_eq!( + parse_loc("dir:a.rb:method:42"), + ( + Some("dir:a.rb".to_string()), + Some("method".to_string()), + Some("42".to_string()) + ) + ); + } + + #[test] + fn rollup_requires_distinct_detectors() { + let sections = vec![ + ReportSection::new("A", 1, "", vec![json!({"at": "a.rb:m:1"})]), + ReportSection::new("B", 2, "", vec![json!({"at": "a.rb:m:2"})]), + ]; + let rows = rollup(§ions, 2); + assert_eq!(rows.len(), 1); + assert_eq!(rows[0].score, 5); + } +} diff --git a/gems/decomplex/rust/src/decomplex/delta.rs b/gems/decomplex/rust/src/decomplex/delta.rs new file mode 100644 index 000000000..22144b872 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/delta.rs @@ -0,0 +1,94 @@ +use crate::decomplex::report::ReportSection; +use crate::decomplex::root_cause::{self, Cluster}; +use serde_json::{json, Map, Value}; +use std::collections::BTreeMap; + +const SEP: &str = "\t"; + +pub fn snapshot(sections: &[ReportSection], clusters: &[Cluster]) -> Value { + let mut findings: BTreeMap = BTreeMap::new(); + let mut details: BTreeMap> = BTreeMap::new(); + for section in sections { + for finding in §ion.findings { + let fp = fingerprint(§ion.title, finding); + *findings.entry(fp.clone()).or_insert(0) += 1; + details + .entry(fp) + .or_default() + .push(json_safe_finding(§ion.title, finding)); + } + } + + let mut site: BTreeMap = BTreeMap::new(); + let mut site_details: BTreeMap> = BTreeMap::new(); + for section in sections { + for finding in §ion.findings { + let detail = json_safe_finding(§ion.title, finding); + for sfp in site_fingerprints(§ion.title, finding) { + *site.entry(sfp.clone()).or_insert(0) += 1; + site_details.entry(sfp).or_default().push(detail.clone()); + } + } + } + + let mut cluster_values = Map::new(); + for cluster in clusters { + cluster_values.insert( + format!("{}{}{}", cluster.kind, SEP, cluster.token), + json!({ + "n": cluster.n_detectors, + "s": cluster.support, + "fat": cluster.fat_union, + }), + ); + } + let total = findings.values().sum::(); + json!({ + "findings": findings, + "site_findings": site, + "details": details, + "site_details": site_details, + "clusters": cluster_values, + "total": total, + }) +} + +pub fn fingerprint(detector: &str, finding: &Value) -> String { + let mut entities = root_cause::entities(finding) + .into_iter() + .map(|entity| format!("{}:{}", entity.kind, entity.token)) + .collect::>(); + entities.sort(); + let mut units = root_cause::finding_units(finding) + .into_iter() + .map(|(file, method)| format!("{file}#{method}")) + .collect::>(); + units.sort(); + units.dedup(); + [detector.to_string(), entities.join(","), units.join(",")].join(SEP) +} + +pub fn site_fingerprints(detector: &str, finding: &Value) -> Vec { + let mut entities = root_cause::entities(finding) + .into_iter() + .map(|entity| format!("{}:{}", entity.kind, entity.token)) + .collect::>(); + entities.sort(); + let entity_text = entities.join(","); + let mut units = root_cause::finding_units(finding) + .into_iter() + .map(|(file, method)| format!("{file}#{method}")) + .collect::>(); + units.sort(); + units.dedup(); + units + .into_iter() + .map(|unit| [detector.to_string(), entity_text.clone(), unit].join(SEP)) + .collect() +} + +pub fn json_safe_finding(detector: &str, finding: &Value) -> Value { + let mut object = finding.as_object().cloned().unwrap_or_default(); + object.insert("detector".to_string(), Value::String(detector.to_string())); + Value::Object(object) +} diff --git a/gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs b/gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs index b02ccec76..8b9567825 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs @@ -1,9 +1,12 @@ -use crate::decomplex::ast::{self, Child, Node, Span}; -use crate::decomplex::syntax::{self, Document, Language}; +use crate::decomplex::ast::Span; +use crate::decomplex::detectors::local_flow::{self, MethodSummary}; +use crate::decomplex::syntax::{self, CallSite, Document, Language}; use anyhow::Result; +use regex::Regex; use serde::Serialize; use std::collections::{BTreeMap, BTreeSet}; use std::path::PathBuf; +use std::sync::OnceLock; const GUARD_MIDS: &[&str] = &[ "is_a?", @@ -45,256 +48,218 @@ pub fn scan_files(files: &[PathBuf], language: Language) -> Result Vec { let mut guard = Vec::new(); let mut dispatch = Vec::new(); + let methods = local_flow::scan_documents(documents); + let assignment_maps = build_assignment_maps(&methods); + let methods_by_file = methods_by_file(&methods); for document in documents { - let mut detector = DecisionPressure::new(document.file.clone(), document.lines.clone()); - detector.walk(&document.normalized_root, &Vec::new(), &BTreeMap::new()); - guard.extend(detector.guard_hits); - dispatch.extend(detector.dispatch_hits); + for call in &document.call_sites { + if call.receiver.is_empty() { + continue; + } + let empty = BTreeMap::new(); + let assignment_map = assignment_maps + .get(&(call.file.clone(), call.function.clone())) + .unwrap_or(&empty); + if eliminable_guard(call) { + if let Some(contract) = contract_of(&call.receiver, assignment_map, 0) { + guard.push(hit(contract, call)); + } + } else if essential_dispatch(call) { + if let Some(contract) = contract_of(&call.receiver, assignment_map, 0) { + dispatch.push(hit(contract, call)); + } + } + } + + if let Some(methods) = methods_by_file.get(&document.file) { + guard.extend(rescue_nil_hits(document, methods, &assignment_maps)); + } } + let mut seen = BTreeSet::new(); + guard.retain(|hit| { + seen.insert(( + hit.contract.clone(), + hit.file.clone(), + hit.defn.clone(), + hit.line, + )) + }); + Report::new(guard, dispatch).ranked() } -struct DecisionPressure { - file: String, - lines: Vec, - guard_hits: Vec, - dispatch_hits: Vec, +fn eliminable_guard(call: &CallSite) -> bool { + GUARD_MIDS.contains(&call.message.as_str()) || call.safe_navigation } -impl DecisionPressure { - fn new(file: String, lines: Vec) -> Self { - Self { - file, - lines, - guard_hits: Vec::new(), - dispatch_hits: Vec::new(), - } +fn essential_dispatch(call: &CallSite) -> bool { + call.message.ends_with('?') +} + +fn hit(contract: String, call: &CallSite) -> Hit { + Hit { + contract, + file: call.file.clone(), + defn: call.function.clone(), + line: call.line, + span: call.span, } +} + +fn build_assignment_maps( + methods: &[MethodSummary], +) -> BTreeMap<(String, String), BTreeMap> { + methods + .iter() + .map(|method| { + ( + (method.file.clone(), method.name.clone()), + local_contract_assignments(method), + ) + }) + .collect() +} - fn walk(&mut self, node: &Node, defstack: &[String], asgmap: &BTreeMap) { - let mut next_defstack = defstack.to_vec(); - let mut next_asgmap = asgmap.clone(); +fn methods_by_file<'a>(methods: &'a [MethodSummary]) -> BTreeMap> { + let mut out: BTreeMap> = BTreeMap::new(); + for method in methods { + out.entry(method.file.clone()).or_default().push(method); + } + out +} - if matches!(node.r#type.as_str(), "DEFN" | "DEFS") { - let name_index = if node.r#type == "DEFS" { 1 } else { 0 }; - if let Some(Child::Symbol(name)) = node.children.get(name_index) { - next_defstack.push(name.clone()); - } - next_asgmap = self.build_asgmap(node); +fn local_contract_assignments(method: &MethodSummary) -> BTreeMap { + let mut map = BTreeMap::new(); + for statement in &method.statements { + if statement.writes.len() != 1 { + continue; } - - self.record_decision(node, &next_defstack, &next_asgmap); - self.record_rescue_nil(node, &next_defstack, &next_asgmap); - for child in node.children.iter().filter_map(ast::node) { - self.walk(child, &next_defstack, &next_asgmap); + let name = statement.writes.iter().next().unwrap(); + if map.contains_key(name) { + continue; + } + if let Some(source) = local_contract_source(name, &statement.source) { + map.insert(name.clone(), source); } } + map.into_iter() + .filter_map(|(name, source)| contract_of(&source, &BTreeMap::new(), 0).map(|c| (name, c))) + .collect() +} - fn build_asgmap(&self, defn_node: &Node) -> BTreeMap { - let mut map = BTreeMap::new(); - let mut stack = ast::body_stmts(defn_node); - stack.reverse(); - - while let Some(node) = stack.pop() { - if node.r#type == "LASGN" { - if let Some(Child::String(name)) = node.children.get(0) { - if let Some(src) = node.children.get(1).and_then(ast::node) { - if !map.contains_key(name) && self.simple_source(src) { - map.insert(name.clone(), src.clone()); - } - } - } - } - for child in node.children.iter().filter_map(ast::node).rev() { - stack.push(child); - } - } - map +fn local_contract_source(name: &str, source: &str) -> Option { + let pattern = format!( + r"(?s)\b{}\b\s*(?::=|=)\s*(.+?)\s*;?\s*$", + regex::escape(name) + ); + let assignment = Regex::new(&pattern).ok()?; + let rhs = assignment.captures(source)?.get(1)?.as_str().trim(); + static CONDITIONAL_SOURCE: OnceLock = OnceLock::new(); + let conditional = + CONDITIONAL_SOURCE.get_or_init(|| Regex::new(r"\s(?:if|unless|rescue)\s|\?|:").unwrap()); + if conditional.is_match(rhs) { + None + } else { + Some(rhs.to_string()) } +} - fn simple_source(&self, n: &Node) -> bool { - match n.r#type.as_str() { - "IVAR" => true, - "CALL" | "QCALL" => { - let recv = n.children.get(0).and_then(ast::node); - let mid = n.children.get(1).and_then(|c| match c { - Child::Symbol(s) => Some(s), - _ => None, - }); - let args = n.children.get(2); - recv.is_some() - && (args.is_none() - || matches!(args, Some(Child::Nil)) - || mid.map(|s| s.as_str()) == Some("[]")) +fn rescue_nil_hits( + document: &Document, + methods: &[&MethodSummary], + assignment_maps: &BTreeMap<(String, String), BTreeMap>, +) -> Vec { + let mut out = Vec::new(); + for method in methods { + let empty = BTreeMap::new(); + let assignment_map = assignment_maps + .get(&(method.file.clone(), method.name.clone())) + .unwrap_or(&empty); + for statement in &method.statements { + if !statement.source.contains("rescue nil") { + continue; } - _ => false, + let Some(call) = document.call_sites.iter().find(|candidate| { + candidate.function == method.name && inside_span(candidate.span, statement.span) + }) else { + continue; + }; + let Some(contract) = contract_of(&call_expression(call), assignment_map, 0) else { + continue; + }; + out.push(Hit { + contract, + file: method.file.clone(), + defn: method.name.clone(), + line: statement.line, + span: statement.span, + }); } } + out +} - fn hit(&self, contract: String, defstack: &[String], node: &Node) -> Hit { - Hit { - contract, - file: self.file.clone(), - defn: defstack - .last() - .cloned() - .unwrap_or_else(|| "(top-level)".to_string()), - line: node.first_lineno, - span: [ - node.first_lineno, - node.first_column, - node.last_lineno, - node.last_column, - ], - } +fn contract_of( + receiver: &str, + assignment_map: &BTreeMap, + depth: usize, +) -> Option { + let source = receiver.trim(); + if source.is_empty() || depth >= 8 { + return None; } - fn record_decision( - &mut self, - node: &Node, - defstack: &[String], - asgmap: &BTreeMap, - ) { - if !matches!(node.r#type.as_str(), "CALL" | "QCALL") { - return; - } + if let Some(mapped) = assignment_map.get(source) { + return Some(mapped.clone()); + } + if source.starts_with('@') { + return Some(source.to_string()); + } - let recv = node.children.get(0).and_then(ast::node); - let mid = node.children.get(1).and_then(|c| match c { - Child::Symbol(s) => Some(s), - _ => None, - }); - let _args = node.children.get(2); + static INDEX_SOURCE: OnceLock = OnceLock::new(); + let index_source = + INDEX_SOURCE.get_or_init(|| Regex::new(r"^(?:[A-Za-z_]\w*|self)\s*\[(.+)\]$").unwrap()); + if let Some(captures) = index_source.captures(source) { + return Some(format!("[{}]", captures[1].trim())); + } - let Some(recv) = recv else { return }; - let Some(mid) = mid else { return }; + static LOCAL_SOURCE: OnceLock = OnceLock::new(); + let local_source = LOCAL_SOURCE.get_or_init(|| Regex::new(r"^[A-Za-z_]\w*$").unwrap()); + if local_source.is_match(source) { + return Some("~local".to_string()); + } - let guard = - (node.r#type == "CALL" && GUARD_MIDS.contains(&mid.as_str())) || node.r#type == "QCALL"; - if guard { - if let Some(c) = self.contract_of(recv, asgmap, 0) { - self.guard_hits.push(self.hit(c, defstack, node)); + if source.contains('.') { + let mut member = source.split('.').last().unwrap_or("").to_string(); + if let Some(index) = member.find('(') { + if member.ends_with(')') { + member.truncate(index); } - return; } - - if node.r#type == "CALL" && mid.ends_with('?') { - if let Some(c) = self.contract_of(recv, asgmap, 0) { - self.dispatch_hits.push(self.hit(c, defstack, node)); - } + if TRANSIENT_NOARG_MIDS.contains(&member.as_str()) || member.is_empty() { + return None; } + return Some(format!(".{member}")); } - fn record_rescue_nil( - &mut self, - node: &Node, - defstack: &[String], - asgmap: &BTreeMap, - ) { - if node.r#type != "RESCUE" { - return; - } - - let body = node.children.get(0).and_then(ast::node); - let resb = node.children.get(1).and_then(ast::node); - - let Some(resb) = resb else { return }; - if resb.r#type != "RESBODY" { - return; - }; - if !matches!(resb.children.get(0), None | Some(Child::Nil)) { - return; - }; - - let handler = resb.children.get(1); - let nil_handler = matches!(handler, None | Some(Child::Nil)) - || handler - .and_then(ast::node) - .map(|n| n.r#type == "NIL") - .unwrap_or(false); - if !nil_handler { - return; - }; - - let Some(body) = body else { return }; - if !matches!(body.r#type.as_str(), "CALL" | "QCALL") { - return; - }; - - if let Some(c) = self.contract_of(body, asgmap, 0) { - self.guard_hits.push(self.hit(c, defstack, node)); - } - } + None +} - fn contract_of( - &self, - n: &Node, - asgmap: &BTreeMap, - depth: usize, - ) -> Option { - if depth >= 8 { - return None; - } +fn call_expression(call: &CallSite) -> String { + [call.receiver.as_str(), call.message.as_str()] + .into_iter() + .filter(|part| !part.is_empty()) + .collect::>() + .join(".") +} - match n.r#type.as_str() { - "LVAR" | "DVAR" => { - if let Some(Child::String(nm)) = n.children.first() { - if let Some(src) = asgmap.get(nm) { - return self.contract_of(src, asgmap, depth + 1); - } else { - return Some("~local".to_string()); - } - } - None - } - "IVAR" => { - if let Some(Child::String(attr)) = n.children.first() { - return Some(attr.clone()); - } - None - } - "CALL" | "QCALL" => { - let recv = n.children.get(0).and_then(ast::node); - let mid = n.children.get(1).and_then(|c| match c { - Child::Symbol(s) => Some(s), - _ => None, - })?; - let args = n.children.get(2); - - if mid == "[]" { - let key = if let Some(Child::Node(node)) = args { - node.children - .iter() - .filter(|c| !matches!(c, Child::Nil)) - .next() - } else { - None - }; - let kt = match key { - Some(Child::Node(k)) => ast::slice(k, &self.lines), - _ => "nil".to_string(), // Simplified key.inspect - }; - Some(format!("[{}]", kt)) - } else if (args.is_none() || matches!(args, Some(Child::Nil))) - && recv.is_some() - && !TRANSIENT_NOARG_MIDS.contains(&mid.as_str()) - { - Some(format!(".{}", mid)) - } else { - None - } - } - "VCALL" => { - if let Some(Child::Symbol(name)) = n.children.first() { - return Some(format!(".{}", name)); - } - None - } - _ => None, - } - } +fn inside_span(inner: Span, outer: Span) -> bool { + let starts_after_or_at = + (inner[0] > outer[0]) || (inner[0] == outer[0] && inner[1] >= outer[1]); + let ends_before_or_at = (inner[2] < outer[2]) || (inner[2] == outer[2] && inner[3] <= outer[3]); + starts_after_or_at && ends_before_or_at } struct Report { diff --git a/gems/decomplex/rust/src/decomplex/detectors/derived_state.rs b/gems/decomplex/rust/src/decomplex/detectors/derived_state.rs index 662311b4b..274e6d30b 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/derived_state.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/derived_state.rs @@ -1,4 +1,5 @@ -use crate::decomplex::ast::{self, Child, Node, Span}; +use crate::decomplex::ast::Span; +use crate::decomplex::detectors::local_flow::{self, MethodSummary, Statement}; use crate::decomplex::syntax::{self, Document, Language}; use anyhow::Result; use serde::Serialize; @@ -24,6 +25,7 @@ struct Asgn { deps: Vec, line: usize, span: Span, + statement_index: usize, } pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { @@ -32,116 +34,57 @@ pub fn scan_files(files: &[PathBuf], language: Language) -> Result Vec { - let mut out = Vec::new(); - for document in documents { - let detector = DerivedState::new(document.file.clone(), document.lines.clone()); - detector.each_method(&document.normalized_root, &mut |file, defn, stmts| { - out.extend(analyze(file, defn, stmts)); - }); - } + let mut out = local_flow::scan_documents(documents) + .iter() + .flat_map(|method| analyze_method(method)) + .collect::>(); out.sort_by(|a, b| b.gap.cmp(&a.gap)); out } -struct DerivedState { - file: String, - #[allow(dead_code)] - lines: Vec, +fn analyze_method(method: &MethodSummary) -> Vec { + analyze(&method.file, &method.name, &assignments(method)) } -impl DerivedState { - fn new(file: String, lines: Vec) -> Self { - Self { file, lines } - } - - fn each_method(&self, node: &Node, blk: &mut dyn FnMut(&str, &str, &[&Node])) { - if matches!(node.r#type.as_str(), "DEFN" | "DEFS") { - let name_index = if node.r#type == "DEFS" { 1 } else { 0 }; - if let Some(Child::Symbol(name)) = node.children.get(name_index) { - blk(&self.file, name, &ast::body_stmts(node)); - } - } - for child in node.children.iter().filter_map(ast::node) { - self.each_method(child, blk); - } - } -} - -const BRANCH_RHS: &[&str] = &[ - "IF", "CASE", "CASE2", "CASE3", "AND", "OR", "WHILE", "UNTIL", "RESCUE", "ENSURE", -]; - -fn lasgns<'a>(stmts: &'a [&'a Node]) -> Vec<&'a Node> { - let mut acc = Vec::new(); - for s in stmts { - walk_lasgns(s, &mut acc); - } - acc -} - -fn walk_lasgns<'a>(n: &'a Node, acc: &mut Vec<&'a Node>) { - if n.r#type == "LASGN" { - acc.push(n); - if let Some(val) = n.children.get(1).and_then(ast::node) { - if BRANCH_RHS.contains(&val.r#type.as_str()) { - // branch-local RHS: do not flatten its inner assignments - } else { - for child in n.children.iter().filter_map(ast::node) { - walk_lasgns(child, acc); - } - } - } - } else { - for child in n.children.iter().filter_map(ast::node) { - walk_lasgns(child, acc); - } - } -} - -fn lvars(node: &Node, acc: &mut Vec) { - if matches!( - node.r#type.as_str(), - "BRACKETED_ARGUMENT_LIST" | "bracketed_argument_list" - ) { - return; - } - if node.r#type == "LVAR" { - if let Some(Child::String(name)) = node.children.first() { - acc.push(name.clone()); - } - } - for child in node.children.iter().filter_map(ast::node) { - lvars(child, acc); - } +fn assignments(method: &MethodSummary) -> Vec { + method + .statements + .iter() + .flat_map(|statement| { + statement + .writes + .iter() + .map(|name| Asgn { + name: name.clone(), + deps: dependencies_for(statement, name), + line: statement.line, + span: statement.span, + statement_index: statement.index, + }) + .collect::>() + }) + .collect() } -fn analyze(file: &str, defn: &str, stmts: &[&Node]) -> Vec { - let asgns: Vec<_> = lasgns(stmts) +fn dependencies_for(statement: &Statement, name: &str) -> Vec { + let mut deps: Vec<_> = statement + .dependencies .iter() - .filter_map(|n| { - let name = match n.children.first() { - Some(Child::String(name)) => name.clone(), - _ => return None, - }; - let mut deps = Vec::new(); - if let Some(val) = n.children.get(1).and_then(ast::node) { - lvars(val, &mut deps); + .filter_map(|(left, right)| { + if left == name { + Some(right.clone()) + } else { + None } - let mut deps: Vec<_> = deps - .into_iter() - .collect::>() - .into_iter() - .collect(); - deps.sort(); - Some(Asgn { - name, - deps, - line: n.first_lineno, - span: [n.first_lineno, n.first_column, n.last_lineno, n.last_column], - }) }) + .collect::>() + .into_iter() .collect(); + deps.sort(); + deps +} +fn analyze(file: &str, defn: &str, asgns: &[Asgn]) -> Vec { let mut out = Vec::new(); for (i, b) in asgns.iter().enumerate() { if b.deps.is_empty() { @@ -154,14 +97,17 @@ fn analyze(file: &str, defn: &str, stmts: &[&Node]) -> Vec { } // a reassigned strictly after b's definition? - let reasn = asgns.iter().skip(i + 1).find(|x| &x.name == a); + let reasn = asgns + .iter() + .skip(i + 1) + .find(|x| &x.name == a && x.statement_index > b.statement_index); let Some(reasn) = reasn else { continue }; // b recomputed at or after a's reassignment? let recomputed = asgns .iter() .skip(i + 1) - .any(|x| &x.name == &b.name && x.line >= reasn.line); + .any(|x| &x.name == &b.name && x.statement_index >= reasn.statement_index); if recomputed { continue; } diff --git a/gems/decomplex/rust/src/decomplex/detectors/implicit_control_flow.rs b/gems/decomplex/rust/src/decomplex/detectors/implicit_control_flow.rs index 7be958b60..bc8b21d47 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/implicit_control_flow.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/implicit_control_flow.rs @@ -175,10 +175,7 @@ pub fn scan_documents(documents: &[Document]) -> ImplicitControlFlowReport { let effect_index = EffectIndex::build_documents(documents); let mut sequences = Vec::new(); for document in documents { - let mut miner = - ImplicitControlFlow::new(document.file.clone(), document.lines.clone(), &effect_index); - miner.walk(&document.normalized_root, &Vec::new()); - sequences.extend(miner.sequences); + sequences.extend(sequences_for_document(document, &effect_index)); } let report = Report::new(sequences); @@ -188,6 +185,68 @@ pub fn scan_documents(documents: &[Document]) -> ImplicitControlFlowReport { } } +fn sequences_for_document(document: &Document, effect_index: &EffectIndex) -> Vec { + document + .function_defs + .iter() + .filter_map(|function_def| { + let defn = protocol_method_name(&function_def.name); + let calls = document + .call_sites + .iter() + .filter(|call| { + call.owner == function_def.owner + && call.function == function_def.name + && call.receiver == "self" + }) + .map(|call| { + let mid = protocol_method_name(&call.message); + let effect = effect_index.effect_for(&function_def.owner, &mid); + Call { + mid, + file: call.file.clone(), + line: call.line, + span: call.span, + reads: effect.map(|e| e.reads.clone()).unwrap_or_default(), + writes: effect.map(|e| e.writes.clone()).unwrap_or_default(), + } + }) + .collect::>(); + + if calls + .iter() + .filter(|call| !call.reads.is_empty() || !call.writes.is_empty()) + .count() + < 2 + { + return None; + } + + Some(MethodSequence { + file: function_def.file.clone(), + owner: function_def.owner.clone(), + defn, + line: function_def.line, + calls, + }) + }) + .collect() +} + +fn protocol_method_name(name: &str) -> String { + name.split(['.', ':']) + .filter(|part| !part.is_empty()) + .last() + .unwrap_or(name) + .to_string() +} + +fn normalize_protocol_state(name: &str) -> String { + name.trim_start_matches('@') + .trim_end_matches('=') + .to_string() +} + struct ImplicitControlFlow<'a> { file: String, lines: Vec, @@ -502,10 +561,36 @@ impl EffectIndex { fn build_documents(documents: &[Document]) -> Self { let mut effects = Vec::new(); for document in documents { - effects.extend( - EffectCollector::new(document.file.clone(), document.lines.clone()) - .scan(&document.normalized_root), - ); + for function_def in &document.function_defs { + let mut reads = document + .state_reads + .iter() + .filter(|read| { + read.owner == function_def.owner && read.function == function_def.name + }) + .map(|read| normalize_protocol_state(&read.field)) + .collect::>(); + reads.sort(); + reads.dedup(); + + let mut writes = document + .state_writes + .iter() + .filter(|write| { + write.owner == function_def.owner && write.function == function_def.name + }) + .map(|write| normalize_protocol_state(&write.field)) + .collect::>(); + writes.sort(); + writes.dedup(); + + effects.push(MethodEffect { + owner: function_def.owner.clone(), + name: protocol_method_name(&function_def.name), + reads, + writes, + }); + } } Self::from_effects(effects) } diff --git a/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs b/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs index 9ffaa0637..eb175bc5a 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs @@ -1,9 +1,9 @@ use crate::decomplex::ast::{self, Child, Node, Span}; -use crate::decomplex::syntax::{self, Document, Language}; +use crate::decomplex::syntax::{self, Document, FunctionDef, Language}; use anyhow::Result; use serde::Serialize; -use std::collections::BTreeSet; -use std::path::PathBuf; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::{Path, PathBuf}; #[derive(Clone, Debug, Eq, PartialEq, Serialize)] pub struct LocalFlowRow { @@ -51,6 +51,14 @@ const METHOD_TYPES: &[&str] = &["DEFN", "DEFS"]; const SKIP_NESTED_TYPES: &[&str] = &["CLASS", "MODULE", "DEFN", "DEFS", "LAMBDA"]; const LOCAL_READ_TYPES: &[&str] = &["LVAR", "DVAR"]; const LOCAL_WRITE_TYPES: &[&str] = &["LASGN", "DASGN"]; +const STATEMENT_CONTAINER_TYPES: &[&str] = &[ + "BLOCK", + "COMPOUND_STATEMENT", + "DECLARATION_LIST", + "FUNCTION_BODY", + "HASH", + "STATEMENTS", +]; pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { let documents = syntax::parse_files(files, language)?; @@ -64,24 +72,39 @@ pub fn scan_documents(documents: &[Document]) -> Vec { document.file.clone(), document.lines.clone(), document.language, + method_metadata(document), ); out.extend(detector.scan(&document.normalized_root)); } out } +#[derive(Clone, Debug, Eq, PartialEq)] +struct MethodMetadata { + owner: String, + name: String, + params: BTreeSet, +} + struct LocalFlow { file: String, lines: Vec, language: Language, + methods_by_line: BTreeMap, } impl LocalFlow { - fn new(file: String, lines: Vec, language: Language) -> Self { + fn new( + file: String, + lines: Vec, + language: Language, + methods_by_line: BTreeMap, + ) -> Self { Self { file, lines, language, + methods_by_line, } } @@ -95,13 +118,13 @@ impl LocalFlow { if OWNER_TYPES.contains(&node.r#type.as_str()) { let owner = self.full_owner_name(owners, node); for method in self.owner_methods(node) { - out.push(self.method_summary(method, &owner)); + out.push(self.method_summary(method, Some(&owner))); } let mut next_owners = owners.to_vec(); next_owners.push(self.owner_segment(node)); self.collect_nested_owners(node, &next_owners, out); } else if METHOD_TYPES.contains(&node.r#type.as_str()) && owners.is_empty() { - out.push(self.method_summary(node, "(top-level)")); + out.push(self.method_summary(node, None)); } else { for child in node.children.iter().filter_map(ast::node) { self.collect_methods(child, owners, out); @@ -123,16 +146,26 @@ impl LocalFlow { } } - fn method_summary(&self, node: &Node, owner: &str) -> MethodSummary { - let statements: Vec<_> = ast::body_stmts(node) + fn method_summary(&self, node: &Node, owner_hint: Option<&str>) -> MethodSummary { + let metadata = self.methods_by_line.get(&node.first_lineno); + let owner = metadata + .map(|item| item.owner.as_str()) + .or(owner_hint) + .unwrap_or("(top-level)"); + let name = metadata + .map(|item| item.name.clone()) + .unwrap_or_else(|| self.method_name(node)); + let statement_nodes = ast::body_stmts(node); + let local_names = self.local_names(&statement_nodes, metadata); + let statements: Vec<_> = statement_nodes .iter() .enumerate() - .map(|(index, stmt)| self.statement_summary(stmt, index)) + .map(|(index, stmt)| self.statement_summary(stmt, index, &local_names)) .collect(); MethodSummary { - id: format!("{}#{}", owner, self.method_name(node)), + id: format!("{}#{}", owner, name), owner: owner.to_string(), - name: self.method_name(node), + name, file: self.file.clone(), line: node.first_lineno, span: [ @@ -147,7 +180,14 @@ impl LocalFlow { } } - fn statement_summary(&self, node: &Node, index: usize) -> Statement { + fn statement_summary( + &self, + node: &Node, + index: usize, + local_names: &BTreeSet, + ) -> Statement { + let reads = self.local_reads(node, local_names); + let writes = self.local_writes(node); Statement { index, line: node.first_lineno, @@ -159,11 +199,23 @@ impl LocalFlow { node.last_column, ], source: ast::slice(node, &self.lines), - reads: self.local_reads(node), - writes: self.local_writes(node), - dependencies: self.assignment_dependencies(node), - co_uses: self.co_use_edges(node), + dependencies: self.assignment_dependencies(node, local_names), + co_uses: self.co_use_edges(node, local_names), + reads, + writes, + } + } + + fn local_names( + &self, + statements: &[&Node], + metadata: Option<&MethodMetadata>, + ) -> BTreeSet { + let mut names = metadata.map(|item| item.params.clone()).unwrap_or_default(); + for statement in statements { + names.extend(self.local_writes(statement)); } + names } fn structural_boundaries(&self, statements: &[Statement]) -> Vec { @@ -221,7 +273,7 @@ impl LocalFlow { return Vec::new(); }; - let stmts = if body.r#type == "BLOCK" { + let stmts = if statement_container(body) { body.children .iter() .filter_map(ast::node) @@ -325,12 +377,14 @@ impl LocalFlow { } } - fn local_reads(&self, node: &Node) -> BTreeSet { + fn local_reads(&self, node: &Node, local_names: &BTreeSet) -> BTreeSet { let mut reads = Vec::new(); self.walk_local(node, &mut |child| { if LOCAL_READ_TYPES.contains(&child.r#type.as_str()) { if let Some(name) = local_read_name(child) { - reads.push(name); + if local_names.contains(&name) { + reads.push(name); + } } } }); @@ -349,13 +403,17 @@ impl LocalFlow { writes.into_iter().collect() } - fn assignment_dependencies(&self, node: &Node) -> Vec<(String, String)> { + fn assignment_dependencies( + &self, + node: &Node, + local_names: &BTreeSet, + ) -> Vec<(String, String)> { let mut deps = Vec::new(); self.walk_local(node, &mut |child| { if LOCAL_WRITE_TYPES.contains(&child.r#type.as_str()) { if let Some(Child::String(lhs)) = child.children.first() { if let Some(rhs) = child.children.get(1).and_then(ast::node) { - for read in self.local_reads(rhs) { + for read in self.local_reads(rhs, local_names) { if lhs != &read { deps.push((lhs.clone(), read)); } @@ -369,8 +427,8 @@ impl LocalFlow { deps } - fn co_use_edges(&self, node: &Node) -> Vec<(String, String)> { - let reads: Vec<_> = self.local_reads(node).into_iter().collect(); + fn co_use_edges(&self, node: &Node, local_names: &BTreeSet) -> Vec<(String, String)> { + let reads: Vec<_> = self.local_reads(node, local_names).into_iter().collect(); let mut out = Vec::new(); for i in 0..reads.len() { for j in i + 1..reads.len() { @@ -399,6 +457,40 @@ fn local_read_name(node: &Node) -> Option { } } +fn method_metadata(document: &Document) -> BTreeMap { + document + .function_defs + .iter() + .map(|function| (function.line, metadata_for_function(document, function))) + .collect() +} + +fn metadata_for_function(document: &Document, function: &FunctionDef) -> MethodMetadata { + let owner = if function.owner == file_owner(&document.file) { + "(top-level)".to_string() + } else { + function.owner.clone() + }; + MethodMetadata { + owner, + name: function.name.clone(), + params: function.params.iter().cloned().collect(), + } +} + +fn file_owner(file: &str) -> String { + Path::new(file) + .file_stem() + .and_then(|stem| stem.to_str()) + .filter(|stem| !stem.is_empty()) + .unwrap_or("(file)") + .to_string() +} + +fn statement_container(node: &Node) -> bool { + STATEMENT_CONTAINER_TYPES.contains(&node.r#type.as_str()) +} + struct RawBoundary { line: usize, kind: String, diff --git a/gems/decomplex/rust/src/decomplex/detectors/oversized_predicate.rs b/gems/decomplex/rust/src/decomplex/detectors/oversized_predicate.rs index 45f7599c8..032554bb1 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/oversized_predicate.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/oversized_predicate.rs @@ -20,7 +20,7 @@ pub struct ResultReport { } const LIMIT: usize = 3; -const PREDICATE_NODES: &[&str] = &["IF", "WHILE", "UNTIL"]; +const PREDICATE_NODES: &[&str] = &["IF", "UNLESS", "WHILE", "UNTIL"]; pub fn scan_files(files: &[PathBuf], language: Language) -> Result { let documents = syntax::parse_files(files, language)?; @@ -84,8 +84,9 @@ impl OversizedPredicate { let cond = node.children.get(0).and_then(ast::node); let Some(cond) = cond else { return }; - let atoms = self.condition_atoms(cond); - if atoms.len() <= self.limit { + let predicate = ast::slice(cond, &self.lines); + let atoms_text = self.condition_atoms(&predicate); + if atoms_text.len() <= self.limit { return; } @@ -101,37 +102,24 @@ impl OversizedPredicate { ], ); - let atoms_text: Vec = atoms - .into_iter() - .map(|a| ast::slice(a, &self.lines)) - .collect(); - self.findings.push(OversizedPredicateRow { at, count: atoms_text.len(), - predicate: ast::slice(cond, &self.lines), + predicate, atoms: atoms_text, spans, }); } - fn condition_atoms<'a>(&self, node: &'a Node) -> Vec<&'a Node> { - match node.r#type.as_str() { - "AND" | "OR" => node - .children - .iter() - .filter_map(ast::node) - .flat_map(|child| self.condition_atoms(child)) - .collect(), - "NOT" => { - if let Some(child) = node.children.get(0).and_then(ast::node) { - self.condition_atoms(child) - } else { - vec![node] - } - } - _ => vec![node], - } + fn condition_atoms(&self, predicate: &str) -> Vec { + predicate + .split("&&") + .flat_map(|part| part.split("||")) + .flat_map(|part| part.split(" and ")) + .flat_map(|part| part.split(" or ")) + .map(|atom| atom.replace(['(', ')'], "").trim().to_string()) + .filter(|atom| !atom.is_empty()) + .collect() } fn predicate_helper(&self, name: &str) -> bool { diff --git a/gems/decomplex/rust/src/decomplex/detectors/path_condition.rs b/gems/decomplex/rust/src/decomplex/detectors/path_condition.rs index a8fe23d8e..acd40192e 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/path_condition.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/path_condition.rs @@ -109,9 +109,6 @@ impl PathCondition { self.walk(b_node, &next_defstack, &next_guards); } - if let Some(cond_node) = cond { - self.walk(cond_node, &next_defstack, guards); - } return; } "CALL" | "FCALL" | "VCALL" | "ATTRASGN" | "LASGN" | "IASGN" | "OPCALL" => { diff --git a/gems/decomplex/rust/src/decomplex/detectors/sequence_mine.rs b/gems/decomplex/rust/src/decomplex/detectors/sequence_mine.rs index efde4c7b1..306ca805f 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/sequence_mine.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/sequence_mine.rs @@ -1,5 +1,5 @@ -use crate::decomplex::ast::{self, Child, Node, Span}; -use crate::decomplex::syntax::{self, Document, Language}; +use crate::decomplex::ast::Span; +use crate::decomplex::syntax::{self, CallSite, Document, Language}; use anyhow::Result; use serde::Serialize; use std::collections::{BTreeMap, BTreeSet}; @@ -38,9 +38,28 @@ pub fn scan_files(files: &[PathBuf], language: Language) -> Result BrokenProtocolReport { let mut calls = Vec::new(); for document in documents { - let mut sm = SequenceMine::new(document.file.clone(), document.lines.clone()); - sm.walk(&document.normalized_root, &Vec::new()); - calls.extend(sm.calls); + for call in &document.call_sites { + let mid = call.message.to_string(); + for nested_mid in nested_protocol_events(call, document) { + calls.push(Call { + mid: nested_mid, + file: call.file.clone(), + defn: call.function.clone(), + line: call.line, + span: call.span, + }); + } + + if protocol_event(call, &mid) { + calls.push(Call { + mid, + file: call.file.clone(), + defn: call.function.clone(), + line: call.line, + span: call.span, + }); + } + } } Report::new(calls).findings() } @@ -180,82 +199,89 @@ const ZERO_ARG_ACTION_PREFIXES: &[&str] = &[ "write", ]; -struct SequenceMine { - file: String, - #[allow(dead_code)] - lines: Vec, - calls: Vec, +fn protocol_event(call: &CallSite, mid: &str) -> bool { + !ignored_mid(mid) && !passive_reader_call(call, mid) } -impl SequenceMine { - fn new(file: String, lines: Vec) -> Self { - Self { - file, - lines, - calls: Vec::new(), - } +fn passive_reader_call(call: &CallSite, mid: &str) -> bool { + if zero_arg_action_name(mid) { + return false; } - fn walk(&mut self, node: &Node, defstack: &[String]) { - let mut next_defstack = defstack.to_vec(); - if matches!(node.r#type.as_str(), "DEFN" | "DEFS") { - let name_index = if node.r#type == "DEFS" { 1 } else { 0 }; - if let Some(Child::Symbol(name)) = node.children.get(name_index) { - next_defstack.push(name.clone()); - } - } + call.arguments.is_empty() +} - if matches!(node.r#type.as_str(), "CALL" | "FCALL" | "VCALL") { - if let Some(mid) = self.call_mid(node) { - if self.protocol_event(node, &mid) { - self.calls.push(Call { - mid, - file: self.file.clone(), - defn: next_defstack - .last() - .cloned() - .unwrap_or_else(|| "(top-level)".to_string()), - line: node.first_lineno, - span: [ - node.first_lineno, - node.first_column, - node.last_lineno, - node.last_column, - ], - }); - } - } - } +fn nested_protocol_events(call: &CallSite, document: &Document) -> Vec { + if !ignored_mid(&call.message) { + return Vec::new(); + } - for child in node.children.iter().filter_map(ast::node) { - self.walk(child, &next_defstack); + let mut candidates = call.arguments.clone(); + candidates.extend( + source_text(&document.lines, call.span) + .split(|ch: char| !(ch == '_' || ch == '!' || ch == '?' || ch.is_ascii_alphanumeric())) + .filter_map(protocol_word), + ); + let mut out = Vec::new(); + for candidate in candidates { + if !out.contains(&candidate) && !ignored_mid(&candidate) && zero_arg_action_name(&candidate) + { + out.push(candidate); } } + out +} - fn protocol_event(&self, node: &Node, mid: &str) -> bool { - !ignored_mid(mid) && !self.passive_reader_call(node, mid) +fn protocol_word(text: &str) -> Option { + let word = text.trim(); + if word.is_empty() { + return None; } + let mut chars = word.chars(); + let first = chars.next()?; + if !(first == '_' || first.is_ascii_lowercase()) { + return None; + } + if !chars.all(|ch| ch == '_' || ch == '!' || ch == '?' || ch.is_ascii_alphanumeric()) { + return None; + } + Some(word.to_string()) +} - fn passive_reader_call(&self, node: &Node, mid: &str) -> bool { - if zero_arg_action_name(mid) { - return false; - } - - match node.r#type.as_str() { - "CALL" => no_args(node.children.get(2)), - "VCALL" => true, - "FCALL" => no_args(node.children.get(1)), - _ => false, - } +fn source_text(lines: &[String], span: Span) -> String { + let [first_line, first_column, last_line, last_column] = span; + if first_line == 0 || last_line == 0 { + return String::new(); + } + if first_line == last_line { + return lines + .get(first_line - 1) + .and_then(|line| line.get(first_column..last_column)) + .unwrap_or("") + .to_string(); } - fn call_mid(&self, node: &Node) -> Option { - match node.r#type.as_str() { - "CALL" => ast::child_to_string(node.children.get(1)), - "FCALL" | "VCALL" => ast::child_to_string(node.children.get(0)), - _ => None, + let mut parts = Vec::new(); + parts.push( + lines + .get(first_line - 1) + .and_then(|line| line.get(first_column..)) + .unwrap_or("") + .to_string(), + ); + for line_index in first_line..last_line.saturating_sub(1) { + if let Some(line) = lines.get(line_index) { + parts.push(line.clone()); } } + parts.push( + lines + .get(last_line - 1) + .and_then(|line| line.get(..last_column)) + .unwrap_or("") + .to_string(), + ); + parts.join("") } struct PairSupport { @@ -387,10 +413,6 @@ fn zero_arg_action_name(mid: &str) -> bool { .any(|prefix| mid == *prefix || mid.starts_with(&format!("{prefix}_"))) } -fn no_args(child: Option<&Child>) -> bool { - child.is_none() || matches!(child, Some(Child::Nil)) -} - fn unique_mids(calls: &[Call]) -> Vec { let set: BTreeSet<_> = calls.iter().map(|call| call.mid.clone()).collect(); set.into_iter().collect() diff --git a/gems/decomplex/rust/src/decomplex/detectors/state_mesh.rs b/gems/decomplex/rust/src/decomplex/detectors/state_mesh.rs index e5123edc3..643d2e9b5 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/state_mesh.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/state_mesh.rs @@ -159,6 +159,14 @@ pub fn scan_documents(documents: &[Document]) -> StateMeshReport { pub fn scan_documents_with_semantic_aliases( documents: &[Document], semantic_aliases: &semantic_alias::SemanticAliasReport, +) -> StateMeshReport { + scan_documents_with_semantic_aliases_and_min_writes(documents, semantic_aliases, 2) +} + +pub fn scan_documents_with_semantic_aliases_and_min_writes( + documents: &[Document], + semantic_aliases: &semantic_alias::SemanticAliasReport, + min_writes: usize, ) -> StateMeshReport { let mut src_map = BTreeMap::new(); for document in documents { @@ -168,7 +176,7 @@ pub fn scan_documents_with_semantic_aliases( ); } - let mut sm = StateMesh::new(src_map); + let mut sm = StateMesh::new(src_map, min_writes); sm.run(semantic_aliases); sm.to_json_graph() } @@ -183,10 +191,10 @@ struct StateMesh { } impl StateMesh { - fn new(src_map: BTreeMap)>) -> Self { + fn new(src_map: BTreeMap)>, min_writes: usize) -> Self { Self { src_map, - min_writes: 2, + min_writes, custom_fields: None, writes: Vec::new(), reads: Vec::new(), @@ -347,10 +355,39 @@ impl StateMesh { || self.is_empty_list(args) { if field_norms.contains(name) { - out.push(Read { + self.push_read( + Read { + attr: name.clone(), + norm: name.clone(), + recv: self.recv_slice(recv, lines), + file: file.to_string(), + defn: next_defstack + .last() + .cloned() + .unwrap_or_else(|| "(top-level)".to_string()), + line: node.first_lineno, + span: [ + node.first_lineno, + node.first_column, + node.last_lineno, + node.last_column, + ], + }, + out, + ); + } + } + } + } + "IVAR" => { + if let Some(Child::String(name)) = node.children.first() { + let norm = self.normalize(name); + if field_norms.contains(&norm) { + self.push_read( + Read { attr: name.clone(), - norm: name.clone(), - recv: self.recv_slice(recv, lines), + norm, + recv: "self".to_string(), file: file.to_string(), defn: next_defstack .last() @@ -363,32 +400,9 @@ impl StateMesh { node.last_lineno, node.last_column, ], - }); - } - } - } - } - "IVAR" => { - if let Some(Child::String(name)) = node.children.first() { - let norm = self.normalize(name); - if field_norms.contains(&norm) { - out.push(Read { - attr: name.clone(), - norm, - recv: "self".to_string(), - file: file.to_string(), - defn: next_defstack - .last() - .cloned() - .unwrap_or_else(|| "(top-level)".to_string()), - line: node.first_lineno, - span: [ - node.first_lineno, - node.first_column, - node.last_lineno, - node.last_column, - ], - }); + }, + out, + ); } } } @@ -400,6 +414,25 @@ impl StateMesh { } } + fn push_read(&self, read: Read, out: &mut Vec) { + if self.write_target_read(&read) { + return; + } + out.push(read); + } + + fn write_target_read(&self, read: &Read) -> bool { + self.writes.iter().any(|write| { + write.file == read.file + && write.defn == read.defn + && write.recv == read.recv + && (write.attr == read.attr || write.norm == read.norm) + && write.line == read.line + && write.span[0] == read.span[0] + && write.span[1] == read.span[1] + }) + } + fn find_re_derivations(&mut self, semantic_aliases: &semantic_alias::SemanticAliasReport) { let field_norms = self.known_field_norms(); if field_norms.is_empty() { diff --git a/gems/decomplex/rust/src/decomplex/detectors/structural_topology.rs b/gems/decomplex/rust/src/decomplex/detectors/structural_topology.rs index 506b54268..950b9e44f 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/structural_topology.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/structural_topology.rs @@ -1,9 +1,10 @@ -use crate::decomplex::ast::{self, Node, Span}; -use crate::decomplex::syntax::{self, Document, Language}; +use crate::decomplex::ast::{RawNode, Span}; +use crate::decomplex::syntax::adapters::language_profile; +use crate::decomplex::syntax::{self, CallSite, Document, FunctionDef, Language}; use anyhow::Result; use serde::Serialize; -use std::collections::BTreeMap; -use std::path::PathBuf; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::{Path, PathBuf}; #[derive(Clone, Debug, Serialize)] pub struct StructuralTopologyReport { @@ -36,13 +37,6 @@ pub struct Edge { pub confidence: String, } -const VISIBILITY_MIDS: &[&str] = &["public", "protected", "private"]; -const OWNER_TYPES: &[&str] = &["CLASS", "MODULE"]; -const METHOD_TYPES: &[&str] = &["DEFN", "DEFS"]; -const SKIP_NESTED_TYPES: &[&str] = &["CLASS", "MODULE", "DEFN", "DEFS", "LAMBDA"]; -const CONDITIONAL_TYPES: &[&str] = &["IF", "UNLESS", "CASE", "CASE2"]; -const ITERATION_TYPES: &[&str] = &["ITER", "FOR", "WHILE", "UNTIL"]; - pub fn scan_files(files: &[PathBuf], language: Language) -> Result { let documents = syntax::parse_files(files, language)?; Ok(scan_documents(&documents)) @@ -50,16 +44,27 @@ pub fn scan_files(files: &[PathBuf], language: Language) -> Result StructuralTopologyReport { let mut methods = Vec::new(); - for document in documents { - let mut mc = MethodCollector::new(document.file.clone(), document.lines.clone()); - methods.extend(mc.scan(&document.normalized_root)); + methods.extend(methods_for_document(document)); } + let method_by_id = methods + .iter() + .map(|method| (method.id.clone(), method.clone())) + .collect::>(); + let mut edges = Vec::new(); + let mut seen = BTreeSet::new(); for document in documents { - let mut ec = EdgeCollector::new(document.file.clone(), document.lines.clone(), &methods); - edges.extend(ec.scan(&document.normalized_root)); + for edge in edges_for_document(document, &method_by_id) { + if seen.insert(( + edge.caller.clone(), + edge.callee.clone(), + edge.r#type.clone(), + )) { + edges.push(edge); + } + } } StructuralTopologyReport { methods, edges } @@ -128,436 +133,190 @@ impl Graph { } } -struct MethodCollector { - file: String, - lines: Vec, +fn methods_for_document(document: &Document) -> Vec { + document + .function_defs + .iter() + .map(|function| method_for_function(document, function)) + .collect() } -impl MethodCollector { - fn new(file: String, lines: Vec) -> Self { - Self { file, lines } - } - - fn scan(&mut self, root: &Node) -> Vec { - let mut out = Vec::new(); - out.extend( - self.methods_from_statements(&self.top_level_statements(root), &self.top_level_owner()), - ); - self.walk(root, &Vec::new(), &mut out); - out - } - - fn walk(&self, node: &Node, owners: &[String], out: &mut Vec) { - if OWNER_TYPES.contains(&node.r#type.as_str()) { - let owner = self.full_owner_name(owners, node); - out.extend(self.owner_methods(node, &owner)); - let mut next_owners = owners.to_vec(); - next_owners.push(self.owner_segment(node)); - for child in node.children.iter().filter_map(ast::node) { - self.walk(child, &next_owners, out); - } - } else { - for child in node.children.iter().filter_map(ast::node) { - self.walk(child, owners, out); - } - } - } - - fn owner_methods(&self, owner_node: &Node, owner: &str) -> Vec { - let Some(body) = self.owner_body(owner_node) else { - return Vec::new(); - }; - self.methods_from_statements(&self.owner_statements(body), owner) - } - - fn methods_from_statements(&self, stmts: &[&Node], owner: &str) -> Vec { - let mut methods = Vec::new(); - let mut visibility = "public".to_string(); - for stmt in stmts { - if self.bare_visibility_marker(stmt) { - visibility = ast::child_to_string(stmt.children.get(0)).unwrap_or_default(); - } else if self.visibility_call(stmt) { - visibility = self.handle_visibility_call(stmt, owner, &visibility, &mut methods); - } else if METHOD_TYPES.contains(&stmt.r#type.as_str()) { - methods.push(self.method_record(stmt, owner, &visibility)); - } - } - methods - } - - fn handle_visibility_call( - &self, - stmt: &Node, - owner: &str, - current_visibility: &str, - methods: &mut Vec, - ) -> String { - let vis = ast::child_to_string(stmt.children.get(0)).unwrap_or_default(); - if let Some(args) = stmt.children.get(1).and_then(ast::node) { - for arg in args.children.iter().filter_map(ast::node) { - if METHOD_TYPES.contains(&arg.r#type.as_str()) { - methods.push(self.method_record(arg, owner, &vis)); - } else if let Some(name) = self.literal_method_name(arg) { - if let Some(m) = methods.iter_mut().rev().find(|m| m.name == name) { - m.visibility = vis.clone(); - } - } - } - } - current_visibility.to_string() - } - - fn owner_body<'a>(&self, owner_node: &'a Node) -> Option<&'a Node> { - let scope_index = if owner_node.r#type == "CLASS" { 2 } else { 1 }; - let scope = owner_node.children.get(scope_index).and_then(ast::node)?; - if scope.r#type != "SCOPE" { - return None; - } - scope.children.get(2).and_then(ast::node) - } - - fn owner_statements<'a>(&self, body: &'a Node) -> Vec<&'a Node> { - if body.r#type == "BLOCK" { - body.children.iter().filter_map(ast::node).collect() - } else { - vec![body] - } +fn method_for_function(document: &Document, function: &FunctionDef) -> Method { + let owner = top_level_owner_for(document, &function.owner, function.span); + Method { + id: format!("{}#{}", owner, function.name), + owner, + name: function.name.clone(), + file: function.file.clone(), + line: function.line, + span: function.span, + visibility: function + .visibility + .clone() + .unwrap_or_else(|| "public".to_string()), } +} - fn top_level_statements<'a>(&self, root: &'a Node) -> Vec<&'a Node> { - root.children - .iter() - .filter_map(ast::node) - .flat_map(|c| { - if c.r#type == "BLOCK" { - c.children.iter().filter_map(ast::node).collect() - } else { - vec![c] - } - }) - .collect() - } +fn edges_for_document(document: &Document, method_by_id: &BTreeMap) -> Vec { + document + .call_sites + .iter() + .filter_map(|call| edge_for_call(document, method_by_id, call)) + .collect() +} - fn bare_visibility_marker(&self, node: &Node) -> bool { - node.r#type == "VCALL" - && VISIBILITY_MIDS.contains( - &ast::child_to_string(node.children.get(0)) - .unwrap_or_default() - .as_str(), - ) - } +fn edge_for_call( + document: &Document, + method_by_id: &BTreeMap, + call: &CallSite, +) -> Option { + if call.receiver != "self" { + return None; + } + + let owner = top_level_owner_for(document, &call.owner, call.span); + let caller = method_by_id.get(&format!("{}#{}", owner, call.function))?; + let callee_name = scoped_name(caller, &call.message); + let callee = method_by_id.get(&format!("{}#{}", owner, callee_name))?; + if caller.id == callee.id { + return None; + } + + Some(Edge { + caller: caller.id.clone(), + callee: callee.id.clone(), + caller_name: caller.name.clone(), + callee_name: callee.name.clone(), + file: call.file.clone(), + line: call.line, + span: call.span, + r#type: edge_type(call.control.as_deref()), + kind: call_kind(document, call.span), + confidence: "high".to_string(), + }) +} - fn visibility_call(&self, node: &Node) -> bool { - node.r#type == "FCALL" - && VISIBILITY_MIDS.contains( - &ast::child_to_string(node.children.get(0)) - .unwrap_or_default() - .as_str(), - ) +fn scoped_name(caller: &Method, message: &str) -> String { + if caller.name.starts_with("self.") { + format!("self.{message}") + } else { + message.to_string() } +} - fn literal_method_name(&self, node: &Node) -> Option { - match node.r#type.as_str() { - "LIT" | "STR" | "DSTR" => ast::child_to_string(node.children.get(0)), - _ => None, - } +fn edge_type(control: Option<&str>) -> String { + match control { + Some("conditional" | "iterates") => control.unwrap().to_string(), + _ => "always".to_string(), } +} - fn method_record(&self, node: &Node, owner: &str, visibility: &str) -> Method { - let name = self.method_name(node); - Method { - id: format!("{}#{}", owner, name), - owner: owner.to_string(), - name: name.clone(), - file: self.file.clone(), - line: node.first_lineno, - span: [ - node.first_lineno, - node.first_column, - node.last_lineno, - node.last_column, - ], - visibility: if node.r#type == "DEFS" { - "public".to_string() - } else { - visibility.to_string() - }, - } +fn call_kind(document: &Document, span: Span) -> String { + if source_text(&document.lines, span) + .trim_start() + .starts_with("self.") + { + "direct_self".to_string() + } else { + "bare_internal".to_string() } +} - fn method_name(&self, node: &Node) -> String { - if node.r#type == "DEFS" { - let receiver = node.children.get(0).and_then(ast::node); - let prefix = if let Some(r) = receiver { - if r.r#type == "SELF" { - "self".to_string() - } else { - ast::slice(r, &self.lines) - } - } else { - "?".to_string() - }; - format!( - "{}.{}", - prefix, - ast::child_to_string(node.children.get(1)).unwrap_or_else(|| "?".to_string()) - ) - } else { - ast::child_to_string(node.children.get(0)).unwrap_or_else(|| "?".to_string()) - } +fn source_text(lines: &[String], span: Span) -> String { + let [first_line, first_column, last_line, last_column] = span; + if first_line == 0 || last_line == 0 || first_line > lines.len() || last_line > lines.len() { + return String::new(); } - - fn full_owner_name(&self, owners: &[String], node: &Node) -> String { - let mut next = owners.to_vec(); - next.push(self.owner_segment(node)); - next.join("::") + if first_line == last_line { + return lines[first_line - 1] + .chars() + .skip(first_column) + .take(last_column.saturating_sub(first_column)) + .collect(); } - fn owner_segment(&self, node: &Node) -> String { - let text = ast::slice( - node.children.first().and_then(ast::node).unwrap_or(node), - &self.lines, - ); - if text.is_empty() { - "(anonymous)".to_string() - } else { - text - } + let mut parts = Vec::new(); + parts.push(lines[first_line - 1].chars().skip(first_column).collect()); + for line in lines + .iter() + .take(last_line.saturating_sub(1)) + .skip(first_line) + { + parts.push(line.clone()); } + parts.push(lines[last_line - 1].chars().take(last_column).collect()); + parts.join("") +} - fn top_level_owner(&self) -> String { - format!("(top-level:{})", self.file) +fn top_level_owner_for(document: &Document, owner: &str, span: Span) -> String { + if owner != file_owner(&document.file) || enclosed_by_matching_owner(document, owner, span) { + owner.to_string() + } else { + format!("(top-level:{})", document.file) } } -struct EdgeCollector { - file: String, - lines: Vec, - method_by_id: BTreeMap, +fn file_owner(file: &str) -> String { + Path::new(file) + .file_stem() + .and_then(|stem| stem.to_str()) + .filter(|stem| !stem.is_empty()) + .unwrap_or("(file)") + .to_string() } -impl EdgeCollector { - fn new(file: String, lines: Vec, methods: &[Method]) -> Self { - let mut map = BTreeMap::new(); - for m in methods { - map.insert(m.id.clone(), m.clone()); - } - Self { - file, - lines, - method_by_id: map, - } - } - - fn scan(&mut self, root: &Node) -> Vec { - let mut out = Vec::new(); - let top_level_methods: Vec<_> = self - .top_level_statements(root) - .into_iter() - .filter(|s| METHOD_TYPES.contains(&s.r#type.as_str())) - .collect(); - for m_node in top_level_methods { - let id = format!("(top-level:{})#{}", self.file, self.method_name(m_node)); - if let Some(m) = self.method_by_id.get(&id) { - self.collect_calls(m_node, m, &Vec::new(), &mut out); - } - } - self.walk(root, &Vec::new(), &mut out); - out - } - - fn walk(&self, node: &Node, owners: &[String], out: &mut Vec) { - if OWNER_TYPES.contains(&node.r#type.as_str()) { - let owner = self.full_owner_name(owners, node); - for m_node in self.owner_methods(node) { - let id = format!("{}#{}", owner, self.method_name(m_node)); - if let Some(m) = self.method_by_id.get(&id) { - self.collect_calls(m_node, m, &Vec::new(), out); - } - } - let mut next_owners = owners.to_vec(); - next_owners.push(self.owner_segment(node)); - for child in node.children.iter().filter_map(ast::node) { - self.walk(child, &next_owners, out); - } - } else { - for child in node.children.iter().filter_map(ast::node) { - self.walk(child, owners, out); - } - } - } - - fn collect_calls( - &self, - node: &Node, - caller: &Method, - context_stack: &[String], - out: &mut Vec, - ) { - if SKIP_NESTED_TYPES.contains(&node.r#type.as_str()) - && !METHOD_TYPES.contains(&node.r#type.as_str()) - { - return; - } - - let mut next_context = context_stack.to_vec(); - if CONDITIONAL_TYPES.contains(&node.r#type.as_str()) { - next_context.push("conditional".to_string()) - } - if ITERATION_TYPES.contains(&node.r#type.as_str()) { - next_context.push("iterates".to_string()) - } - - if let Some(edge) = self.internal_edge(node, caller, &next_context) { - if edge.caller != edge.callee { - out.push(edge) - } - } - - for child in node.children.iter().filter_map(ast::node) { - self.collect_calls(child, caller, &next_context, out); - } - } +fn enclosed_by_matching_owner(document: &Document, owner: &str, span: Span) -> bool { + let profile = language_profile(document.language); + let mut nodes = Vec::new(); + document.root.walk(&mut nodes); + nodes.into_iter().any(|node| { + raw_owner_name(profile, node).as_deref() == Some(owner) && encloses(node.span, span) + }) +} - fn internal_edge( - &self, - node: &Node, - caller: &Method, - context_stack: &[String], - ) -> Option { - let call = self.internal_call_name(node, caller)?; - let id = format!("{}#{}", caller.owner, call.name); - let callee = self.method_by_id.get(&id)?; - - Some(Edge { - caller: caller.id.clone(), - callee: callee.id.clone(), - caller_name: caller.name.clone(), - callee_name: callee.name.clone(), - file: self.file.clone(), - line: node.first_lineno, - span: [ - node.first_lineno, - node.first_column, - node.last_lineno, - node.last_column, - ], - r#type: context_stack - .last() - .cloned() - .unwrap_or_else(|| "always".to_string()), - kind: call.kind, - confidence: "high".to_string(), +fn raw_owner_name( + profile: &dyn crate::decomplex::syntax::adapters::LanguageProfile, + node: &RawNode, +) -> Option { + let owner_kind = profile + .class_owner_node_kinds() + .contains(&node.kind.as_str()) + || profile + .module_owner_node_kinds() + .contains(&node.kind.as_str()) + || profile + .generic_owner_node_kinds() + .contains(&node.kind.as_str()) + || profile + .struct_owner_node_kinds() + .contains(&node.kind.as_str()) + || profile + .impl_owner_node_kinds() + .contains(&node.kind.as_str()); + let hidden_ruby_owner = node.kind == "body_statement" + && node + .children + .first() + .map(|child| matches!(child.kind.as_str(), "class" | "module")) + .unwrap_or(false); + if !owner_kind && !hidden_ruby_owner { + return None; + } + + node.children + .iter() + .find(|child| { + child.named + && matches!( + child.kind.as_str(), + "identifier" | "constant" | "type_identifier" | "field_identifier" + ) }) - } - - fn internal_call_name(&self, node: &Node, caller: &Method) -> Option { - match node.r#type.as_str() { - "FCALL" | "VCALL" => Some(InternalCallName { - name: self.scoped_name( - caller, - &ast::child_to_string(node.children.get(0)).unwrap_or_default(), - ), - kind: "bare_internal".to_string(), - }), - "CALL" | "OPCALL" => { - let recv = node.children.get(0).and_then(ast::node)?; - if recv.r#type != "SELF" { - return None; - } - let mid = ast::child_to_string(node.children.get(1))?; - Some(InternalCallName { - name: self.scoped_name(caller, &mid), - kind: "direct_self".to_string(), - }) - } - _ => None, - } - } - - fn scoped_name(&self, caller: &Method, mid: &str) -> String { - if caller.name.starts_with("self.") { - format!("self.{}", mid) - } else { - mid.to_string() - } - } - - // Reuse helpers from MethodCollector - fn top_level_statements<'a>(&self, root: &'a Node) -> Vec<&'a Node> { - root.children - .iter() - .filter_map(ast::node) - .flat_map(|c| { - if c.r#type == "BLOCK" { - c.children.iter().filter_map(ast::node).collect() - } else { - vec![c] - } - }) - .collect() - } - fn method_name(&self, node: &Node) -> String { - if node.r#type == "DEFS" { - let receiver = node.children.get(0).and_then(ast::node); - let prefix = if let Some(r) = receiver { - if r.r#type == "SELF" { - "self".to_string() - } else { - ast::slice(r, &self.lines) - } - } else { - "?".to_string() - }; - format!( - "{}.{}", - prefix, - ast::child_to_string(node.children.get(1)).unwrap_or_else(|| "?".to_string()) - ) - } else { - ast::child_to_string(node.children.get(0)).unwrap_or_else(|| "?".to_string()) - } - } - fn owner_methods<'a>(&self, owner_node: &'a Node) -> Vec<&'a Node> { - let Some(body) = self.owner_body(owner_node) else { - return Vec::new(); - }; - self.owner_statements(body) - } - fn owner_body<'a>(&self, owner_node: &'a Node) -> Option<&'a Node> { - let scope_index = if owner_node.r#type == "CLASS" { 2 } else { 1 }; - let scope = owner_node.children.get(scope_index).and_then(ast::node)?; - if scope.r#type != "SCOPE" { - return None; - } - scope.children.get(2).and_then(ast::node) - } - fn owner_statements<'a>(&self, body: &'a Node) -> Vec<&'a Node> { - if body.r#type == "BLOCK" { - body.children.iter().filter_map(ast::node).collect() - } else { - vec![body] - } - } - fn full_owner_name(&self, owners: &[String], node: &Node) -> String { - let mut next = owners.to_vec(); - next.push(self.owner_segment(node)); - next.join("::") - } - fn owner_segment(&self, node: &Node) -> String { - let text = ast::slice( - node.children.first().and_then(ast::node).unwrap_or(node), - &self.lines, - ); - if text.is_empty() { - "(anonymous)".to_string() - } else { - text - } - } + .map(|child| child.text.clone()) } -struct InternalCallName { - name: String, - kind: String, +fn encloses(outer: Span, inner: Span) -> bool { + let starts_before = outer[0] < inner[0] || (outer[0] == inner[0] && outer[1] <= inner[1]); + let ends_after = outer[2] > inner[2] || (outer[2] == inner[2] && outer[3] >= inner[3]); + starts_before && ends_after } diff --git a/gems/decomplex/rust/src/decomplex/detectors/temporal_ordering_pressure.rs b/gems/decomplex/rust/src/decomplex/detectors/temporal_ordering_pressure.rs index bfdec618f..abfae59be 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/temporal_ordering_pressure.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/temporal_ordering_pressure.rs @@ -1,5 +1,5 @@ use crate::decomplex::ast::{self, Child, Node, Span}; -use crate::decomplex::syntax::{self, Document, Language}; +use crate::decomplex::syntax::{self, Document, Language, StateRead, StateWrite}; use anyhow::Result; use serde::Serialize; use std::collections::{BTreeMap, BTreeSet}; @@ -43,8 +43,12 @@ pub fn scan_files( pub fn scan_documents(documents: &[Document]) -> Vec { let mut rows = Vec::new(); for document in documents { - let mut detector = - TemporalOrderingPressure::new(document.file.clone(), document.lines.clone()); + let mut detector = TemporalOrderingPressure::new( + document.file.clone(), + document.lines.clone(), + document.state_reads.clone(), + document.state_writes.clone(), + ); rows.extend(detector.scan(&document.normalized_root)); } rows.sort_by(|a, b| { @@ -60,11 +64,23 @@ pub fn scan_documents(documents: &[Document]) -> Vec, + state_reads: Vec, + state_writes: Vec, } impl TemporalOrderingPressure { - fn new(file: String, lines: Vec) -> Self { - Self { file, lines } + fn new( + file: String, + lines: Vec, + state_reads: Vec, + state_writes: Vec, + ) -> Self { + Self { + file, + lines, + state_reads, + state_writes, + } } fn scan(&mut self, root: &Node) -> Vec { @@ -80,13 +96,13 @@ impl TemporalOrderingPressure { out: &mut Vec, ) { if matches!(node.r#type.as_str(), "CLASS" | "MODULE") { - let owner = self.owner_name(node); - let methods = self.owner_methods(node); + let owner = self.full_owner_name(owners, node); + let methods = self.owner_methods(node, &owner); if let Some(row) = self.pressure_row(&owner, &methods) { out.push(row); } let mut next_owners = owners.to_vec(); - next_owners.push(owner); + next_owners.push(self.owner_segment(node)); for child in node.children.iter().filter_map(ast::node) { self.walk_owners(child, &next_owners, out); } @@ -97,7 +113,13 @@ impl TemporalOrderingPressure { } } - fn owner_name(&self, node: &Node) -> String { + fn full_owner_name(&self, owners: &[String], node: &Node) -> String { + let mut next = owners.to_vec(); + next.push(self.owner_segment(node)); + next.join("::") + } + + fn owner_segment(&self, node: &Node) -> String { let name = ast::slice( node.children.first().and_then(ast::node).unwrap_or(node), &self.lines, @@ -109,7 +131,7 @@ impl TemporalOrderingPressure { } } - fn owner_methods(&self, owner_node: &Node) -> Vec { + fn owner_methods(&self, owner_node: &Node, owner: &str) -> Vec { let Some(body) = self.owner_body(owner_node) else { return Vec::new(); }; @@ -132,14 +154,15 @@ impl TemporalOrderingPressure { visibility = name.clone(); } } else if matches!(stmt.r#type.as_str(), "DEFN" | "DEFS") { - methods.push(self.method_state(stmt, &visibility)); + methods.push(self.method_state(stmt, &visibility, owner)); } } methods } fn owner_body<'a>(&self, owner_node: &'a Node) -> Option<&'a Node> { - let scope = owner_node.children.get(2).and_then(ast::node)?; + let scope_index = if owner_node.r#type == "CLASS" { 2 } else { 1 }; + let scope = owner_node.children.get(scope_index).and_then(ast::node)?; if scope.r#type != "SCOPE" { return None; } @@ -155,11 +178,7 @@ impl TemporalOrderingPressure { false } - fn method_state(&self, defn_node: &Node, visibility: &str) -> MethodState { - let mut reads = Vec::new(); - let mut writes = Vec::new(); - self.collect_state_access(defn_node, &mut reads, &mut writes); - + fn method_state(&self, defn_node: &Node, visibility: &str, owner: &str) -> MethodState { let name_index = if defn_node.r#type == "DEFS" { 1 } else { 0 }; let name = defn_node .children @@ -170,18 +189,8 @@ impl TemporalOrderingPressure { }) .unwrap_or_else(|| "(anonymous)".to_string()); - let mut reads: Vec<_> = reads - .into_iter() - .collect::>() - .into_iter() - .collect(); - let mut writes: Vec<_> = writes - .into_iter() - .collect::>() - .into_iter() - .collect(); - reads.sort(); - writes.sort(); + let reads = self.state_reads_for(owner, &name); + let writes = self.state_writes_for(owner, &name); MethodState { name, @@ -198,23 +207,22 @@ impl TemporalOrderingPressure { } } - fn collect_state_access(&self, node: &Node, reads: &mut Vec, writes: &mut Vec) { - match node.r#type.as_str() { - "IASGN" => { - if let Some(Child::String(name)) = node.children.first() { - writes.push(name.clone()); - } - } - "IVAR" => { - if let Some(Child::String(name)) = node.children.first() { - reads.push(name.clone()); - } - } - _ => {} - } - for child in node.children.iter().filter_map(ast::node) { - self.collect_state_access(child, reads, writes); - } + fn state_reads_for(&self, owner: &str, function: &str) -> Vec { + sorted_unique( + self.state_reads + .iter() + .filter(|read| read.owner == owner && read.function == function) + .map(|read| read.field.clone()), + ) + } + + fn state_writes_for(&self, owner: &str, function: &str) -> Vec { + sorted_unique( + self.state_writes + .iter() + .filter(|write| write.owner == owner && write.function == function) + .map(|write| write.field.clone()), + ) } fn pressure_row( @@ -303,3 +311,9 @@ impl TemporalOrderingPressure { format!("{}!", n) } } + +fn sorted_unique(values: impl Iterator) -> Vec { + let mut out: Vec<_> = values.collect::>().into_iter().collect(); + out.sort(); + out +} diff --git a/gems/decomplex/rust/src/decomplex/detectors/weighted_inlined_cognitive_complexity.rs b/gems/decomplex/rust/src/decomplex/detectors/weighted_inlined_cognitive_complexity.rs index 0db7f64ee..7d2b61a5d 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/weighted_inlined_cognitive_complexity.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/weighted_inlined_cognitive_complexity.rs @@ -1,5 +1,5 @@ -use crate::decomplex::ast::{self, Node, Span}; -use crate::decomplex::detectors::structural_topology; +use crate::decomplex::ast::{self, Node, RawNode, Span}; +use crate::decomplex::detectors::{local_flow, structural_topology}; use crate::decomplex::syntax::{self, Document, Language}; use anyhow::Result; use serde::Serialize; @@ -33,25 +33,29 @@ pub fn scan_files( pub fn scan_documents(documents: &[Document]) -> Vec { let topology_report = structural_topology::scan_documents(documents); let topology = structural_topology::Graph::new(topology_report.methods, topology_report.edges); - - let mut bodies = Vec::new(); - for document in documents { - let mut collector = MethodBodyCollector::new(document.file.clone(), document.lines.clone()); - bodies.extend(collector.scan(&document.normalized_root)); - } + let raw_scores = raw_complexity_scores(documents); let mut scores = BTreeMap::new(); - for body in bodies { - let score = LocalScorer::new().score(&body.node); + for summary in local_flow::scan_documents(documents) { + let owner = if summary.owner == "(top-level)" { + format!("(top-level:{})", summary.file) + } else { + summary.owner.clone() + }; + let id = format!("{}#{}", owner, summary.name); + let score = raw_scores + .get(&(summary.file.clone(), summary.line, summary.name.clone())) + .cloned() + .unwrap_or_else(|| LocalScorer::new().score(&summary.node)); scores.insert( - body.id.clone(), + id.clone(), LocalScore { - id: body.id, - owner: body.owner, - name: body.name, - file: body.file, - line: body.line, - span: body.span, + id, + owner, + name: summary.name, + file: summary.file, + line: summary.line, + span: summary.span, score: score.score, signals: score.signals, }, @@ -62,14 +66,17 @@ pub fn scan_documents(documents: &[Document]) -> Vec BTreeMap<(String, usize, String), ScoreResult> { + let mut out = BTreeMap::new(); + for document in documents { + for function in &document.function_defs { + out.insert( + (function.file.clone(), function.line, function.name.clone()), + LocalScorer::new().score_raw(&function.body), + ); + } + } + out } struct LocalScore { @@ -94,7 +101,6 @@ struct Contribution { chain: Vec, } -const OWNER_TYPES: &[&str] = &["CLASS", "MODULE"]; const METHOD_TYPES: &[&str] = &["DEFN", "DEFS"]; const SKIP_NESTED_TYPES: &[&str] = &["CLASS", "MODULE", "DEFN", "DEFS", "LAMBDA"]; const BRANCH_TYPES: &[&str] = &["IF", "UNLESS"]; @@ -104,185 +110,9 @@ const RESCUE_TYPES: &[&str] = &["RESCUE", "RESBODY"]; const EARLY_EXIT_TYPES: &[&str] = &["RETURN", "BREAK", "NEXT", "REDO", "RETRY"]; const BOOLEAN_TYPES: &[&str] = &["AND", "OR"]; -struct MethodBodyCollector { - file: String, - lines: Vec, -} - -impl MethodBodyCollector { - fn new(file: String, lines: Vec) -> Self { - Self { file, lines } - } - - fn scan(&mut self, root: &Node) -> Vec { - let mut out = Vec::new(); - for m_node in self.top_level_methods(root) { - out.push(self.method_body(m_node, &self.top_level_owner())); - } - self.walk(root, &Vec::new(), &mut out); - out - } - - fn top_level_methods<'a>(&self, root: &'a Node) -> Vec<&'a Node> { - self.top_level_statements(root) - .into_iter() - .filter(|s| METHOD_TYPES.contains(&s.r#type.as_str())) - .collect() - } - - fn walk<'a>(&self, node: &'a Node, owners: &[String], out: &mut Vec) { - if OWNER_TYPES.contains(&node.r#type.as_str()) { - let owner = self.full_owner_name(owners, node); - for m_node in self.owner_methods(node) { - out.push(self.method_body(m_node, &owner)); - } - let mut next_owners = owners.to_vec(); - next_owners.push(self.owner_segment(node)); - for child in node.children.iter().filter_map(ast::node) { - self.walk(child, &next_owners, out); - } - } else { - for child in node.children.iter().filter_map(ast::node) { - self.walk(child, owners, out); - } - } - } - - fn owner_methods<'a>(&self, owner_node: &'a Node) -> Vec<&'a Node> { - let Some(body) = self.owner_body(owner_node) else { - return Vec::new(); - }; - self.owner_statements(body) - .into_iter() - .flat_map(|stmt| { - if METHOD_TYPES.contains(&stmt.r#type.as_str()) { - vec![stmt] - } else if self.visibility_call(stmt) { - self.inline_methods(stmt) - } else { - vec![] - } - }) - .collect() - } - - fn method_body(&self, node: &Node, owner: &str) -> MethodBody { - let name = self.method_name(node); - MethodBody { - id: format!("{}#{}", owner, name), - owner: owner.to_string(), - name, - file: self.file.clone(), - line: node.first_lineno, - span: [ - node.first_lineno, - node.first_column, - node.last_lineno, - node.last_column, - ], - node: node.clone(), - } - } - - fn inline_methods<'a>(&self, stmt: &'a Node) -> Vec<&'a Node> { - let Some(args) = stmt.children.get(1).and_then(ast::node) else { - return Vec::new(); - }; - args.children - .iter() - .filter_map(ast::node) - .filter(|arg| METHOD_TYPES.contains(&arg.r#type.as_str())) - .collect() - } - - fn owner_body<'a>(&self, owner_node: &'a Node) -> Option<&'a Node> { - let scope_index = if owner_node.r#type == "CLASS" { 2 } else { 1 }; - let scope = owner_node.children.get(scope_index).and_then(ast::node)?; - if scope.r#type != "SCOPE" { - return None; - } - scope.children.get(2).and_then(ast::node) - } - - fn owner_statements<'a>(&self, body: &'a Node) -> Vec<&'a Node> { - if body.r#type == "BLOCK" { - body.children.iter().filter_map(ast::node).collect() - } else { - vec![body] - } - } - - fn top_level_statements<'a>(&self, root: &'a Node) -> Vec<&'a Node> { - root.children - .iter() - .filter_map(ast::node) - .flat_map(|c| { - if c.r#type == "BLOCK" { - c.children.iter().filter_map(ast::node).collect() - } else { - vec![c] - } - }) - .collect() - } - - fn visibility_call(&self, node: &Node) -> bool { - node.r#type == "FCALL" - && matches!( - ast::child_to_string(node.children.get(0)) - .unwrap_or_default() - .as_str(), - "public" | "protected" | "private" - ) - } - - fn method_name(&self, node: &Node) -> String { - if node.r#type == "DEFS" { - let receiver = node.children.get(0).and_then(ast::node); - let prefix = if let Some(r) = receiver { - if r.r#type == "SELF" { - "self".to_string() - } else { - ast::slice(r, &self.lines) - } - } else { - "?".to_string() - }; - format!( - "{}.{}", - prefix, - ast::child_to_string(node.children.get(1)).unwrap_or_else(|| "?".to_string()) - ) - } else { - ast::child_to_string(node.children.get(0)).unwrap_or_else(|| "?".to_string()) - } - } - - fn full_owner_name(&self, owners: &[String], node: &Node) -> String { - let mut next = owners.to_vec(); - next.push(self.owner_segment(node)); - next.join("::") - } - - fn owner_segment(&self, node: &Node) -> String { - let text = ast::slice( - node.children.first().and_then(ast::node).unwrap_or(node), - &self.lines, - ); - if text.is_empty() { - "(anonymous)".to_string() - } else { - text - } - } - - fn top_level_owner(&self) -> String { - format!("(top-level:{})", self.file) - } -} - pub struct LocalScorer {} +#[derive(Clone)] pub struct ScoreResult { pub score: f64, pub signals: BTreeMap, @@ -301,6 +131,14 @@ impl LocalScorer { } } + pub fn score_raw(&self, method_node: &RawNode) -> ScoreResult { + let mut signals = BTreeMap::new(); + ScoreResult { + score: self.round(self.score_raw_node(method_node, 0, &mut signals)), + signals, + } + } + fn score_node( &self, node: &Node, @@ -478,12 +316,223 @@ impl LocalScorer { } fn branch_cost(&self, nesting: usize) -> f64 { - 1.0 + (nesting as f64) + 1.1 + (nesting as f64) } fn round(&self, value: f64) -> f64 { (value * 10.0).round() / 10.0 } + + fn score_raw_node( + &self, + node: &RawNode, + nesting: usize, + signals: &mut BTreeMap, + ) -> f64 { + if raw_skip_nested(node) { + return 0.0; + } + + if raw_branch(node) { + *signals.entry("branches".to_string()).or_insert(0) += 1; + if nesting > 0 { + *signals.entry("nested".to_string()).or_insert(0) += 1; + } + return self.branch_cost(nesting) + + self.raw_predicate_cost(raw_condition_node(node), signals) + + self.score_raw_children(node, nesting + 1, signals); + } + + if raw_loop(node) { + *signals.entry("loops".to_string()).or_insert(0) += 1; + if nesting > 0 { + *signals.entry("nested".to_string()).or_insert(0) += 1; + } + return self.branch_cost(nesting) + self.score_raw_children(node, nesting + 1, signals); + } + + if raw_case(node) { + *signals.entry("cases".to_string()).or_insert(0) += 1; + return 0.5 + self.score_raw_children(node, nesting + 1, signals); + } + + if raw_rescue(node) { + *signals.entry("rescues".to_string()).or_insert(0) += 1; + return self.branch_cost(nesting) + self.score_raw_children(node, nesting + 1, signals); + } + + if raw_early_exit(node) { + *signals.entry("early_exits".to_string()).or_insert(0) += 1; + let exit_cost = if nesting > 0 { + 0.5 + (nesting as f64 * 0.25) + } else { + 0.0 + }; + return exit_cost + self.score_raw_children(node, nesting, signals); + } + + if raw_boolean_node(node) { + *signals.entry("boolean_ops".to_string()).or_insert(0) += 1; + return 0.25 + self.score_raw_children(node, nesting, signals); + } + + self.score_raw_children(node, nesting, signals) + } + + fn score_raw_children( + &self, + node: &RawNode, + nesting: usize, + signals: &mut BTreeMap, + ) -> f64 { + node.children + .iter() + .map(|child| self.score_raw_node(child, nesting, signals)) + .sum() + } + + fn raw_predicate_cost( + &self, + node: Option<&RawNode>, + signals: &mut BTreeMap, + ) -> f64 { + let Some(node) = node else { return 0.0 }; + let bools = raw_boolean_count(node); + *signals.entry("boolean_ops".to_string()).or_insert(0) += bools; + (bools as f64) * 0.5 + } +} + +fn raw_skip_nested(node: &RawNode) -> bool { + matches!(node.kind.as_str(), "class" | "module" | "lambda") +} + +fn raw_branch(node: &RawNode) -> bool { + (matches!( + node.kind.as_str(), + "if" | "unless" | "if_statement" | "if_expression" | "if_modifier" | "unless_modifier" + ) && !node.named_children().is_empty()) + || raw_hidden_if(node) + || raw_modifier_if(node) +} + +fn raw_hidden_if(node: &RawNode) -> bool { + if node.kind == "expression_statement" && node.text.trim_start().starts_with("if ") { + return true; + } + matches!( + node.kind.as_str(), + "body_statement" | "block" | "statements" | "statement_list" + ) && node + .children + .first() + .map(|child| !child.named && matches!(child.kind.as_str(), "if" | "unless")) + .unwrap_or(false) +} + +fn raw_modifier_if(node: &RawNode) -> bool { + if matches!(node.kind.as_str(), "if_modifier" | "unless_modifier") { + return true; + } + if node.kind != "body_statement" { + return false; + } + let mut seen_named = false; + node.children.iter().any(|child| { + seen_named |= child.named; + seen_named && !child.named && matches!(child.kind.as_str(), "if" | "unless") + }) +} + +fn raw_loop(node: &RawNode) -> bool { + matches!( + node.kind.as_str(), + "while" + | "until" + | "while_statement" + | "for" + | "for_statement" + | "for_in_statement" + | "do_block" + ) || raw_hidden_loop(node) + || (node.kind == "expression_statement" + && starts_with_any(node.text.trim_start(), &["for", "while", "loop"])) + || (node.kind == "labeled_statement" && node.text.trim_start().starts_with("for ")) +} + +fn raw_hidden_loop(node: &RawNode) -> bool { + matches!( + node.kind.as_str(), + "body_statement" | "block" | "statements" | "statement_list" + ) && node + .children + .first() + .map(|child| !child.named && matches!(child.kind.as_str(), "for" | "while" | "loop")) + .unwrap_or(false) +} + +fn starts_with_any(text: &str, words: &[&str]) -> bool { + words + .iter() + .any(|word| text == *word || text.starts_with(&format!("{word} "))) +} + +fn raw_case(node: &RawNode) -> bool { + matches!( + node.kind.as_str(), + "case" | "switch_statement" | "switch_expression" | "match_statement" | "match_expression" + ) || (node.kind == "expression_statement" && node.text.trim_start().starts_with("match ")) +} + +fn raw_rescue(node: &RawNode) -> bool { + matches!( + node.kind.as_str(), + "rescue" | "rescue_modifier" | "rescue_clause" | "rescue_body" + ) +} + +fn raw_early_exit(node: &RawNode) -> bool { + (node.named || node.kind == "return") + && matches!( + node.kind.as_str(), + "return" + | "break" + | "next" + | "redo" + | "retry" + | "return_statement" + | "break_statement" + | "continue_statement" + ) +} + +fn raw_boolean_node(node: &RawNode) -> bool { + matches!( + node.kind.as_str(), + "binary" + | "binary_expression" + | "boolean_operator" + | "conjunction_expression" + | "disjunction_expression" + ) && node + .children + .iter() + .any(|child| !child.named && matches!(child.text.as_str(), "&&" | "||" | "and" | "or")) +} + +fn raw_condition_node(node: &RawNode) -> Option<&RawNode> { + if raw_modifier_if(node) { + return node.named_children().last().copied(); + } + if node.kind == "body_statement" { + return node.named_children().first().copied(); + } + node.named_children().first().copied() +} + +fn raw_boolean_count(node: &RawNode) -> usize { + let own = usize::from(raw_boolean_node(node)); + own + node.children.iter().map(raw_boolean_count).sum::() } struct Analyzer { diff --git a/gems/decomplex/rust/src/decomplex/mod.rs b/gems/decomplex/rust/src/decomplex/mod.rs index 6f0af1177..48dd07632 100644 --- a/gems/decomplex/rust/src/decomplex/mod.rs +++ b/gems/decomplex/rust/src/decomplex/mod.rs @@ -2,7 +2,13 @@ mod architecture_test; pub mod ast; +pub mod convergence; +pub mod delta; pub mod detectors; pub mod parallel; +pub mod report; pub mod report_facts; +pub mod report_value; +pub mod root_cause; +pub mod sarif; pub mod syntax; diff --git a/gems/decomplex/rust/src/decomplex/report.rs b/gems/decomplex/rust/src/decomplex/report.rs new file mode 100644 index 000000000..651725d09 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/report.rs @@ -0,0 +1,1237 @@ +use crate::decomplex::convergence::{self, Unit}; +use crate::decomplex::report_value as rv; +use crate::decomplex::root_cause::{self, Cluster}; +use crate::decomplex::{delta, sarif}; +use anyhow::{bail, Result}; +use serde_json::{json, Value}; + +#[derive(Clone, Debug)] +pub struct ReportSection { + pub title: String, + pub tier: i64, + pub desc: String, + pub findings: Vec, + convergence_excluded: bool, +} + +impl ReportSection { + pub fn new(title: &str, tier: i64, desc: &str, findings: Vec) -> Self { + Self { + title: title.to_string(), + tier, + desc: desc.to_string(), + findings, + convergence_excluded: false, + } + } + + fn excluded_from_convergence(mut self) -> Self { + self.convergence_excluded = true; + self + } +} + +#[derive(Clone, Debug)] +pub struct Report { + files: Vec, + sections: Vec, + convergence: Vec, + root: Vec, +} + +impl Report { + pub fn from_facts(facts: &Value) -> Result { + let files = rv::field_array_strings(facts, "files"); + let Some(detectors) = rv::get(facts, "detectors") else { + bail!("report facts missing detectors"); + }; + let sections = build_sections(detectors); + validate_spans(§ions)?; + let convergence_sections = sections + .iter() + .filter(|section| !section.convergence_excluded) + .cloned() + .collect::>(); + let convergence = convergence::rollup(&convergence_sections, 2); + let root = root_cause::cluster(&convergence_sections, 2); + Ok(Self { + files, + sections, + convergence, + root, + }) + } + + pub fn to_markdown(&self) -> String { + let mut out = String::from("# Decomplex Report\n\n"); + out.push_str("> Decision-level duplication and neglected-condition analysis.\n"); + out.push_str("> Every entry is a ranked **candidate** (Engler's discipline),\n"); + out.push_str("> never a verdict -- *POSSIBLE* findings, triaged by a human.\n"); + out.push_str("> Sections are ordered by SIGNAL TIER (1 = lowest false\n"); + out.push_str("> positive), not by volume. Items within a section are\n"); + out.push_str("> frequency-ranked. Triage tier 1, top-of-list, first.\n\n"); + + out.push_str("## Table of Contents\n"); + out.push_str("- [Project Prioritization](#project-prioritization)\n"); + out.push_str(&format!( + "- [Cross-Detector Convergence ({})](#cross-detector-convergence-{})\n", + self.convergence.len(), + self.convergence.len() + )); + out.push_str(&format!( + "- [Root-Cause Clusters ({})](#root-cause-clusters-{})\n", + self.root.len(), + self.root.len() + )); + for section in &self.sections { + out.push_str(&format!( + "- [{} ({})](#{}-{})\n", + section.title, + section.findings.len(), + slug(§ion.title), + section.findings.len() + )); + } + out.push_str("- [Run Summary](#run-summary)\n\n"); + + self.render_project_prioritization(&mut out); + self.render_convergence(&mut out); + self.render_root_cause(&mut out); + + for section in &self.sections { + out.push_str(&format!( + "## {} ({})\n", + section.title, + section.findings.len() + )); + out.push_str(&format!("_{}_\n\n", section.desc)); + if section.findings.is_empty() { + out.push_str("None.\n\n"); + continue; + } + self.render_section(&mut out, section); + out.push('\n'); + } + + out.push_str("## Run Summary\n"); + out.push_str(&format!("- Files analyzed: {}\n", self.files.len())); + out.push_str(&format!( + "- Detectors: {} (all shipped, self-tested)\n", + self.sections.len() + )); + out.push_str(&format!( + "- Convergence: {} unit(s) flagged by >=2 independent detectors\n", + self.convergence.len() + )); + out.push_str(&format!( + "- Root-cause clusters: {} (one fix collapses each)\n", + self.root.len() + )); + let total: usize = self + .sections + .iter() + .map(|section| section.findings.len()) + .sum(); + out.push_str(&format!("- Total candidates: {total}\n")); + out.push_str("- Method: stdlib AST only, intra-procedural, zero deps, no CFG / no points-to; Type-2/3 similarity uses Tree-sitter structural fingerprints (see docs/agents/design.md)\n"); + out + } + + pub fn to_sarif(&self) -> String { + serde_json::to_string_pretty(&self.to_sarif_value(true, true, None)).unwrap() + } + + pub fn to_sarif_value( + &self, + include_snapshot: bool, + include_finding_payload: bool, + max_results: Option, + ) -> Value { + let snapshot = delta::snapshot(&self.sections, &self.root); + let mut results = self.sarif_results(include_finding_payload); + if let Some(max_results) = max_results { + results = ranked_sarif_results(results) + .into_iter() + .take(max_results) + .collect(); + } + let mut properties = json!({ + "format": "decomplex.report.sarif.v1", + "files": self.files, + }); + if include_snapshot { + if let Some(object) = properties.as_object_mut() { + object.insert("decomplex.snapshot".to_string(), snapshot); + } + } + sarif::document( + "Decomplex", + self.sarif_rules(), + results, + Some("https://github.com/cuzzo/clear"), + properties, + ) + } + + fn render_project_prioritization(&self, out: &mut String) { + out.push_str("## Project Prioritization\n"); + out.push_str( + "_Ordered by signal tier (1 = highest signal / lowest FP), then by volume._\n\n", + ); + let mut ranked = self + .sections + .iter() + .enumerate() + .filter(|(_, section)| !section.findings.is_empty()) + .collect::>(); + ranked.sort_by(|(left_index, left), (right_index, right)| { + left.tier + .cmp(&right.tier) + .then_with(|| right.findings.len().cmp(&left.findings.len())) + .then_with(|| left_index.cmp(right_index)) + }); + for (_, section) in ranked { + out.push_str(&format!( + "- **[tier {}]** [{} ({})](#{}-{}): {}\n", + section.tier, + section.title, + section.findings.len(), + slug(§ion.title), + section.findings.len(), + section.desc + )); + } + if self + .sections + .iter() + .all(|section| section.findings.is_empty()) + { + out.push_str("\nNothing flagged.\n"); + } + out.push('\n'); + } + + fn render_convergence(&self, out: &mut String) { + out.push_str(&format!( + "## Cross-Detector Convergence ({})\n", + self.convergence.len() + )); + out.push_str("_(file, method) units flagged by >=2 INDEPENDENT detectors -- the strongest triage signal: agreement outranks any single detector's volume. Tier-weighted (1=3, 2=2, 3=1). **Start here.**_\n\n"); + if self.convergence.is_empty() { + out.push_str("None (no unit flagged by >=2 detectors).\n\n"); + return; + } + for hit in self.convergence.iter().take(25) { + out.push_str(&format!( + "- {} -- **{} detectors** [score {}, {} findings]: {}\n", + nav(&hit.at), + hit.n_detectors, + hit.score, + hit.findings, + hit.detectors.join(", ") + )); + } + if self.convergence.len() > 25 { + out.push_str(&format!("- ...(+{} more)\n", self.convergence.len() - 25)); + } + let by_file = convergence::by_file(&self.convergence); + if !by_file.is_empty() { + out.push_str("\n### By file\n"); + for hit in by_file.iter().take(15) { + out.push_str(&format!( + "- `{}` -- {} detectors across {} method(s): {}\n", + hit.file, + hit.n_detectors, + hit.methods, + hit.detectors.join(", ") + )); + } + } + out.push('\n'); + } + + fn render_root_cause(&self, out: &mut String) { + out.push_str(&format!("## Root-Cause Clusters ({})\n", self.root.len())); + out.push_str("_Findings across >=2 INDEPENDENT detectors that name the SAME entity -- 'N findings are really one invariant'. Convergence says where to look; this says **what one fix collapses the cluster**. Ranked candidate, not a verdict._\n\n"); + if self.root.is_empty() { + out.push_str("None (no entity named by >=2 detectors).\n\n"); + return; + } + for hit in self.root.iter().take(20) { + let tag = if hit.fat_union { + format!("[{} | FAT-UNION]", hit.kind) + } else { + format!("[{}]", hit.kind) + }; + out.push_str(&format!( + "- **{}** `{}` -- **{} detectors** [score {}] across {} unit(s), {} findings: {}\n - FIX: {}\n - {}\n", + tag, + hit.token, + hit.n_detectors, + hit.score, + hit.scatter, + hit.support, + hit.detectors.join(", "), + hit.fix, + hit.sites.iter().take(4).map(|site| nav(site)).collect::>().join(" ; ") + )); + } + if self.root.len() > 20 { + out.push_str(&format!("- ...(+{} more)\n", self.root.len() - 20)); + } + out.push('\n'); + } + + fn render_section(&self, out: &mut String, section: &ReportSection) { + for finding in section.findings.iter().take(25) { + out.push_str(&render_finding(§ion.title, finding)); + } + if section.findings.len() > 25 { + out.push_str(&format!("- ...(+{} more)\n", section.findings.len() - 25)); + } + } + + fn sarif_rules(&self) -> Vec { + self.sections + .iter() + .map(|section| { + sarif::rule( + &sarif_rule_id(§ion.title), + Some(§ion.title), + Some(§ion.desc), + None, + if section.tier <= 1 { "warning" } else { "note" }, + None, + json!({ "tier": section.tier }), + ) + }) + .collect() + } + + fn sarif_results(&self, include_finding_payload: bool) -> Vec { + let mut out = Vec::new(); + for section in &self.sections { + for finding in §ion.findings { + for location in sarif_locations_for_finding(finding) { + let mut properties = json!({ + "detector": section.title, + "tier": section.tier, + "method": location.method, + }); + if include_finding_payload { + if let Some(object) = properties.as_object_mut() { + object.insert( + "decomplex_finding".to_string(), + delta::json_safe_finding(§ion.title, finding), + ); + } + } + out.push(sarif::result( + &sarif_rule_id(§ion.title), + &sarif_message(§ion.title, finding, &location), + location.path.as_deref(), + Some(location.line), + location.start_column, + location.end_line, + location.end_column, + if section.tier <= 1 { "warning" } else { "note" }, + properties, + json!({ "decomplexFinding": delta::fingerprint(§ion.title, finding) }), + )); + } + } + } + out + } +} + +#[derive(Clone, Debug)] +struct SarifLocation { + path: Option, + method: Option, + line: i64, + start_column: Option, + end_line: Option, + end_column: Option, +} + +fn build_sections(detectors: &Value) -> Vec { + let miner = rv::get(detectors, "miner").unwrap_or(&Value::Null); + let co_update = rv::get(detectors, "co_update").unwrap_or(&Value::Null); + let semantic_alias = rv::get(detectors, "semantic_alias").unwrap_or(&Value::Null); + let path_condition = rv::get(detectors, "path_condition").unwrap_or(&Value::Null); + let sequence_mine = rv::get(detectors, "sequence_mine").unwrap_or(&Value::Null); + let fat_union = rv::get(detectors, "fat_union").unwrap_or(&Value::Null); + let operational = direct_array(detectors, "operational_discontinuity"); + let (operational_high, operational_rest): (Vec<_>, Vec<_>) = operational + .into_iter() + .partition(|finding| rv::field(finding, "confidence") == "high"); + + vec![ + section("Decision Pressure", 1, "ELIMINABLE guard-pressure per loose contract (nil/is_a?/respond_to?/safe-nav/rescue-nil) -> tighten the contract once / nil-kill: DELETE. essential dispatch + pure c-uses are split out, NEVER summed (Rapps-Weyuker p-use; McCabe)", direct_array(detectors, "decision_pressure")), + section("Redundant Nil Guards", 1, "nil checks / safe-nav dominated by an earlier non-nil proof -- delete repeated control flow or tighten the type", direct_array(detectors, "redundant_nil_guard")), + section("State Heatmap", 1, "state fields ranked by write/read/re-derivation scatter -- tangled mutable state should get one owner", direct_array(detectors, "state_heatmap")).excluded_from_convergence(), + section("State-Based Branch Density", 1, "branch decisions over mutable/object state -- state + control-flow pressure", direct_array(detectors, "state_branch_density")), + section("Temporal Ordering Pressure", 1, "public mutable lifecycle surfaces that create implicit state-machine ordering", direct_array(detectors, "temporal_ordering_pressure")), + section("Missing Abstractions", 1, "guard tuple recomputed across >=2 decision units", nested_array(miner, "missing_abstractions")), + section("Reification Misses", 1, "an existing predicate reinvented inline -- invariant #16", nested_array(semantic_alias, "reification_misses")), + section("Semantic Predicate Aliases", 1, "one decision, multiple names (receiver/polarity folded)", nested_array(semantic_alias, "alias_clusters")), + section("Exact Predicate Aliases", 1, "identical one-line predicate body under >=2 names", nested_array(rv::get(detectors, "predicate_alias").unwrap_or(&Value::Null), "alias_clusters")), + section("Inconsistent Rename Clones", 2, "pasted block with inconsistent identifier mapping -- *POSSIBLE* missed rename bug", direct_array(detectors, "inconsistent_rename_clone")), + section("Structural Similarity (Type-2/3)", 2, "Tree-sitter structural clone pressure: Type-2 renamed clones and Type-3 fuzzy clones -- refactor pressure, not a verdict", direct_array(detectors, "flay_similarity")), + section("Neglected Updates", 2, "co-written state, one write missing -- *POSSIBLE* redundant-state desync", nested_array(co_update, "neglected_updates")), + section("Derived-State Staleness", 2, "b = f(a); a later reassigned, b not recomputed -- *POSSIBLE* bug", direct_array(detectors, "derived_state")), + section("Neglected Conditions", 2, "dispatch/conjunction minus one element -- *POSSIBLE* bug", nested_array(miner, "neglected_conditions")), + section("Neglected Path Conditions", 3, "nested-if/&& guard set minus one atom -- *POSSIBLE* bug (noisy)", nested_array(path_condition, "neglected")), + section("Oversized Predicates", 3, "predicate with >3 condition atoms -- use an existing helper or extract a named predicate", direct_array(detectors, "oversized_predicate")), + section("Broken Protocols", 3, "co-called pair, one site does A without B -- *POSSIBLE* bug (noisy)", nested_array(sequence_mine, "broken_protocol")), + section("Implicit Control Flow", 2, "state-dependent internal call order exists -- hidden lifecycle/control-flow pressure", nested_array(rv::get(detectors, "implicit_control_flow").unwrap_or(&Value::Null), "ordered_protocols")), + section("Weighted Inlined Cognitive Complexity", 2, "same-owner helper chain hides cognitive load behind a low-looking orchestration method", direct_array(detectors, "weighted_inlined_complexity")), + section("Locality Drag", 2, "local initialized far before first use while unrelated work runs -- move setup closer or extract a private phase", direct_array(detectors, "locality_drag")), + section("Operational Discontinuity (High Confidence)", 2, "strong blank/comment phase boundary where local variable lifetimes reset -- likely implicit sub-function boundary", operational_high), + section("Function LCOM", 3, "independent local data-flow components inside one method -- *POSSIBLE* mixed concerns", direct_array(detectors, "function_lcom")), + section("Operational Discontinuity", 3, "blank/comment phase boundary where local variable lifetimes reset -- *POSSIBLE* implicit sub-function boundary", operational_rest), + section("False Simplicity", 3, "looks simple, behaves non-locally: hidden dispatch/mutation/IO/context/reflection/reopen -- *POSSIBLE* (noisy)", direct_array(detectors, "false_simplicity")), + section("Fat Unions", 3, "case dispatch over class consts whose arms read mostly variant-invariant members -- product-vs-sum decomposition candidate (extraction -> nil-kill) -- *POSSIBLE*", nested_array(fat_union, "fat_unions")), + ] +} + +fn section(title: &str, tier: i64, desc: &str, findings: Vec) -> ReportSection { + ReportSection::new(title, tier, desc, findings) +} + +fn direct_array(value: &Value, key: &str) -> Vec { + rv::array(value, key).to_vec() +} + +fn nested_array(value: &Value, key: &str) -> Vec { + rv::array(value, key).to_vec() +} + +fn validate_spans(sections: &[ReportSection]) -> Result<()> { + for section in sections + .iter() + .filter(|section| !section.convergence_excluded) + { + for finding in §ion.findings { + let Some(spans) = rv::get(finding, "spans").and_then(Value::as_object) else { + continue; + }; + for (loc, span) in spans { + if span.is_null() { + continue; + } + let values = span.as_array(); + let ok = values.is_some_and(|values| { + values.len() == 4 + && values[0].as_i64().is_some() + && values[2].as_i64().is_some() + && values[0].as_i64() <= values[2].as_i64() + }); + if !ok { + bail!( + "decomplex: {} emitted malformed span {} for {}", + section.title, + span, + loc + ); + } + } + } + } + Ok(()) +} + +pub fn slug(title: &str) -> String { + title + .to_lowercase() + .chars() + .map(|ch| { + if ch.is_ascii_alphanumeric() || ch == ' ' { + ch + } else { + '\0' + } + }) + .filter(|ch| *ch != '\0') + .collect::() + .replace(' ', "-") +} + +pub fn nav(loc: &str) -> String { + let parts = loc.split(':').collect::>(); + if parts.len() < 3 { + return loc.to_string(); + } + let line = parts[parts.len() - 1]; + let method = parts[parts.len() - 2]; + let file = parts[..parts.len() - 2].join(":"); + format!("`{file}:{line}` ({method})") +} + +fn render_finding(title: &str, h: &Value) -> String { + match title { + "Decision Pressure" => format!( + "- `{}` -- ELIMINABLE guard-pressure **{}** across {} method(s) -> tighten contract / nil-kill: DELETE{}\n - {}\n", + rv::field(h, "contract"), + rv::field(h, "decisions"), + rv::field(h, "methods"), + if rv::positive(h, "essential") { + format!(" (+{} essential dispatch on this contract -- legitimate; leave unless Fat-Union/Missing-Abstractions says re-derived)", rv::field(h, "essential")) + } else { + String::new() + }, + rv::array(h, "sites").iter().take(4).map(|site| nav(&rv::string(Some(site)))).collect::>().join(" ; ") + ), + "Redundant Nil Guards" => format!( + "- {} -- redundant nil guard on `{}`: `{}`\n - proof: {}\n", + nav(&rv::field(h, "at")), + rv::field(h, "local"), + rv::field(h, "guard"), + rv::field(h, "proof") + ), + "Missing Abstractions" => format!( + "- **[{}]** support={} scatter={} rank={}\n - tuple: `{}`\n - {}\n", + rv::field(h, "kind"), + rv::field(h, "support"), + rv::field(h, "scatter"), + rv::field(h, "rank"), + rv::join_field(h, "members", " | "), + rv::array(h, "sites").iter().take(6).map(|site| nav(&rv::string(Some(site)))).collect::>().join(" ; ") + ), + "State Heatmap" => render_state_heatmap_item(h), + "State-Based Branch Density" => format!( + "- {} -- **{}** state-based branch decision(s), refs=`{}` score={}\n - example predicate: `{}`\n", + nav(&rv::field(h, "at")), + rv::field(h, "decisions"), + rv::array(h, "state_refs").iter().take(8).map(|v| rv::string(Some(v))).collect::>().join(" | "), + rv::field(h, "score"), + rv::field(h, "predicate") + ), + "Temporal Ordering Pressure" => format!( + "- `{}` ({}) -- implicit lifecycle score **{}** (public={}, state methods={}, writers={}, fields={}, shared={}, flows={}, states={})\n - shared fields: `{}`\n - surface: {}\n", + rv::field(h, "owner"), + nav(&rv::field(h, "at")), + rv::field(h, "score"), + rv::field(h, "public_methods"), + rv::field(h, "state_methods"), + rv::field(h, "writers"), + rv::array_len(h, "state_fields"), + rv::array_len(h, "shared_fields"), + rv::field(h, "orderings"), + rv::field(h, "state_space"), + rv::array(h, "shared_fields").iter().take(8).map(|v| rv::string(Some(v))).collect::>().join(" | "), + rv::array(h, "sites").iter().take(6).map(|site| nav(&rv::string(Some(site)))).collect::>().join(" ; ") + ), + "Neglected Conditions" | "Neglected Path Conditions" => { + let pattern = rv::get(h, "pattern").or_else(|| rv::get(h, "guards")); + format!( + "- *POSSIBLE* (support={}) {} -- MISSING `{}` from `{}`\n", + rv::field(h, "support"), + nav(&rv::field(h, "at")), + rv::field(h, "missing"), + rv::array_strings(pattern).join(" | ") + ) + } + "Oversized Predicates" => format!( + "- *POSSIBLE* {} -- {} condition atoms in `{}`\n - atoms: `{}`\n", + nav(&rv::field(h, "at")), + rv::field(h, "count"), + rv::field(h, "predicate"), + rv::array(h, "atoms").iter().take(8).map(|v| rv::string(Some(v))).collect::>().join(" | ") + ), + "Neglected Updates" => format!( + "- *POSSIBLE* (support={}) {} writes `.{}` but NOT `.{}` (recv `{}`)\n", + rv::field(h, "support"), + nav(&rv::field(h, "at")), + rv::field(h, "has"), + rv::field(h, "missing"), + rv::field(h, "recv") + ), + "Semantic Predicate Aliases" | "Exact Predicate Aliases" => format!( + "- `{}` == `{}`\n - {}\n", + rv::join_field(h, "names", " = "), + if rv::get(h, "canon").is_some() { rv::field(h, "canon") } else { rv::field(h, "body") }, + rv::array(h, "sites").iter().map(|site| nav(&rv::string(Some(site)))).collect::>().join(" ; ") + ), + "Reification Misses" => format!( + "- predicate `{}` reinvented inline at {} (`{}`)\n", + rv::field(h, "predicate"), + nav(&rv::field(h, "at")), + rv::field(h, "raw") + ), + "Broken Protocols" => format!( + "- *POSSIBLE* conf={} support={} {} does `{}` without `{}`\n", + rv::field(h, "confidence"), + rv::field(h, "support"), + nav(&rv::field(h, "at")), + rv::field(h, "has"), + rv::field(h, "missing") + ), + "Implicit Control Flow" => render_implicit_control_flow_item(h), + "Weighted Inlined Cognitive Complexity" => render_weighted_inlined_complexity_item(h), + "Locality Drag" => render_locality_drag_item(h), + "Function LCOM" => render_function_lcom_item(h), + "Operational Discontinuity" | "Operational Discontinuity (High Confidence)" => { + render_operational_discontinuity_item(h) + } + "False Simplicity" => format!( + "- *POSSIBLE* [{}] scatter={} support={} `{}` -- {}{}\n", + rv::field(h, "kind"), + rv::field(h, "scatter"), + rv::field(h, "support"), + rv::field(h, "detail"), + nav(&rv::field(h, "at")), + if rv::array_len(h, "sites") > 1 { + format!(" (+{} more)", rv::array_len(h, "sites") - 1) + } else { + String::new() + } + ), + "Fat Unions" => format!( + "- *POSSIBLE*{} union `{}` -- **{} common** vs {} variant member(s), scatter={} -- {}\n - common: `{}` -> hoist to a struct, keep a SMALL union for `{}` (-> nil-kill)\n", + if rv::field_bool(h, "degenerate") { " [DEGENERATE: no variance]" } else { "" }, + rv::join_field(h, "variant_set", " | "), + rv::array_len(h, "common"), + rv::array_len(h, "variant"), + rv::field(h, "scatter"), + nav(&rv::field(h, "at")), + rv::array(h, "common").iter().take(8).map(|v| rv::string(Some(v))).collect::>().join(", "), + rv::array(h, "variant").iter().take(6).map(|v| rv::string(Some(v))).collect::>().join(", ") + ), + "Derived-State Staleness" => format!( + "- *POSSIBLE* {}: `{}` derived from `{}` (line {}); `{}` reassigned line {}, `{}` not recomputed\n", + nav(&rv::field(h, "at")), + rv::field(h, "derived"), + rv::field(h, "source"), + rv::field(h, "derived_at"), + rv::field(h, "source"), + rv::field(h, "source_reassigned_at"), + rv::field(h, "derived") + ), + "Inconsistent Rename Clones" => format!( + "- *POSSIBLE* {} clone of {}: ref var `{}` spelled {} here\n", + nav(&rv::field(h, "at")), + nav(&rv::field(h, "ref_at")), + rv::field(h, "ref_name"), + rv::ruby_inspect_array(rv::get(h, "divergent")) + ), + "Structural Similarity (Type-2/3)" => format!( + "- *POSSIBLE* [{}] mass={} node=`{}` {}{}\n", + rv::field(h, "clone_type"), + rv::field(h, "mass"), + rv::field(h, "node"), + rv::array(h, "sites").iter().take(4).map(|site| nav(&rv::string(Some(site)))).collect::>().join(" ; "), + if rv::array_len(h, "sites") > 4 { + format!(" (+{} more)", rv::array_len(h, "sites") - 4) + } else { + String::new() + } + ), + _ => String::new(), + } +} + +fn render_state_heatmap_item(item: &Value) -> String { + let mut out = format!( + "- `{}` -- messiness **{}** (writes={}, reads={}, re-derived={}, scatter={}, receiver patterns={})\n", + rv::field(item, "field"), + rv::field(item, "messiness"), + rv::field(item, "writes"), + rv::field(item, "reads"), + rv::field(item, "re_derivations"), + rv::field(item, "scatter"), + rv::field(item, "receiver_types") + ); + let writers = rv::array(item, "top_writers") + .iter() + .map(|site| nav(&rv::string(Some(site)))) + .collect::>(); + let readers = rv::array(item, "top_readers") + .iter() + .map(|site| nav(&rv::string(Some(site)))) + .collect::>(); + if !writers.is_empty() { + out.push_str(&format!(" - writers: {}\n", writers.join(" ; "))); + } + if !readers.is_empty() { + out.push_str(&format!(" - readers: {}\n", readers.join(" ; "))); + } + out +} + +fn render_implicit_control_flow_item(item: &Value) -> String { + if rv::kind_is(item, "kind", "order_drift") { + return format!( + "- *POSSIBLE* [order_drift] conf={} support={} {} observed `{}` against protocol `{}` ({} state=`{}`)\n", + rv::field(item, "confidence"), + rv::field(item, "support"), + nav(&rv::field(item, "at")), + rv::join_field(item, "observed", " -> "), + rv::join_field(item, "protocol", " -> "), + rv::join_field(item, "dependency", "|"), + rv::join_field(item, "states", " | ") + ); + } + let sites = rv::array(item, "sites") + .iter() + .take(4) + .map(|site| nav(&rv::string(Some(site)))) + .collect::>() + .join(" ; "); + let more = if rv::array_len(item, "sites") > 4 { + format!(" (+{} more)", rv::array_len(item, "sites") - 4) + } else { + String::new() + }; + format!( + "- *POSSIBLE* [protocol_pressure] support={} `{}` ({} state=`{}`) -- {}\n - sites: {}{}\n", + rv::field(item, "support"), + rv::join_field(item, "protocol", " -> "), + rv::join_field(item, "dependency", "|"), + rv::join_field(item, "states", " | "), + nav(&rv::field(item, "at")), + sites, + more + ) +} + +fn render_weighted_inlined_complexity_item(item: &Value) -> String { + format!( + "- *POSSIBLE* {} -- inlined={} (local={}, hidden={}, depth={})\n - chain: `{}`\n - single-caller helpers: `{}`\n - reason: {}\n", + nav(&rv::field(item, "at")), + rv::field(item, "inlined"), + rv::field(item, "local"), + rv::field(item, "hidden"), + rv::field(item, "depth"), + rv::join_field(item, "call_chain", " -> "), + rv::array(item, "single_caller_callees").iter().take(8).map(|v| rv::string(Some(v))).collect::>().join(" | "), + rv::field(item, "reason") + ) +} + +fn render_locality_drag_item(item: &Value) -> String { + let mut out = format!( + "- *POSSIBLE* {} -- `{}` dormant until line {} score={} (gap={} lines, unrelated={}, boundaries={}, local={})\n - reason: {}\n", + nav(&rv::field(item, "at")), + rv::field(item, "variable"), + rv::field(item, "used_at"), + rv::field(item, "score"), + rv::field(item, "gap_lines"), + rv::field(item, "unrelated_statements"), + rv::field(item, "boundary_crossings"), + rv::field(item, "local_complexity"), + rv::field(item, "reason") + ); + if rv::positive(item, "setup_statements") { + out.push_str(&format!( + " - ignored setup initializers: {}\n", + rv::field(item, "setup_statements") + )); + } + if rv::array_len(item, "definition_deps") > 0 { + out.push_str(&format!( + " - definition deps: `{}`\n", + rv::array(item, "definition_deps") + .iter() + .take(6) + .map(|v| rv::string(Some(v))) + .collect::>() + .join(" | ") + )); + } + if rv::array_len(item, "use_reads") > 0 { + out.push_str(&format!( + " - first-use reads: `{}`\n", + rv::array(item, "use_reads") + .iter() + .take(8) + .map(|v| rv::string(Some(v))) + .collect::>() + .join(" | ") + )); + } + for boundary in rv::array(item, "boundaries").iter().take(2) { + out.push_str(&format!( + " - crosses line {} {}\n", + rv::field(boundary, "line"), + rv::field(boundary, "marker") + )); + } + for example in rv::array(item, "examples").iter().take(2) { + out.push_str(&format!( + " - unrelated line {}: `{}`\n", + rv::field(example, "line"), + rv::field(example, "source") + )); + } + out +} + +fn render_function_lcom_item(item: &Value) -> String { + let mode = if rv::kind_is(item, "mode", "late_join") { + "late_join" + } else { + "disjoint" + }; + let mut out = format!( + "- *POSSIBLE* [{}] {} -- score={} components={}, locals={}, statements={}\n", + mode, + nav(&rv::field(item, "at")), + rv::field(item, "score"), + rv::field(item, "components"), + rv::field(item, "locals"), + rv::field(item, "statements") + ); + for (index, vars) in rv::array(item, "component_vars").iter().take(4).enumerate() { + let lines = rv::array(item, "component_lines").get(index); + let var_text = rv::array_from(Some(vars)) + .iter() + .take(8) + .map(|value| rv::string(Some(value))) + .collect::>() + .join(" | "); + out.push_str(&format!(" - component {}: `{}`", index + 1, var_text)); + if let Some(lines) = lines { + let line_values = rv::array_from(Some(lines)); + if let (Some(first), Some(last)) = (line_values.first(), line_values.last()) { + out.push_str(&format!( + " (lines {}-{})", + rv::string(Some(first)), + rv::string(Some(last)) + )); + } + } + out.push('\n'); + } + out +} + +fn render_operational_discontinuity_item(item: &Value) -> String { + let reasons = rv::join_field(item, "confidence_reasons", ", "); + let confidence = if rv::get(item, "confidence").is_some() { + rv::field(item, "confidence") + } else { + "review".to_string() + }; + let mut out = format!( + "- *POSSIBLE* {} -- score={} reset_boundaries={}, dead={}, new={}, confidence={}", + nav(&rv::field(item, "at")), + rv::field(item, "score"), + rv::field(item, "resets"), + rv::field(item, "dead_total"), + rv::field(item, "new_total"), + confidence + ); + if !reasons.is_empty() { + out.push_str(&format!(" ({reasons})")); + } + out.push('\n'); + for reset in rv::array(item, "reset_points").iter().take(3) { + let marker = if rv::field(reset, "text").is_empty() { + rv::field(reset, "kind") + } else { + rv::field(reset, "text") + }; + out.push_str(&format!( + " - line {} {}: dead `{}` -> new `{}`", + rv::field(reset, "line"), + marker, + rv::array(reset, "dead") + .iter() + .take(6) + .map(|v| rv::string(Some(v))) + .collect::>() + .join(" | "), + rv::array(reset, "new") + .iter() + .take(6) + .map(|v| rv::string(Some(v))) + .collect::>() + .join(" | ") + )); + if rv::array_len(reset, "continuing") > 0 { + out.push_str(&format!( + " (continuing `{}`)", + rv::join_field(reset, "continuing", " | ") + )); + } + out.push('\n'); + } + out +} + +fn sarif_rule_id(title: &str) -> String { + format!("decomplex.{}", sarif::slug(title)) +} + +fn ranked_sarif_results(mut results: Vec) -> Vec { + results.sort_by(|left, right| { + let left_location = first_physical_location(left); + let right_location = first_physical_location(right); + tier_property(left) + .cmp(&tier_property(right)) + .then_with(|| rv::field(left, "ruleId").cmp(&rv::field(right, "ruleId"))) + .then_with(|| { + rv::get(left, "message") + .and_then(|message| rv::get(message, "text")) + .map(|value| rv::string(Some(value))) + .unwrap_or_default() + .cmp( + &rv::get(right, "message") + .and_then(|message| rv::get(message, "text")) + .map(|value| rv::string(Some(value))) + .unwrap_or_default(), + ) + }) + .then_with(|| { + left_location + .as_ref() + .and_then(|location| location.get("artifactLocation")) + .and_then(|artifact| artifact.get("uri")) + .map(|value| rv::string(Some(value))) + .unwrap_or_default() + .cmp( + &right_location + .as_ref() + .and_then(|location| location.get("artifactLocation")) + .and_then(|artifact| artifact.get("uri")) + .map(|value| rv::string(Some(value))) + .unwrap_or_default(), + ) + }) + .then_with(|| start_line(left_location).cmp(&start_line(right_location))) + }); + results +} + +fn tier_property(result: &Value) -> i64 { + rv::get(result, "properties") + .map(|properties| rv::field_i64(properties, "tier")) + .unwrap_or(0) +} + +fn first_physical_location(result: &Value) -> Option<&Value> { + rv::array(result, "locations") + .first() + .and_then(|location| rv::get(location, "physicalLocation")) +} + +fn start_line(location: Option<&Value>) -> i64 { + location + .and_then(|location| rv::get(location, "region")) + .map(|region| rv::field_i64(region, "startLine")) + .unwrap_or(0) +} + +fn sarif_message(title: &str, finding: &Value, location: &SarifLocation) -> String { + let detail = sarif_message_detail(title, finding); + if !detail.is_empty() { + return format!("{title}: {detail}"); + } + let subject = location + .method + .clone() + .filter(|value| !value.is_empty()) + .or_else(|| { + first_non_empty_field( + finding, + &[ + "method", "name", "field", "contract", "owner", "token", "kind", + ], + ) + }); + [Some(title.to_string()), subject] + .into_iter() + .flatten() + .collect::>() + .join(": ") +} + +fn first_non_empty_field(finding: &Value, keys: &[&str]) -> Option { + keys.iter() + .map(|key| rv::field(finding, key)) + .find(|value| !value.is_empty()) +} + +fn sarif_message_detail(title: &str, finding: &Value) -> String { + match title { + "Decision Pressure" => format!( + "`{}` creates {} eliminable guard decision(s) across {} method(s)", + rv::field(finding, "contract"), + rv::field(finding, "decisions"), + rv::field(finding, "methods") + ), + "Redundant Nil Guards" => format!( + "`{}` is nil-guarded by `{}` after proof `{}`", + rv::field(finding, "local"), + rv::field(finding, "guard"), + rv::field(finding, "proof") + ), + "State Heatmap" => format!( + "state `{}` has pressure={}, messiness={} (writes={}, reads={}, re-derived={}, scatter={}); writers {}; readers {}", + rv::field(finding, "field"), + rv::field(finding, "pressure"), + rv::field(finding, "messiness"), + rv::field(finding, "writes"), + rv::field(finding, "reads"), + rv::field(finding, "re_derivations"), + rv::field(finding, "scatter"), + rv::array(finding, "top_writers").iter().take(3).map(|v| rv::string(Some(v))).collect::>().join(" | "), + rv::array(finding, "top_readers").iter().take(3).map(|v| rv::string(Some(v))).collect::>().join(" | ") + ), + "Missing Abstractions" => format!( + "guard tuple `{}` repeats in {} site(s) with scatter={}", + rv::join_field(finding, "members", " | "), + rv::field(finding, "support"), + rv::field(finding, "scatter") + ), + "State-Based Branch Density" => format!( + "{} state-based branch decision(s) over `{}`; example predicate `{}`", + rv::field(finding, "decisions"), + rv::array(finding, "state_refs").iter().take(8).map(|v| rv::string(Some(v))).collect::>().join(" | "), + rv::field(finding, "predicate") + ), + "Temporal Ordering Pressure" => format!( + "`{}` exposes mutable lifecycle pressure score={} (public={}, state_methods={}, writers={})", + rv::field(finding, "owner"), + rv::field(finding, "score"), + rv::field(finding, "public_methods"), + rv::field(finding, "state_methods"), + rv::field(finding, "writers") + ), + "Neglected Conditions" | "Neglected Path Conditions" => { + let pattern = rv::get(finding, "pattern").or_else(|| rv::get(finding, "guards")); + format!( + "missing condition `{}` from `{}` (support={})", + rv::field(finding, "missing"), + rv::array_strings(pattern).join(" | "), + rv::field(finding, "support") + ) + } + "Oversized Predicates" => format!( + "{} condition atoms in predicate `{}`", + rv::field(finding, "count"), + rv::field(finding, "predicate") + ), + "Neglected Updates" => format!( + "writes `.{}` but not co-written `.{}` on receiver `{}` (support={})", + rv::field(finding, "has"), + rv::field(finding, "missing"), + rv::field(finding, "recv"), + rv::field(finding, "support") + ), + "Semantic Predicate Aliases" | "Exact Predicate Aliases" => format!( + "predicate aliases `{}` for `{}`", + rv::join_field(finding, "names", " = "), + if rv::get(finding, "canon").is_some() { + rv::field(finding, "canon") + } else { + rv::field(finding, "body") + } + ), + "Reification Misses" => format!( + "predicate `{}` is reinvented inline as `{}`", + rv::field(finding, "predicate"), + rv::field(finding, "raw") + ), + "Broken Protocols" => format!( + "does `{}` without co-called `{}` (support={}, confidence={})", + rv::field(finding, "has"), + rv::field(finding, "missing"), + rv::field(finding, "support"), + rv::field(finding, "confidence") + ), + "Implicit Control Flow" => sarif_implicit_control_flow_detail(finding), + "Weighted Inlined Cognitive Complexity" => format!( + "inlined={} (local={}, hidden={}, depth={}); chain `{}`", + rv::field(finding, "inlined"), + rv::field(finding, "local"), + rv::field(finding, "hidden"), + rv::field(finding, "depth"), + rv::join_field(finding, "call_chain", " -> ") + ), + "Locality Drag" => format!( + "`{}` is initialized at line {} but first used at line {} after {} unrelated statement(s)", + rv::field(finding, "variable"), + rv::field(finding, "defined_at"), + rv::field(finding, "used_at"), + rv::field(finding, "unrelated_statements") + ), + "Function LCOM" => { + let mode = if rv::kind_is(finding, "mode", "late_join") { + "late_join" + } else { + "disjoint" + }; + format!( + "{} local data-flow: score={}, components={}, locals={}, statements={}", + mode, + rv::field(finding, "score"), + rv::field(finding, "components"), + rv::field(finding, "locals"), + rv::field(finding, "statements") + ) + } + "Operational Discontinuity" | "Operational Discontinuity (High Confidence)" => format!( + "score={}, reset_boundaries={}, dead={}, new={}, confidence={}", + rv::field(finding, "score"), + rv::field(finding, "resets"), + rv::field(finding, "dead_total"), + rv::field(finding, "new_total"), + if rv::get(finding, "confidence").is_some() { + rv::field(finding, "confidence") + } else { + "review".to_string() + } + ), + "False Simplicity" => format!( + "[{}] `{}` support={}, scatter={}", + rv::field(finding, "kind"), + rv::field(finding, "detail"), + rv::field(finding, "support"), + rv::field(finding, "scatter") + ), + "Fat Unions" => format!( + "union `{}` has {} common and {} variant member(s), scatter={}", + rv::join_field(finding, "variant_set", " | "), + rv::array_len(finding, "common"), + rv::array_len(finding, "variant"), + rv::field(finding, "scatter") + ), + "Derived-State Staleness" => format!( + "`{}` derived from `{}` at line {}; `{}` reassigned at line {} but `{}` is not recomputed", + rv::field(finding, "derived"), + rv::field(finding, "source"), + rv::field(finding, "derived_at"), + rv::field(finding, "source"), + rv::field(finding, "source_reassigned_at"), + rv::field(finding, "derived") + ), + "Inconsistent Rename Clones" => format!( + "clone of {}: reference variable `{}` diverges as {}", + rv::field(finding, "ref_at"), + rv::field(finding, "ref_name"), + rv::ruby_inspect_array(rv::get(finding, "divergent")) + ), + "Structural Similarity (Type-2/3)" => format!( + "[{}] mass={} node=`{}` across {} site(s)", + rv::field(finding, "clone_type"), + rv::field(finding, "mass"), + rv::field(finding, "node"), + rv::array_len(finding, "sites") + ), + _ => String::new(), + } +} + +fn sarif_implicit_control_flow_detail(finding: &Value) -> String { + let protocol = rv::join_field(finding, "protocol", " -> "); + let dependency = rv::join_field(finding, "dependency", "|"); + let states = rv::join_field(finding, "states", " | "); + if rv::kind_is(finding, "kind", "order_drift") { + return format!( + "[order_drift] observed `{}` against protocol `{}` ({} state=`{}`)", + rv::join_field(finding, "observed", " -> "), + protocol, + dependency, + states + ); + } + format!( + "[protocol_pressure] protocol `{}` ({} state=`{}`), support={}", + protocol, + dependency, + states, + rv::field(finding, "support") + ) +} + +fn sarif_locations_for_finding(finding: &Value) -> Vec { + if let Some(spans) = rv::get(finding, "spans").and_then(Value::as_object) { + if !spans.is_empty() { + return spans + .iter() + .filter_map(|(loc, span)| { + let mut parsed = parse_sarif_loc(loc); + parsed.path.as_ref()?; + let span = rv::array_from(Some(span)); + parsed.line = span + .first() + .and_then(Value::as_i64) + .filter(|line| *line > 0) + .unwrap_or(parsed.line); + parsed.start_column = span + .get(1) + .and_then(Value::as_i64) + .map(zero_based_column_to_sarif); + parsed.end_line = span.get(2).and_then(Value::as_i64).filter(|line| *line > 0); + parsed.end_column = span + .get(3) + .and_then(Value::as_i64) + .map(zero_based_column_to_sarif); + Some(parsed) + }) + .collect(); + } + } + + let mut locs = Vec::new(); + if let Some(value) = rv::get(finding, "at") { + locs.push(rv::string(Some(value))); + } + locs.extend(rv::field_array_strings(finding, "sites")); + if let Some(value) = rv::get(finding, "ref_at") { + locs.push(rv::string(Some(value))); + } + let mut seen = std::collections::HashSet::new(); + locs.retain(|loc| !loc.is_empty() && seen.insert(loc.clone())); + locs.into_iter() + .map(|loc| parse_sarif_loc(&loc)) + .filter(|loc| loc.path.is_some()) + .collect() +} + +fn parse_sarif_loc(loc: &str) -> SarifLocation { + let mut parts = loc.split(':').map(ToOwned::to_owned).collect::>(); + let line = if parts + .last() + .is_some_and(|part| part.chars().all(|ch| ch.is_ascii_digit())) + { + parts.pop().and_then(|part| part.parse::().ok()) + } else { + None + }; + let method = if parts.len() >= 2 { parts.pop() } else { None }; + let path = parts.join(":"); + SarifLocation { + path: (!path.is_empty()).then_some(path), + method, + line: line.filter(|line| *line > 0).unwrap_or(1), + start_column: None, + end_line: None, + end_column: None, + } +} + +fn zero_based_column_to_sarif(value: i64) -> i64 { + value + 1 +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn nav_splits_location_from_the_right() { + assert_eq!(nav("a:b.rb:m:10"), "`a:b.rb:10` (m)"); + } + + #[test] + fn slug_matches_ruby_report_anchor_shape() { + assert_eq!( + slug("Structural Similarity (Type-2/3)"), + "structural-similarity-type23" + ); + } +} diff --git a/gems/decomplex/rust/src/decomplex/report_facts.rs b/gems/decomplex/rust/src/decomplex/report_facts.rs index 18be88423..88003a1d0 100644 --- a/gems/decomplex/rust/src/decomplex/report_facts.rs +++ b/gems/decomplex/rust/src/decomplex/report_facts.rs @@ -10,9 +10,10 @@ use crate::decomplex::syntax::{self, Document, Language}; use anyhow::{bail, Context, Result}; use serde::Serialize; use serde_json::{json, Map, Value}; -use std::collections::BTreeMap; +use std::collections::{BTreeMap, BTreeSet, HashSet}; use std::fs; use std::path::{Path, PathBuf}; +use std::process::Command; use std::sync::mpsc; use std::thread; @@ -30,12 +31,18 @@ const DEFAULT_EXCLUDE_DIRS: &[&str] = &[ "node_modules", ]; +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum VcsFilter { + Git, +} + #[derive(Clone, Debug)] pub struct Options { pub language: Option, pub excludes: Vec, pub mass: usize, pub fuzzy: usize, + pub vcs: Option, } impl Default for Options { @@ -45,6 +52,7 @@ impl Default for Options { excludes: Vec::new(), mass: DEFAULT_MASS, fuzzy: DEFAULT_FUZZY, + vcs: None, } } } @@ -88,6 +96,9 @@ pub fn collect_source_files(targets: &[PathBuf], options: &Options) -> Result Result { let mut rows = Vec::new(); for documents in groups.values() { - let report = state_mesh::scan_documents_with_semantic_aliases(documents, semantic_aliases); + let report = state_mesh::scan_documents_with_semantic_aliases_and_min_writes( + documents, + semantic_aliases, + 1, + ); rows.extend(state_heatmap_findings(&report)); } Ok(Value::Array(rows)) @@ -560,6 +575,94 @@ fn language_counts(files: &[SourceFile]) -> BTreeMap { counts } +fn retain_git_tracked_files(files: &mut Vec) -> Result<()> { + let tracked = git_tracked_paths_for_files(files)?; + files.retain(|file| tracked.contains(&normalize_path(&file.path))); + Ok(()) +} + +fn git_tracked_paths_for_files(files: &[SourceFile]) -> Result> { + let mut tracked = HashSet::new(); + for root in git_roots_for_files(files)? { + for path in git_ls_files(&root)? { + tracked.insert(path); + } + } + Ok(tracked) +} + +fn git_roots_for_files(files: &[SourceFile]) -> Result> { + let current_root = git_root_for_dir(&std::env::current_dir()?).ok(); + if let Some(root) = current_root { + let root = normalize_path(&root); + if files + .iter() + .all(|file| normalize_path(&file.path).starts_with(&root)) + { + return Ok(BTreeSet::from([root])); + } + } + + let mut roots = BTreeSet::new(); + for file in files { + let dir = file.path.parent().unwrap_or_else(|| Path::new(".")); + let root = git_root_for_dir(dir).with_context(|| { + format!( + "--vcs=git requires {} to be inside a Git work tree", + file.path.display() + ) + })?; + roots.insert(normalize_path(&root)); + } + Ok(roots) +} + +fn git_root_for_dir(dir: &Path) -> Result { + let output = Command::new("git") + .arg("-C") + .arg(dir) + .args(["rev-parse", "--show-toplevel"]) + .output() + .with_context(|| format!("failed to run git rev-parse in {}", dir.display()))?; + if !output.status.success() { + bail!("git rev-parse failed in {}", dir.display()); + } + let stdout = String::from_utf8(output.stdout) + .with_context(|| format!("git rev-parse output was not UTF-8 in {}", dir.display()))?; + Ok(PathBuf::from(stdout.trim())) +} + +fn git_ls_files(root: &Path) -> Result> { + let output = Command::new("git") + .arg("-C") + .arg(root) + .args(["ls-files", "-z"]) + .output() + .with_context(|| format!("failed to run git ls-files in {}", root.display()))?; + if !output.status.success() { + bail!("git ls-files failed in {}", root.display()); + } + let stdout = String::from_utf8(output.stdout) + .with_context(|| format!("git ls-files output was not UTF-8 in {}", root.display()))?; + Ok(stdout + .split('\0') + .filter(|path| !path.is_empty()) + .map(|path| normalize_path(&root.join(path))) + .collect()) +} + +fn normalize_path(path: &Path) -> PathBuf { + fs::canonicalize(path).unwrap_or_else(|_| { + if path.is_absolute() { + path.to_path_buf() + } else { + std::env::current_dir() + .unwrap_or_else(|_| PathBuf::from(".")) + .join(path) + } + }) +} + fn expand_target(target: &Path, options: &Options, out: &mut Vec) -> Result<()> { if target.is_dir() { expand_directory(target, options, out) @@ -632,3 +735,44 @@ fn excluded_path(path: &Path, options: &Options) -> bool { } }) } + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + #[test] + fn git_vcs_filter_keeps_only_tracked_source_files() { + let dir = TempDir::new().expect("tempdir"); + run_git(dir.path(), &["init"]); + + let tracked = dir.path().join("tracked.rb"); + let untracked = dir.path().join("untracked.rb"); + fs::write(&tracked, "def tracked\nend\n").expect("write tracked"); + fs::write(&untracked, "def untracked\nend\n").expect("write untracked"); + run_git(dir.path(), &["add", "tracked.rb"]); + + let options = Options { + vcs: Some(VcsFilter::Git), + ..Options::default() + }; + let files = + collect_source_files(&[dir.path().to_path_buf()], &options).expect("source files"); + let names = files + .iter() + .map(|file| file.path.file_name().unwrap().to_string_lossy().to_string()) + .collect::>(); + + assert_eq!(names, vec!["tracked.rb"]); + } + + fn run_git(dir: &Path, args: &[&str]) { + let status = Command::new("git") + .arg("-C") + .arg(dir) + .args(args) + .status() + .expect("git command"); + assert!(status.success(), "git {:?} failed", args); + } +} diff --git a/gems/decomplex/rust/src/decomplex/report_value.rs b/gems/decomplex/rust/src/decomplex/report_value.rs new file mode 100644 index 000000000..048799558 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/report_value.rs @@ -0,0 +1,111 @@ +use serde_json::Value; + +pub fn get<'a>(value: &'a Value, key: &str) -> Option<&'a Value> { + value.as_object()?.get(key) +} + +pub fn string(value: Option<&Value>) -> String { + match value { + Some(Value::String(text)) => text.clone(), + Some(Value::Number(number)) => number.to_string(), + Some(Value::Bool(true)) => "true".to_string(), + Some(Value::Bool(false)) => "false".to_string(), + Some(Value::Null) | None => String::new(), + Some(other) => other.to_string(), + } +} + +pub fn field(value: &Value, key: &str) -> String { + string(get(value, key)) +} + +pub fn field_i64(value: &Value, key: &str) -> i64 { + match get(value, key) { + Some(Value::Number(number)) => number + .as_i64() + .or_else(|| number.as_u64().map(|n| n as i64)) + .unwrap_or(0), + Some(Value::String(text)) => text.parse().unwrap_or(0), + _ => 0, + } +} + +pub fn field_usize(value: &Value, key: &str) -> usize { + field_i64(value, key).max(0) as usize +} + +pub fn field_bool(value: &Value, key: &str) -> bool { + match get(value, key) { + Some(Value::Bool(value)) => *value, + Some(Value::String(text)) => text == "true", + _ => false, + } +} + +pub fn array<'a>(value: &'a Value, key: &str) -> &'a [Value] { + get(value, key) + .and_then(Value::as_array) + .map(Vec::as_slice) + .unwrap_or(&[]) +} + +pub fn array_from(value: Option<&Value>) -> &[Value] { + value + .and_then(Value::as_array) + .map(Vec::as_slice) + .unwrap_or(&[]) +} + +pub fn array_strings(value: Option<&Value>) -> Vec { + array_from(value) + .iter() + .map(|item| string(Some(item))) + .collect() +} + +pub fn field_array_strings(value: &Value, key: &str) -> Vec { + array_strings(get(value, key)) +} + +pub fn join(values: &[String], separator: &str) -> String { + values.join(separator) +} + +pub fn join_field(value: &Value, key: &str, separator: &str) -> String { + field_array_strings(value, key).join(separator) +} + +pub fn array_len(value: &Value, key: &str) -> usize { + array(value, key).len() +} + +pub fn positive(value: &Value, key: &str) -> bool { + field_i64(value, key) > 0 +} + +pub fn kind_is(value: &Value, key: &str, expected: &str) -> bool { + field(value, key) == expected +} + +pub fn ruby_inspect_array(value: Option<&Value>) -> String { + let parts = array_from(value) + .iter() + .map(ruby_inspect_value) + .collect::>(); + format!("[{}]", parts.join(", ")) +} + +fn ruby_inspect_value(value: &Value) -> String { + match value { + Value::String(text) => format!("{text:?}"), + Value::Number(number) => number.to_string(), + Value::Bool(true) => "true".to_string(), + Value::Bool(false) => "false".to_string(), + Value::Null => "nil".to_string(), + Value::Array(items) => { + let parts = items.iter().map(ruby_inspect_value).collect::>(); + format!("[{}]", parts.join(", ")) + } + Value::Object(_) => value.to_string(), + } +} diff --git a/gems/decomplex/rust/src/decomplex/root_cause.rs b/gems/decomplex/rust/src/decomplex/root_cause.rs new file mode 100644 index 000000000..66fe2de7a --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/root_cause.rs @@ -0,0 +1,287 @@ +use crate::decomplex::convergence; +use crate::decomplex::report::ReportSection; +use crate::decomplex::report_value as rv; +use regex::Regex; +use serde_json::Value; +use std::collections::{BTreeMap, HashMap, HashSet}; +use std::sync::OnceLock; + +const TUPLE_FIELDS: &[&str] = &["members", "guards", "pattern"]; +const NAME_ARRAY_FIELDS: &[&str] = &["pair", "names"]; +const NAME_STR_FIELDS: &[&str] = &[ + "field", + "derived", + "source", + "contract", + "canon", + "predicate", + "detail", + "ref_name", + "has", + "missing", +]; +const STOPWORDS: &[&str] = &[ + "nil", "true", "false", "self", "end", "do", "if", "then", "else", "self_", "it", "new", + "to_s", "call", "each", "map", +]; +const FAT_UNION_FIX: &str = "fat union -- decompose product-vs-sum: hoist the common fields to a struct, keep a SMALL union for the variant part (extraction is value-object work -> nil-kill owns it)"; + +#[derive(Clone, Debug, Eq, PartialEq, Hash)] +pub struct Entity { + pub kind: String, + pub token: String, +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct Cluster { + pub kind: String, + pub token: String, + pub detectors: Vec, + pub n_detectors: usize, + pub support: usize, + pub scatter: usize, + pub score: i64, + pub fat_union: bool, + pub fix: String, + pub sites: Vec, +} + +#[derive(Clone, Debug)] +struct Accumulator { + dets: BTreeMap, + findings: Vec, + tiers: BTreeMap, +} + +pub fn cluster(sections: &[ReportSection], min_detectors: usize) -> Vec { + let mut acc: HashMap = HashMap::new(); + for section in sections { + for finding in §ion.findings { + for entity in entities(finding) { + let row = acc.entry(entity).or_insert_with(|| Accumulator { + dets: BTreeMap::new(), + findings: Vec::new(), + tiers: BTreeMap::new(), + }); + row.dets.insert(section.title.clone(), true); + row.tiers.insert(section.title.clone(), section.tier); + row.findings.push(finding.clone()); + } + } + } + + let mut clusters = acc + .into_iter() + .filter_map(|(entity, row)| { + if row.dets.len() < min_detectors { + return None; + } + let detectors = row.dets.keys().cloned().collect::>(); + let mut units = row + .findings + .iter() + .flat_map(finding_units) + .collect::>(); + units.sort(); + units.dedup(); + let score = row + .tiers + .values() + .map(|tier| convergence::tier_weight(*tier)) + .sum(); + let fat_union = fat_union(&entity.kind, &entity.token, &row.findings); + let mut sites = row + .findings + .iter() + .flat_map(convergence::locations) + .collect::>(); + let mut seen_sites = HashSet::new(); + sites.retain(|site| seen_sites.insert(site.clone())); + sites.truncate(8); + Some(Cluster { + kind: entity.kind.clone(), + token: entity.token.clone(), + n_detectors: detectors.len(), + support: row.findings.len(), + scatter: units.len(), + score, + fat_union, + fix: if fat_union { + FAT_UNION_FIX.to_string() + } else { + fix_shape(&detectors, &entity.kind) + }, + detectors, + sites, + }) + }) + .collect::>(); + clusters.sort_by(|left, right| { + right + .n_detectors + .cmp(&left.n_detectors) + .then_with(|| right.score.cmp(&left.score)) + .then_with(|| right.scatter.cmp(&left.scatter)) + .then_with(|| left.kind.cmp(&right.kind)) + .then_with(|| left.token.cmp(&right.token)) + }); + clusters +} + +pub fn entities(finding: &Value) -> Vec { + let mut out = Vec::new(); + for key in TUPLE_FIELDS { + let values = rv::array(finding, key); + if values.len() < 2 { + continue; + } + let mut members = values + .iter() + .map(|value| rv::string(Some(value))) + .collect::>(); + members.sort(); + out.push(Entity { + kind: "tuple".to_string(), + token: truncate_chars(&members.join(" | "), 160), + }); + } + for key in NAME_ARRAY_FIELDS { + for value in rv::array(finding, key) { + for token in tokens(&rv::string(Some(value))) { + out.push(Entity { + kind: "name".to_string(), + token, + }); + } + } + } + for key in NAME_STR_FIELDS { + if let Some(value) = rv::get(finding, key) { + for token in tokens(&rv::string(Some(value))) { + out.push(Entity { + kind: "name".to_string(), + token, + }); + } + } + } + let mut seen = HashSet::new(); + out.retain(|entity| seen.insert((entity.kind.clone(), entity.token.clone()))); + out +} + +pub fn tokens(value: &str) -> Vec { + static TOKEN_RE: OnceLock = OnceLock::new(); + let re = TOKEN_RE.get_or_init(|| Regex::new(r"[A-Za-z_][A-Za-z0-9_]*[?!=]?").unwrap()); + let mut out = re + .find_iter(value) + .filter_map(|mat| { + let token = mat.as_str().trim_end_matches(['?', '!', '=']).to_string(); + if token.len() < 2 || STOPWORDS.contains(&token.as_str()) { + None + } else { + Some(token) + } + }) + .collect::>(); + out.sort(); + out.dedup(); + out +} + +pub fn finding_units(finding: &Value) -> Vec<(String, String)> { + convergence::locations(finding) + .into_iter() + .filter_map(|loc| { + let (file, method, _) = convergence::parse_loc(&loc); + match (file, method) { + (Some(file), Some(method)) => Some((file, method)), + _ => None, + } + }) + .collect() +} + +fn fat_union(kind: &str, token: &str, findings: &[Value]) -> bool { + static CONST_RE: OnceLock = OnceLock::new(); + let re = CONST_RE.get_or_init(|| Regex::new(r"\A(::)?[A-Z]\w*(::[A-Z]\w*)*\z").unwrap()); + if kind != "tuple" { + return false; + } + if !findings + .iter() + .any(|finding| rv::kind_is(finding, "kind", "case_dispatch")) + { + return false; + } + let members = token.split(" | ").collect::>(); + members.len() >= 2 && members.iter().all(|member| re.is_match(member)) +} + +fn fix_shape(detectors: &[String], kind: &str) -> String { + let detectors = detectors.iter().map(String::as_str).collect::>(); + let shapes: &[(&[&str], &str, &str)] = &[ + ( + &["Neglected Updates", "Derived-State Staleness"], + "name", + "single-source this state (one stamp, or recompute on write) -- the invariant-#16 desync shape", + ), + ( + &["Broken Protocols"], + "any", + "pair the protocol (RAII / ensure); the unpaired site is the deviant", + ), + ( + &[ + "Missing Abstractions", + "Reification Misses", + "Semantic Predicate Aliases", + "Exact Predicate Aliases", + ], + "any", + "reify ONE named predicate/decision and call it everywhere", + ), + ( + &["Missing Abstractions", "Neglected Conditions", "Neglected Path Conditions"], + "tuple", + "extract the decision; if it dispatches a closed set, consider product-vs-sum (fat-union -> nil-kill)", + ), + ( + &["Decision Pressure"], + "any", + "tighten the contract once; the scattered defensive guards collapse (cross-proc -> nil-kill)", + ), + ]; + for (titles, want_kind, label) in shapes { + if *want_kind != "any" && *want_kind != kind { + continue; + } + if titles.iter().any(|title| detectors.contains(title)) { + return (*label).to_string(); + } + } + "converging structural debt -- resolve once at the named entity".to_string() +} + +fn truncate_chars(value: &str, max: usize) -> String { + value.chars().take(max).collect() +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + #[test] + fn equivalent_state_tokens_collapse_to_same_name() { + assert_eq!(tokens("@storage="), vec!["storage"]); + assert_eq!(tokens(".storage"), vec!["storage"]); + } + + #[test] + fn tuple_fields_share_the_same_token() { + let left = entities(&json!({"members": ["b", "a"]})); + let right = entities(&json!({"guards": ["a", "b"]})); + assert_eq!(left, right); + } +} diff --git a/gems/decomplex/rust/src/decomplex/sarif.rs b/gems/decomplex/rust/src/decomplex/sarif.rs new file mode 100644 index 000000000..32eaaacde --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/sarif.rs @@ -0,0 +1,219 @@ +use serde_json::{json, Map, Value}; +use std::collections::BTreeSet; + +const SCHEMA: &str = "https://json.schemastore.org/sarif-2.1.0.json"; + +pub fn document( + tool_name: &str, + rules: Vec, + results: Vec, + information_uri: Option<&str>, + properties: Value, +) -> Value { + let normalized_rules = unique_rules(rules); + let mut rule_index = Map::new(); + for (index, rule) in normalized_rules.iter().enumerate() { + if let Some(id) = rule.get("id").and_then(Value::as_str) { + rule_index.insert(id.to_string(), json!(index)); + } + } + let normalized_results = results + .into_iter() + .map(|result| { + let mut result = compact_value(json_safe_value(result)); + if let Some(rule_id) = result.get("ruleId").and_then(Value::as_str) { + if let Some(index) = rule_index.get(rule_id) { + if let Some(object) = result.as_object_mut() { + object.insert("ruleIndex".to_string(), index.clone()); + } + } + } + result + }) + .collect::>(); + + let mut driver = Map::new(); + driver.insert("name".to_string(), Value::String(tool_name.to_string())); + if let Some(uri) = information_uri { + driver.insert("informationUri".to_string(), Value::String(uri.to_string())); + } + driver.insert("rules".to_string(), Value::Array(normalized_rules)); + let driver = compact_object(driver); + + let run = compact_value(json!({ + "tool": { "driver": driver }, + "results": normalized_results, + "properties": json_safe_value(properties), + })); + + compact_value(json!({ + "version": "2.1.0", + "$schema": SCHEMA, + "runs": [run], + })) +} + +pub fn rule( + id: &str, + name: Option<&str>, + short_description: Option<&str>, + full_description: Option<&str>, + default_level: &str, + help_uri: Option<&str>, + properties: Value, +) -> Value { + compact_value(json!({ + "id": id, + "name": name.unwrap_or(id), + "shortDescription": { "text": short_description.or(name).unwrap_or(id) }, + "fullDescription": full_description.map(|text| json!({ "text": text })), + "defaultConfiguration": { "level": default_level }, + "helpUri": help_uri, + "properties": json_safe_value(properties), + })) +} + +pub fn result( + rule_id: &str, + message: &str, + path: Option<&str>, + line: Option, + start_column: Option, + end_line: Option, + end_column: Option, + level: &str, + properties: Value, + partial_fingerprints: Value, +) -> Value { + compact_value(json!({ + "ruleId": rule_id, + "level": level, + "message": { "text": message }, + "locations": sarif_locations(path, line, start_column, end_line, end_column), + "partialFingerprints": json_safe_value(partial_fingerprints), + "properties": json_safe_value(properties), + })) +} + +fn sarif_locations( + path: Option<&str>, + line: Option, + start_column: Option, + end_line: Option, + end_column: Option, +) -> Value { + let Some(path) = path.filter(|path| !path.is_empty()) else { + return Value::Array(Vec::new()); + }; + Value::Array(vec![compact_value(json!({ + "physicalLocation": compact_value(json!({ + "artifactLocation": { "uri": normalize_path(path) }, + "region": compact_value(json!({ + "startLine": positive_int(line, Some(1)), + "startColumn": positive_int(start_column, None), + "endLine": positive_int(end_line, None), + "endColumn": positive_int(end_column, None), + })) + })) + }))]) +} + +pub fn normalize_path(path: &str) -> String { + path.replace('\\', "/").trim_start_matches("./").to_string() +} + +pub fn slug(value: &str) -> String { + let mut out = String::new(); + let mut last_dash = false; + for ch in value.to_lowercase().chars() { + if ch.is_ascii_alphanumeric() { + out.push(ch); + last_dash = false; + } else if !last_dash { + out.push('-'); + last_dash = true; + } + } + out.trim_matches('-').to_string() +} + +fn positive_int(value: Option, fallback: Option) -> Option { + let number = value.or(fallback)?; + (number > 0).then_some(number).or(fallback) +} + +pub fn json_safe_value(value: Value) -> Value { + match value { + Value::Array(items) => Value::Array(items.into_iter().map(json_safe_value).collect()), + Value::Object(object) => { + let mut out = Map::new(); + for (key, value) in object { + out.insert(key, json_safe_value(value)); + } + Value::Object(out) + } + other => other, + } +} + +fn compact_value(value: Value) -> Value { + match value { + Value::Object(object) => compact_object(object), + other => other, + } +} + +fn compact_object(object: Map) -> Value { + let mut out = Map::new(); + for (key, value) in object { + if value.is_null() { + continue; + } + let value = match value { + Value::Object(object) => compact_object(object), + Value::Array(items) => Value::Array(items.into_iter().map(compact_value).collect()), + other => other, + }; + let empty = match &value { + Value::Array(items) => items.is_empty(), + Value::Object(object) => object.is_empty(), + Value::String(text) => text.is_empty(), + _ => false, + }; + if !empty { + out.insert(key, value); + } + } + Value::Object(out) +} + +fn unique_rules(rules: Vec) -> Vec { + let mut seen = BTreeSet::new(); + let mut out = Vec::new(); + for rule in rules { + let rule = json_safe_value(rule); + let id = rule + .get("id") + .and_then(Value::as_str) + .unwrap_or_default() + .to_string(); + if id.is_empty() || !seen.insert(id) { + continue; + } + out.push(compact_value(rule)); + } + out +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn slug_matches_ruby_sarif_slug() { + assert_eq!( + slug("Structural Similarity (Type-2/3)"), + "structural-similarity-type-2-3" + ); + } +} diff --git a/gems/decomplex/rust/src/decomplex/syntax.rs b/gems/decomplex/rust/src/decomplex/syntax.rs index c813c574d..9cf8d94d4 100644 --- a/gems/decomplex/rust/src/decomplex/syntax.rs +++ b/gems/decomplex/rust/src/decomplex/syntax.rs @@ -100,6 +100,8 @@ pub struct Document { pub root: RawNode, pub normalized_root: NormalizedNode, pub function_defs: Vec, + pub call_sites: Vec, + pub state_reads: Vec, pub state_writes: Vec, pub decision_sites: Vec, pub predicate_aliases: Vec, @@ -114,6 +116,24 @@ pub struct FunctionDef { pub line: usize, pub span: Span, pub body: RawNode, + pub visibility: Option, + pub params: Vec, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct CallSite { + pub receiver: String, + pub message: String, + pub file: String, + pub function: String, + pub owner: String, + pub line: usize, + pub span: Span, + pub conditional: bool, + pub arguments: Vec, + pub control: Option, + pub safe_navigation: bool, + pub block: bool, } #[derive(Clone, Debug, Eq, PartialEq, Serialize)] @@ -127,6 +147,17 @@ pub struct StateWrite { pub owner: String, } +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct StateRead { + pub field: String, + pub receiver: String, + pub file: String, + pub function: String, + pub line: usize, + pub span: Span, + pub owner: String, +} + #[derive(Clone, Debug, Eq, PartialEq, Serialize)] pub struct PredicateAlias { pub name: String, diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs index 414d4630d..79451bd3f 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs @@ -1,9 +1,9 @@ use super::super::tree_sitter_adapter::{ first_named_child, first_named_child_except, first_named_child_with_kind, first_named_text, named_children, normalize_type_owner, previous_sibling_text, strip_assignment_suffix, - AssignmentTarget, Target, + AssignmentTarget, CallTarget, Target, }; -use super::super::{CloneCandidate, Document, Language}; +use super::super::{CallSite, CloneCandidate, Document, FunctionDef, Language}; use crate::decomplex::ast::{node_text, normalize_text, span, RawNode}; use std::collections::{BTreeMap, BTreeSet, HashSet}; use tree_sitter::{Language as TreeSitterLanguage, Node}; @@ -121,6 +121,9 @@ const CLONE_CALL_KINDS: &[&str] = &[ "method_invocation", "invocation_expression", ]; +const NOISE_MESSAGES: &[&str] = &[ + "!", "!=", "==", "===", "<", "<=", ">", ">=", "[]", "[]=", "to_s", "inspect", "class", +]; pub(crate) trait LanguageProfile { fn language(&self) -> Language; @@ -158,6 +161,14 @@ pub(crate) trait LanguageProfile { EMPTY_NODE_KINDS } + fn parameter_identifier_node_kinds(&self) -> &[&str] { + self.identifier_node_kinds() + } + + fn inline_parameter_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + fn function_body_node_kinds(&self) -> &[&str] { EMPTY_NODE_KINDS } @@ -258,6 +269,19 @@ pub(crate) trait LanguageProfile { EMPTY_NODE_KINDS } + fn argument_list_node_kinds(&self) -> &[&str] { + &[ + "argument_list", + "arguments", + "call_suffix", + "value_arguments", + ] + } + + fn block_argument_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + fn navigation_suffix_node_kinds(&self) -> &[&str] { EMPTY_NODE_KINDS } @@ -282,6 +306,32 @@ pub(crate) trait LanguageProfile { self.default_function_name(node, source) } + fn function_visibility(&self, _node: Node<'_>, _source: &str) -> Option { + None + } + + fn function_params(&self, node: Node<'_>, source: &str) -> Vec { + let param_nodes = if let Some(params) = self.function_parameter_list(node) { + named_children(params) + } else { + named_children(node) + .into_iter() + .filter(|child| self.inline_parameter_node_kinds().contains(&child.kind())) + .collect() + }; + let mut out = Vec::new(); + for param in param_nodes { + if let Some(name) = self.parameter_name(param, source) { + if !out.contains(&name) { + out.push(name); + } + } + } + out + } + + fn after_collect_facts(&self, _functions: &mut Vec, _calls: &[CallSite]) {} + fn default_function_name(&self, node: Node<'_>, source: &str) -> Option { if !self.function_node_kinds().contains(&node.kind()) { return None; @@ -318,6 +368,16 @@ pub(crate) trait LanguageProfile { false } + fn control_context(&self, node: Node<'_>, source: &str) -> Option { + if generic_loop_context(node, source) { + Some("iterates".to_string()) + } else if generic_branch_context(node, source) { + Some("conditional".to_string()) + } else { + None + } + } + fn normalize_source_text(&self, text: &str) -> String { normalize_text(text) } @@ -404,15 +464,171 @@ pub(crate) trait LanguageProfile { } } + fn call_target<'tree>(&self, node: Node<'tree>, source: &str) -> Option> { + if self.call_node_kinds().contains(&node.kind()) { + self.default_call_target(node, source) + } else { + None + } + } + + fn default_call_target<'tree>( + &self, + node: Node<'tree>, + source: &str, + ) -> Option> { + let callee = if self.field_like_node_kinds().contains(&node.kind()) { + node + } else { + node.child_by_field_name("function") + .or_else(|| node.child_by_field_name("callee")) + .or_else(|| first_named_child(node))? + }; + if callee.kind() == "builtin_function" || node_text(callee, source).starts_with('@') { + return None; + } + + let (receiver, message) = self.target_from_callee(callee, source)?; + Some(CallTarget::new( + receiver, + message, + self.call_argument_texts(node, source), + )) + } + + fn target_from_callee(&self, callee: Node<'_>, source: &str) -> Option<(String, String)> { + if self.field_like_node_kinds().contains(&callee.kind()) { + let object = callee + .child_by_field_name("object") + .or_else(|| callee.child_by_field_name("receiver")) + .or_else(|| callee.child_by_field_name("operand")) + .or_else(|| callee.child_by_field_name("value")) + .or_else(|| callee.child_by_field_name("expression")) + .or_else(|| first_named_child_except(callee, "navigation_suffix"))?; + let field = callee + .child_by_field_name("field") + .or_else(|| callee.child_by_field_name("property")) + .or_else(|| callee.child_by_field_name("name")) + .or_else(|| callee.child_by_field_name("suffix")) + .or_else(|| first_named_child_with_kind(callee, "navigation_suffix")) + .or_else(|| named_children(callee).into_iter().last())?; + let field_text = self.member_field_text(field, source)?; + return Some(( + normalize_text(node_text(object, source)) + .trim_start_matches('*') + .to_string(), + field_text, + )); + } + + if self.identifier_node_kinds().contains(&callee.kind()) { + return Some(("self".to_string(), node_text(callee, source).to_string())); + } + + let text = normalize_text(node_text(callee, source)); + if text.is_empty() { + return None; + } + let parts = text.split('.').collect::>(); + if parts.len() > 1 { + Some(( + parts[..parts.len() - 1].join("."), + parts[parts.len() - 1].to_string(), + )) + } else { + Some(("self".to_string(), text)) + } + } + + fn call_argument_texts(&self, node: Node<'_>, source: &str) -> Vec { + self.call_argument_nodes(node) + .into_iter() + .map(|argument| normalize_text(node_text(argument, source))) + .collect() + } + + fn call_argument_nodes<'tree>(&self, node: Node<'tree>) -> Vec> { + if let Some(args) = node.child_by_field_name("arguments").or_else(|| { + named_children(node) + .into_iter() + .find(|child| self.argument_list_node_kinds().contains(&child.kind())) + }) { + return named_children(args); + } + if !self.call_node_kinds().contains(&node.kind()) { + return Vec::new(); + } + + let callee = node + .child_by_field_name("function") + .or_else(|| node.child_by_field_name("callee")) + .or_else(|| first_named_child(node)); + named_children(node) + .into_iter() + .filter(|child| Some(*child) != callee) + .collect() + } + + fn call_has_block(&self, node: Node<'_>) -> bool { + named_children(node) + .into_iter() + .any(|child| self.block_argument_node_kinds().contains(&child.kind())) + } + + fn noise_call(&self, target: &CallTarget<'_>) -> bool { + let message = target.message.as_str(); + let receiver = target.receiver.as_str(); + message.is_empty() + || NOISE_MESSAGES.contains(&message) + || message.starts_with('@') + || matches!(receiver, "std" | "builtin" | "build_options") + || receiver.starts_with("std.") + || receiver.starts_with("builtin.") + || receiver.starts_with("build_options.") + } + fn state_target(&self, lhs: Node<'_>, source: &str) -> Option { self.default_state_target(lhs, source) } + fn state_read_target(&self, node: Node<'_>, source: &str) -> Option { + self.default_state_read_target(node, source) + } + + fn default_state_read_target(&self, node: Node<'_>, source: &str) -> Option { + if self.accessor_call_node_kinds().contains(&node.kind()) { + let receiver = node.child_by_field_name("receiver")?; + let method = node.child_by_field_name("method")?; + let field = node_text(method, source); + if node.child_by_field_name("arguments").is_some() || NOISE_MESSAGES.contains(&field) { + return None; + } + return Some(Target { + receiver: normalize_text(node_text(receiver, source)), + field: field.to_string(), + }); + } + + let target = self.default_state_target(node, source)?; + if NOISE_MESSAGES.contains(&target.field.as_str()) { + None + } else { + Some(target) + } + } + fn default_state_target(&self, lhs: Node<'_>, source: &str) -> Option { if previous_sibling_text(lhs, source).as_deref() == Some(":") { return None; } + if self.expression_list_node_kinds().contains(&lhs.kind()) { + let children = named_children(lhs); + if children.len() == 1 { + return self.default_state_target(children[0], source); + } + } + if self.accessor_call_node_kinds().contains(&lhs.kind()) { let receiver = lhs.child_by_field_name("receiver")?; let method = lhs.child_by_field_name("method")?; @@ -573,6 +789,68 @@ pub(crate) trait LanguageProfile { None } + fn function_parameter_list<'tree>(&self, node: Node<'tree>) -> Option> { + let declarator = node.child_by_field_name("declarator"); + declarator + .and_then(|declarator| declarator.child_by_field_name("parameters")) + .or_else(|| node.child_by_field_name("parameters")) + .or_else(|| { + named_children(node) + .into_iter() + .find(|child| self.parameter_list_node_kinds().contains(&child.kind())) + }) + .or_else(|| { + declarator.and_then(|declarator| { + named_children(declarator) + .into_iter() + .find(|child| self.parameter_list_node_kinds().contains(&child.kind())) + }) + }) + } + + fn parameter_name(&self, param: Node<'_>, source: &str) -> Option { + let name = if self + .parameter_identifier_node_kinds() + .contains(¶m.kind()) + { + Some(param) + } else { + param + .child_by_field_name("name") + .or_else(|| { + named_children(param) + .into_iter() + .filter(|child| { + self.parameter_identifier_node_kinds() + .contains(&child.kind()) + }) + .last() + }) + .or_else(|| self.descendant_parameter_name(param)) + }?; + let text = self.normalize_parameter_name(node_text(name, source)); + (!text.is_empty() && text != "_").then_some(text) + } + + fn descendant_parameter_name<'tree>(&self, node: Node<'tree>) -> Option> { + let mut found = None; + let mut stack = named_children(node); + while let Some(current) = stack.pop() { + if self + .parameter_identifier_node_kinds() + .contains(¤t.kind()) + { + found = Some(current); + } + stack.extend(named_children(current)); + } + found + } + + fn normalize_parameter_name(&self, text: &str) -> String { + text.to_string() + } + fn impl_owner_name(&self, node: Node<'_>, source: &str) -> Option { let r#type = node.child_by_field_name("type").or_else(|| { named_children(node).into_iter().find(|child| { @@ -1027,6 +1305,67 @@ fn clone_method_span_for<'a>( .find(|function| function.span[0] <= line_no && line_no <= function.span[2]) } +fn generic_loop_context(node: Node<'_>, source: &str) -> bool { + matches!( + node.kind(), + "while" + | "until" + | "for" + | "do_block" + | "while_statement" + | "until_statement" + | "for_statement" + | "for_in_statement" + | "enhanced_for_statement" + | "foreach_statement" + | "for_range_loop" + | "for_expression" + | "loop_expression" + ) || matches!(node.kind(), "expression_statement" | "labeled_statement") + && normalize_text(node_text(node, source)) + .trim_start() + .starts_with("for ") +} + +fn generic_branch_context(node: Node<'_>, source: &str) -> bool { + if matches!( + node.kind(), + "if" | "unless" + | "if_modifier" + | "unless_modifier" + | "case" + | "if_statement" + | "if_expression" + | "case_statement" + | "switch_statement" + | "switch_expression" + | "match_statement" + | "match_expression" + | "when_expression" + | "expression_switch_statement" + ) { + return true; + } + + let first_token_is_branch = matches!( + node.kind(), + "body_statement" | "block" | "statements" | "statement_list" + ) && { + let mut cursor = node.walk(); + let result = node + .children(&mut cursor) + .next() + .map(|child| matches!(child.kind(), "if" | "unless" | "case")) + .unwrap_or(false); + result + }; + first_token_is_branch + || node.kind() == "expression_statement" + && normalize_text(node_text(node, source)) + .trim_start() + .starts_with("if ") +} + fn clone_node_key(node: &RawNode) -> String { format!( "{}\0{}\0{}\0{}\0{}\0{}", diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/c.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/c.rs index 26f0836de..dae57a428 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/c.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/c.rs @@ -30,6 +30,10 @@ impl LanguageProfile for CProfile { &["parameter_list"] } + fn parameter_identifier_node_kinds(&self) -> &[&str] { + &["identifier", "field_identifier"] + } + fn function_body_node_kinds(&self) -> &[&str] { &["compound_statement"] } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/cpp.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/cpp.rs index 8bc0ca830..0729fcfd6 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/cpp.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/cpp.rs @@ -29,6 +29,10 @@ impl LanguageProfile for CppProfile { &["parameter_list"] } + fn parameter_identifier_node_kinds(&self) -> &[&str] { + &["identifier", "field_identifier"] + } + fn function_body_node_kinds(&self) -> &[&str] { &["compound_statement"] } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/csharp.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/csharp.rs index 55fc8c99a..8e0944203 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/csharp.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/csharp.rs @@ -25,6 +25,10 @@ impl LanguageProfile for CSharpProfile { &["parameter_list"] } + fn parameter_identifier_node_kinds(&self) -> &[&str] { + &["identifier"] + } + fn function_body_node_kinds(&self) -> &[&str] { &["block", "declaration_list"] } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/go.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/go.rs index a51f5f04d..66eba28db 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/go.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/go.rs @@ -25,6 +25,10 @@ impl LanguageProfile for GoProfile { &["parameter_list"] } + fn parameter_identifier_node_kinds(&self) -> &[&str] { + &["identifier", "field_identifier"] + } + fn function_body_node_kinds(&self) -> &[&str] { &["block", "statement_list"] } @@ -45,6 +49,10 @@ impl LanguageProfile for GoProfile { &["assignment_statement", "short_var_declaration"] } + fn expression_list_node_kinds(&self) -> &[&str] { + &["expression_list"] + } + fn assignment_operator_tokens(&self) -> &[&str] { &["=", ":=", "+=", "-=", "*=", "/=", "%="] } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/java.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/java.rs index acf69741e..ca65e549a 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/java.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/java.rs @@ -25,6 +25,10 @@ impl LanguageProfile for JavaProfile { &["formal_parameters"] } + fn parameter_identifier_node_kinds(&self) -> &[&str] { + &["identifier"] + } + fn function_body_node_kinds(&self) -> &[&str] { &["block"] } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/javascript.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/javascript.rs index 2a36abe45..3feae9881 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/javascript.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/javascript.rs @@ -25,6 +25,10 @@ impl LanguageProfile for JavaScriptProfile { &["formal_parameters"] } + fn parameter_identifier_node_kinds(&self) -> &[&str] { + &["identifier"] + } + fn function_body_node_kinds(&self) -> &[&str] { &["statement_block"] } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/kotlin.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/kotlin.rs index e28347285..b2280a8fa 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/kotlin.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/kotlin.rs @@ -25,6 +25,10 @@ impl LanguageProfile for KotlinProfile { &["function_value_parameters"] } + fn parameter_identifier_node_kinds(&self) -> &[&str] { + &["identifier", "simple_identifier"] + } + fn function_body_node_kinds(&self) -> &[&str] { &["function_body", "statements"] } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/lua.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/lua.rs index 8dedf0062..11dc1ded3 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/lua.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/lua.rs @@ -22,6 +22,10 @@ impl LanguageProfile for LuaProfile { &["parameters"] } + fn parameter_identifier_node_kinds(&self) -> &[&str] { + &["identifier"] + } + fn function_body_node_kinds(&self) -> &[&str] { &["block"] } @@ -59,11 +63,11 @@ impl LanguageProfile for LuaProfile { } fn expression_list_node_kinds(&self) -> &[&str] { - &["expression_list"] + &["expression_list", "variable_list"] } fn field_like_node_kinds(&self) -> &[&str] { - &["dot_index_expression", "variable_list"] + &["dot_index_expression"] } fn generated_prelude(&self, node: Node<'_>, source: &str) -> bool { diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/php.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/php.rs index 89a50a142..b1a467968 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/php.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/php.rs @@ -27,6 +27,10 @@ impl LanguageProfile for PhpProfile { &["formal_parameters"] } + fn parameter_identifier_node_kinds(&self) -> &[&str] { + &["name", "variable_name", "simple_parameter"] + } + fn function_body_node_kinds(&self) -> &[&str] { &["compound_statement", "declaration_list"] } @@ -108,6 +112,10 @@ impl LanguageProfile for PhpProfile { normalize_text(&php_normalize_source(text)) } + fn normalize_parameter_name(&self, text: &str) -> String { + php_identifier_text_value(text) + } + fn function_name(&self, node: Node<'_>, source: &str) -> Option { if self.function_node_kinds().contains(&node.kind()) { return node diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/python.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/python.rs index e94e630b5..e4c88a2d3 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/python.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/python.rs @@ -25,6 +25,10 @@ impl LanguageProfile for PythonProfile { &["parameters"] } + fn parameter_identifier_node_kinds(&self) -> &[&str] { + &["identifier"] + } + fn function_body_node_kinds(&self) -> &[&str] { &["block"] } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/ruby.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/ruby.rs index c00a944a9..a45ea5f48 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/ruby.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/ruby.rs @@ -1,10 +1,10 @@ use super::super::tree_sitter_adapter::{ - first_child_kind, first_named_text, named_children, next_sibling_raw_text, AssignmentTarget, - Target, + first_child_kind, first_named_text, named_children, next_sibling_raw_text, + previous_sibling_raw_text, AssignmentTarget, CallTarget, Target, }; -use super::super::Language; +use super::super::{CallSite, FunctionDef, Language}; use super::base::LanguageProfile; -use crate::decomplex::ast::{node_text, normalize_text}; +use crate::decomplex::ast::{node_text, normalize_text, span}; use regex::Regex; use std::collections::{BTreeMap, BTreeSet}; use tree_sitter::{Language as TreeSitterLanguage, Node}; @@ -40,6 +40,10 @@ impl LanguageProfile for RubyProfile { &["method_parameters"] } + fn parameter_identifier_node_kinds(&self) -> &[&str] { + &["identifier"] + } + fn function_body_node_kinds(&self) -> &[&str] { &["body_statement", "do_block"] } @@ -100,6 +104,51 @@ impl LanguageProfile for RubyProfile { &["call"] } + fn argument_list_node_kinds(&self) -> &[&str] { + &["argument_list"] + } + + fn block_argument_node_kinds(&self) -> &[&str] { + &["do_block", "block"] + } + + fn call_target<'tree>(&self, node: Node<'tree>, source: &str) -> Option> { + if ruby_embedded_text_node(node) { + return None; + } + if node.kind() == "call" && ruby_command_argument_call(node, source) { + return None; + } + let mut target = match node.kind() { + "call" => { + ruby_proc_call_target(node, source).or_else(|| ruby_call_target(node, source)) + } + "body_statement" => ruby_bare_body_call_target(node, source), + "identifier" => ruby_bare_call_target(node, source), + _ => None, + }?; + if ruby_brace_block_parameter_receiver(node, &target.receiver, source) { + return None; + } + if target.arguments.is_empty() { + if let Some(span) = + ruby_narrow_no_arg_call_span(node, source, &target.receiver, &target.message) + { + target.span = Some(span); + } + } + let effective_span = target + .span + .unwrap_or_else(|| target.source_node.map(span).unwrap_or_else(|| span(node))); + if target.receiver == "self" + && target.message.ends_with('?') + && effective_span[0] != effective_span[2] + { + return None; + } + ruby_valid_call_target(&target).then_some(target) + } + fn function_name(&self, node: Node<'_>, source: &str) -> Option { match node.kind() { "singleton_method" => { @@ -130,6 +179,28 @@ impl LanguageProfile for RubyProfile { } } + fn function_visibility(&self, node: Node<'_>, source: &str) -> Option { + if node.kind() == "singleton_method" { + return Some("public".to_string()); + } + if node.kind() == "argument_list" && first_child_kind(node) == Some("def") { + let target = node + .parent() + .and_then(|parent| (parent.kind() == "call").then_some(parent)) + .and_then(|parent| ruby_call_target(parent, source))?; + if target.receiver == "self" + && matches!(target.message.as_str(), "private" | "protected" | "public") + { + return Some(target.message); + } + } + None + } + + fn after_collect_facts(&self, functions: &mut Vec, calls: &[CallSite]) { + apply_ruby_visibility(functions, calls); + } + fn owner_name_from_declaration(&self, node: Node<'_>, source: &str) -> Option { if node.kind() == "body_statement" && matches!(first_child_kind(node), Some("class" | "module")) @@ -166,6 +237,15 @@ impl LanguageProfile for RubyProfile { ruby_state_variable_target(lhs, source).or_else(|| self.default_state_target(lhs, source)) } + fn state_read_target(&self, node: Node<'_>, source: &str) -> Option { + if ruby_direct_flat_map_block_statement(node, source) { + return None; + } + ruby_state_variable_target(node, source) + .or_else(|| ruby_bare_state_reader_target(node, source)) + .or_else(|| self.default_state_read_target(node, source)) + } + fn assignment_target<'tree>(&self, node: Node<'tree>) -> Option> { self.default_assignment_target(node) .or_else(|| match node.kind() { @@ -240,6 +320,472 @@ fn inline_def_name(node: Node<'_>, source: &str) -> Option { hidden_ruby_method_name(node, source) } +fn ruby_call_target<'tree>(node: Node<'tree>, source: &str) -> Option> { + let receiver = node.child_by_field_name("receiver"); + let method = node.child_by_field_name("method"); + let arguments = ruby_argument_texts(node, source); + let message = method + .map(|method| node_text(method, source).to_string()) + .or_else(|| first_named_text(node, source, &["identifier", "constant"])) + .or_else(|| { + let text = normalize_text(node_text(node, source)); + (receiver.is_none() && ruby_simple_call_text(&text)).then_some(text) + })?; + + let mut target = CallTarget::new( + receiver + .map(|receiver| normalize_text(node_text(receiver, source))) + .unwrap_or_else(|| "self".to_string()), + message, + arguments, + ); + if target.arguments.is_empty() { + if let (Some(receiver), Some(method)) = (receiver, method) { + let receiver_span = span(receiver); + let method_span = span(method); + target.span = Some([ + receiver_span[0], + receiver_span[1], + method_span[2], + method_span[3], + ]); + } + } + target.safe_navigation = ruby_safe_navigation_call(node, source); + Some(target) +} + +fn apply_ruby_visibility(functions: &mut [FunctionDef], calls: &[CallSite]) { + let mut owners = functions + .iter() + .map(|function| function.owner.clone()) + .collect::>(); + owners.sort(); + owners.dedup(); + + for owner in owners { + let function_indices = functions + .iter() + .enumerate() + .filter_map(|(index, function)| (function.owner == owner).then_some(index)) + .collect::>(); + let call_indices = calls + .iter() + .enumerate() + .filter_map(|(index, call)| { + (call.owner == owner && ruby_visibility_call(call)).then_some(index) + }) + .collect::>(); + + let mut visibility = "public".to_string(); + let mut events = Vec::new(); + events.extend( + function_indices + .iter() + .map(|index| (functions[*index].line, 1_u8, *index)), + ); + events.extend( + call_indices + .iter() + .map(|index| (calls[*index].line, 0_u8, *index)), + ); + events.sort(); + + for (_, kind, index) in events { + if kind == 1 { + if functions[index].visibility.is_none() { + functions[index].visibility = Some(if functions[index].name.contains('.') { + "public".to_string() + } else { + visibility.clone() + }); + } + } else { + let call = &calls[index]; + if call.arguments.is_empty() { + visibility = call.message.clone(); + } else { + for argument in &call.arguments { + let name = ruby_visibility_arg_name(argument); + for function_index in function_indices.iter().rev() { + if functions[*function_index].name == name { + functions[*function_index].visibility = Some(call.message.clone()); + break; + } + } + } + } + } + } + } +} + +fn ruby_visibility_call(call: &CallSite) -> bool { + call.function == "(top-level)" + && call.receiver == "self" + && matches!(call.message.as_str(), "public" | "protected" | "private") +} + +fn ruby_visibility_arg_name(argument: &str) -> String { + argument + .trim() + .trim_start_matches(':') + .trim_start_matches('"') + .trim_end_matches('"') + .trim_start_matches('\'') + .trim_end_matches('\'') + .to_string() +} + +fn ruby_bare_call_target<'tree>(node: Node<'tree>, source: &str) -> Option> { + if !ruby_bare_call_identifier(node, source) { + return None; + } + let parent = node.parent(); + let source_node = if parent + .map(|parent| parent.kind() == "call") + .unwrap_or(false) + || node + .next_sibling() + .map(|sibling| sibling.kind() == "argument_list") + .unwrap_or(false) + { + parent.unwrap_or(node) + } else { + node + }; + let mut target = CallTarget::new( + "self".to_string(), + node_text(node, source).to_string(), + ruby_argument_texts(source_node, source), + ); + target.source_node = Some(source_node); + target.safe_navigation = ruby_safe_navigation_call(source_node, source); + Some(target) +} + +fn ruby_bare_body_call_target<'tree>(node: Node<'tree>, source: &str) -> Option> { + let stripped = node_text(node, source).trim_start(); + if matches!(first_child_kind(node), Some("def" | "class" | "module")) + || stripped.starts_with("def ") + || stripped.starts_with("class ") + || stripped.starts_with("module ") + { + return None; + } + if let Some(explicit) = ruby_explicit_receiver_body_call_target(node, source) { + return Some(explicit); + } + + let message = node_text(node, source).trim().to_string(); + if !ruby_simple_call_text(&message) + || matches!(message.as_str(), "true" | "false" | "nil" | "self") + { + return None; + } + Some(CallTarget::new("self".to_string(), message, Vec::new())) +} + +fn ruby_explicit_receiver_body_call_target<'tree>( + node: Node<'tree>, + source: &str, +) -> Option> { + let children = named_children(node); + let receiver = *children.first()?; + let message = *children.get(1)?; + if !matches!(receiver.kind(), "self" | "constant" | "identifier") { + return None; + } + if !matches!(message.kind(), "identifier" | "constant") { + return None; + } + let mut target = CallTarget::new( + normalize_text(node_text(receiver, source)), + node_text(message, source).to_string(), + Vec::new(), + ); + let receiver_span = span(receiver); + let message_span = span(message); + target.span = Some([ + receiver_span[0], + receiver_span[1], + message_span[2], + message_span[3], + ]); + Some(target) +} + +fn ruby_proc_call_target<'tree>(node: Node<'tree>, source: &str) -> Option> { + if node.kind() != "call" { + return None; + } + let mut cursor = node.walk(); + if !node + .children(&mut cursor) + .any(|child| !child.is_named() && node_text(child, source) == ".") + { + return None; + } + if node.child_by_field_name("method").is_some() { + return None; + } + + let receiver = node + .child_by_field_name("receiver") + .or_else(|| named_children(node).into_iter().next())?; + let args = node.child_by_field_name("arguments").or_else(|| { + named_children(node) + .into_iter() + .find(|child| child.kind() == "argument_list") + })?; + let mut target = CallTarget::new( + normalize_text(node_text(receiver, source)), + "call".to_string(), + ruby_argument_texts(node, source), + ); + target.source_node = Some(node); + target.safe_navigation = ruby_safe_navigation_call(node, source); + target.block = named_children(args) + .into_iter() + .any(|child| matches!(child.kind(), "do_block" | "block")); + Some(target) +} + +fn ruby_argument_texts(node: Node<'_>, source: &str) -> Vec { + let args = node.child_by_field_name("arguments").or_else(|| { + named_children(node) + .into_iter() + .find(|child| child.kind() == "argument_list") + }); + let Some(args) = args else { + return Vec::new(); + }; + let values = named_children(args) + .into_iter() + .map(|child| normalize_text(node_text(child, source))) + .collect::>(); + if !values.is_empty() { + return values; + } + + let mut text = node_text(args, source).trim().to_string(); + if text.starts_with('(') && text.ends_with(')') && text.len() >= 2 { + text = text[1..text.len() - 1].to_string(); + } + text.split(',') + .map(normalize_text) + .filter(|arg| !arg.is_empty()) + .collect() +} + +fn ruby_safe_navigation_call(node: Node<'_>, source: &str) -> bool { + let mut cursor = node.walk(); + let found = node + .children(&mut cursor) + .any(|child| !child.is_named() && node_text(child, source) == "&."); + found +} + +fn ruby_simple_call_text(text: &str) -> bool { + Regex::new(r"^[a-z_]\w*[!?=]?$") + .unwrap() + .is_match(text.trim()) +} + +fn ruby_bare_call_identifier(node: Node<'_>, source: &str) -> bool { + if ruby_embedded_text_node(node) { + return false; + } + let Some(parent) = node.parent() else { + return false; + }; + if ruby_declaration_name(node, parent, source) { + return false; + } + if matches!( + parent.kind(), + "method_parameters" | "block_parameters" | "argument_list" | "assignment" + ) { + return false; + } + if parent.kind() == "call" { + if ruby_command_argument_call(parent, source) { + return false; + } + if parent.child_by_field_name("receiver").is_some() { + return false; + } + let first = named_children(parent).into_iter().next(); + return first == Some(node) + && node + .next_sibling() + .map(|sibling| sibling.kind() == "argument_list") + .unwrap_or(false); + } + if next_sibling_raw_text(node).as_deref() == Some("=") + || previous_sibling_raw_text(node).as_deref() == Some("=") + || next_sibling_raw_text(node).as_deref() == Some(".") + || previous_sibling_raw_text(node).as_deref() == Some(".") + { + return false; + } + + matches!( + parent.kind(), + "body_statement" | "then" | "else" | "elsif" | "ensure" | "rescue" + ) || node + .next_sibling() + .map(|sibling| sibling.kind() == "argument_list") + .unwrap_or(false) +} + +fn ruby_declaration_name(node: Node<'_>, parent: Node<'_>, source: &str) -> bool { + if matches!( + parent.kind(), + "method" | "singleton_method" | "class" | "module" + ) { + return true; + } + if parent.kind() == "body_statement" { + let stripped = node_text(parent, source).trim_start(); + if matches!(first_child_kind(parent), Some("def" | "class" | "module")) + || stripped.starts_with("def ") + || stripped.starts_with("class ") + || stripped.starts_with("module ") + { + return true; + } + } + matches!(node.kind(), "identifier" | "constant") && parent.kind() == "method_parameters" +} + +fn ruby_command_argument_call(node: Node<'_>, source: &str) -> bool { + let Some(parent) = node.parent() else { + return false; + }; + if parent.kind() != "argument_list" { + return false; + } + !node_text(parent, source).trim_start().starts_with('(') +} + +fn ruby_embedded_text_node(node: Node<'_>) -> bool { + let mut current = Some(node); + while let Some(node) = current { + if matches!( + node.kind(), + "string" + | "string_content" + | "heredoc_body" + | "simple_symbol" + | "symbol" + | "delimited_symbol" + ) { + return true; + } + current = node.parent(); + } + false +} + +fn ruby_brace_block_parameter_receiver(node: Node<'_>, receiver: &str, source: &str) -> bool { + if receiver.contains('.') || receiver.contains('[') || receiver == "self" { + return false; + } + let mut current = node.parent(); + while let Some(parent) = current { + if parent.kind() == "block" { + return ruby_block_parameters(parent, source) + .into_iter() + .any(|param| param == receiver); + } + if matches!( + parent.kind(), + "method" | "singleton_method" | "body_statement" + ) { + return false; + } + current = parent.parent(); + } + false +} + +fn ruby_block_parameters(block: Node<'_>, source: &str) -> Vec { + named_children(block) + .into_iter() + .find(|child| child.kind() == "block_parameters") + .map(|params| { + named_children(params) + .into_iter() + .filter(|child| child.kind() == "identifier") + .map(|child| node_text(child, source).to_string()) + .collect() + }) + .unwrap_or_default() +} + +fn ruby_narrow_no_arg_call_span( + node: Node<'_>, + source: &str, + receiver: &str, + message: &str, +) -> Option<[usize; 4]> { + if message.is_empty() || message == "[]" || message == "[]=" { + return None; + } + let needle = if receiver == "self" { + message.to_string() + } else { + format!("{receiver}.{message}") + }; + let node_span = span(node); + if let Some(line_text) = source.lines().nth(node_span[0].saturating_sub(1)) { + if let Some(start) = line_text.find(&needle) { + let end = start + needle.chars().count(); + return Some([node_span[0], start, node_span[0], end]); + } + } + let text = node_text(node, source); + let offset = text.find(&needle)?; + if text[..offset].contains('\n') || needle.contains('\n') { + return None; + } + let mut start = node_span[1] + text[..offset].chars().count(); + let end = start + needle.chars().count(); + if start == node_span[1] + && (previous_sibling_raw_text(node).as_deref() == Some("!") + || node + .start_byte() + .checked_sub(1) + .and_then(|index| source.as_bytes().get(index)) + .copied() + == Some(b'!')) + { + start += 1; + } + Some([node_span[0], start, node_span[0], end]) +} + +fn ruby_valid_call_target(target: &CallTarget<'_>) -> bool { + if invalid_call_text(&target.receiver) + || invalid_call_text(&target.message) + || target.receiver.split_whitespace().count() > 1 + { + return false; + } + if matches!(target.message.as_str(), "[]" | "[]=") { + return true; + } + Regex::new(r"^[A-Za-z_]\w*[!?=]?$") + .unwrap() + .is_match(target.message.as_str()) +} + +fn invalid_call_text(text: &str) -> bool { + text.chars() + .any(|ch| matches!(ch, '"' | '\'' | '\n' | '\r')) +} + fn ruby_state_variable_target(node: Node<'_>, source: &str) -> Option { matches!(node.kind(), "instance_variable" | "global_variable").then(|| Target { receiver: "self".to_string(), @@ -247,6 +793,63 @@ fn ruby_state_variable_target(node: Node<'_>, source: &str) -> Option { }) } +fn ruby_bare_state_reader_target(node: Node<'_>, source: &str) -> Option { + if node.kind() != "identifier" || !ruby_simple_call_text(node_text(node, source)) { + return None; + } + let parent = node.parent()?; + if ruby_declaration_name(node, parent, source) { + return None; + } + if matches!( + parent.kind(), + "call" + | "method_parameters" + | "block_parameters" + | "argument_list" + | "assignment" + | "operator_assignment" + | "pair" + | "hash_key_symbol" + ) { + return None; + } + if next_sibling_raw_text(node).as_deref() == Some("=") + || previous_sibling_raw_text(node).as_deref() == Some("=") + || next_sibling_raw_text(node).as_deref() == Some(".") + || previous_sibling_raw_text(node).as_deref() == Some(".") + || next_sibling_raw_text(node).as_deref() == Some(":") + || previous_sibling_raw_text(node).as_deref() == Some(":") + { + return None; + } + + Some(Target { + receiver: "self".to_string(), + field: node_text(node, source).to_string(), + }) +} + +fn ruby_direct_flat_map_block_statement(node: Node<'_>, source: &str) -> bool { + if node.kind() != "call" { + return false; + } + let Some(method) = node.child_by_field_name("method") else { + return false; + }; + if node_text(method, source) != "flat_map" { + return false; + } + let Some(parent) = node.parent() else { + return false; + }; + parent.kind() == "body_statement" + && named_children(parent).first().copied() == Some(node) + && named_children(node) + .iter() + .any(|child| child.kind() == "do_block" || child.kind() == "block") +} + fn ruby_case_pattern_texts(patterns: &[Node<'_>], source: &str) -> Vec { if patterns.is_empty() { return Vec::new(); diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/rust.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/rust.rs index 608a7c94f..c78ea7c8b 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/rust.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/rust.rs @@ -29,6 +29,10 @@ impl LanguageProfile for RustProfile { &["parameters"] } + fn parameter_identifier_node_kinds(&self) -> &[&str] { + &["identifier", "self_parameter"] + } + fn function_body_node_kinds(&self) -> &[&str] { &["block", "declaration_list"] } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/swift.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/swift.rs index af73a5320..da998e1e8 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/swift.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/swift.rs @@ -25,6 +25,14 @@ impl LanguageProfile for SwiftProfile { &["function_value_parameters"] } + fn parameter_identifier_node_kinds(&self) -> &[&str] { + &["simple_identifier"] + } + + fn inline_parameter_node_kinds(&self) -> &[&str] { + &["parameter"] + } + fn function_body_node_kinds(&self) -> &[&str] { &["function_body", "statements"] } @@ -41,6 +49,10 @@ impl LanguageProfile for SwiftProfile { &["assignment"] } + fn expression_list_node_kinds(&self) -> &[&str] { + &["directly_assignable_expression"] + } + fn assignment_operator_tokens(&self) -> &[&str] { &["=", "+=", "-=", "*=", "/=", "%="] } @@ -100,6 +112,6 @@ impl LanguageProfile for SwiftProfile { } fn field_like_node_kinds(&self) -> &[&str] { - &["navigation_expression", "directly_assignable_expression"] + &["navigation_expression"] } } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/typescript.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/typescript.rs index 183a2755f..17110bd0a 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/typescript.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/typescript.rs @@ -25,6 +25,10 @@ impl LanguageProfile for TypeScriptProfile { &["formal_parameters"] } + fn parameter_identifier_node_kinds(&self) -> &[&str] { + &["identifier"] + } + fn function_body_node_kinds(&self) -> &[&str] { &["statement_block"] } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/zig.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/zig.rs index 66c9723ea..5c54ea927 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/zig.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/zig.rs @@ -21,6 +21,10 @@ impl LanguageProfile for ZigProfile { &["parameters"] } + fn parameter_identifier_node_kinds(&self) -> &[&str] { + &["identifier"] + } + fn function_body_node_kinds(&self) -> &[&str] { &["block", "block_expression"] } @@ -34,7 +38,7 @@ impl LanguageProfile for ZigProfile { } fn assignment_node_kinds(&self) -> &[&str] { - &["assignment_expression"] + &["assignment_expression", "variable_declaration"] } fn assignment_operator_tokens(&self) -> &[&str] { diff --git a/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs b/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs index a4f79bcf1..19f49b09a 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs @@ -1,6 +1,7 @@ use super::{ adapters::{language_profile, LanguageProfile}, - ComparisonUse, DecisionSite, Document, FunctionDef, Language, PredicateAlias, StateWrite, + CallSite, ComparisonUse, DecisionSite, Document, FunctionDef, Language, PredicateAlias, + StateRead, StateWrite, }; use crate::decomplex::ast::{line, node_text, normalize_text, normalize_tree, span, RawNode}; use anyhow::{Context, Result}; @@ -12,11 +13,15 @@ use tree_sitter::{Node, Parser}; pub fn parse_file(file: PathBuf, language: Language) -> Result { let parsed = ParsedDocument::parse(file, language)?; let mut function_defs = Vec::new(); + let mut call_sites = Vec::new(); + let mut state_reads = Vec::new(); let mut state_writes = Vec::new(); let mut decision_sites = Vec::new(); let mut predicate_aliases = Vec::new(); let mut comparison_uses = Vec::new(); let mut seen_writes = HashSet::new(); + let mut seen_reads = HashSet::new(); + let mut seen_calls = HashSet::new(); let mut seen_decisions = HashSet::new(); let context = ContextState::new(file_owner(&parsed.file)); @@ -27,13 +32,18 @@ pub fn parse_file(file: PathBuf, language: Language) -> Result { language, &context, &mut function_defs, + &mut call_sites, + &mut state_reads, &mut state_writes, &mut decision_sites, &mut predicate_aliases, &mut comparison_uses, &mut seen_writes, + &mut seen_reads, + &mut seen_calls, &mut seen_decisions, ); + language_profile(language).after_collect_facts(&mut function_defs, &call_sites); Ok(Document { file: parsed.file.to_string_lossy().to_string(), @@ -43,6 +53,8 @@ pub fn parse_file(file: PathBuf, language: Language) -> Result { root: RawNode::from_tree_sitter(parsed.tree.root_node(), &parsed.source), normalized_root: normalize_tree(parsed.tree.root_node(), &parsed.source, language), function_defs, + call_sites, + state_reads, state_writes, decision_sites, predicate_aliases, @@ -76,7 +88,9 @@ struct ContextState { file_owner: String, owner: Option, function: Option, + function_line: Option, pub receiver: Option, + controls: Vec, } impl ContextState { @@ -85,7 +99,9 @@ impl ContextState { file_owner, owner: None, function: None, + function_line: None, receiver: None, + controls: Vec::new(), } } @@ -100,6 +116,19 @@ impl ContextState { .clone() .unwrap_or_else(|| "(top-level)".to_string()) } + + fn current_control(&self) -> String { + self.controls + .last() + .cloned() + .unwrap_or_else(|| "always".to_string()) + } + + fn conditional_context(&self) -> bool { + self.controls + .iter() + .any(|control| matches!(control.as_str(), "conditional" | "iterates")) + } } fn collect_facts( @@ -109,20 +138,47 @@ fn collect_facts( language: Language, context: &ContextState, function_defs: &mut Vec, + call_sites: &mut Vec, + state_reads: &mut Vec, state_writes: &mut Vec, decision_sites: &mut Vec, predicate_aliases: &mut Vec, comparison_uses: &mut Vec, seen_writes: &mut HashSet, + seen_reads: &mut HashSet, + seen_calls: &mut HashSet, seen_decisions: &mut HashSet, ) { - let next_context = push_function_context( + let next_context = push_control_context( node, - push_owner_context(node, source, context, language), + push_function_context( + node, + push_owner_context(node, source, context, language), + source, + language, + ), source, language, ); record_function_def(node, source, file, language, &next_context, function_defs); + record_call_site( + node, + source, + file, + language, + &next_context, + call_sites, + seen_calls, + ); + record_state_read( + node, + source, + file, + language, + &next_context, + state_reads, + seen_reads, + ); record_state_write( node, source, @@ -153,11 +209,15 @@ fn collect_facts( language, &next_context, function_defs, + call_sites, + state_reads, state_writes, decision_sites, predicate_aliases, comparison_uses, seen_writes, + seen_reads, + seen_calls, seen_decisions, ); } @@ -181,6 +241,8 @@ fn record_function_def( line: line(node), span: span(node), body: RawNode::from_tree_sitter(node, source), + visibility: language_profile(language).function_visibility(node, source), + params: language_profile(language).function_params(node, source), }; let key = ( function.file.clone(), @@ -432,11 +494,134 @@ fn push_function_context( }; let owner = context.current_owner(); context.function = Some(function); + context.function_line = Some(line(node)); context.owner = Some(owner); context.receiver = profile.function_receiver_name(node, source); context } +fn push_control_context( + node: Node<'_>, + mut context: ContextState, + source: &str, + language: Language, +) -> ContextState { + if let Some(control) = language_profile(language).control_context(node, source) { + context.controls.push(control); + } + context +} + +fn record_call_site( + node: Node<'_>, + source: &str, + file: &Path, + language: Language, + context: &ContextState, + out: &mut Vec, + seen: &mut HashSet, +) { + let profile = language_profile(language); + let Some(mut target) = profile.call_target(node, source) else { + return; + }; + normalize_call_receiver(&mut target, context); + if profile.noise_call(&target) { + return; + } + + let source_node = target.source_node.unwrap_or(node); + if target.receiver == "self" + && target.message == context.current_function() + && context.function_line == Some(line(source_node)) + { + return; + } + let file_name = file.to_string_lossy().to_string(); + let owner = context.current_owner(); + let function = context.current_function(); + let mut call_span = target.span.unwrap_or_else(|| span(source_node)); + if target.message.ends_with('?') && call_span[0] == call_span[2] { + if let Some(line_text) = source.lines().nth(call_span[0].saturating_sub(1)) { + if line_text.as_bytes().get(call_span[1]).copied() == Some(b'!') { + call_span[1] += 1; + } + } + } + let key = format!( + "{}\0{}\0{}\0{:?}\0{}\0{}", + file_name, owner, function, call_span, target.receiver, target.message + ); + if !seen.insert(key) { + return; + } + + out.push(CallSite { + receiver: target.receiver, + message: target.message, + file: file_name, + function, + owner, + line: line(source_node), + span: call_span, + conditional: context.conditional_context(), + arguments: target.arguments, + control: Some(context.current_control()), + safe_navigation: target.safe_navigation, + block: target.block || profile.call_has_block(source_node), + }); +} + +fn record_state_read( + node: Node<'_>, + source: &str, + file: &Path, + language: Language, + context: &ContextState, + out: &mut Vec, + seen: &mut HashSet, +) { + let profile = language_profile(language); + if profile.assignment_lhs_node(node) { + return; + } + + let Some(target) = profile.state_read_target(node, source) else { + return; + }; + let target = normalize_target_receiver(target, context); + if namespace_receiver(&target.receiver) { + return; + } + + let file_name = file.to_string_lossy().to_string(); + let owner = context.current_owner(); + let function = context.current_function(); + let line = line(node); + let key = format!( + "{}\0{}\0{}\0{:?}\0{}\0{}", + file_name, + owner, + function, + span(node), + target.receiver, + target.field + ); + if !seen.insert(key) { + return; + } + + out.push(StateRead { + field: target.field, + receiver: target.receiver, + file: file_name, + function, + line, + span: span(node), + owner, + }); +} + fn record_state_write( node: Node<'_>, source: &str, @@ -498,6 +683,31 @@ pub(crate) struct Target { pub(crate) field: String, } +#[derive(Clone, Debug, Eq, PartialEq)] +pub(crate) struct CallTarget<'tree> { + pub(crate) receiver: String, + pub(crate) message: String, + pub(crate) arguments: Vec, + pub(crate) source_node: Option>, + pub(crate) span: Option<[usize; 4]>, + pub(crate) safe_navigation: bool, + pub(crate) block: bool, +} + +impl<'tree> CallTarget<'tree> { + pub(crate) fn new(receiver: String, message: String, arguments: Vec) -> Self { + Self { + receiver, + message, + arguments, + source_node: None, + span: None, + safe_navigation: false, + block: false, + } + } +} + pub(crate) fn normalize_type_owner(text: &str) -> String { let value = text.trim(); let value = value.trim_start_matches(['&', '*']); @@ -518,6 +728,22 @@ fn file_owner(file: &Path) -> String { .to_string() } +fn namespace_receiver(text: &str) -> bool { + let receiver = text.trim(); + if receiver.starts_with('@') { + return true; + } + if matches!(receiver, "std" | "builtin" | "build_options") + || receiver.starts_with("std.") + || receiver.starts_with("builtin.") + || receiver.starts_with("build_options.") + { + return true; + } + + matches!(receiver.chars().next(), Some(first) if first.is_ascii_uppercase()) +} + pub(crate) fn first_named_text(node: Node<'_>, source: &str, kinds: &[&str]) -> Option { named_children(node) .into_iter() @@ -872,6 +1098,7 @@ mod c_tests { } fn normalize_target_receiver(mut target: Target, context: &ContextState) -> Target { + target.receiver = canonical_self_receiver(&target.receiver); if let Some(current_receiver) = &context.receiver { if &target.receiver == current_receiver { target.receiver = "self".to_string(); @@ -890,3 +1117,38 @@ fn normalize_target_receiver(mut target: Target, context: &ContextState) -> Targ } target } + +fn normalize_call_receiver(target: &mut CallTarget<'_>, context: &ContextState) { + target.receiver = canonical_self_receiver(&target.receiver); + if let Some(current_receiver) = &context.receiver { + if &target.receiver == current_receiver { + target.receiver = "self".to_string(); + } else if target + .receiver + .starts_with(&format!("{}.", current_receiver)) + { + target.receiver = format!( + "self.{}", + target + .receiver + .strip_prefix(&format!("{}.", current_receiver)) + .unwrap() + ); + } + } +} + +fn canonical_self_receiver(receiver: &str) -> String { + match receiver { + "self" | "this" | "$this" => "self".to_string(), + _ if receiver.starts_with("this.") => format!( + "self.{}", + receiver.strip_prefix("this.").unwrap_or_default() + ), + _ if receiver.starts_with("$this.") => format!( + "self.{}", + receiver.strip_prefix("$this.").unwrap_or_default() + ), + _ => receiver.to_string(), + } +} diff --git a/gems/decomplex/rust/src/main.rs b/gems/decomplex/rust/src/main.rs index 7d74510eb..ce882fc3d 100644 --- a/gems/decomplex/rust/src/main.rs +++ b/gems/decomplex/rust/src/main.rs @@ -9,11 +9,11 @@ use decomplex::detectors::{ structural_topology, temporal_ordering_pressure, weighted_inlined_cognitive_complexity, }; use decomplex::parallel; -use decomplex::report_facts::{self, Options as ReportFactsOptions}; +use decomplex::report::Report; +use decomplex::report_facts::{self, Options as ReportFactsOptions, VcsFilter}; use decomplex::syntax::Language; -use std::io::Write; +use std::io::Read; use std::path::PathBuf; -use std::process::{Command as ProcessCommand, Stdio}; fn main() -> Result<()> { let worker = std::thread::Builder::new() @@ -255,7 +255,16 @@ fn run() -> Result<()> { } => { let facts = report_facts::collect(&targets, &options) .with_context(|| "failed to collect report facts")?; - render_report_with_ruby(&facts, &format, output.as_ref())?; + render_report(&facts, &format, output.as_ref())?; + } + Command::RenderReport { + input, + from_stdin, + format, + output, + } => { + let facts = read_facts(input.as_ref(), from_stdin)?; + render_report(&facts, &format, output.as_ref())?; } } Ok(()) @@ -402,6 +411,12 @@ enum Command { output: Option, jobs: Option, }, + RenderReport { + input: Option, + from_stdin: bool, + format: String, + output: Option, + }, } impl Command { @@ -434,6 +449,7 @@ impl Command { | Self::FatUnion { jobs, .. } | Self::Facts { jobs, .. } | Self::Report { jobs, .. } => *jobs, + Self::RenderReport { .. } => None, } } } @@ -469,6 +485,15 @@ fn parse_args(args: Vec) -> Result { jobs: args.jobs, }) } + "render-report" => { + let args = parse_render_report_args(cursor.collect())?; + Ok(Command::RenderReport { + input: args.input, + from_stdin: args.from_stdin, + format: args.format, + output: args.output, + }) + } "state-writes" => { let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; if files.is_empty() { @@ -796,6 +821,53 @@ struct ReportFactsArgs { format: String, } +struct RenderReportArgs { + input: Option, + from_stdin: bool, + output: Option, + format: String, +} + +fn parse_render_report_args(args: Vec) -> Result { + let mut input = None; + let mut from_stdin = false; + let mut output = None; + let mut format = "markdown".to_string(); + let mut cursor = args.into_iter(); + while let Some(arg) = cursor.next() { + if arg == "--from-stdin" { + from_stdin = true; + } else if arg == "--input" { + input = Some(PathBuf::from( + cursor.next().with_context(|| "--input requires a value")?, + )); + } else if let Some(value) = arg.strip_prefix("--input=") { + input = Some(PathBuf::from(value)); + } else if arg == "--output" { + output = Some(PathBuf::from( + cursor.next().with_context(|| "--output requires a value")?, + )); + } else if let Some(value) = arg.strip_prefix("--output=") { + output = Some(PathBuf::from(value)); + } else if arg == "--format" { + format = cursor.next().with_context(|| "--format requires a value")?; + } else if let Some(value) = arg.strip_prefix("--format=") { + format = value.to_string(); + } else { + bail!("unknown render-report argument: {arg}"); + } + } + if input.is_none() && !from_stdin { + bail!("render-report requires facts JSON on stdin or --input=FILE"); + } + Ok(RenderReportArgs { + input, + from_stdin, + output, + format, + }) +} + fn parse_report_facts_args(args: Vec, allow_format: bool) -> Result { let mut options = ReportFactsOptions::default(); let mut targets = Vec::new(); @@ -859,6 +931,12 @@ fn parse_report_facts_args(args: Vec, allow_format: bool) -> Result) -> Result<()> Ok(()) } -fn render_report_with_ruby( - facts: &serde_json::Value, - format: &str, - output: Option<&PathBuf>, -) -> Result<()> { - let mut command = ruby_renderer_command(); - command - .arg("render-report") - .arg("--from-stdin") - .arg(format!("--format={format}")) - .stdin(Stdio::piped()) - .stdout(Stdio::inherit()) - .stderr(Stdio::inherit()); - if let Some(path) = output { - command.arg(format!("--output={}", path.display())); - } - - let mut child = command - .spawn() - .with_context(|| "failed to start Ruby decomplex renderer")?; - { - let stdin = child - .stdin - .as_mut() - .with_context(|| "failed to open Ruby renderer stdin")?; - stdin.write_all(serde_json::to_string(facts)?.as_bytes())?; - } - let status = child - .wait() - .with_context(|| "failed to wait for Ruby decomplex renderer")?; - if !status.success() { - bail!("Ruby decomplex renderer failed with status {status}"); +fn read_facts(input: Option<&PathBuf>, from_stdin: bool) -> Result { + let payload = if let Some(path) = input { + std::fs::read_to_string(path) + .with_context(|| format!("failed to read {}", path.display()))? + } else if from_stdin { + let mut payload = String::new(); + std::io::stdin() + .read_to_string(&mut payload) + .with_context(|| "failed to read facts JSON from stdin")?; + payload + } else { + bail!("render-report requires facts JSON on stdin or --input=FILE"); + }; + if payload.trim().is_empty() { + bail!("render-report requires facts JSON on stdin or --input=FILE"); } - Ok(()) + serde_json::from_str(&payload).with_context(|| "failed to parse report facts JSON") } -fn ruby_renderer_command() -> ProcessCommand { - if let Ok(program) = std::env::var("DECOMPLEX_RUBY_RENDERER") { - if !program.trim().is_empty() { - return ProcessCommand::new(program); - } +fn render_report(facts: &serde_json::Value, format: &str, output: Option<&PathBuf>) -> Result<()> { + let report = Report::from_facts(facts)?; + let text = match format { + "markdown" | "md" => report.to_markdown(), + "sarif" | "json" => report.to_sarif(), + _ => bail!("unsupported report format: {format}"), + }; + if let Some(path) = output { + std::fs::write(path, text)?; + } else { + println!("{text}"); } - - let mut command = ProcessCommand::new("ruby"); - command.arg( - PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .join("..") - .join("exe") - .join("decomplex"), - ); - command + Ok(()) } fn parse_language_files_and_jobs( @@ -962,6 +1021,13 @@ fn parse_language_files_and_jobs( Ok((language, files, jobs)) } +fn parse_vcs_filter(value: String) -> Result { + match value.as_str() { + "git" => Ok(VcsFilter::Git), + _ => bail!("unsupported --vcs value: {value}"), + } +} + fn parse_jobs(value: String) -> Result { let jobs = value .parse::() @@ -997,4 +1063,19 @@ mod tests { ]) .is_err()); } + + #[test] + fn parses_git_vcs_filter_for_facts() { + let command = parse_args(vec![ + "facts".to_string(), + "--vcs=git".to_string(), + "src".to_string(), + ]) + .expect("command"); + + match command { + Command::Facts { options, .. } => assert_eq!(options.vcs, Some(VcsFilter::Git)), + _ => panic!("expected facts command"), + } + } } diff --git a/gems/decomplex/test/examples_oracle_test.rb b/gems/decomplex/test/examples_oracle_test.rb index c8353eca2..46d9bae62 100644 --- a/gems/decomplex/test/examples_oracle_test.rb +++ b/gems/decomplex/test/examples_oracle_test.rb @@ -7,6 +7,7 @@ class ExamplesOracleTest < Minitest::Test EXAMPLES_ROOT = File.expand_path("../examples", __dir__) ORACLE_DIR = File.join(EXAMPLES_ROOT, "oracles") + ENGINES = Decomplex::DetectorRunner::ENGINES.freeze SOURCE_EXTENSIONS = Decomplex::Syntax.supported_exts.freeze LOCATION_KEYS = %w[ at boundaries boundary_crossings component_lines defn examples file @@ -23,6 +24,12 @@ def test_shared_oracle_files_exist refute_empty ORACLE_PATHS end + def test_shared_oracles_are_engine_agnostic + pinned = ORACLE_PATHS.select { |path| JSON.parse(File.read(path)).key?("engine") } + + assert_empty pinned, "shared example oracles must not pin detector engines:\n#{pinned.join("\n")}" + end + def test_each_detector_has_one_fixture_per_language languages = FIXTURE_PATHS.map { |path| File.basename(File.dirname(path)) }.uniq.sort detectors = ORACLE_PATHS.map { |path| File.basename(path, ".json") }.sort @@ -36,19 +43,19 @@ def test_each_detector_has_one_fixture_per_language end end - FIXTURE_PATHS.each_with_index do |fixture_path, index| + FIXTURE_PATHS.product(ENGINES).each_with_index do |(fixture_path, engine), index| language = File.basename(File.dirname(fixture_path)) detector = File.basename(fixture_path, File.extname(fixture_path)) - method_name = "test_#{index}_#{language}_#{detector.tr("-", "_")}_matches_shared_oracle" + method_name = "test_#{index}_#{engine}_#{language}_#{detector.tr("-", "_")}_matches_shared_oracle" define_method(method_name) do - assert_fixture_matches_shared_oracle(fixture_path) + assert_fixture_matches_shared_oracle(fixture_path, engine) end end private - def assert_fixture_matches_shared_oracle(fixture_path) + def assert_fixture_matches_shared_oracle(fixture_path, engine) detector = File.basename(fixture_path, File.extname(fixture_path)) oracle_path = File.join(ORACLE_DIR, "#{detector}.json") @@ -63,12 +70,12 @@ def assert_fixture_matches_shared_oracle(fixture_path) Decomplex::DetectorRunner.canonical_json( oracle.fetch("detector"), [fixture_path], - engine: oracle.fetch("engine", "ruby"), + engine: engine, **options ) ) - assert_equal expected, project_detector_output(detector, actual) + assert_equal expected, project_detector_output(detector, actual), "#{engine} #{fixture_path}" end def symbolize_options(options) From 52a643c8b18fcdf2639bbc88773af150b89bee23 Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Sat, 20 Jun 2026 02:40:06 +0000 Subject: [PATCH 33/52] Fix Rust decomplex detector parity --- .../decomplex/detectors/false_simplicity.rs | 117 ++++- .../rust/src/decomplex/detectors/fat_union.rs | 119 ++++- .../detectors/inconsistent_rename_clone.rs | 177 ++++--- .../src/decomplex/detectors/local_flow.rs | 283 ++++++++++- .../src/decomplex/detectors/path_condition.rs | 66 +++ .../detectors/redundant_nil_guard.rs | 216 +++++++-- .../detectors/state_branch_density.rs | 70 +++ .../src/decomplex/detectors/state_mesh.rs | 46 +- .../detectors/temporal_ordering_pressure.rs | 130 +++++- gems/decomplex/rust/src/decomplex/syntax.rs | 14 + .../src/decomplex/syntax/adapters/base.rs | 20 +- .../rust/src/decomplex/syntax/adapters/c.rs | 4 + .../rust/src/decomplex/syntax/adapters/cpp.rs | 4 + .../src/decomplex/syntax/adapters/csharp.rs | 4 + .../rust/src/decomplex/syntax/adapters/go.rs | 84 +++- .../src/decomplex/syntax/adapters/java.rs | 31 ++ .../src/decomplex/syntax/adapters/kotlin.rs | 6 + .../rust/src/decomplex/syntax/adapters/lua.rs | 45 +- .../rust/src/decomplex/syntax/adapters/php.rs | 18 +- .../src/decomplex/syntax/adapters/python.rs | 4 + .../src/decomplex/syntax/adapters/ruby.rs | 4 + .../src/decomplex/syntax/adapters/swift.rs | 27 +- .../rust/src/decomplex/syntax/adapters/zig.rs | 19 +- .../decomplex/syntax/tree_sitter_adapter.rs | 439 +++++++++++++++++- 24 files changed, 1780 insertions(+), 167 deletions(-) diff --git a/gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs b/gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs index aab87205c..c245906ff 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs @@ -2,7 +2,7 @@ use crate::decomplex::ast::{self, Child, Node, Span}; use crate::decomplex::syntax::adapters::false_simplicity_lexicon::{ false_simplicity_lexicon, FalseSimplicityLexicon, }; -use crate::decomplex::syntax::{self, Document, Language}; +use crate::decomplex::syntax::{self, CallSite, Document, Language}; use anyhow::Result; use serde::Serialize; use std::collections::{BTreeMap, BTreeSet}; @@ -38,6 +38,9 @@ struct ClassRec { span: Span, } +const GENERIC_SYSTEM_IO_BARE: &[&str] = + &["print", "println", "eprintln", "printf", "puts", "panic"]; + pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { let documents = syntax::parse_files(files, language)?; Ok(scan_documents(&documents)) @@ -47,6 +50,7 @@ pub fn scan_documents(documents: &[Document]) -> Vec { let mut hits = Vec::new(); let mut classrecs = Vec::new(); for document in documents { + hits.extend(hits_for_document(document)); let mut detector = FalseSimplicity::new( document.file.clone(), document.lines.clone(), @@ -59,6 +63,117 @@ pub fn scan_documents(documents: &[Document]) -> Vec { Report::new(hits, classrecs).findings() } +fn hits_for_document(document: &Document) -> Vec { + let lexicon = false_simplicity_lexicon(document.language); + document + .call_sites + .iter() + .filter_map(|call| semantic_effect_hit_for_call(call, &lexicon)) + .collect() +} + +fn semantic_effect_hit_for_call(call: &CallSite, lexicon: &FalseSimplicityLexicon) -> Option { + let message = call.message.as_str(); + let (kind, detail) = if effect_callback_call(call, message, lexicon) { + ("callback_inversion", message.to_string()) + } else if lexicon.meta_mids.contains(&message) { + ("metaprogramming", message.to_string()) + } else if lexicon.dispatch_mids.contains(&message) { + ("dynamic_dispatch", message.to_string()) + } else if message == "call" && !call.receiver.is_empty() { + if method_object_receiver(&call.receiver, lexicon) { + ("dynamic_dispatch", "method(...).call".to_string()) + } else if variable_receiver(&call.receiver) { + ("dynamic_dispatch", format!("{}.call", call.receiver)) + } else { + return None; + } + } else if let Some(detail) = const_effect_detail(call, message, lexicon) { + ("hidden_io", detail) + } else if call.receiver == "self" + && (lexicon.io_bare.contains(&message) || GENERIC_SYSTEM_IO_BARE.contains(&message)) + { + ("hidden_io", message.to_string()) + } else if call.receiver == "self" && lexicon.context_bare.contains(&message) { + ("context_dependency", message.to_string()) + } else if message.len() > 1 && message.ends_with('!') && !matches!(message, "!=" | "!~") { + ("hidden_mutation", message.to_string()) + } else { + return None; + }; + + Some(Hit { + kind: kind.to_string(), + detail, + file: call.file.clone(), + defn: call.function.clone(), + line: call.line, + span: call.span, + }) +} + +fn const_effect_detail( + call: &CallSite, + message: &str, + lexicon: &FalseSimplicityLexicon, +) -> Option { + let receiver = call.receiver.as_str(); + if receiver.is_empty() || receiver == "self" { + return None; + } + let base = receiver + .trim_start_matches("::") + .split("::") + .next() + .unwrap_or(""); + if base == "Dir" && lexicon.dir_context.contains(&message) { + return Some(format!("Dir.{message}")); + } + if lexicon.io_consts.contains(&base) || receiver.starts_with("Net::") { + return Some(format!("{}.{}", receiver.trim_start_matches("::"), message)); + } + if receiver == "ENV" { + return Some("ENV".to_string()); + } + if lexicon + .context_pairs + .iter() + .any(|(name, mids)| *name == base && mids.contains(&message)) + { + return Some(format!("{base}.{message}")); + } + None +} + +fn effect_callback_call(call: &CallSite, message: &str, lexicon: &FalseSimplicityLexicon) -> bool { + (call.block || call.arguments.iter().any(|arg| arg.starts_with('&'))) + && effect_callback_name(message, lexicon) + && !lexicon.meta_mids.contains(&message) +} + +fn effect_callback_name(message: &str, lexicon: &FalseSimplicityLexicon) -> bool { + lexicon.callback_set.contains(&message) + || message.starts_with("with_") + || message.starts_with("around_") + || message.starts_with("on_") + || message.starts_with("before_") + || message.starts_with("after_") + || message.ends_with("_hook") +} + +fn method_object_receiver(receiver: &str, lexicon: &FalseSimplicityLexicon) -> bool { + lexicon + .method_obj_mids + .iter() + .any(|name| receiver.contains(name)) +} + +fn variable_receiver(receiver: &str) -> bool { + let mut chars = receiver.chars(); + matches!(chars.next(), Some(first) if first == '@' || first == '$' || first == '_' || first.is_ascii_lowercase()) + && chars.all(|ch| ch == '_' || ch == '!' || ch == '?' || ch.is_ascii_alphanumeric()) +} + struct FalseSimplicity { file: String, lines: Vec, diff --git a/gems/decomplex/rust/src/decomplex/detectors/fat_union.rs b/gems/decomplex/rust/src/decomplex/detectors/fat_union.rs index 040f5d583..b8da2ae56 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/fat_union.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/fat_union.rs @@ -1,5 +1,5 @@ use crate::decomplex::ast::{self, Child, Node, Span}; -use crate::decomplex::syntax::{self, Document, Language}; +use crate::decomplex::syntax::{self, DispatchSite, Document, Language}; use anyhow::Result; use serde::Serialize; use std::collections::{BTreeMap, BTreeSet}; @@ -40,6 +40,123 @@ pub fn scan_files(files: &[PathBuf], language: Language) -> Result FatUnionReport { + let sites = documents + .iter() + .flat_map(|document| document.dispatch_sites.iter()) + .collect::>(); + FatUnionReport { + fat_unions: fat_unions_from_sites(&sites, 3, 2, 0.6), + } +} + +fn fat_unions_from_sites( + sites: &[&DispatchSite], + min_variants: usize, + min_common: usize, + ratio: f64, +) -> Vec { + let mut groups: BTreeMap, Vec<&DispatchSite>> = BTreeMap::new(); + for site in sites { + groups + .entry(site.variant_set.clone()) + .or_default() + .push(*site); + } + + let mut rows = Vec::new(); + for (variant_set, group) in groups { + let variant_count = variant_set.len(); + if variant_count < min_variants { + continue; + } + + let mut by_member_variant: BTreeMap> = BTreeMap::new(); + let mut outside = BTreeSet::new(); + for site in &group { + for (variant, members) in &site.arm_members { + for member in members { + by_member_variant + .entry(member.clone()) + .or_default() + .insert(variant.clone()); + } + } + for member in &site.outside { + outside.insert(member.clone()); + } + } + + let mut keys = by_member_variant.keys().cloned().collect::>(); + keys.extend(outside.iter().cloned()); + let common = keys + .iter() + .filter(|member| { + outside.contains(*member) + || by_member_variant + .get(*member) + .map(|variants| variants.len() >= variant_count) + .unwrap_or(false) + }) + .cloned() + .collect::>(); + let variant = keys + .iter() + .filter(|member| { + !outside.contains(*member) + && by_member_variant + .get(*member) + .map(|variants| variants.len() == 1) + .unwrap_or(false) + }) + .cloned() + .collect::>(); + let total = common.len() + variant.len(); + if common.len() < min_common || total == 0 || common.len() as f64 / (total as f64) < ratio { + continue; + } + + let at = group + .first() + .map(|site| format!("{}:{}:{}", site.file, site.function, site.line)) + .unwrap_or_default(); + let mut spans = BTreeMap::new(); + for site in &group { + spans.insert( + format!("{}:{}:{}", site.file, site.function, site.line), + site.span, + ); + } + let scatter = group + .iter() + .map(|site| (site.file.clone(), site.function.clone())) + .collect::>() + .len(); + rows.push(( + group.len() * common.len(), + FatUnionRow { + name: String::new(), + common, + variant: variant.clone(), + degenerate: variant.is_empty(), + support: group.len(), + scatter, + variant_set, + at, + spans, + }, + )); + } + + rows.sort_by(|a, b| { + (if a.1.degenerate { 0 } else { 1 }) + .cmp(&(if b.1.degenerate { 0 } else { 1 })) + .then_with(|| b.0.cmp(&a.0)) + }); + rows.into_iter().map(|(_, row)| row).collect() +} + +#[allow(dead_code)] +fn scan_documents_from_normalized_roots(documents: &[Document]) -> FatUnionReport { let mut out = Vec::new(); for document in documents { let mut detector = FatUnion::new(document.file.clone(), document.lines.clone()); diff --git a/gems/decomplex/rust/src/decomplex/detectors/inconsistent_rename_clone.rs b/gems/decomplex/rust/src/decomplex/detectors/inconsistent_rename_clone.rs index 4f0817655..837fbf46b 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/inconsistent_rename_clone.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/inconsistent_rename_clone.rs @@ -1,9 +1,12 @@ -use crate::decomplex::ast::{self, Child, Node, Span}; +use crate::decomplex::ast::Span; +use crate::decomplex::detectors::local_flow; use crate::decomplex::syntax::{self, Document, Language}; use anyhow::Result; +use regex::Regex; use serde::Serialize; use std::collections::{BTreeMap, BTreeSet}; use std::path::PathBuf; +use std::sync::OnceLock; #[derive(Clone, Debug, Eq, PartialEq, Serialize)] pub struct InconsistentRenameCloneRow { @@ -21,9 +24,6 @@ pub struct InconsistentRenameCloneRow { #[derive(Clone, Debug, Eq, PartialEq, Hash, PartialOrd, Ord)] enum Skeleton { ID, - MID, - CALL, - FCALL, Node(String), } @@ -37,7 +37,6 @@ struct Block { span: Span, } -const HOLE_TYPES: &[&str] = &["LVAR", "DVAR", "IVAR", "LASGN", "DASGN", "IASGN"]; const MIN_TOKENS: usize = 8; pub fn scan_files( @@ -49,109 +48,97 @@ pub fn scan_files( } pub fn scan_documents(documents: &[Document]) -> Vec { - let mut blocks = Vec::new(); - for document in documents { - let detector = InconsistentRenameClone::new(document.file.clone()); - detector.collect(&document.normalized_root, &Vec::new(), &mut blocks); - } + let blocks = local_flow::scan_documents(documents) + .into_iter() + .filter_map(|method| block_from_method(&method)) + .collect::>(); Report::new(blocks).inconsistent_renames() } -struct InconsistentRenameClone { - file: String, -} - -impl InconsistentRenameClone { - fn new(file: String) -> Self { - Self { file } +fn block_from_method(method: &local_flow::MethodSummary) -> Option { + if method.statements.len() < 3 { + return None; + } + let mut skeleton = Vec::new(); + let mut names = Vec::new(); + for statement in &method.statements { + tokenize_source(&statement.source, &mut skeleton, &mut names); + } + if skeleton.len() < MIN_TOKENS { + return None; } - fn collect(&self, node: &Node, defstack: &[String], blocks: &mut Vec) { - let mut next_defstack = defstack.to_vec(); - if matches!(node.r#type.as_str(), "DEFN" | "DEFS") { - let name_index = if node.r#type == "DEFS" { 1 } else { 0 }; - if let Some(Child::Symbol(name)) = node.children.get(name_index) { - next_defstack.push(name.clone()); - } - } - - if node.r#type == "BLOCK" { - let stmts: Vec<_> = node.children.iter().filter_map(ast::node).collect(); - if stmts.len() >= 3 { - self.add_block(&stmts, &next_defstack, blocks); - } - } + let first = method.statements.first()?; + let last = method.statements.last()?; + Some(Block { + skeleton, + names, + file: method.file.clone(), + defn: method.name.clone(), + line: first.line, + span: [first.span[0], first.span[1], last.span[2], last.span[3]], + }) +} - for child in node.children.iter().filter_map(ast::node) { - self.collect(child, &next_defstack, blocks); +fn tokenize_source(source: &str, skeleton: &mut Vec, names: &mut Vec) { + for token in token_re().find_iter(source).map(|match_| match_.as_str()) { + if identifier_token(token) { + skeleton.push(Skeleton::ID); + names.push( + token + .trim_start_matches('@') + .trim_end_matches('=') + .to_string(), + ); + } else if literal_token(token) { + skeleton.push(Skeleton::Node("LIT".to_string())); + } else { + skeleton.push(Skeleton::Node(token.to_string())); } } +} - fn add_block(&self, stmts: &[&Node], defstack: &[String], blocks: &mut Vec) { - let mut skeleton = Vec::new(); - let mut names = Vec::new(); - for stmt in stmts { - self.tokenize(stmt, &mut skeleton, &mut names); - } - if skeleton.len() < MIN_TOKENS { - return; - } +fn token_re() -> &'static Regex { + static TOKEN_RE: OnceLock = OnceLock::new(); + TOKEN_RE.get_or_init(|| { + Regex::new(r#"[A-Za-z_]\w*[!?=]?|@\w+|\d+(?:\.\d+)?|:[A-Za-z_]\w*|"[^"]*"|'[^']*'|\S"#) + .expect("inconsistent-rename-clone token regex") + }) +} - blocks.push(Block { - skeleton, - names, - file: self.file.clone(), - defn: defstack - .last() - .cloned() - .unwrap_or_else(|| "(top-level)".to_string()), - line: stmts[0].first_lineno, - span: [ - stmts[0].first_lineno, - stmts[0].first_column, - stmts.last().unwrap().last_lineno, - stmts.last().unwrap().last_column, - ], - }); - } +fn identifier_token(token: &str) -> bool { + let token = token.strip_prefix('@').unwrap_or(token); + let token = token.trim_end_matches(['!', '?', '=']); + let mut chars = token.chars(); + let Some(first) = chars.next() else { + return false; + }; + (first == '_' || first.is_ascii_alphabetic()) + && chars.all(|ch| ch == '_' || ch.is_ascii_alphanumeric()) +} - fn tokenize(&self, node: &Node, skeleton: &mut Vec, names: &mut Vec) { - match node.r#type.as_str() { - t if HOLE_TYPES.contains(&t) => { - skeleton.push(Skeleton::ID); - if let Some(Child::String(name)) = node.children.first() { - names.push(name.clone()); - } - } - "VCALL" => { - skeleton.push(Skeleton::ID); - if let Some(Child::Symbol(name)) = node.children.first() { - names.push(name.clone()); - } - } - "CALL" | "FCALL" => { - skeleton.push(if node.r#type == "CALL" { - Skeleton::CALL - } else { - Skeleton::FCALL - }); - let mid_index = if node.r#type == "CALL" { 1 } else { 0 }; - skeleton.push(Skeleton::MID); - if let Some(Child::Symbol(mid)) = node.children.get(mid_index) { - names.push(mid.clone()); - } - } - "LIT" | "STR" | "SYM" | "INTEGER" | "FLOAT" => { - skeleton.push(Skeleton::Node(node.r#type.clone())); - } - _ => { - skeleton.push(Skeleton::Node(node.r#type.clone())); - } - } - for child in node.children.iter().filter_map(ast::node) { - self.tokenize(child, skeleton, names); +fn literal_token(token: &str) -> bool { + token.starts_with(':') || quoted_token(token) || numeric_token(token) +} + +fn quoted_token(token: &str) -> bool { + (token.starts_with('"') && token.ends_with('"')) + || (token.starts_with('\'') && token.ends_with('\'')) +} + +fn numeric_token(token: &str) -> bool { + let mut saw_digit = false; + let mut saw_dot = false; + for ch in token.chars() { + if ch.is_ascii_digit() { + saw_digit = true; + } else if ch == '.' && !saw_dot { + saw_dot = true; + } else { + return false; } } + saw_digit } struct Report { diff --git a/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs b/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs index eb175bc5a..b3786051c 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs @@ -90,7 +90,7 @@ struct LocalFlow { file: String, lines: Vec, language: Language, - methods_by_line: BTreeMap, + methods_by_span: BTreeMap, } impl LocalFlow { @@ -98,13 +98,13 @@ impl LocalFlow { file: String, lines: Vec, language: Language, - methods_by_line: BTreeMap, + methods_by_span: BTreeMap, ) -> Self { Self { file, lines, language, - methods_by_line, + methods_by_span, } } @@ -147,7 +147,13 @@ impl LocalFlow { } fn method_summary(&self, node: &Node, owner_hint: Option<&str>) -> MethodSummary { - let metadata = self.methods_by_line.get(&node.first_lineno); + let node_span = [ + node.first_lineno, + node.first_column, + node.last_lineno, + node.last_column, + ]; + let metadata = self.methods_by_span.get(&node_span); let owner = metadata .map(|item| item.owner.as_str()) .or(owner_hint) @@ -155,7 +161,10 @@ impl LocalFlow { let name = metadata .map(|item| item.name.clone()) .unwrap_or_else(|| self.method_name(node)); - let statement_nodes = ast::body_stmts(node); + let statement_nodes = ast::body_stmts(node) + .into_iter() + .filter(|statement| !comment_statement(statement)) + .collect::>(); let local_names = self.local_names(&statement_nodes, metadata); let statements: Vec<_> = statement_nodes .iter() @@ -186,8 +195,9 @@ impl LocalFlow { index: usize, local_names: &BTreeSet, ) -> Statement { - let reads = self.local_reads(node, local_names); + let source = ast::slice(node, &self.lines); let writes = self.local_writes(node); + let reads = self.local_reads(node, local_names, &writes); Statement { index, line: node.first_lineno, @@ -198,7 +208,7 @@ impl LocalFlow { node.last_lineno, node.last_column, ], - source: ast::slice(node, &self.lines), + source, dependencies: self.assignment_dependencies(node, local_names), co_uses: self.co_use_edges(node, local_names), reads, @@ -377,7 +387,12 @@ impl LocalFlow { } } - fn local_reads(&self, node: &Node, local_names: &BTreeSet) -> BTreeSet { + fn local_reads( + &self, + node: &Node, + local_names: &BTreeSet, + writes: &BTreeSet, + ) -> BTreeSet { let mut reads = Vec::new(); self.walk_local(node, &mut |child| { if LOCAL_READ_TYPES.contains(&child.r#type.as_str()) { @@ -388,6 +403,11 @@ impl LocalFlow { } } }); + reads.extend(textual_local_reads( + &ast::slice(node, &self.lines), + local_names, + writes, + )); reads.into_iter().collect() } @@ -400,6 +420,7 @@ impl LocalFlow { } } }); + writes.extend(textual_local_writes(&ast::slice(node, &self.lines))); writes.into_iter().collect() } @@ -413,7 +434,8 @@ impl LocalFlow { if LOCAL_WRITE_TYPES.contains(&child.r#type.as_str()) { if let Some(Child::String(lhs)) = child.children.first() { if let Some(rhs) = child.children.get(1).and_then(ast::node) { - for read in self.local_reads(rhs, local_names) { + let rhs_writes = BTreeSet::new(); + for read in self.local_reads(rhs, local_names, &rhs_writes) { if lhs != &read { deps.push((lhs.clone(), read)); } @@ -422,13 +444,28 @@ impl LocalFlow { } } }); + let lhs_names = self.local_writes(node); + if !lhs_names.is_empty() { + let reads = self.local_reads(node, local_names, &lhs_names); + for lhs in lhs_names { + for read in &reads { + if &lhs != read { + deps.push((lhs.clone(), read.clone())); + } + } + } + } deps.sort(); deps.dedup(); deps } fn co_use_edges(&self, node: &Node, local_names: &BTreeSet) -> Vec<(String, String)> { - let reads: Vec<_> = self.local_reads(node, local_names).into_iter().collect(); + let writes = self.local_writes(node); + let reads: Vec<_> = self + .local_reads(node, local_names, &writes) + .into_iter() + .collect(); let mut out = Vec::new(); for i in 0..reads.len() { for j in i + 1..reads.len() { @@ -457,11 +494,226 @@ fn local_read_name(node: &Node) -> Option { } } -fn method_metadata(document: &Document) -> BTreeMap { +fn textual_local_writes(source: &str) -> Vec { + let Some((lhs, operator)) = split_assignment(source) else { + return Vec::new(); + }; + if lhs.contains('.') || lhs.contains("->") || lhs.contains('[') { + return Vec::new(); + } + + let identifiers = identifiers_with_positions(lhs) + .into_iter() + .map(|identifier| identifier.name) + .filter(|name| !local_keyword(name)) + .collect::>(); + if identifiers.is_empty() { + return Vec::new(); + } + + if operator == ":=" || declaration_like_lhs(lhs) || identifiers.len() == 1 { + return identifiers + .into_iter() + .filter(|name| simple_identifier(name)) + .collect(); + } + + Vec::new() +} + +fn textual_local_reads( + source: &str, + local_names: &BTreeSet, + writes: &BTreeSet, +) -> Vec { + identifiers_with_positions(source) + .into_iter() + .filter(|identifier| local_names.contains(&identifier.name)) + .filter(|identifier| !writes.contains(&identifier.name)) + .filter(|identifier| !local_keyword(&identifier.name)) + .filter(|identifier| !member_name(source, identifier.start)) + .filter(|identifier| !call_name(source, identifier.end)) + .map(|identifier| identifier.name) + .collect() +} + +#[derive(Clone, Debug, Eq, PartialEq)] +struct IdentifierSpan { + name: String, + start: usize, + end: usize, +} + +fn identifiers_with_positions(source: &str) -> Vec { + let bytes = source.as_bytes(); + let mut out = Vec::new(); + let mut index = 0; + while index < bytes.len() { + let start = if bytes[index] == b'$' { + let next = index + 1; + if next < bytes.len() && identifier_start(bytes[next]) { + next + } else { + index += 1; + continue; + } + } else if identifier_start(bytes[index]) { + index + } else { + index += 1; + continue; + }; + let mut end = start + 1; + while end < bytes.len() && identifier_part(bytes[end]) { + end += 1; + } + out.push(IdentifierSpan { + name: source[start..end].to_string(), + start, + end, + }); + index = end; + } + out +} + +fn identifier_start(byte: u8) -> bool { + byte == b'_' || byte.is_ascii_alphabetic() +} + +fn identifier_part(byte: u8) -> bool { + byte == b'_' || byte.is_ascii_alphanumeric() +} + +fn split_assignment(source: &str) -> Option<(&str, &str)> { + let bytes = source.as_bytes(); + let mut index = 0; + while index < bytes.len() { + if index + 1 < bytes.len() && &source[index..index + 2] == ":=" { + return Some((source[..index].trim(), ":=")); + } + if bytes[index] == b'=' { + let previous = index.checked_sub(1).and_then(|i| bytes.get(i)).copied(); + let next = bytes.get(index + 1).copied(); + if !matches!( + previous, + Some( + b'=' | b'!' + | b'<' + | b'>' + | b':' + | b'+' + | b'-' + | b'*' + | b'/' + | b'%' + | b'&' + | b'|' + ) + ) && !matches!(next, Some(b'=' | b'>')) + { + return Some((source[..index].trim(), "=")); + } + } + index += 1; + } + None +} + +fn declaration_like_lhs(lhs: &str) -> bool { + identifiers_with_positions(lhs) + .first() + .map(|identifier| { + matches!( + identifier.name.as_str(), + "let" + | "const" + | "var" + | "val" + | "auto" + | "int" + | "long" + | "float" + | "double" + | "bool" + | "boolean" + | "char" + | "String" + | "string" + ) + }) + .unwrap_or(false) +} + +fn local_keyword(name: &str) -> bool { + matches!( + name, + "as" | "break" + | "auto" + | "boolean" + | "bool" + | "case" + | "char" + | "class" + | "const" + | "continue" + | "default" + | "double" + | "else" + | "false" + | "float" + | "for" + | "func" + | "fun" + | "function" + | "if" + | "in" + | "int" + | "long" + | "let" + | "mut" + | "nil" + | "None" + | "null" + | "private" + | "protected" + | "public" + | "return" + | "self" + | "short" + | "static" + | "String" + | "string" + | "this" + | "true" + | "val" + | "var" + | "void" + | "while" + ) +} + +fn simple_identifier(name: &str) -> bool { + let mut chars = name.chars(); + matches!(chars.next(), Some(first) if first == '_' || first.is_ascii_alphabetic()) + && chars.all(|ch| ch == '_' || ch.is_ascii_alphanumeric()) +} + +fn member_name(source: &str, start: usize) -> bool { + let prefix = source[..start].trim_end(); + prefix.ends_with('.') || prefix.ends_with("->") || prefix.ends_with("::") +} + +fn call_name(source: &str, end: usize) -> bool { + let suffix = source[end..].trim_start(); + suffix.starts_with('(') +} + +fn method_metadata(document: &Document) -> BTreeMap { document .function_defs .iter() - .map(|function| (function.line, metadata_for_function(document, function))) + .map(|function| (function.span, metadata_for_function(document, function))) .collect() } @@ -491,6 +743,13 @@ fn statement_container(node: &Node) -> bool { STATEMENT_CONTAINER_TYPES.contains(&node.r#type.as_str()) } +fn comment_statement(node: &Node) -> bool { + node.r#type.to_ascii_lowercase().contains("comment") + || node.text.trim_start().starts_with("//") + || node.text.trim_start().starts_with('#') + || node.text.trim_start().starts_with("--") +} + struct RawBoundary { line: usize, kind: String, diff --git a/gems/decomplex/rust/src/decomplex/detectors/path_condition.rs b/gems/decomplex/rust/src/decomplex/detectors/path_condition.rs index acd40192e..f0591756e 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/path_condition.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/path_condition.rs @@ -47,6 +47,14 @@ pub fn scan_files(files: &[PathBuf], language: Language) -> Result PathConditionReport { + let mined_sites = documents + .iter() + .flat_map(sites_from_mined_facts) + .collect::>(); + if !mined_sites.is_empty() { + return Report::new(mined_sites).findings(); + } + let mut sites = Vec::new(); for document in documents { let mut pc = PathCondition::new(document.file.clone(), document.lines.clone()); @@ -56,6 +64,64 @@ pub fn scan_documents(documents: &[Document]) -> PathConditionReport { Report::new(sites).findings() } +fn sites_from_mined_facts(document: &Document) -> Vec { + let mut sites = Vec::new(); + for decision in &document.decision_sites { + if decision.members.len() < 2 { + continue; + } + for call in &document.call_sites { + if call.function != decision.function + || !span_inside(call.span, decision.enclosing_span) + { + continue; + } + if span_inside(call.span, decision.span) { + continue; + } + if decision + .members + .iter() + .any(|member| member == &guard_call_text(call)) + { + continue; + } + sites.push(Site { + guards: decision.members.clone(), + action: action_text(call), + file: call.file.clone(), + defn: call.function.clone(), + line: call.line, + span: call.span, + }); + } + } + sites +} + +fn action_text(call: &syntax::CallSite) -> String { + let arguments = call.arguments.join(", "); + if call.receiver == "self" { + format!("{}({arguments})", call.message) + } else { + format!("{}.{}({arguments})", call.receiver, call.message) + } +} + +fn guard_call_text(call: &syntax::CallSite) -> String { + if call.receiver == "self" { + format!("{}()", call.message) + } else { + format!("{}.{}()", call.receiver, call.message) + } +} + +fn span_inside(inner: Span, outer: Span) -> bool { + let starts_after_or_at = inner[0] > outer[0] || (inner[0] == outer[0] && inner[1] >= outer[1]); + let ends_before_or_at = inner[2] < outer[2] || (inner[2] == outer[2] && inner[3] <= outer[3]); + starts_after_or_at && ends_before_or_at +} + struct PathCondition { file: String, lines: Vec, diff --git a/gems/decomplex/rust/src/decomplex/detectors/redundant_nil_guard.rs b/gems/decomplex/rust/src/decomplex/detectors/redundant_nil_guard.rs index 1b5c346f0..59d512853 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/redundant_nil_guard.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/redundant_nil_guard.rs @@ -30,6 +30,12 @@ struct NilFact { non_nil_when_true: bool, } +struct CallParts<'a> { + receiver: Option<&'a Node>, + message: String, + no_args: bool, +} + struct Finding { file: String, defn: String, @@ -240,7 +246,7 @@ impl RedundantNilGuard { if matches!(node.r#type.as_str(), "DEFN" | "DEFS") { return; } - if recorded && node.r#type == "OPCALL" { + if recorded && (node.r#type == "OPCALL" || self.call_parts(node).is_some()) { return; } for child in node.children.iter().filter_map(ast::node) { @@ -283,26 +289,43 @@ impl RedundantNilGuard { } fn nil_fact(&self, node: &Node) -> Option { + if self.parenthesized_wrapper(node) { + return self.nil_fact(self.first_node_child(node)?); + } + + if let Some(call) = self.call_parts(node) { + if call.no_args && NIL_PREDICATE_MIDS.contains(&call.message.as_str()) { + let subject = self.subject_key(call.receiver?)?; + return Some(NilFact { + local: subject, + non_nil_when_true: false, + }); + } + if call.no_args && NON_NIL_PREDICATE_MIDS.contains(&call.message.as_str()) { + let subject = self.subject_key(call.receiver?)?; + return Some(NilFact { + local: subject, + non_nil_when_true: true, + }); + } + } + match node.r#type.as_str() { "CALL" => { let recv = node.children.get(0).and_then(ast::node)?; let mid = match node.children.get(1)? { - Child::Symbol(s) => s, + Child::String(s) | Child::Symbol(s) => s, _ => return None, }; let args = node.children.get(2); - if NIL_PREDICATE_MIDS.contains(&mid.as_str()) - && (args.is_none() || matches!(args, Some(Child::Nil))) - { + if NIL_PREDICATE_MIDS.contains(&mid.as_str()) && self.no_call_arguments(args) { let subject = self.subject_key(recv)?; return Some(NilFact { local: subject, non_nil_when_true: false, }); } - if NON_NIL_PREDICATE_MIDS.contains(&mid.as_str()) - && (args.is_none() || matches!(args, Some(Child::Nil))) - { + if NON_NIL_PREDICATE_MIDS.contains(&mid.as_str()) && self.no_call_arguments(args) { let subject = self.subject_key(recv)?; return Some(NilFact { local: subject, @@ -331,6 +354,12 @@ impl RedundantNilGuard { } fn branch_nil_facts(&self, node: &Node, cond_truth: bool) -> Vec { + if self.parenthesized_wrapper(node) { + if let Some(child) = self.first_node_child(node) { + return self.branch_nil_facts(child, cond_truth); + } + } + if node.r#type == "AND" { if !cond_truth { return Vec::new(); @@ -425,30 +454,148 @@ impl RedundantNilGuard { Child::String(s) | Child::Symbol(s) => Some(s.clone()), _ => None, }, - "CALL" => { - let recv = node.children.get(0).and_then(ast::node); - let mid = match node.children.get(1)? { - Child::Symbol(s) => s, - _ => return None, - }; - let args = node.children.get(2); - if (args.is_none() || matches!(args, Some(Child::Nil))) - && self.stable_reader_name(mid) - { - if let Some(recv) = recv { - if recv.r#type == "SELF" { - return Some(format!("self.{}", mid)); - } - let recv_key = self.subject_key(recv)?; - return Some(format!("{}.{}", recv_key, mid)); - } + _ if self.call_parts(node).is_some() => { + let call = self.call_parts(node)?; + if !call.no_args || !self.stable_reader_name(&call.message) { + return None; } - None + let recv = call.receiver?; + if recv.r#type == "SELF" { + return Some(format!("self.{}", call.message)); + } + let recv_key = self.subject_key(recv)?; + Some(format!("{}.{}", recv_key, call.message)) } _ => None, } } + fn call_parts<'a>(&self, node: &'a Node) -> Option> { + match node.r#type.as_str() { + "CALL" => { + let receiver = node.children.get(0).and_then(ast::node); + let message = self.child_name(node.children.get(1)?)?; + Some(CallParts { + receiver, + message, + no_args: self.no_call_arguments(node.children.get(2)), + }) + } + "METHOD_INVOCATION" => { + let nodes = node + .children + .iter() + .filter_map(ast::node) + .collect::>(); + let receiver = nodes.first().copied(); + let message = nodes.get(1).and_then(|child| self.node_name(child))?; + Some(CallParts { + receiver, + message, + no_args: self.no_call_arguments(node.children.get(2)), + }) + } + "FUNCTION_CALL" | "METHOD_CALL" => { + let callee = node.children.iter().filter_map(ast::node).next()?; + let args = node + .children + .iter() + .skip(1) + .find(|child| matches!(child, Child::Node(n) if matches!(n.r#type.as_str(), "ARGUMENTS" | "ARGUMENT_LIST" | "LIST"))); + self.field_call_parts(callee, args) + } + "BLOCK" => { + let callee = node.children.iter().filter_map(ast::node).next()?; + let args = node + .children + .iter() + .skip(1) + .find(|child| matches!(child, Child::Node(n) if matches!(n.r#type.as_str(), "ARGUMENTS" | "ARGUMENT_LIST" | "LIST"))); + self.field_call_parts(callee, args) + } + "INVOCATION_EXPRESSION" => { + let callee = node.children.iter().filter_map(ast::node).next()?; + let mut parts = self.call_parts(callee)?; + let args = node + .children + .iter() + .skip(1) + .find(|child| matches!(child, Child::Node(n) if matches!(n.r#type.as_str(), "ARGUMENTS" | "ARGUMENT_LIST" | "LIST"))); + parts.no_args = self.no_call_arguments(args); + Some(parts) + } + _ => None, + } + } + + fn field_call_parts<'a>( + &self, + node: &'a Node, + args: Option<&'a Child>, + ) -> Option> { + if !matches!( + node.r#type.as_str(), + "DOT_INDEX_EXPRESSION" + | "FIELD_EXPRESSION" + | "FIELD_ACCESS" + | "MEMBER_EXPRESSION" + | "CALL" + ) { + return self.call_parts(node); + } + let nodes = node + .children + .iter() + .filter_map(ast::node) + .collect::>(); + let receiver = nodes.first().copied(); + let message = nodes.last().and_then(|child| self.node_name(child))?; + Some(CallParts { + receiver, + message, + no_args: self.no_call_arguments(args), + }) + } + + fn child_name(&self, child: &Child) -> Option { + match child { + Child::String(s) | Child::Symbol(s) => Some(s.clone()), + Child::Node(node) => self.node_name(node), + _ => None, + } + } + + fn node_name(&self, node: &Node) -> Option { + match node.children.first() { + Some(Child::String(s)) | Some(Child::Symbol(s)) => Some(s.clone()), + _ => { + let text = ast::slice(node, &self.lines).trim().to_string(); + (!text.is_empty()).then_some(text) + } + } + } + + fn no_call_arguments(&self, args: Option<&Child>) -> bool { + match args { + None | Some(Child::Nil) => true, + Some(Child::Node(node)) => { + !node.children.iter().any(|child| ast::node(child).is_some()) + } + Some(_) => false, + } + } + + fn parenthesized_wrapper(&self, node: &Node) -> bool { + matches!( + node.r#type.as_str(), + "CONDITION_CLAUSE" | "PARENTHESIZED_EXPRESSION" | "PARENTHESIZED_STATEMENTS" + ) && self.first_node_child(node).is_some() + } + + fn first_node_child<'a>(&self, node: &'a Node) -> Option<&'a Node> { + node.children.iter().find_map(ast::node) + } + fn stable_reader_name(&self, mid: &str) -> bool { !(mid.ends_with('=') || mid.ends_with('!') || mid == "[]") } @@ -480,6 +627,9 @@ impl RedundantNilGuard { fn stmts_for<'a>(&self, node: Option<&'a Node>) -> Vec<&'a Node> { let Some(node) = node else { return Vec::new() }; + if self.call_parts(node).is_some() { + return vec![node]; + } if node.r#type == "BLOCK" { node.children.iter().filter_map(ast::node).collect() } else { @@ -491,24 +641,28 @@ impl RedundantNilGuard { if matches!(node.r#type.as_str(), "RETURN" | "NEXT" | "BREAK") { return true; } - if !matches!(node.r#type.as_str(), "FCALL" | "VCALL" | "CALL") { + if !matches!(node.r#type.as_str(), "FCALL" | "VCALL" | "CALL") + && self.call_parts(node).is_none() + { return false; } - let mid = if node.r#type == "CALL" { + let mid = if let Some(call) = self.call_parts(node) { + Some(call.message) + } else if node.r#type == "CALL" { node.children.get(1).and_then(|c| match c { - Child::Symbol(s) => Some(s.as_str()), + Child::String(s) | Child::Symbol(s) => Some(s.clone()), _ => None, }) } else { node.children.get(0).and_then(|c| match c { - Child::Symbol(s) => Some(s.as_str()), + Child::String(s) | Child::Symbol(s) => Some(s.clone()), _ => None, }) }; if let Some(mid) = mid { - return TERMINATING_CALLS.contains(&mid); + return TERMINATING_CALLS.contains(&mid.as_str()); } false } diff --git a/gems/decomplex/rust/src/decomplex/detectors/state_branch_density.rs b/gems/decomplex/rust/src/decomplex/detectors/state_branch_density.rs index 253d10847..77a6d4ecd 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/state_branch_density.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/state_branch_density.rs @@ -41,6 +41,14 @@ pub fn scan_files(files: &[PathBuf], language: Language) -> Result Vec { + let all_decisions = documents + .iter() + .flat_map(decisions_from_mined_facts) + .collect::>(); + if !all_decisions.is_empty() { + return Report::new(all_decisions).findings(); + } + let mut global_immutable_readers: BTreeMap> = BTreeMap::new(); let mut global_immutable_reader_types: BTreeMap> = BTreeMap::new(); @@ -85,6 +93,68 @@ pub fn scan_documents(documents: &[Document]) -> Vec { Report::new(all_decisions).findings() } +fn decisions_from_mined_facts(document: &Document) -> Vec { + let state_fields = document + .state_writes + .iter() + .map(|write| normalized_state_field(&write.field)) + .collect::>(); + + document + .decision_sites + .iter() + .filter_map(|decision| { + let refs = document + .state_reads + .iter() + .filter(|read| { + read.function == decision.function && span_inside(read.span, decision.span) + }) + .filter_map(|read| mined_state_ref(read, &state_fields)) + .collect::>() + .into_iter() + .collect::>(); + if refs.is_empty() { + return None; + } + Some(Decision { + file: decision.file.clone(), + defn: decision.function.clone(), + line: decision.line, + span: decision.span, + predicate: decision.predicate.clone(), + state_refs: refs, + }) + }) + .collect() +} + +fn mined_state_ref(read: &syntax::StateRead, state_fields: &BTreeSet) -> Option { + let field = normalized_state_field(&read.field); + if !state_fields.is_empty() && !state_fields.contains(&field) { + return None; + } + let receiver = read.receiver.trim_start_matches('$'); + if receiver.is_empty() || matches!(receiver, "self" | "this") { + Some(field) + } else { + Some(format!("{}.{}", receiver, field)) + } +} + +fn normalized_state_field(field: &str) -> String { + field + .trim_start_matches('@') + .trim_start_matches('$') + .to_string() +} + +fn span_inside(inner: Span, outer: Span) -> bool { + let starts_after_or_at = inner[0] > outer[0] || (inner[0] == outer[0] && inner[1] >= outer[1]); + let ends_before_or_at = inner[2] < outer[2] || (inner[2] == outer[2] && inner[3] <= outer[3]); + starts_after_or_at && ends_before_or_at +} + struct StateBranchDensity { file: String, lines: Vec, diff --git a/gems/decomplex/rust/src/decomplex/detectors/state_mesh.rs b/gems/decomplex/rust/src/decomplex/detectors/state_mesh.rs index 643d2e9b5..0b0677c3e 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/state_mesh.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/state_mesh.rs @@ -177,7 +177,8 @@ pub fn scan_documents_with_semantic_aliases_and_min_writes( } let mut sm = StateMesh::new(src_map, min_writes); - sm.run(semantic_aliases); + sm.load_document_facts(documents); + sm.find_re_derivations(semantic_aliases); sm.to_json_graph() } @@ -212,6 +213,49 @@ impl StateMesh { self.find_re_derivations(semantic_aliases); } + fn load_document_facts(&mut self, documents: &[Document]) { + for document in documents { + for write in &document.state_writes { + let norm = self.normalize(&write.field); + self.writes.push(Write { + attr: write.field.clone(), + norm, + recv: write.receiver.clone(), + file: write.file.clone(), + defn: write.function.clone(), + line: write.line, + span: write.span, + }); + } + } + + let field_norms = self.known_field_norms(); + if field_norms.is_empty() { + return; + } + + for document in documents { + for read in &document.state_reads { + let norm = self.normalize(&read.field); + if !field_norms.contains(&norm) { + continue; + } + let candidate = Read { + attr: read.field.clone(), + norm, + recv: read.receiver.clone(), + file: read.file.clone(), + defn: read.function.clone(), + line: read.line, + span: read.span, + }; + if !self.write_target_read(&candidate) { + self.reads.push(candidate); + } + } + } + } + fn discover_fields(&mut self) { let files: Vec<_> = self.src_map.keys().cloned().collect(); for file in files { diff --git a/gems/decomplex/rust/src/decomplex/detectors/temporal_ordering_pressure.rs b/gems/decomplex/rust/src/decomplex/detectors/temporal_ordering_pressure.rs index abfae59be..247b60dcf 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/temporal_ordering_pressure.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/temporal_ordering_pressure.rs @@ -43,13 +43,7 @@ pub fn scan_files( pub fn scan_documents(documents: &[Document]) -> Vec { let mut rows = Vec::new(); for document in documents { - let mut detector = TemporalOrderingPressure::new( - document.file.clone(), - document.lines.clone(), - document.state_reads.clone(), - document.state_writes.clone(), - ); - rows.extend(detector.scan(&document.normalized_root)); + rows.extend(scan_document_facts(document)); } rows.sort_by(|a, b| { b.score @@ -61,6 +55,128 @@ pub fn scan_documents(documents: &[Document]) -> Vec Vec { + let owners = document + .function_defs + .iter() + .map(|function| function.owner.clone()) + .collect::>(); + owners + .into_iter() + .filter_map(|owner| pressure_row_for_owner(document, &owner)) + .collect() +} + +fn pressure_row_for_owner(document: &Document, owner: &str) -> Option { + let methods = document + .function_defs + .iter() + .filter(|function| function.owner == owner) + .map(|function| MethodState { + name: function.name.clone(), + line: function.line, + span: function.span, + visibility: function + .visibility + .clone() + .unwrap_or_else(|| "public".to_string()), + reads: sorted_unique( + document + .state_reads + .iter() + .filter(|read| read.owner == function.owner && read.function == function.name) + .map(|read| read.field.clone()), + ), + writes: sorted_unique( + document + .state_writes + .iter() + .filter(|write| { + write.owner == function.owner && write.function == function.name + }) + .map(|write| write.field.clone()), + ), + }) + .collect::>(); + pressure_row(document.file.as_str(), owner, &methods) +} + +fn pressure_row( + file: &str, + owner: &str, + methods: &[MethodState], +) -> Option { + let public_methods: Vec<_> = methods + .iter() + .filter(|m| m.visibility == "public") + .collect(); + let state_methods: Vec<_> = public_methods + .iter() + .filter(|m| !m.reads.is_empty() || !m.writes.is_empty()) + .collect(); + let writers: Vec<_> = public_methods + .iter() + .filter(|m| !m.writes.is_empty()) + .collect(); + + if state_methods.len() < 3 || writers.len() < 2 { + return None; + } + + let mut fields_set = BTreeSet::new(); + for m in &state_methods { + fields_set.extend(m.reads.iter().cloned()); + fields_set.extend(m.writes.iter().cloned()); + } + let fields = fields_set.into_iter().collect::>(); + let shared_fields = fields + .iter() + .filter(|field| { + state_methods + .iter() + .filter(|m| m.reads.contains(*field) || m.writes.contains(*field)) + .count() + >= 2 + }) + .cloned() + .collect::>(); + if shared_fields.is_empty() { + return None; + } + + let n = state_methods.len(); + let state_space = 2usize.pow(fields.len().min(12) as u32); + let score = (n * writers.len() * shared_fields.len().max(1)) + state_space; + Some(TemporalOrderingPressureRow { + at: format!("{}:{}:{}", file, owner, state_methods[0].line), + file: file.to_string(), + owner: owner.to_string(), + public_methods: public_methods.len(), + state_methods: n, + writers: writers.len(), + state_fields: fields, + shared_fields, + orderings: format!("{n}!"), + state_space: format!( + "2^{}", + state_methods + .iter() + .flat_map(|m| m.reads.iter().chain(m.writes.iter())) + .collect::>() + .len() + ), + score, + sites: state_methods + .iter() + .map(|m| format!("{}:{}:{}", file, m.name, m.line)) + .collect(), + spans: state_methods + .iter() + .map(|m| (format!("{}:{}:{}", file, m.name, m.line), m.span)) + .collect(), + }) +} + struct TemporalOrderingPressure { file: String, lines: Vec, diff --git a/gems/decomplex/rust/src/decomplex/syntax.rs b/gems/decomplex/rust/src/decomplex/syntax.rs index 9cf8d94d4..2c4643791 100644 --- a/gems/decomplex/rust/src/decomplex/syntax.rs +++ b/gems/decomplex/rust/src/decomplex/syntax.rs @@ -104,6 +104,7 @@ pub struct Document { pub state_reads: Vec, pub state_writes: Vec, pub decision_sites: Vec, + pub dispatch_sites: Vec, pub predicate_aliases: Vec, pub comparison_uses: Vec, } @@ -177,6 +178,18 @@ pub struct DecisionSite { pub line: usize, pub span: Span, pub predicate: String, + pub enclosing_span: Span, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct DispatchSite { + pub variant_set: Vec, + pub arm_members: BTreeMap>, + pub outside: Vec, + pub file: String, + pub function: String, + pub line: usize, + pub span: Span, } #[derive(Clone, Debug, Eq, PartialEq, Serialize)] @@ -187,6 +200,7 @@ pub struct ComparisonUse { pub function: String, pub line: usize, pub span: Span, + pub enclosing_span: Span, } #[derive(Clone, Debug)] diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs index 79451bd3f..330dd3362 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs @@ -173,6 +173,10 @@ pub(crate) trait LanguageProfile { EMPTY_NODE_KINDS } + fn nested_statement_wrapper_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + fn identifier_node_kinds(&self) -> &[&str] { EMPTY_NODE_KINDS } @@ -217,6 +221,10 @@ pub(crate) trait LanguageProfile { DEFAULT_COMPARISON_OPERATORS } + fn branch_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + fn case_node_kinds(&self) -> &[&str] { EMPTY_NODE_KINDS } @@ -453,10 +461,20 @@ pub(crate) trait LanguageProfile { .into_iter() .find(|child| self.function_body_node_kinds().contains(&child.kind())) })?; - let statements: Vec> = named_children(body) + let mut statements: Vec> = named_children(body) .into_iter() .filter(|child| !self.ignored_statement_node_kinds().contains(&child.kind())) .collect(); + if statements.len() == 1 + && self + .nested_statement_wrapper_node_kinds() + .contains(&statements[0].kind()) + { + statements = named_children(statements[0]) + .into_iter() + .filter(|child| !self.ignored_statement_node_kinds().contains(&child.kind())) + .collect(); + } if statements.len() == 1 { statements.first().copied() } else { diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/c.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/c.rs index dae57a428..70c7bbbf7 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/c.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/c.rs @@ -79,6 +79,10 @@ impl LanguageProfile for CProfile { &["binary_expression"] } + fn branch_node_kinds(&self) -> &[&str] { + &["if_statement", "for_statement", "switch_statement"] + } + fn case_node_kinds(&self) -> &[&str] { &["switch_statement"] } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/cpp.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/cpp.rs index 0729fcfd6..b82d20923 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/cpp.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/cpp.rs @@ -91,6 +91,10 @@ impl LanguageProfile for CppProfile { &["binary_expression"] } + fn branch_node_kinds(&self) -> &[&str] { + &["if_statement", "for_range_loop", "switch_statement"] + } + fn case_node_kinds(&self) -> &[&str] { &["switch_statement"] } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/csharp.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/csharp.rs index 8e0944203..344fb5b02 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/csharp.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/csharp.rs @@ -57,6 +57,10 @@ impl LanguageProfile for CSharpProfile { &["binary_expression"] } + fn branch_node_kinds(&self) -> &[&str] { + &["if_statement", "foreach_statement", "switch_statement"] + } + fn case_node_kinds(&self) -> &[&str] { &["switch_statement"] } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/go.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/go.rs index 66eba28db..864e5fcea 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/go.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/go.rs @@ -1,6 +1,8 @@ +use super::super::tree_sitter_adapter::{named_children, normalize_type_owner, CallTarget}; use super::super::Language; use super::base::LanguageProfile; -use tree_sitter::Language as TreeSitterLanguage; +use crate::decomplex::ast::{node_text, normalize_text}; +use tree_sitter::{Language as TreeSitterLanguage, Node}; pub(crate) struct GoProfile; @@ -13,10 +15,28 @@ impl LanguageProfile for GoProfile { tree_sitter_go::LANGUAGE.into() } + fn first_argument_receiver(&self) -> bool { + true + } + fn function_node_kinds(&self) -> &[&str] { &["function_declaration", "method_declaration"] } + fn owner_name_from_declaration(&self, node: Node<'_>, source: &str) -> Option { + if node.kind() == "method_declaration" { + return go_method_receiver(node, source).map(|(owner, _name)| owner); + } + self.default_owner_name_from_declaration(node, source) + } + + fn function_receiver_name(&self, node: Node<'_>, source: &str) -> Option { + if node.kind() == "method_declaration" { + return go_method_receiver(node, source).map(|(_owner, name)| name); + } + None + } + fn generic_owner_node_kinds(&self) -> &[&str] { &["type_spec"] } @@ -33,8 +53,12 @@ impl LanguageProfile for GoProfile { &["block", "statement_list"] } + fn nested_statement_wrapper_node_kinds(&self) -> &[&str] { + &["statement_list"] + } + fn call_node_kinds(&self) -> &[&str] { - &["call_expression"] + &["call_expression", "go_statement"] } fn identifier_node_kinds(&self) -> &[&str] { @@ -77,6 +101,14 @@ impl LanguageProfile for GoProfile { &["binary_expression"] } + fn branch_node_kinds(&self) -> &[&str] { + &[ + "if_statement", + "for_statement", + "expression_switch_statement", + ] + } + fn case_node_kinds(&self) -> &[&str] { &["expression_switch_statement"] } @@ -116,4 +148,52 @@ impl LanguageProfile for GoProfile { fn field_like_node_kinds(&self) -> &[&str] { &["selector_expression"] } + + fn call_target<'tree>(&self, node: Node<'tree>, source: &str) -> Option> { + match node.kind() { + "call_expression" => self.default_call_target(node, source), + "go_statement" => go_keyword_call_target(node, source), + _ => None, + } + } +} + +fn go_method_receiver(node: Node<'_>, source: &str) -> Option<(String, String)> { + let receiver_params = named_children(node) + .into_iter() + .find(|child| child.kind() == "parameter_list")?; + let receiver = named_children(receiver_params) + .into_iter() + .find(|child| child.kind() == "parameter_declaration")?; + let children = named_children(receiver); + let name = children + .iter() + .find(|child| matches!(child.kind(), "identifier" | "field_identifier")) + .map(|child| node_text(*child, source).to_string())?; + let type_node = children + .iter() + .find(|child| matches!(child.kind(), "pointer_type" | "type_identifier"))?; + Some((normalize_type_owner(node_text(*type_node, source)), name)) +} + +fn go_keyword_call_target<'tree>(node: Node<'tree>, source: &str) -> Option> { + if node.kind() != "go_statement" { + return None; + } + let arguments = go_statement_arguments(node, source)?; + let mut target = CallTarget::new("self".to_string(), "go".to_string(), arguments); + target.source_node = Some(node); + Some(target) +} + +fn go_statement_arguments(node: Node<'_>, source: &str) -> Option> { + let text = node_text(node, source).trim(); + let inner = text.strip_prefix("go(")?.strip_suffix(')')?; + Some( + inner + .split(',') + .map(normalize_text) + .filter(|argument| !argument.is_empty()) + .collect(), + ) } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/java.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/java.rs index ca65e549a..476431eed 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/java.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/java.rs @@ -1,6 +1,9 @@ +use super::super::tree_sitter_adapter::{named_children, CallTarget}; use super::super::Language; use super::base::LanguageProfile; +use crate::decomplex::ast::node_text; use tree_sitter::Language as TreeSitterLanguage; +use tree_sitter::Node; pub(crate) struct JavaProfile; @@ -53,6 +56,14 @@ impl LanguageProfile for JavaProfile { &["binary_expression"] } + fn branch_node_kinds(&self) -> &[&str] { + &[ + "if_statement", + "enhanced_for_statement", + "switch_expression", + ] + } + fn case_node_kinds(&self) -> &[&str] { &["switch_expression"] } @@ -88,4 +99,24 @@ impl LanguageProfile for JavaProfile { fn field_like_node_kinds(&self) -> &[&str] { &["field_access"] } + + fn call_target<'tree>(&self, node: Node<'tree>, source: &str) -> Option> { + if node.kind() != "method_invocation" { + return None; + } + let children = named_children(node); + let identifiers = children + .iter() + .copied() + .filter(|child| child.kind() == "identifier") + .collect::>(); + if identifiers.len() >= 2 { + return Some(CallTarget::new( + node_text(identifiers[0], source).to_string(), + node_text(identifiers[1], source).to_string(), + self.call_argument_texts(node, source), + )); + } + self.default_call_target(node, source) + } } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/kotlin.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/kotlin.rs index b2280a8fa..d6ee6d4eb 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/kotlin.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/kotlin.rs @@ -33,6 +33,10 @@ impl LanguageProfile for KotlinProfile { &["function_body", "statements"] } + fn nested_statement_wrapper_node_kinds(&self) -> &[&str] { + &["statements"] + } + fn call_node_kinds(&self) -> &[&str] { &["call_expression"] } @@ -56,6 +60,7 @@ impl LanguageProfile for KotlinProfile { "conjunction_expression", "additive_expression", "multiplicative_expression", + "binary_expression", ] } @@ -96,6 +101,7 @@ impl LanguageProfile for KotlinProfile { "conjunction_expression", "equality_expression", "comparison_expression", + "binary_expression", ] } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/lua.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/lua.rs index 11dc1ded3..7c80d9e33 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/lua.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/lua.rs @@ -1,6 +1,7 @@ +use super::super::tree_sitter_adapter::named_children; use super::super::Language; use super::base::LanguageProfile; -use crate::decomplex::ast::line; +use crate::decomplex::ast::{line, node_text}; use tree_sitter::{Language as TreeSitterLanguage, Node}; pub(crate) struct LuaProfile; @@ -18,6 +19,15 @@ impl LanguageProfile for LuaProfile { &["function_declaration"] } + fn function_name(&self, node: Node<'_>, source: &str) -> Option { + lua_method_name(node, source).or_else(|| self.default_function_name(node, source)) + } + + fn owner_name_from_declaration(&self, node: Node<'_>, source: &str) -> Option { + lua_method_owner_name(node, source) + .or_else(|| self.default_owner_name_from_declaration(node, source)) + } + fn parameter_list_node_kinds(&self) -> &[&str] { &["parameters"] } @@ -30,6 +40,10 @@ impl LanguageProfile for LuaProfile { &["block"] } + fn nested_statement_wrapper_node_kinds(&self) -> &[&str] { + &["block"] + } + fn call_node_kinds(&self) -> &[&str] { &["function_call", "method_call"] } @@ -50,6 +64,10 @@ impl LanguageProfile for LuaProfile { &["binary_expression"] } + fn branch_node_kinds(&self) -> &[&str] { + &["if_statement"] + } + fn boolean_and_operators(&self) -> &[&str] { &["and", "&&"] } @@ -78,3 +96,28 @@ impl LanguageProfile for LuaProfile { first_line.contains("_tl_compat") && first_line.contains("compat53.module") } } + +fn lua_method_name(node: Node<'_>, source: &str) -> Option { + let method = lua_method_index_expression(node)?; + named_children(method) + .into_iter() + .last() + .map(|child| node_text(child, source).to_string()) +} + +fn lua_method_owner_name(node: Node<'_>, source: &str) -> Option { + let method = lua_method_index_expression(node)?; + named_children(method) + .into_iter() + .next() + .map(|child| node_text(child, source).to_string()) +} + +fn lua_method_index_expression<'tree>(node: Node<'tree>) -> Option> { + if node.kind() != "function_declaration" { + return None; + } + named_children(node) + .into_iter() + .find(|child| child.kind() == "method_index_expression") +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/php.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/php.rs index b1a467968..0948afecb 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/php.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/php.rs @@ -1,4 +1,4 @@ -use super::super::tree_sitter_adapter::{named_children, AssignmentTarget, Target}; +use super::super::tree_sitter_adapter::{named_children, AssignmentTarget, CallTarget, Target}; use super::super::Language; use super::base::LanguageProfile; use crate::decomplex::ast::{node_text, normalize_text}; @@ -140,6 +140,22 @@ impl LanguageProfile for PhpProfile { self.default_assignment_target(node) } + fn call_target<'tree>(&self, node: Node<'tree>, source: &str) -> Option> { + if !self.call_node_kinds().contains(&node.kind()) { + return None; + } + let mut target = self.default_call_target(node, source)?; + target.receiver = php_normalize_receiver(&target.receiver); + Some(target) + } + + fn call_argument_texts(&self, node: Node<'_>, source: &str) -> Vec { + self.call_argument_nodes(node) + .into_iter() + .map(|argument| normalize_text(&php_normalize_source(node_text(argument, source)))) + .collect() + } + fn state_target(&self, lhs: Node<'_>, source: &str) -> Option { let target = self.default_state_target(lhs, source)?; Some(Target { diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/python.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/python.rs index e4c88a2d3..e1cf90cf8 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/python.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/python.rs @@ -33,6 +33,10 @@ impl LanguageProfile for PythonProfile { &["block"] } + fn nested_statement_wrapper_node_kinds(&self) -> &[&str] { + &["block"] + } + fn call_node_kinds(&self) -> &[&str] { &["call"] } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/ruby.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/ruby.rs index a45ea5f48..bfa7f82d1 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/ruby.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/ruby.rs @@ -48,6 +48,10 @@ impl LanguageProfile for RubyProfile { &["body_statement", "do_block"] } + fn nested_statement_wrapper_node_kinds(&self) -> &[&str] { + &["body_statement"] + } + fn identifier_node_kinds(&self) -> &[&str] { &["identifier", "constant"] } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/swift.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/swift.rs index da998e1e8..5e755e1f5 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/swift.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/swift.rs @@ -1,6 +1,7 @@ use super::super::Language; use super::base::LanguageProfile; -use tree_sitter::Language as TreeSitterLanguage; +use crate::decomplex::ast::{node_text, normalize_text}; +use tree_sitter::{Language as TreeSitterLanguage, Node}; pub(crate) struct SwiftProfile; @@ -37,6 +38,10 @@ impl LanguageProfile for SwiftProfile { &["function_body", "statements"] } + fn nested_statement_wrapper_node_kinds(&self) -> &[&str] { + &["statements"] + } + fn call_node_kinds(&self) -> &[&str] { &["call_expression"] } @@ -67,6 +72,10 @@ impl LanguageProfile for SwiftProfile { ] } + fn branch_node_kinds(&self) -> &[&str] { + &["if_statement", "for_statement", "switch_statement"] + } + fn case_node_kinds(&self) -> &[&str] { &["switch_statement"] } @@ -114,4 +123,20 @@ impl LanguageProfile for SwiftProfile { fn field_like_node_kinds(&self) -> &[&str] { &["navigation_expression"] } + + fn call_argument_texts(&self, node: Node<'_>, source: &str) -> Vec { + self.call_argument_nodes(node) + .into_iter() + .filter_map(|argument| { + let text = normalize_text(node_text(argument, source)); + let value = text + .strip_prefix('(') + .and_then(|inner| inner.strip_suffix(')')) + .unwrap_or(&text) + .trim() + .to_string(); + (!value.is_empty()).then_some(value) + }) + .collect() + } } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/zig.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/zig.rs index 5c54ea927..b58d82b73 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/zig.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/zig.rs @@ -1,6 +1,8 @@ +use super::super::tree_sitter_adapter::named_children; use super::super::Language; use super::base::LanguageProfile; -use tree_sitter::Language as TreeSitterLanguage; +use crate::decomplex::ast::node_text; +use tree_sitter::{Language as TreeSitterLanguage, Node}; pub(crate) struct ZigProfile; @@ -17,6 +19,21 @@ impl LanguageProfile for ZigProfile { &["function_declaration"] } + fn owner_name_from_declaration(&self, node: Node<'_>, source: &str) -> Option { + if node.kind() == "struct_declaration" { + return node + .parent() + .filter(|parent| parent.kind() == "variable_declaration") + .and_then(|parent| { + named_children(parent) + .into_iter() + .find(|child| child.kind() == "identifier") + }) + .map(|name| node_text(name, source).to_string()); + } + self.default_owner_name_from_declaration(node, source) + } + fn parameter_list_node_kinds(&self) -> &[&str] { &["parameters"] } diff --git a/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs b/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs index 19f49b09a..7e5df2d20 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs @@ -1,11 +1,11 @@ use super::{ adapters::{language_profile, LanguageProfile}, - CallSite, ComparisonUse, DecisionSite, Document, FunctionDef, Language, PredicateAlias, - StateRead, StateWrite, + CallSite, ComparisonUse, DecisionSite, DispatchSite, Document, FunctionDef, Language, + PredicateAlias, StateRead, StateWrite, }; use crate::decomplex::ast::{line, node_text, normalize_text, normalize_tree, span, RawNode}; use anyhow::{Context, Result}; -use std::collections::HashSet; +use std::collections::{BTreeMap, BTreeSet, HashSet}; use std::fs; use std::path::{Path, PathBuf}; use tree_sitter::{Node, Parser}; @@ -17,6 +17,7 @@ pub fn parse_file(file: PathBuf, language: Language) -> Result { let mut state_reads = Vec::new(); let mut state_writes = Vec::new(); let mut decision_sites = Vec::new(); + let mut dispatch_sites = Vec::new(); let mut predicate_aliases = Vec::new(); let mut comparison_uses = Vec::new(); let mut seen_writes = HashSet::new(); @@ -44,6 +45,16 @@ pub fn parse_file(file: PathBuf, language: Language) -> Result { &mut seen_decisions, ); language_profile(language).after_collect_facts(&mut function_defs, &call_sites); + collect_dispatch_sites( + parsed.tree.root_node(), + &parsed.source, + &parsed.file, + language, + &context, + &call_sites, + &mut dispatch_sites, + ); + collect_equality_dispatch_sites(&comparison_uses, &call_sites, &mut dispatch_sites); Ok(Document { file: parsed.file.to_string_lossy().to_string(), @@ -57,6 +68,7 @@ pub fn parse_file(file: PathBuf, language: Language) -> Result { state_reads, state_writes, decision_sites, + dispatch_sites, predicate_aliases, comparison_uses, }) @@ -223,6 +235,213 @@ fn collect_facts( } } +fn collect_dispatch_sites( + node: Node<'_>, + source: &str, + file: &Path, + language: Language, + context: &ContextState, + call_sites: &[CallSite], + out: &mut Vec, +) { + let next_context = push_control_context( + node, + push_function_context( + node, + push_owner_context(node, source, context, language), + source, + language, + ), + source, + language, + ); + record_dispatch_site(node, source, file, language, &next_context, call_sites, out); + + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + collect_dispatch_sites( + child, + source, + file, + language, + &next_context, + call_sites, + out, + ); + } +} + +fn record_dispatch_site( + node: Node<'_>, + source: &str, + file: &Path, + language: Language, + context: &ContextState, + call_sites: &[CallSite], + out: &mut Vec, +) { + let profile = language_profile(language); + if !(case_node(profile, node) || profile.hidden_case(node)) { + return; + } + + let decision_node = profile.case_source_node(node); + if profile.predicate_less_case(decision_node) { + return; + } + let predicate = strip_enclosing_parentheses( + &profile.normalize_source_text(&decision_predicate(profile, decision_node, source)), + ); + if predicate.is_empty() { + return; + } + + let mut arm_members: BTreeMap> = BTreeMap::new(); + for arm in case_arms(profile, decision_node) { + let members = dispatch_members_inside( + call_sites, + &predicate, + &context.current_function(), + span(arm), + ); + if members.is_empty() { + continue; + } + for pattern in case_arm_patterns(arm, source, profile) { + for variant in dispatch_constant_patterns(&pattern) { + arm_members + .entry(variant) + .or_default() + .extend(members.clone()); + } + } + } + if arm_members.len() < 2 { + return; + } + for members in arm_members.values_mut() { + members.sort(); + members.dedup(); + } + + let mut variant_set = arm_members.keys().cloned().collect::>(); + variant_set.sort(); + let outside = dispatch_members_outside( + call_sites, + &predicate, + &context.current_function(), + span(decision_node), + ); + let site = DispatchSite { + variant_set, + arm_members, + outside, + file: file.to_string_lossy().to_string(), + function: context.current_function(), + line: line(decision_node), + span: span(decision_node), + }; + if out.iter().any(|existing| existing == &site) { + return; + } + out.push(site); +} + +fn collect_equality_dispatch_sites( + comparisons: &[ComparisonUse], + call_sites: &[CallSite], + out: &mut Vec, +) { + let mut groups: BTreeMap<(String, String, String), Vec<(&ComparisonUse, String)>> = + BTreeMap::new(); + for comparison in comparisons { + let Some((predicate, variant)) = dispatch_equality(&comparison.canon_source) else { + continue; + }; + groups + .entry(( + comparison.file.clone(), + comparison.function.clone(), + predicate, + )) + .or_default() + .push((comparison, variant)); + } + + for ((file, function, predicate), entries) in groups { + let variant_set = entries + .iter() + .map(|(_, variant)| variant.clone()) + .collect::>(); + if variant_set.len() < 2 { + continue; + } + + let mut arm_members: BTreeMap> = BTreeMap::new(); + let mut branch_spans = Vec::new(); + for (comparison, variant) in entries { + branch_spans.push(comparison.enclosing_span); + let members = dispatch_members_inside( + call_sites, + &predicate, + &function, + comparison.enclosing_span, + ); + if members.is_empty() { + continue; + } + arm_members.entry(variant).or_default().extend(members); + } + if arm_members.len() < 2 { + continue; + } + for members in arm_members.values_mut() { + members.sort(); + members.dedup(); + } + + let outside = + dispatch_members_outside_any(call_sites, &predicate, &function, &branch_spans); + let mut variant_set = arm_members.keys().cloned().collect::>(); + variant_set.sort(); + let span = branch_spans + .into_iter() + .reduce(union_span) + .unwrap_or([0, 0, 0, 0]); + let site = DispatchSite { + variant_set, + arm_members, + outside, + file, + function, + line: span[0], + span, + }; + if out.iter().any(|existing| existing == &site) { + continue; + } + out.push(site); + } +} + +fn dispatch_equality(source: &str) -> Option<(String, String)> { + for operator in ["===", "=="] { + let Some((left, right)) = source.split_once(operator) else { + continue; + }; + let left = strip_enclosing_parentheses(&normalize_text(left)); + let right = strip_enclosing_parentheses(&normalize_text(right)); + let left_variant = dispatch_constant_pattern(&left); + let right_variant = dispatch_constant_pattern(&right); + return match (left_variant, right_variant) { + (true, false) => Some((right, left)), + (false, true) => Some((left, right)), + _ => None, + }; + } + None +} + fn record_function_def( node: Node<'_>, source: &str, @@ -277,10 +496,9 @@ fn record_predicate_alias( let Some(body) = profile.single_expression_body(node) else { return; }; - let text = profile.normalize_source_text(node_text(body, source)); - if text.is_empty() || text == "nil" || text.len() > 200 { + let Some(text) = predicate_body_text(profile, node_text(body, source)) else { return; - } + }; let file_name = file.to_string_lossy().to_string(); out.push(PredicateAlias { name: name.clone(), @@ -292,18 +510,58 @@ fn record_predicate_alias( }); } +fn predicate_body_text(profile: &dyn LanguageProfile, source: &str) -> Option { + let mut text = profile.normalize_source_text(source); + if text.starts_with('{') && text.ends_with('}') { + text = text[1..text.len() - 1].trim().to_string(); + } + let text = text + .strip_prefix("return ") + .unwrap_or(&text) + .trim_end_matches(';') + .trim() + .to_string(); + if text.contains(';') { + return None; + } + if text.is_empty() || text == "nil" || text.len() > 200 { + return None; + } + if predicate_like_body(&text) { + Some(text) + } else { + None + } +} + +fn predicate_like_body(text: &str) -> bool { + let lower = text.to_ascii_lowercase(); + matches!(lower.as_str(), "true" | "false") + || lower.contains("true") + || lower.contains("false") + || lower.contains("null") + || lower.contains("nil") + || text.contains("==") + || text.contains("!=") + || text.contains("&&") + || text.contains("||") + || lower.contains(" and ") + || lower.contains(" or ") +} + fn record_comparison_use( node: Node<'_>, source: &str, file: &Path, - _language: Language, + language: Language, context: &ContextState, out: &mut Vec, ) { - if !comparison_node(language_profile(_language), node, source) { + let profile = language_profile(language); + if !comparison_node(profile, node, source) { return; } - let raw = language_profile(_language).normalize_source_text(node_text(node, source)); + let raw = profile.normalize_source_text(node_text(node, source)); out.push(ComparisonUse { canon_source: raw.clone(), raw, @@ -311,14 +569,18 @@ fn record_comparison_use( function: context.current_function(), line: line(node), span: span(node), + enclosing_span: decision_enclosing_span(profile, node), }); } fn comparison_node(profile: &dyn LanguageProfile, node: Node<'_>, source: &str) -> bool { if profile.comparison_node_kinds().contains(&node.kind()) { - return profile - .comparison_operators() - .contains(&direct_operator_from_source(node, source).as_str()); + let operator = direct_operator_from_source(node, source); + return profile.comparison_operators().contains(&operator.as_str()) + || profile + .comparison_operators() + .iter() + .any(|operator| node_text(node, source).contains(operator)); } if !profile.call_node_kinds().contains(&node.kind()) { return false; @@ -371,6 +633,7 @@ fn record_decision_site( decision_node, source, )), + enclosing_span: span(decision_node), }, ); } @@ -435,6 +698,7 @@ fn record_conjunction_decision( line: conjunction_span(node)[0], span: conjunction_span(node), predicate: profile.normalize_source_text(node_text(node, source)), + enclosing_span: decision_enclosing_span(profile, node), }, ); } @@ -874,6 +1138,125 @@ fn default_case_pattern(profile: &dyn LanguageProfile, text: &str) -> bool { text.is_empty() || profile.default_case_patterns().contains(&text) } +fn dispatch_members_inside( + call_sites: &[CallSite], + predicate: &str, + function: &str, + outer: [usize; 4], +) -> Vec { + let mut members = dispatch_member_calls(call_sites, predicate, function) + .into_iter() + .filter(|call| dispatch_inside_span(call.span, outer)) + .map(dispatch_member_name) + .collect::>(); + members.sort(); + members.dedup(); + members +} + +fn dispatch_members_outside( + call_sites: &[CallSite], + predicate: &str, + function: &str, + decision_span: [usize; 4], +) -> Vec { + let mut members = dispatch_member_calls(call_sites, predicate, function) + .into_iter() + .filter(|call| !dispatch_inside_span(call.span, decision_span)) + .map(dispatch_member_name) + .collect::>(); + members.sort(); + members.dedup(); + members +} + +fn dispatch_members_outside_any( + call_sites: &[CallSite], + predicate: &str, + function: &str, + decision_spans: &[[usize; 4]], +) -> Vec { + let mut members = dispatch_member_calls(call_sites, predicate, function) + .into_iter() + .filter(|call| { + !decision_spans + .iter() + .any(|span| dispatch_inside_span(call.span, *span)) + }) + .map(dispatch_member_name) + .collect::>(); + members.sort(); + members.dedup(); + members +} + +fn dispatch_member_calls<'a>( + call_sites: &'a [CallSite], + predicate: &str, + function: &str, +) -> Vec<&'a CallSite> { + call_sites + .iter() + .filter(|call| { + call.function == function && call.receiver == predicate && !call.message.is_empty() + }) + .collect() +} + +fn dispatch_member_name(call: &CallSite) -> String { + strip_assignment_suffix(&call.message) +} + +fn dispatch_constant_patterns(member: &str) -> Vec { + member + .split(',') + .map(|pattern| { + pattern + .trim() + .strip_prefix("case ") + .unwrap_or(pattern.trim()) + }) + .filter(|pattern| dispatch_constant_pattern(pattern)) + .map(ToString::to_string) + .collect() +} + +fn dispatch_constant_pattern(pattern: &str) -> bool { + if pattern.is_empty() { + return false; + } + pattern.replace("::", ".").split(['.', '_']).all(|part| { + let mut chars = part.chars(); + matches!(chars.next(), Some(first) if first.is_ascii_uppercase()) + && chars.all(|ch| ch == '_' || ch.is_ascii_alphanumeric()) + }) +} + +fn dispatch_inside_span(inner: [usize; 4], outer: [usize; 4]) -> bool { + let starts_after_or_at = inner[0] > outer[0] || (inner[0] == outer[0] && inner[1] >= outer[1]); + let ends_before_or_at = inner[2] < outer[2] || (inner[2] == outer[2] && inner[3] <= outer[3]); + starts_after_or_at && ends_before_or_at +} + +fn union_span(left: [usize; 4], right: [usize; 4]) -> [usize; 4] { + let starts_before_or_at = left[0] < right[0] || (left[0] == right[0] && left[1] <= right[1]); + let ends_after_or_at = left[2] > right[2] || (left[2] == right[2] && left[3] >= right[3]); + [ + if starts_before_or_at { + left[0] + } else { + right[0] + }, + if starts_before_or_at { + left[1] + } else { + right[1] + }, + if ends_after_or_at { left[2] } else { right[2] }, + if ends_after_or_at { left[3] } else { right[3] }, + ] +} + fn decision_predicate(profile: &dyn LanguageProfile, node: Node<'_>, source: &str) -> String { let target = profile.decision_subject(node); normalize_text( @@ -921,6 +1304,38 @@ fn conjunction_span(node: Node<'_>) -> [usize; 4] { base } +fn decision_enclosing_span(profile: &dyn LanguageProfile, node: Node<'_>) -> [usize; 4] { + let mut parent = node.parent(); + let mut seen = HashSet::new(); + while let Some(current) = parent { + let key = format!("{:?}\0{}", span(current), current.kind()); + if !seen.insert(key) { + break; + } + if branch_like_node(profile, current) { + return span(current); + } + parent = current.parent(); + } + span(node) +} + +fn branch_like_node(profile: &dyn LanguageProfile, node: Node<'_>) -> bool { + profile.branch_node_kinds().contains(&node.kind()) + || profile.case_node_kinds().contains(&node.kind()) + || matches!( + node.kind(), + "if" | "unless" + | "if_statement" + | "if_expression" + | "while" + | "while_statement" + | "for_statement" + | "foreach_statement" + | "for_expression" + ) +} + fn decision_member_text(node: Node<'_>, source: &str) -> String { normalize_text(&strip_enclosing_parentheses(node_text(node, source))) } From a3462a53d82ca4618db4ab844017799960d9c6a7 Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Sat, 20 Jun 2026 03:53:07 +0000 Subject: [PATCH 34/52] Fix Rust local flow parity --- .../src/decomplex/detectors/local_flow.rs | 718 +++++++++++++++++- .../src/decomplex/detectors/locality_drag.rs | 38 +- .../src/decomplex/syntax/adapters/base.rs | 20 + .../rust/src/decomplex/syntax/adapters/c.rs | 4 + .../rust/src/decomplex/syntax/adapters/cpp.rs | 4 + .../src/decomplex/syntax/adapters/csharp.rs | 16 + .../rust/src/decomplex/syntax/adapters/go.rs | 16 + .../src/decomplex/syntax/adapters/java.rs | 8 + .../decomplex/syntax/adapters/javascript.rs | 8 + .../src/decomplex/syntax/adapters/kotlin.rs | 14 +- .../rust/src/decomplex/syntax/adapters/lua.rs | 12 + .../src/decomplex/syntax/adapters/rust.rs | 8 + .../src/decomplex/syntax/adapters/swift.rs | 16 + .../decomplex/syntax/adapters/typescript.rs | 8 + .../rust/src/decomplex/syntax/adapters/zig.rs | 4 + 15 files changed, 869 insertions(+), 25 deletions(-) diff --git a/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs b/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs index b3786051c..09d185685 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs @@ -1,4 +1,5 @@ -use crate::decomplex::ast::{self, Child, Node, Span}; +use crate::decomplex::ast::{self, Child, Node, RawNode, Span}; +use crate::decomplex::syntax::adapters::{language_profile, LanguageProfile}; use crate::decomplex::syntax::{self, Document, FunctionDef, Language}; use anyhow::Result; use serde::Serialize; @@ -20,6 +21,8 @@ pub struct MethodSummary { pub span: Span, #[serde(skip_serializing)] pub node: Node, + #[serde(skip_serializing)] + pub raw_node: Option, pub statements: Vec, pub boundaries: Vec, } @@ -68,17 +71,42 @@ pub fn scan_files(files: &[PathBuf], language: Language) -> Result Vec { let mut out = Vec::new(); for document in documents { - let mut detector = LocalFlow::new( - document.file.clone(), - document.lines.clone(), - document.language, - method_metadata(document), - ); - out.extend(detector.scan(&document.normalized_root)); + let normalized = normalized_local_methods(document); + if document.language != Language::Ruby { + let mut normalized_by_key: BTreeMap<_, _> = normalized + .into_iter() + .map(|summary| (method_summary_key(&summary), summary)) + .collect(); + for raw in raw_local_methods(document) { + out.push( + normalized_by_key + .remove(&method_summary_key(&raw)) + .unwrap_or(raw), + ); + } + out.extend(normalized_by_key.into_values()); + continue; + } + + out.extend(normalized); } out } +fn normalized_local_methods(document: &Document) -> Vec { + let mut detector = LocalFlow::new( + document.file.clone(), + document.lines.clone(), + document.language, + method_metadata(document), + ); + detector.scan(&document.normalized_root) +} + +fn method_summary_key(summary: &MethodSummary) -> (String, String, usize) { + (summary.file.clone(), summary.id.clone(), summary.line) +} + #[derive(Clone, Debug, Eq, PartialEq)] struct MethodMetadata { owner: String, @@ -86,6 +114,562 @@ struct MethodMetadata { params: BTreeSet, } +fn raw_local_methods(document: &Document) -> Vec { + let profile = language_profile(document.language); + document + .function_defs + .iter() + .map(|function| raw_method_summary(document, profile, function)) + .collect() +} + +fn raw_method_summary( + document: &Document, + profile: &dyn LanguageProfile, + function: &FunctionDef, +) -> MethodSummary { + let statement_nodes = raw_function_body_statements(&function.body, profile); + let local_names = raw_local_names(function, &statement_nodes, profile); + let statements: Vec<_> = statement_nodes + .iter() + .enumerate() + .map(|(index, statement)| raw_statement_summary(statement, index, &local_names, profile)) + .collect(); + let owner = local_flow_owner(&document.file, &function.owner); + + MethodSummary { + id: format!("{}#{}", owner, function.name), + owner, + name: function.name.clone(), + file: function.file.clone(), + line: function.line, + span: function.span, + node: normalized_node_for_span(&document.normalized_root, function.span) + .cloned() + .unwrap_or_else(|| fallback_node_from_raw(&function.body)), + raw_node: Some(function.body.clone()), + boundaries: raw_structural_boundaries(document, &statements), + statements, + } +} + +fn raw_function_body_statements<'a>( + node: &'a RawNode, + profile: &dyn LanguageProfile, +) -> Vec<&'a RawNode> { + let body = raw_function_body_node(node, profile); + let Some(body) = body else { + return Vec::new(); + }; + + let mut named = raw_named_children(body) + .into_iter() + .filter(|child| !raw_comment_node(child)) + .collect::>(); + if named.len() == 1 + && profile + .nested_statement_wrapper_node_kinds() + .contains(&named[0].kind.as_str()) + { + if raw_branch_node(named[0], profile) { + return vec![named[0]]; + } + named = raw_named_children(named[0]) + .into_iter() + .filter(|child| !raw_comment_node(child)) + .collect(); + } + if named.is_empty() && body.text.trim().is_empty() { + return Vec::new(); + } + if raw_branch_node(body, profile) || raw_assignment_statement(body, profile) || named.is_empty() + { + return vec![body]; + } + named +} + +fn raw_function_body_node<'a>( + node: &'a RawNode, + profile: &dyn LanguageProfile, +) -> Option<&'a RawNode> { + raw_named_children(node).into_iter().rev().find(|child| { + profile + .function_body_node_kinds() + .contains(&child.kind.as_str()) + }) +} + +fn raw_local_names( + function: &FunctionDef, + statements: &[&RawNode], + profile: &dyn LanguageProfile, +) -> BTreeSet { + let mut names: BTreeSet = function.params.iter().cloned().collect(); + for statement in statements { + names.extend(raw_local_writes(statement, profile)); + } + names +} + +fn raw_statement_summary( + node: &RawNode, + index: usize, + local_names: &BTreeSet, + profile: &dyn LanguageProfile, +) -> Statement { + let writes = raw_local_writes(node, profile); + let reads = raw_local_reads(node, local_names, profile); + Statement { + index, + line: node.span[0], + end_line: node.span[2], + span: node.span, + source: ast::normalize_text(&node.text), + dependencies: raw_assignment_dependencies(node, local_names, profile), + co_uses: co_use_pairs(&reads), + reads, + writes, + } +} + +fn raw_local_reads( + node: &RawNode, + local_names: &BTreeSet, + profile: &dyn LanguageProfile, +) -> BTreeSet { + let mut reads = Vec::new(); + raw_walk_local(node, None, node, profile, &mut |child, parent| { + let Some(name) = raw_local_identifier_text(child, profile) else { + return; + }; + if local_names.contains(&name) + && !raw_local_write_node(child, parent, profile) + && !raw_declaration_name_in_tree(node, child, profile) + && !raw_declaration_name(child, parent, profile) + && !raw_member_name(child, parent, profile) + && !raw_call_name(child, parent, profile) + { + reads.push(name); + } + }); + reads.into_iter().collect() +} + +fn raw_local_writes(node: &RawNode, profile: &dyn LanguageProfile) -> BTreeSet { + let mut writes = textual_local_writes(&ast::normalize_text(&node.text)); + raw_walk_local(node, None, node, profile, &mut |child, parent| { + if raw_local_write_node(child, parent, profile) + || raw_declaration_name_in_tree(node, child, profile) + { + if let Some(name) = raw_local_identifier_text(child, profile) { + writes.push(name); + } + } + }); + writes.into_iter().collect() +} + +fn raw_assignment_dependencies( + node: &RawNode, + local_names: &BTreeSet, + profile: &dyn LanguageProfile, +) -> Vec<(String, String)> { + let lhs_names = raw_local_writes(node, profile); + if lhs_names.is_empty() { + return Vec::new(); + } + + let reads = raw_local_reads(node, local_names, profile); + let mut deps = Vec::new(); + for lhs in &lhs_names { + for read in &reads { + if lhs != read && !lhs_names.contains(read) { + deps.push((lhs.clone(), read.clone())); + } + } + } + deps.sort(); + deps.dedup(); + deps +} + +fn co_use_pairs(reads: &BTreeSet) -> Vec<(String, String)> { + let reads = reads.iter().cloned().collect::>(); + let mut out = Vec::new(); + for i in 0..reads.len() { + for j in i + 1..reads.len() { + out.push((reads[i].clone(), reads[j].clone())); + } + } + out +} + +fn raw_structural_boundaries(document: &Document, statements: &[Statement]) -> Vec { + let mut out = Vec::new(); + for i in 0..statements.len().saturating_sub(1) { + let left = &statements[i]; + let right = &statements[i + 1]; + if let Some(boundary) = raw_source_boundary(document, left.end_line + 1, right.line - 1) { + out.push(Boundary { + before_index: left.index, + after_index: right.index, + line: boundary.line, + kind: boundary.kind, + text: boundary.text, + }); + } + } + out +} + +fn raw_source_boundary( + document: &Document, + first_line: usize, + last_line: usize, +) -> Option { + if first_line > last_line { + return None; + } + + let mut blank = None; + for line_number in first_line..=last_line { + let stripped = document + .lines + .get(line_number - 1) + .map(|line| line.trim()) + .unwrap_or(""); + if stripped.starts_with('#') || stripped.starts_with("//") || stripped.starts_with("--") { + return Some(RawBoundary { + line: line_number, + kind: "comment".to_string(), + text: stripped.to_string(), + }); + } + if stripped.is_empty() && blank.is_none() { + blank = Some(RawBoundary { + line: line_number, + kind: "blank".to_string(), + text: stripped.to_string(), + }); + } + } + blank +} + +fn raw_walk_local<'a>( + node: &'a RawNode, + parent: Option<&'a RawNode>, + root: &'a RawNode, + profile: &dyn LanguageProfile, + block: &mut dyn FnMut(&'a RawNode, Option<&'a RawNode>), +) { + if !std::ptr::eq(node, root) && raw_nested_local_scope(node, profile) { + return; + } + block(node, parent); + for child in &node.children { + raw_walk_local(child, Some(node), root, profile, block); + } +} + +fn raw_nested_local_scope(node: &RawNode, profile: &dyn LanguageProfile) -> bool { + profile.function_node_kinds().contains(&node.kind.as_str()) || raw_owner_node(node, profile) +} + +fn raw_owner_node(node: &RawNode, profile: &dyn LanguageProfile) -> bool { + profile + .class_owner_node_kinds() + .contains(&node.kind.as_str()) + || profile + .module_owner_node_kinds() + .contains(&node.kind.as_str()) + || profile + .generic_owner_node_kinds() + .contains(&node.kind.as_str()) + || profile + .impl_owner_node_kinds() + .contains(&node.kind.as_str()) + || profile + .struct_owner_node_kinds() + .contains(&node.kind.as_str()) +} + +fn raw_local_identifier_text(node: &RawNode, profile: &dyn LanguageProfile) -> Option { + if profile + .identifier_node_kinds() + .contains(&node.kind.as_str()) + { + return Some(node.text.clone()); + } + if node.named && raw_named_children(node).is_empty() && simple_identifier(&node.text) { + return Some(node.text.clone()); + } + None +} + +fn raw_local_write_node( + node: &RawNode, + parent: Option<&RawNode>, + profile: &dyn LanguageProfile, +) -> bool { + if raw_local_identifier_text(node, profile).is_none() || raw_member_name(node, parent, profile) + { + return false; + } + if raw_declaration_name(node, parent, profile) { + return true; + } + let Some(parent) = parent else { + return false; + }; + if profile + .assignment_node_kinds() + .contains(&parent.kind.as_str()) + { + if let Some(lhs) = raw_named_children(parent).first() { + if raw_contains_node(lhs, node) { + return true; + } + } + } + raw_assignment_lhs(node, parent, profile) +} + +fn raw_declaration_name( + node: &RawNode, + parent: Option<&RawNode>, + profile: &dyn LanguageProfile, +) -> bool { + parent + .and_then(|parent| raw_local_declaration_name_node(parent, profile)) + .map(|name| std::ptr::eq(name, node) || raw_contains_node(name, node)) + .unwrap_or(false) +} + +fn raw_declaration_name_in_tree( + root: &RawNode, + target: &RawNode, + profile: &dyn LanguageProfile, +) -> bool { + raw_local_declaration_name_node(root, profile) + .map(|name| std::ptr::eq(name, target) || raw_contains_node(name, target)) + .unwrap_or(false) + || root + .children + .iter() + .any(|child| raw_declaration_name_in_tree(child, target, profile)) +} + +fn raw_local_declaration_name_node<'a>( + node: &'a RawNode, + profile: &dyn LanguageProfile, +) -> Option<&'a RawNode> { + if !profile + .local_declaration_node_kinds() + .contains(&node.kind.as_str()) + && !profile + .variable_declaration_node_kinds() + .contains(&node.kind.as_str()) + { + return None; + } + + if profile + .short_variable_declaration_node_kinds() + .contains(&node.kind.as_str()) + { + if let Some(left) = raw_named_children(node).into_iter().find(|child| { + profile + .variable_declaration_node_kinds() + .contains(&child.kind.as_str()) + }) { + return raw_first_identifier(left, profile).or(Some(left)); + } + } + + if let Some(variable) = raw_named_children(node).into_iter().find(|child| { + profile + .variable_declaration_node_kinds() + .contains(&child.kind.as_str()) + }) { + if simple_identifier(&variable.text) { + return Some(variable); + } + if let Some(identifier) = raw_first_identifier(variable, profile) { + return Some(identifier); + } + } + + if let Some(declaration_assignment) = raw_named_children(node).into_iter().find(|child| { + profile + .declaration_assignment_node_kinds() + .contains(&child.kind.as_str()) + }) { + if let Some(lhs) = raw_named_children(declaration_assignment).first().copied() { + return raw_first_identifier(lhs, profile).or(Some(lhs)); + } + } + + raw_named_children(node) + .into_iter() + .find(|child| { + profile + .local_identifier_wrapper_node_kinds() + .contains(&child.kind.as_str()) + }) + .or_else(|| raw_first_identifier(node, profile)) +} + +fn raw_first_identifier<'a>( + node: &'a RawNode, + profile: &dyn LanguageProfile, +) -> Option<&'a RawNode> { + if raw_local_identifier_text(node, profile).is_some() { + return Some(node); + } + node.children + .iter() + .find_map(|child| raw_first_identifier(child, profile)) +} + +fn raw_assignment_lhs(node: &RawNode, parent: &RawNode, profile: &dyn LanguageProfile) -> bool { + if raw_previous_sibling(node, parent) + .map(|sibling| sibling.text.as_str() == ":") + .unwrap_or(false) + { + return false; + } + raw_next_sibling(node, parent) + .map(|sibling| { + !sibling.named + && profile + .assignment_operator_tokens() + .contains(&sibling.text.as_str()) + }) + .unwrap_or(false) +} + +fn raw_member_name( + node: &RawNode, + parent: Option<&RawNode>, + profile: &dyn LanguageProfile, +) -> bool { + let Some(parent) = parent else { + return false; + }; + if !profile + .field_like_node_kinds() + .contains(&parent.kind.as_str()) + { + return false; + } + raw_named_children(parent) + .last() + .map(|field| std::ptr::eq(*field, node)) + .unwrap_or(false) +} + +fn raw_call_name(node: &RawNode, parent: Option<&RawNode>, profile: &dyn LanguageProfile) -> bool { + let Some(parent) = parent else { + return false; + }; + if profile + .field_like_node_kinds() + .contains(&parent.kind.as_str()) + { + return false; + } + profile.call_node_kinds().contains(&parent.kind.as_str()) + && raw_named_children(parent) + .first() + .map(|callee| std::ptr::eq(*callee, node)) + .unwrap_or(false) +} + +fn raw_assignment_statement(node: &RawNode, profile: &dyn LanguageProfile) -> bool { + profile + .assignment_node_kinds() + .contains(&node.kind.as_str()) + || node.children.iter().any(|child| { + !child.named + && profile + .assignment_operator_tokens() + .contains(&child.text.as_str()) + }) +} + +fn raw_branch_node(node: &RawNode, profile: &dyn LanguageProfile) -> bool { + profile.branch_node_kinds().contains(&node.kind.as_str()) +} + +fn raw_comment_node(node: &RawNode) -> bool { + node.kind.to_ascii_lowercase().contains("comment") +} + +fn raw_named_children(node: &RawNode) -> Vec<&RawNode> { + node.children.iter().filter(|child| child.named).collect() +} + +fn raw_next_sibling<'a>(node: &RawNode, parent: &'a RawNode) -> Option<&'a RawNode> { + let index = parent + .children + .iter() + .position(|child| std::ptr::eq(child, node))?; + parent.children.get(index + 1) +} + +fn raw_previous_sibling<'a>(node: &RawNode, parent: &'a RawNode) -> Option<&'a RawNode> { + let index = parent + .children + .iter() + .position(|child| std::ptr::eq(child, node))?; + index + .checked_sub(1) + .and_then(|previous| parent.children.get(previous)) +} + +fn raw_contains_node(root: &RawNode, target: &RawNode) -> bool { + std::ptr::eq(root, target) + || root + .children + .iter() + .any(|child| raw_contains_node(child, target)) +} + +fn normalized_node_for_span(root: &Node, span: Span) -> Option<&Node> { + if [ + root.first_lineno, + root.first_column, + root.last_lineno, + root.last_column, + ] == span + { + return Some(root); + } + root.children + .iter() + .filter_map(ast::node) + .find_map(|child| normalized_node_for_span(child, span)) +} + +fn fallback_node_from_raw(raw: &RawNode) -> Node { + Node { + r#type: "DEFN".to_string(), + children: raw + .children + .iter() + .filter(|child| child.named) + .map(|child| Child::Node(Box::new(fallback_node_from_raw(child)))) + .collect(), + first_lineno: raw.span[0], + first_column: raw.span[1], + last_lineno: raw.span[2], + last_column: raw.span[3], + text: raw.text.clone(), + } +} + struct LocalFlow { file: String, lines: Vec, @@ -184,6 +768,7 @@ impl LocalFlow { node.last_column, ], node: node.clone(), + raw_node: None, boundaries: self.structural_boundaries(&statements), statements, } @@ -434,7 +1019,7 @@ impl LocalFlow { if LOCAL_WRITE_TYPES.contains(&child.r#type.as_str()) { if let Some(Child::String(lhs)) = child.children.first() { if let Some(rhs) = child.children.get(1).and_then(ast::node) { - let rhs_writes = BTreeSet::new(); + let rhs_writes = self.local_writes(rhs); for read in self.local_reads(rhs, local_names, &rhs_writes) { if lhs != &read { deps.push((lhs.clone(), read)); @@ -526,17 +1111,31 @@ fn textual_local_reads( local_names: &BTreeSet, writes: &BTreeSet, ) -> Vec { + if plain_string_literal_source(source) { + return Vec::new(); + } + identifiers_with_positions(source) .into_iter() .filter(|identifier| local_names.contains(&identifier.name)) .filter(|identifier| !writes.contains(&identifier.name)) - .filter(|identifier| !local_keyword(&identifier.name)) .filter(|identifier| !member_name(source, identifier.start)) .filter(|identifier| !call_name(source, identifier.end)) .map(|identifier| identifier.name) .collect() } +fn plain_string_literal_source(source: &str) -> bool { + let source = source.trim(); + if source.starts_with('f') || source.starts_with('F') { + return false; + } + (source.starts_with("\"\"\"") && source.ends_with("\"\"\"")) + || (source.starts_with("'''") && source.ends_with("'''")) + || (source.starts_with('"') && source.ends_with('"')) + || (source.starts_with('\'') && source.ends_with('\'')) +} + #[derive(Clone, Debug, Eq, PartialEq)] struct IdentifierSpan { name: String, @@ -589,7 +1188,7 @@ fn split_assignment(source: &str) -> Option<(&str, &str)> { let bytes = source.as_bytes(); let mut index = 0; while index < bytes.len() { - if index + 1 < bytes.len() && &source[index..index + 2] == ":=" { + if index + 1 < bytes.len() && bytes[index] == b':' && bytes[index + 1] == b'=' { return Some((source[..index].trim(), ":=")); } if bytes[index] == b'=' { @@ -718,11 +1317,7 @@ fn method_metadata(document: &Document) -> BTreeMap { } fn metadata_for_function(document: &Document, function: &FunctionDef) -> MethodMetadata { - let owner = if function.owner == file_owner(&document.file) { - "(top-level)".to_string() - } else { - function.owner.clone() - }; + let owner = local_flow_owner(&document.file, &function.owner); MethodMetadata { owner, name: function.name.clone(), @@ -730,6 +1325,17 @@ fn metadata_for_function(document: &Document, function: &FunctionDef) -> MethodM } } +fn local_flow_owner(file: &str, owner: &str) -> String { + let file_owner = file_owner(file); + if owner == file_owner { + return "(top-level)".to_string(); + } + owner + .strip_prefix(&format!("{file_owner}::")) + .unwrap_or(owner) + .to_string() +} + fn file_owner(file: &str) -> String { Path::new(file) .file_stem() @@ -797,6 +1403,86 @@ mod tests { ); } + #[test] + fn handles_non_ascii_source_without_byte_boundary_panics() { + let summaries = summaries( + "def mixed(price):\n marker = \"✓\"\n total = price\n return total\n", + Language::Python, + ); + let summary = summaries + .iter() + .find(|summary| summary.name == "mixed") + .expect("mixed summary"); + + assert_eq!(summary.statements.len(), 3); + assert_eq!( + summary.statements[1].dependencies, + vec![("total".to_string(), "price".to_string())] + ); + } + + #[test] + fn preserves_self_parameter_reads_for_python_attribute_access() { + let summaries = summaries( + "class TextSuite:\n def setup(self):\n self.console = Console(file=StringIO(), color_system=\"truecolor\")\n self.text = Text.from_markup(markup)\n", + Language::Python, + ); + let summary = summaries + .iter() + .find(|summary| summary.id == "TextSuite#setup") + .expect("setup summary"); + + assert_eq!( + summary.statements[0].reads, + ["self".to_string()].into_iter().collect() + ); + assert!(summary.statements[0] + .dependencies + .contains(&("file".to_string(), "self".to_string()))); + assert_eq!( + summary.statements[1].reads, + ["self".to_string()].into_iter().collect() + ); + } + + #[test] + fn excludes_keyword_argument_writes_from_outer_assignment_dependencies() { + let summaries = summaries( + "def render():\n pretty = Pretty(snippets.PYTHON_DICT, indent_guides=True)\n return pretty\n", + Language::Python, + ); + let summary = summaries + .iter() + .find(|summary| summary.name == "render") + .expect("render summary"); + + assert_eq!( + summary.statements[0].writes, + ["indent_guides".to_string(), "pretty".to_string()] + .into_iter() + .collect() + ); + assert!(summary.statements[0].dependencies.is_empty()); + } + + #[test] + fn does_not_read_locals_from_plain_docstring_text() { + let summaries = summaries( + "def get_content(user):\n \"\"\"Extract text from user dict.\"\"\"\n return user\n", + Language::Python, + ); + let summary = summaries + .iter() + .find(|summary| summary.name == "get_content") + .expect("get_content summary"); + + assert!(summary.statements[0].reads.is_empty()); + assert_eq!( + summary.statements[1].reads, + ["user".to_string()].into_iter().collect() + ); + } + #[test] fn extracts_java_kotlin_and_swift_local_flow() { let cases = [ diff --git a/gems/decomplex/rust/src/decomplex/detectors/locality_drag.rs b/gems/decomplex/rust/src/decomplex/detectors/locality_drag.rs index 7310c785b..3f401ba52 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/locality_drag.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/locality_drag.rs @@ -102,9 +102,12 @@ impl LocalityDrag { return Vec::new(); } - let local_complexity = weighted_inlined_cognitive_complexity::LocalScorer::new() - .score(&summary.node) - .score; + let scorer = weighted_inlined_cognitive_complexity::LocalScorer::new(); + let local_complexity = summary + .raw_node + .as_ref() + .map(|node| scorer.score_raw(node).score) + .unwrap_or_else(|| scorer.score(&summary.node).score); if local_complexity < self.min_local_complexity { return Vec::new(); } @@ -406,11 +409,7 @@ impl LocalityDrag { fn example_for(&self, statement: &local_flow::Statement) -> Example { let source = statement.source.lines().next().unwrap_or("").trim(); - let source = if source.len() > 99 { - format!("{}...", &source[0..96]) - } else { - source.to_string() - }; + let source = truncate_example_source(source); Example { line: statement.line, source, @@ -460,3 +459,26 @@ impl LocalityDrag { (value * 10.0).round() / 10.0 } } + +fn truncate_example_source(source: &str) -> String { + if source.chars().count() <= 99 { + return source.to_string(); + } + + let prefix: String = source.chars().take(96).collect(); + format!("{prefix}...") +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn truncates_non_ascii_examples_on_character_boundaries() { + let source = "value = \"✓\"".repeat(12); + let truncated = truncate_example_source(&source); + + assert_eq!(truncated.chars().count(), 99); + assert!(truncated.ends_with("...")); + } +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs index 330dd3362..e755b7145 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs @@ -197,6 +197,26 @@ pub(crate) trait LanguageProfile { EMPTY_NODE_KINDS } + fn local_identifier_wrapper_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn local_declaration_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn short_variable_declaration_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn variable_declaration_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn declaration_assignment_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + fn receiver_type_node_kinds(&self) -> &[&str] { EMPTY_NODE_KINDS } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/c.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/c.rs index 70c7bbbf7..187b48a0e 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/c.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/c.rs @@ -58,6 +58,10 @@ impl LanguageProfile for CProfile { &["=", "+=", "-=", "*=", "/=", "%="] } + fn local_declaration_node_kinds(&self) -> &[&str] { + &["declaration", "init_declarator"] + } + fn receiver_parameter_node_kinds(&self) -> &[&str] { &["parameter_declaration"] } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/cpp.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/cpp.rs index b82d20923..c2f4430a5 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/cpp.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/cpp.rs @@ -62,6 +62,10 @@ impl LanguageProfile for CppProfile { &["=", "+=", "-=", "*=", "/=", "%="] } + fn local_declaration_node_kinds(&self) -> &[&str] { + &["declaration", "init_declarator"] + } + fn receiver_type_node_kinds(&self) -> &[&str] { &[ "type_identifier", diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/csharp.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/csharp.rs index 344fb5b02..ec2463bd6 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/csharp.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/csharp.rs @@ -49,6 +49,22 @@ impl LanguageProfile for CSharpProfile { &["=", "+=", "-=", "*=", "/=", "%="] } + fn local_identifier_wrapper_node_kinds(&self) -> &[&str] { + &["argument"] + } + + fn local_declaration_node_kinds(&self) -> &[&str] { + &[ + "local_declaration_statement", + "variable_declaration", + "variable_declarator", + ] + } + + fn variable_declaration_node_kinds(&self) -> &[&str] { + &["variable_declaration"] + } + fn declarator_node_kinds(&self) -> &[&str] { &["variable_declaration", "variable_declarator"] } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/go.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/go.rs index 864e5fcea..7cc2a78f0 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/go.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/go.rs @@ -57,6 +57,10 @@ impl LanguageProfile for GoProfile { &["statement_list"] } + fn local_identifier_wrapper_node_kinds(&self) -> &[&str] { + &["expression_list"] + } + fn call_node_kinds(&self) -> &[&str] { &["call_expression", "go_statement"] } @@ -81,6 +85,18 @@ impl LanguageProfile for GoProfile { &["=", ":=", "+=", "-=", "*=", "/=", "%="] } + fn local_declaration_node_kinds(&self) -> &[&str] { + &["short_var_declaration", "variable_declaration"] + } + + fn short_variable_declaration_node_kinds(&self) -> &[&str] { + &["short_var_declaration"] + } + + fn variable_declaration_node_kinds(&self) -> &[&str] { + &["expression_list", "variable_declaration"] + } + fn receiver_type_node_kinds(&self) -> &[&str] { &["pointer_type", "type_identifier"] } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/java.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/java.rs index 476431eed..3f73af2fc 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/java.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/java.rs @@ -52,6 +52,14 @@ impl LanguageProfile for JavaProfile { &["=", "+=", "-=", "*=", "/=", "%="] } + fn local_declaration_node_kinds(&self) -> &[&str] { + &["local_variable_declaration", "variable_declarator"] + } + + fn variable_declaration_node_kinds(&self) -> &[&str] { + &["variable_declarator"] + } + fn comparison_node_kinds(&self) -> &[&str] { &["binary_expression"] } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/javascript.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/javascript.rs index 3feae9881..2968a2e39 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/javascript.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/javascript.rs @@ -53,6 +53,14 @@ impl LanguageProfile for JavaScriptProfile { &["=", "+=", "-=", "*=", "/=", "%=", "&&=", "||="] } + fn local_declaration_node_kinds(&self) -> &[&str] { + &["lexical_declaration", "variable_declarator"] + } + + fn variable_declaration_node_kinds(&self) -> &[&str] { + &["variable_declarator"] + } + fn comparison_node_kinds(&self) -> &[&str] { &["binary_expression"] } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/kotlin.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/kotlin.rs index d6ee6d4eb..245a4556a 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/kotlin.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/kotlin.rs @@ -34,7 +34,7 @@ impl LanguageProfile for KotlinProfile { } fn nested_statement_wrapper_node_kinds(&self) -> &[&str] { - &["statements"] + &["block", "statements"] } fn call_node_kinds(&self) -> &[&str] { @@ -53,6 +53,18 @@ impl LanguageProfile for KotlinProfile { &["=", "+=", "-=", "*=", "/=", "%="] } + fn local_identifier_wrapper_node_kinds(&self) -> &[&str] { + &["directly_assignable_expression", "value_argument"] + } + + fn local_declaration_node_kinds(&self) -> &[&str] { + &["property_declaration", "variable_declaration"] + } + + fn variable_declaration_node_kinds(&self) -> &[&str] { + &["variable_declaration", "directly_assignable_expression"] + } + fn comparison_node_kinds(&self) -> &[&str] { &[ "equality_expression", diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/lua.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/lua.rs index 7c80d9e33..007b6bd5a 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/lua.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/lua.rs @@ -60,6 +60,18 @@ impl LanguageProfile for LuaProfile { &["=", "+=", "-=", "*=", "/=", "%="] } + fn local_declaration_node_kinds(&self) -> &[&str] { + &["variable_declaration"] + } + + fn variable_declaration_node_kinds(&self) -> &[&str] { + &["variable_declaration", "variable_list"] + } + + fn declaration_assignment_node_kinds(&self) -> &[&str] { + &["assignment_statement"] + } + fn comparison_node_kinds(&self) -> &[&str] { &["binary_expression"] } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/rust.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/rust.rs index c78ea7c8b..decf3600b 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/rust.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/rust.rs @@ -57,6 +57,14 @@ impl LanguageProfile for RustProfile { &["=", "+=", "-=", "*=", "/=", "%="] } + fn local_identifier_wrapper_node_kinds(&self) -> &[&str] { + &["pattern"] + } + + fn local_declaration_node_kinds(&self) -> &[&str] { + &["let_declaration"] + } + fn receiver_type_node_kinds(&self) -> &[&str] { &["type_identifier", "generic_type", "scoped_type_identifier"] } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/swift.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/swift.rs index 5e755e1f5..73dd2914c 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/swift.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/swift.rs @@ -62,6 +62,22 @@ impl LanguageProfile for SwiftProfile { &["=", "+=", "-=", "*=", "/=", "%="] } + fn local_identifier_wrapper_node_kinds(&self) -> &[&str] { + &[ + "directly_assignable_expression", + "value_argument", + "pattern", + ] + } + + fn local_declaration_node_kinds(&self) -> &[&str] { + &["property_declaration", "variable_declaration"] + } + + fn variable_declaration_node_kinds(&self) -> &[&str] { + &["variable_declaration", "directly_assignable_expression"] + } + fn comparison_node_kinds(&self) -> &[&str] { &[ "equality_expression", diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/typescript.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/typescript.rs index 17110bd0a..263fc39b9 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/typescript.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/typescript.rs @@ -53,6 +53,14 @@ impl LanguageProfile for TypeScriptProfile { &["=", "+=", "-=", "*=", "/=", "%=", "&&=", "||="] } + fn local_declaration_node_kinds(&self) -> &[&str] { + &["lexical_declaration", "variable_declarator"] + } + + fn variable_declaration_node_kinds(&self) -> &[&str] { + &["variable_declarator"] + } + fn comparison_node_kinds(&self) -> &[&str] { &["binary_expression"] } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/zig.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/zig.rs index b58d82b73..07b72d9d8 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/zig.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/zig.rs @@ -62,6 +62,10 @@ impl LanguageProfile for ZigProfile { &["=", "+=", "-=", "*=", "/=", "%="] } + fn local_declaration_node_kinds(&self) -> &[&str] { + &["variable_declaration"] + } + fn comparison_node_kinds(&self) -> &[&str] { &["binary_expression"] } From 59c595b589dbe95bc443ae11078d679695ec9868 Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Sat, 20 Jun 2026 04:26:18 +0000 Subject: [PATCH 35/52] Improve Python and Rust detector parity --- gems/decomplex/lib/decomplex/syntax/python.rb | 215 +++++++++++++++++- .../src/decomplex/detectors/local_flow.rs | 48 +++- .../decomplex/detectors/predicate_alias.rs | 9 +- .../src/decomplex/detectors/semantic_alias.rs | 16 +- .../src/decomplex/syntax/adapters/base.rs | 6 +- .../src/decomplex/syntax/adapters/kotlin.rs | 41 +++- gems/decomplex/test/syntax_test.rb | 60 +++++ 7 files changed, 364 insertions(+), 31 deletions(-) diff --git a/gems/decomplex/lib/decomplex/syntax/python.rb b/gems/decomplex/lib/decomplex/syntax/python.rb index 87508d55c..60b87ed10 100644 --- a/gems/decomplex/lib/decomplex/syntax/python.rb +++ b/gems/decomplex/lib/decomplex/syntax/python.rb @@ -21,6 +21,16 @@ module Syntax ).freeze class PythonSyntaxAdapter < TreeSitterLanguageAdapter + PythonSyntheticStatement = Struct.new(:kind, :children, :text, :start_point, :end_point, keyword_init: true) do + def named? + true + end + + def named_children + children.select { |child| child.respond_to?(:named?) && child.named? } + end + end + FUNCTION_NODE_KINDS = %w[function_definition].freeze CALL_NODE_KINDS = %w[call].freeze ADJACENT_CALL_NODE_KINDS = %w[attribute identifier].freeze @@ -79,14 +89,97 @@ def visibility(_document, node) :public end + def parameter_name(param) + name = super + return name if name + + python_nested_parameter_identifier(param)&.text + end + def call_target(document, node) python_adjacent_call_target(node) || super end - def local_methods(document) + def state_read_target(node) + return nil if python_hidden_assignment_parts(node) || python_annotation_lhs?(node) + super end + def record_state_write(document, node, stack, out) + parts = python_hidden_assignment_parts(node) + unless parts + super + return + end + + target = state_target(parts.fetch(:lhs)) + return unless target + target = normalize_target_receiver(target, stack) + return if skip_state_write_target?(target) + + out << StateWrite.new( + field: target[:field], + receiver: target[:receiver], + file: document.file, + function: current_function(stack), + line: line(parts.fetch(:lhs)), + span: python_assignment_span(parts.fetch(:lhs), parts.fetch(:rhs)), + owner: current_owner(document, stack) + ) + end + + def record_state_param_origin(document, node, stack, out) + parts = python_hidden_assignment_parts(node) + unless parts + super + return + end + + target = state_target(parts.fetch(:lhs)) + return unless target + target = normalize_target_receiver(target, stack) + + params = current_params(stack) + return if params.empty? + + rhs_param_names(parts.fetch(:rhs), params).each do |param| + out << StateParamOrigin.new( + field: target[:field], + receiver: target[:receiver], + owner: current_owner(document, stack), + param: param, + file: document.file, + function: current_function(stack), + line: line(parts.fetch(:lhs)), + span: python_assignment_span(parts.fetch(:lhs), parts.fetch(:rhs)) + ) + end + end + + def local_methods(document) + document.function_defs.map do |function_def| + statements = python_function_body_statements(function_def.body, document) + local_names = generic_local_names(function_def, statements) + local_statements = statements.each_with_index.map do |statement, index| + generic_local_statement(statement, index, local_names) + end + owner = function_def.owner.to_s == file_owner(document.file) ? "(top-level)" : function_def.owner + + LocalMethod.new( + id: "#{owner}##{function_def.name}", + owner: owner, + name: function_def.name, + file: function_def.file, + line: function_def.line, + span: function_def.span, + node: function_def.body, + statements: local_statements, + boundaries: generic_structural_boundaries(document, local_statements) + ) + end + end + private def hidden_python_function_name(node) @@ -96,12 +189,29 @@ def hidden_python_function_name(node) node.named_children.find { |child| child.kind == "identifier" }&.text end - def python_function_body_statements(node) + def python_nested_parameter_identifier(param) + return nil unless ts_node?(param) + return nil unless %w[typed_parameter default_parameter].include?(param.kind) + + param.named_children.each do |child| + next unless %w[list_splat_pattern dictionary_splat_pattern].include?(child.kind) + + identifier = child.named_children.find { |grandchild| parameter_identifier_node_kinds.include?(grandchild.kind) } + return identifier if identifier + end + nil + end + + def python_function_body_statements(node, document) body = named_field(node, "body") || node.named_children.find { |child| child.kind == "block" } return [] unless body - body.named_children.reject { |child| child.kind == "comment" } + groups = python_statement_child_groups(body) + return [] if groups.empty? && body.text.to_s.strip.empty? + return [body] if groups.empty? + + groups.map { |children| python_synthetic_statement(document, children) } end def python_adjacent_call_target(node) @@ -118,6 +228,105 @@ def python_adjacent_call_target(node) rescue StandardError nil end + + def assignment_lhs?(node) + super || !!python_hidden_assignment_parts(node) + end + + def generic_local_write_node?(node) + super || python_annotation_lhs?(node) + end + + def python_hidden_assignment_parts(node) + return nil unless ts_node?(node) + + operator = next_sibling(node) + return nil unless operator + + if assignment_operator_tokens.include?(operator.text.to_s) + rhs = next_sibling(operator) + return { lhs: node, rhs: rhs } if rhs + elsif operator.text.to_s == ":" + type_node = next_sibling(operator) + equal = next_sibling(type_node) + rhs = next_sibling(equal) + return { lhs: node, rhs: rhs } if equal&.text.to_s == "=" && rhs + end + + nil + end + + def python_annotation_lhs?(node) + return false unless ts_node?(node) + return false unless generic_identifier?(node) || field_like_node?(node) + + colon = next_sibling(node) + return false unless colon&.text.to_s == ":" + + type_node = next_sibling(colon) + equal = next_sibling(type_node) + !equal || equal.text.to_s != "=" + end + + def python_assignment_span(lhs, rhs) + [ + lhs.start_point.row + 1, + lhs.start_point.column, + rhs.end_point.row + 1, + rhs.end_point.column + ] + end + + def python_statement_child_groups(body) + children = body.children.reject { |child| comment_node?(child) } + return [] if children.empty? + + groups = [] + current = [] + body_column = body.start_point.column + + children.each do |child| + if current.any? && python_new_statement_child?(current, child, body_column) + groups << current + current = [] + end + current << child + end + groups << current if current.any? + groups + end + + def python_new_statement_child?(current, child, body_column) + return false unless child.start_point.row > current.map { |item| item.end_point.row }.max + return false if %w[elif else except finally case].include?(child.kind) + + child.start_point.column <= body_column + end + + def python_synthetic_statement(document, children) + first = children.first + last = children.last + PythonSyntheticStatement.new( + kind: "python_statement", + children: children, + text: python_source_slice(document, first.start_point, last.end_point), + start_point: first.start_point, + end_point: last.end_point + ) + end + + def python_source_slice(document, start_point, end_point) + if start_point.row == end_point.row + return document.lines[start_point.row].to_s[start_point.column...end_point.column].to_s + end + + lines = document.lines[start_point.row..end_point.row].to_a + return "" if lines.empty? + + lines[0] = lines[0].to_s[start_point.column..].to_s + lines[-1] = lines[-1].to_s[..end_point.column - 1].to_s + lines.join + end end end end diff --git a/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs b/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs index 09d185685..770952320 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs @@ -73,18 +73,29 @@ pub fn scan_documents(documents: &[Document]) -> Vec { for document in documents { let normalized = normalized_local_methods(document); if document.language != Language::Ruby { - let mut normalized_by_key: BTreeMap<_, _> = normalized - .into_iter() - .map(|summary| (method_summary_key(&summary), summary)) - .collect(); - for raw in raw_local_methods(document) { - out.push( - normalized_by_key - .remove(&method_summary_key(&raw)) - .unwrap_or(raw), + if document.language == Language::Python { + let raw = raw_local_methods(document); + let raw_keys: BTreeSet<_> = raw.iter().map(method_summary_key).collect(); + out.extend(raw); + out.extend( + normalized + .into_iter() + .filter(|summary| !raw_keys.contains(&method_summary_key(summary))), ); + } else { + let mut normalized_by_key: BTreeMap<_, _> = normalized + .into_iter() + .map(|summary| (method_summary_key(&summary), summary)) + .collect(); + for raw in raw_local_methods(document) { + out.push( + normalized_by_key + .remove(&method_summary_key(&raw)) + .unwrap_or(raw), + ); + } + out.extend(normalized_by_key.into_values()); } - out.extend(normalized_by_key.into_values()); continue; } @@ -238,6 +249,10 @@ fn raw_local_reads( local_names: &BTreeSet, profile: &dyn LanguageProfile, ) -> BTreeSet { + if raw_nested_local_scope(node, profile) { + return BTreeSet::new(); + } + let mut reads = Vec::new(); raw_walk_local(node, None, node, profile, &mut |child, parent| { let Some(name) = raw_local_identifier_text(child, profile) else { @@ -248,7 +263,6 @@ fn raw_local_reads( && !raw_declaration_name_in_tree(node, child, profile) && !raw_declaration_name(child, parent, profile) && !raw_member_name(child, parent, profile) - && !raw_call_name(child, parent, profile) { reads.push(name); } @@ -257,6 +271,10 @@ fn raw_local_reads( } fn raw_local_writes(node: &RawNode, profile: &dyn LanguageProfile) -> BTreeSet { + if raw_nested_local_scope(node, profile) { + return BTreeSet::new(); + } + let mut writes = textual_local_writes(&ast::normalize_text(&node.text)); raw_walk_local(node, None, node, profile, &mut |child, parent| { if raw_local_write_node(child, parent, profile) @@ -402,7 +420,13 @@ fn raw_local_identifier_text(node: &RawNode, profile: &dyn LanguageProfile) -> O { return Some(node.text.clone()); } - if node.named && raw_named_children(node).is_empty() && simple_identifier(&node.text) { + if profile + .local_identifier_wrapper_node_kinds() + .contains(&node.kind.as_str()) + && node.named + && raw_named_children(node).is_empty() + && simple_identifier(&node.text) + { return Some(node.text.clone()); } None diff --git a/gems/decomplex/rust/src/decomplex/detectors/predicate_alias.rs b/gems/decomplex/rust/src/decomplex/detectors/predicate_alias.rs index 095115cfb..838a07eac 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/predicate_alias.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/predicate_alias.rs @@ -2,7 +2,7 @@ use crate::decomplex::ast::Span; use crate::decomplex::syntax::{self, Document, Language}; use anyhow::Result; use serde::Serialize; -use std::collections::{BTreeMap, BTreeSet}; +use std::collections::BTreeMap; use std::path::PathBuf; #[derive(Clone, Debug, Eq, PartialEq, Serialize)] @@ -76,11 +76,12 @@ impl Report { let mut out = Vec::new(); for body in keys { let ps = by_body.remove(&body).unwrap(); - let mut names_set = BTreeSet::new(); + let mut names = Vec::new(); for p in &ps { - names_set.insert(p.name.clone()); + if !names.contains(&p.name) { + names.push(p.name.clone()); + } } - let names: Vec<_> = names_set.into_iter().collect(); if names.len() < 2 { continue; } diff --git a/gems/decomplex/rust/src/decomplex/detectors/semantic_alias.rs b/gems/decomplex/rust/src/decomplex/detectors/semantic_alias.rs index 00a6a9026..443ff0811 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/semantic_alias.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/semantic_alias.rs @@ -2,7 +2,7 @@ use crate::decomplex::ast::{self, Span}; use crate::decomplex::syntax::{self, Document, Language}; use anyhow::Result; use serde::Serialize; -use std::collections::{BTreeMap, BTreeSet}; +use std::collections::BTreeMap; use std::path::PathBuf; #[derive(Clone, Debug, Eq, PartialEq, Serialize)] @@ -122,18 +122,24 @@ impl Report { } fn alias_clusters(&self) -> Vec { + let mut keys = Vec::new(); let mut by_canon: BTreeMap> = BTreeMap::new(); for p in &self.preds { + if !by_canon.contains_key(&p.canon) { + keys.push(p.canon.clone()); + } by_canon.entry(p.canon.clone()).or_default().push(p); } let mut out = Vec::new(); - for (c, ps) in by_canon { - let mut names_set = BTreeSet::new(); + for c in keys { + let ps = by_canon.remove(&c).unwrap(); + let mut names = Vec::new(); for p in &ps { - names_set.insert(p.name.clone()); + if !names.contains(&p.name) { + names.push(p.name.clone()); + } } - let names: Vec<_> = names_set.into_iter().collect(); if names.len() < 2 { continue; } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs index e755b7145..e7f4962a7 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs @@ -495,11 +495,7 @@ pub(crate) trait LanguageProfile { .filter(|child| !self.ignored_statement_node_kinds().contains(&child.kind())) .collect(); } - if statements.len() == 1 { - statements.first().copied() - } else { - None - } + statements.last().copied() } fn call_target<'tree>(&self, node: Node<'tree>, source: &str) -> Option> { diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/kotlin.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/kotlin.rs index 245a4556a..ea1ae3c8a 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/kotlin.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/kotlin.rs @@ -1,6 +1,8 @@ +use super::super::tree_sitter_adapter::named_children; use super::super::Language; use super::base::LanguageProfile; -use tree_sitter::Language as TreeSitterLanguage; +use crate::decomplex::ast::node_text; +use tree_sitter::{Language as TreeSitterLanguage, Node}; pub(crate) struct KotlinProfile; @@ -29,6 +31,41 @@ impl LanguageProfile for KotlinProfile { &["identifier", "simple_identifier"] } + fn function_params(&self, node: Node<'_>, source: &str) -> Vec { + let Some(params) = named_children(node) + .into_iter() + .find(|child| self.parameter_list_node_kinds().contains(&child.kind())) + else { + return Vec::new(); + }; + + let mut out = Vec::new(); + for param in named_children(params) { + if let Some(name) = self.parameter_name(param, source) { + if !out.contains(&name) { + out.push(name); + } + } + } + out + } + + fn parameter_name(&self, param: Node<'_>, source: &str) -> Option { + let name = if self + .parameter_identifier_node_kinds() + .contains(¶m.kind()) + { + Some(param) + } else { + named_children(param).into_iter().find(|child| { + self.parameter_identifier_node_kinds() + .contains(&child.kind()) + }) + }?; + let text = node_text(name, source).to_string(); + (!text.is_empty() && text != "_").then_some(text) + } + fn function_body_node_kinds(&self) -> &[&str] { &["function_body", "statements"] } @@ -42,7 +79,7 @@ impl LanguageProfile for KotlinProfile { } fn identifier_node_kinds(&self) -> &[&str] { - &["simple_identifier", "type_identifier"] + &["identifier", "simple_identifier", "type_identifier"] } fn assignment_node_kinds(&self) -> &[&str] { diff --git a/gems/decomplex/test/syntax_test.rb b/gems/decomplex/test/syntax_test.rb index 90962f6b0..c6b867cbc 100644 --- a/gems/decomplex/test/syntax_test.rb +++ b/gems/decomplex/test/syntax_test.rb @@ -458,6 +458,66 @@ def run(items): end end + def test_tree_sitter_python_adapter_extracts_typed_attribute_assignments + grammar = ENV["DECOMPLEX_TS_PYTHON_PATH"] + skip "set DECOMPLEX_TS_PYTHON_PATH to run Python structural facts test" unless grammar && File.file?(grammar) + + with_file(<<~PY, ".py") do |path| + class Worker: + def __init__(self, items): + self.items = items + self.cache: dict[str, int] = items + PY + doc = Decomplex::Syntax.parse(path, parser: "tree_sitter", language: :python) + + assert_includes doc.state_writes.map { |write| [write.receiver, write.field, write.span] }, + ["self", "items", [3, 8, 3, 26]] + assert_includes doc.state_writes.map { |write| [write.receiver, write.field, write.span] }, + ["self", "cache", [4, 8, 4, 42]] + assert_includes doc.state_param_origins.map { |origin| [origin.receiver, origin.field, origin.param, origin.span] }, + ["self", "items", "items", [3, 8, 3, 26]] + assert_includes doc.state_param_origins.map { |origin| [origin.receiver, origin.field, origin.param, origin.span] }, + ["self", "cache", "items", [4, 8, 4, 42]] + assert_empty doc.state_reads + end + end + + def test_tree_sitter_python_adapter_extracts_typed_splat_parameters + grammar = ENV["DECOMPLEX_TS_PYTHON_PATH"] + skip "set DECOMPLEX_TS_PYTHON_PATH to run Python structural facts test" unless grammar && File.file?(grammar) + + with_file(<<~PY, ".py") do |path| + def reconfigure(*args: Any, **kwargs: Any) -> None: + new_console = Console(*args, **kwargs) + PY + doc = Decomplex::Syntax.parse(path, parser: "tree_sitter", language: :python) + + assert_equal %w[args kwargs], doc.function_defs.first.params + statement = doc.local_methods.first.statements.first + assert_equal %w[args kwargs], statement.reads.to_a.sort + assert_equal [["new_console", "args"], ["new_console", "kwargs"]], statement.dependencies + end + end + + def test_tree_sitter_python_adapter_treats_annotation_only_locals_as_writes + grammar = ENV["DECOMPLEX_TS_PYTHON_PATH"] + skip "set DECOMPLEX_TS_PYTHON_PATH to run Python structural facts test" unless grammar && File.file?(grammar) + + with_file(<<~PY, ".py") do |path| + def parse_version(): + version_integers: tuple[int, ...] + PY + statement = Decomplex::Syntax.parse(path, parser: "tree_sitter", language: :python) + .local_methods + .first + .statements + .first + + assert_empty statement.reads + assert_equal ["version_integers"], statement.writes.to_a + end + end + def test_tree_sitter_c_adapter_extracts_functions_branches_and_pointer_state grammar = ENV["DECOMPLEX_TS_C_PATH"] skip "set DECOMPLEX_TS_C_PATH to run C structural facts test" unless grammar && File.file?(grammar) From f9007f793f7dfe55c61966d796c48531b101b739 Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Sat, 20 Jun 2026 05:15:35 +0000 Subject: [PATCH 36/52] Fix Python local flow parity --- gems/decomplex/lib/decomplex/syntax.rb | 8 +- gems/decomplex/lib/decomplex/syntax/python.rb | 71 ++++- .../src/decomplex/detectors/local_flow.rs | 260 +++++++++++++++++- gems/decomplex/test/syntax_test.rb | 185 +++++++++++++ 4 files changed, 512 insertions(+), 12 deletions(-) diff --git a/gems/decomplex/lib/decomplex/syntax.rb b/gems/decomplex/lib/decomplex/syntax.rb index 8f3056211..4d0d2a022 100644 --- a/gems/decomplex/lib/decomplex/syntax.rb +++ b/gems/decomplex/lib/decomplex/syntax.rb @@ -537,7 +537,7 @@ def generic_local_reads(node, local_names) next if generic_local_write_node?(child) next if generic_declaration_name?(child) next if generic_member_name?(child) - next if generic_call_name?(child) + next if skip_local_read_identifier?(child) reads << name end @@ -724,6 +724,10 @@ def generic_member_name?(node) field == node end + def skip_local_read_identifier?(_node) + false + end + def generic_call_name?(node) parent = parent_node(node) return false unless parent @@ -2367,7 +2371,7 @@ def line(node) end def normalize_text(text) - text.to_s.strip.gsub(/\s+/, " ") + text.to_s.tr("\u00A0", " ").strip.gsub(/\s+/, " ") end end diff --git a/gems/decomplex/lib/decomplex/syntax/python.rb b/gems/decomplex/lib/decomplex/syntax/python.rb index 60b87ed10..23e2d2343 100644 --- a/gems/decomplex/lib/decomplex/syntax/python.rb +++ b/gems/decomplex/lib/decomplex/syntax/python.rb @@ -42,7 +42,7 @@ def named_children FIELD_IDENTIFIER_NODE_KINDS = [].freeze PARAMETER_IDENTIFIER_NODE_KINDS = %w[identifier].freeze ASSIGNMENT_NODE_KINDS = %w[assignment augmented_assignment].freeze - ASSIGNMENT_OPERATOR_TOKENS = %w[= += -= *= /= %=].freeze + ASSIGNMENT_OPERATOR_TOKENS = %w[= += -= *= /= %= :=].freeze PATH_ACTION_NODE_KINDS = %w[call expression_statement return_statement].freeze SIMPLE_ACTION_WRAPPER_NODE_KINDS = %w[block].freeze COMPARISON_NODE_KINDS = %w[comparison_operator binary_operator boolean_operator].freeze @@ -68,6 +68,7 @@ def named_children BOOLEAN_WRAPPER_NODE_KINDS = %w[block].freeze PARENTHESIZED_WRAPPER_NODE_KINDS = %w[parenthesized_expression].freeze LOCAL_VARIABLE_DECLARATOR_NODE_KINDS = [].freeze + LOCAL_IDENTIFIER_WRAPPER_NODE_KINDS = %w[with_clause].freeze FIELD_DECLARATION_NODE_KINDS = [].freeze DECLARATION_SITE_PARENT_NODE_KINDS = %w[parameters].freeze ADJACENT_METHOD_INVOCATION_NODE_KINDS = [].freeze @@ -230,11 +231,21 @@ def python_adjacent_call_target(node) end def assignment_lhs?(node) + return false if parent_node(node)&.kind == "keyword_argument" + super || !!python_hidden_assignment_parts(node) end def generic_local_write_node?(node) - super || python_annotation_lhs?(node) + super || python_annotation_lhs?(node) || python_loop_target?(node) + end + + def generic_local_writes(node) + (super + python_with_alias_names(node)).uniq + end + + def skip_local_read_identifier?(node) + parent_node(node)&.kind == "dotted_name" || super end def python_hidden_assignment_parts(node) @@ -244,10 +255,14 @@ def python_hidden_assignment_parts(node) return nil unless operator if assignment_operator_tokens.include?(operator.text.to_s) + return nil unless python_statement_assignment_context?(node) + rhs = next_sibling(operator) return { lhs: node, rhs: rhs } if rhs elsif operator.text.to_s == ":" type_node = next_sibling(operator) + return nil unless type_node&.kind == "type" + equal = next_sibling(type_node) rhs = next_sibling(equal) return { lhs: node, rhs: rhs } if equal&.text.to_s == "=" && rhs @@ -264,10 +279,57 @@ def python_annotation_lhs?(node) return false unless colon&.text.to_s == ":" type_node = next_sibling(colon) + return false unless type_node&.kind == "type" + equal = next_sibling(type_node) !equal || equal.text.to_s != "=" end + def python_statement_assignment_context?(node) + parent_node(node)&.kind == "expression_statement" + end + + def python_loop_target?(node) + return false unless generic_identifier?(node) + + before = prev_sibling(node) + after = next_sibling(node) + return true if before&.text.to_s == "for" && after&.text.to_s != ":" + + seen_for = false + current = before + while ts_node?(current) + text = current.text.to_s + return false if %w[in :].include?(text) + if text == "for" + seen_for = true + break + end + current = prev_sibling(current) + end + return false unless seen_for + + current = after + while ts_node?(current) + text = current.text.to_s + return true if text == "in" + return false if text == ":" + current = next_sibling(current) + end + false + end + + def python_with_alias_names(node) + names = [] + generic_walk_local(node) do |child| + next unless child.kind == "as_pattern_target" + + text = child.text.to_s + names << text if simple_identifier_text?(text) + end + names + end + def python_assignment_span(lhs, rhs) [ lhs.start_point.row + 1, @@ -298,7 +360,10 @@ def python_statement_child_groups(body) def python_new_statement_child?(current, child, body_column) return false unless child.start_point.row > current.map { |item| item.end_point.row }.max - return false if %w[elif else except finally case].include?(child.kind) + return false if %w[ + elif else except finally case + elif_clause else_clause except_clause finally_clause case_clause + ].include?(child.kind) child.start_point.column <= body_column end diff --git a/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs b/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs index 770952320..ce65fa8d1 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs @@ -260,6 +260,8 @@ fn raw_local_reads( }; if local_names.contains(&name) && !raw_local_write_node(child, parent, profile) + && !raw_python_import_name(parent, profile) + && !raw_python_with_alias_read(child, parent, profile) && !raw_declaration_name_in_tree(node, child, profile) && !raw_declaration_name(child, parent, profile) && !raw_member_name(child, parent, profile) @@ -275,7 +277,15 @@ fn raw_local_writes(node: &RawNode, profile: &dyn LanguageProfile) -> BTreeSet bool { + if raw_previous_sibling(node, parent) + .map(|sibling| sibling.text.as_str() == "for") + .unwrap_or(false) + && raw_next_sibling(node, parent) + .map(|sibling| sibling.text.as_str() != ":") + .unwrap_or(false) + { + return true; + } + + let mut seen_for = false; + let mut current = raw_previous_sibling(node, parent); + while let Some(sibling) = current { + match sibling.text.as_str() { + "in" | ":" => return false, + "for" => { + seen_for = true; + break; + } + _ => current = raw_previous_sibling(sibling, parent), + } + } + if !seen_for { + return false; + } + + current = raw_next_sibling(node, parent); + while let Some(sibling) = current { + match sibling.text.as_str() { + "in" => return true, + ":" => return false, + _ => current = raw_next_sibling(sibling, parent), + } + } + false +} + +fn raw_python_typed_assignment_lhs(node: &RawNode, parent: &RawNode) -> bool { + let Some(colon) = raw_next_sibling(node, parent) else { + return false; + }; + if colon.text != ":" { + return false; + } + let Some(type_node) = raw_next_sibling(colon, parent) else { + return false; + }; + if type_node.kind != "type" { + return false; + } + raw_next_sibling(type_node, parent) + .map(|sibling| sibling.text.as_str() == "=") + .unwrap_or(false) +} + +fn raw_python_named_expression_lhs(node: &RawNode, parent: &RawNode) -> bool { + parent.kind == "named_expression" + && raw_named_children(parent) + .first() + .map(|lhs| std::ptr::eq(*lhs, node)) + .unwrap_or(false) + && raw_next_sibling(node, parent) + .map(|sibling| sibling.text.as_str() == ":=") + .unwrap_or(false) +} + +fn raw_python_annotation_lhs(node: &RawNode, parent: &RawNode) -> bool { + let Some(colon) = raw_next_sibling(node, parent) else { + return false; + }; + if colon.text != ":" { + return false; + } + let Some(type_node) = raw_next_sibling(colon, parent) else { + return false; + }; + if type_node.kind != "type" { + return false; + } + !raw_next_sibling(type_node, parent) + .map(|sibling| sibling.text.as_str() == "=") + .unwrap_or(false) +} + +fn raw_python_with_alias_names(node: &RawNode, profile: &dyn LanguageProfile) -> Vec { + let mut names = Vec::new(); + raw_walk_local(node, None, node, profile, &mut |child, _parent| { + if child.kind == "as_pattern_target" && simple_identifier(&child.text) { + names.push(child.text.clone()); + } + }); + names +} + +fn raw_python_import_name(parent: Option<&RawNode>, profile: &dyn LanguageProfile) -> bool { + profile.language() == Language::Python + && parent + .map(|parent| parent.kind.as_str() == "dotted_name") + .unwrap_or(false) +} + +fn raw_python_with_alias_read( + node: &RawNode, + parent: Option<&RawNode>, + profile: &dyn LanguageProfile, +) -> bool { + profile.language() == Language::Python + && (node.kind == "as_pattern_target" + || parent + .map(|parent| parent.kind.as_str() == "as_pattern_target") + .unwrap_or(false)) +} + +fn python_textual_local_writes(source: &str) -> Vec { + match split_assignment(source) { + Some((_lhs, ":=")) => Vec::new(), + _ => textual_local_writes(source), + } +} + fn raw_declaration_name( node: &RawNode, parent: Option<&RawNode>, @@ -1107,7 +1253,12 @@ fn textual_local_writes(source: &str) -> Vec { let Some((lhs, operator)) = split_assignment(source) else { return Vec::new(); }; - if lhs.contains('.') || lhs.contains("->") || lhs.contains('[') { + if lhs.contains('.') + || lhs.contains("->") + || lhs.contains('[') + || lhs.contains('(') + || lhs.contains(')') + { return Vec::new(); } @@ -1460,9 +1611,7 @@ mod tests { summary.statements[0].reads, ["self".to_string()].into_iter().collect() ); - assert!(summary.statements[0] - .dependencies - .contains(&("file".to_string(), "self".to_string()))); + assert!(!summary.statements[0].writes.contains("file")); assert_eq!( summary.statements[1].reads, ["self".to_string()].into_iter().collect() @@ -1482,11 +1631,108 @@ mod tests { assert_eq!( summary.statements[0].writes, - ["indent_guides".to_string(), "pretty".to_string()] + ["pretty".to_string()].into_iter().collect() + ); + assert!(summary.statements[0].dependencies.is_empty()); + } + + #[test] + fn mines_python_loop_and_with_locals_without_keyword_writes() { + let summaries = summaries( + "def download(urls, dest_dir):\n with ThreadPoolExecutor(max_workers=4) as pool:\n for url in urls:\n filename = url.split(\"/\")[-1]\n dest_path = os.path.join(dest_dir, filename)\n task_id = progress.add_task(\"download\", filename=filename, start=False)\n pool.submit(copy_url, task_id, url, dest_path)\n", + Language::Python, + ); + let summary = summaries + .iter() + .find(|summary| summary.name == "download") + .expect("download summary"); + let statement = &summary.statements[0]; + + assert!(statement.reads.contains("urls")); + assert!(statement.reads.contains("url")); + assert!(statement.reads.contains("pool")); + assert!(statement.writes.contains("url")); + assert!(statement.writes.contains("pool")); + assert!(!statement.writes.contains("urls")); + assert!(!statement.writes.contains("max_workers")); + assert!(!statement.writes.contains("start")); + } + + #[test] + fn does_not_read_python_with_alias_at_declaration_site() { + let summaries = summaries( + "def capture(console):\n with console.capture() as output:\n console.line()\n return output\n", + Language::Python, + ); + let summary = summaries + .iter() + .find(|summary| summary.name == "capture") + .expect("capture summary"); + + assert!(summary.statements[0].writes.contains("output")); + assert!(!summary.statements[0].reads.contains("output")); + assert!(summary.statements[1].reads.contains("output")); + } + + #[test] + fn mines_python_named_expression_writes() { + let summaries = summaries( + "def scan(text, index):\n if (character := text[index]):\n return character\n", + Language::Python, + ); + let summary = summaries + .iter() + .find(|summary| summary.name == "scan") + .expect("scan summary"); + let statement = &summary.statements[0]; + + assert!(statement.writes.contains("character")); + assert!(statement.reads.contains("text")); + assert!(statement.reads.contains("index")); + assert!(statement + .dependencies + .contains(&("character".to_string(), "text".to_string()))); + assert!(statement + .dependencies + .contains(&("character".to_string(), "index".to_string()))); + } + + #[test] + fn ignores_python_import_path_segments_that_match_locals() { + let summaries = summaries( + "def status(status):\n from .status import Status\n return status\n", + Language::Python, + ); + let summary = summaries + .iter() + .find(|summary| summary.name == "status") + .expect("status summary"); + + assert!(summary.statements[0].reads.is_empty()); + assert_eq!( + summary.statements[1].reads, + ["status".to_string()].into_iter().collect() + ); + } + + #[test] + fn reads_python_callable_locals_without_marking_call_callee_as_write() { + let summaries = summaries( + "def invoke(callback, value):\n runner = callback\n return runner(value)\n", + Language::Python, + ); + let summary = summaries + .iter() + .find(|summary| summary.name == "invoke") + .expect("invoke summary"); + + assert_eq!( + summary.statements[1].reads, + ["runner".to_string(), "value".to_string()] .into_iter() .collect() ); - assert!(summary.statements[0].dependencies.is_empty()); + assert!(summary.statements[1].writes.is_empty()); } #[test] diff --git a/gems/decomplex/test/syntax_test.rb b/gems/decomplex/test/syntax_test.rb index c6b867cbc..b2790827f 100644 --- a/gems/decomplex/test/syntax_test.rb +++ b/gems/decomplex/test/syntax_test.rb @@ -161,6 +161,12 @@ def test_tree_sitter_adapter_requires_language_profile_context assert_match(/missing Syntax language profile context/, error.message) end + def test_tree_sitter_language_adapter_normalizes_non_breaking_space + profile = Decomplex::Syntax.language_profile(:python) + + assert_equal "alpha beta", profile.send(:normalize_text, "alpha\u00A0beta") + end + def test_tree_sitter_adapter_delegates_language_normalization_to_profiles adapter_class = Decomplex::Syntax::TreeSitterAdapter profile_class = Decomplex::Syntax::TreeSitterLanguageAdapter @@ -518,6 +524,185 @@ def parse_version(): end end + def test_tree_sitter_python_adapter_treats_typed_local_assignment_as_write + grammar = ENV["DECOMPLEX_TS_PYTHON_PATH"] + skip "set DECOMPLEX_TS_PYTHON_PATH to run Python structural facts test" unless grammar && File.file?(grammar) + + with_file(<<~PY, ".py") do |path| + def process(value): + try: + return_value: PromptType = convert(value) + except ValueError: + raise + return return_value + PY + statements = Decomplex::Syntax.parse(path, parser: "tree_sitter", language: :python) + .local_methods + .first + .statements + + assert_includes statements[0].writes, "return_value" + assert_includes statements[0].reads, "value" + refute_includes statements[0].reads, "return_value" + assert_equal ["return_value"], statements[1].reads.to_a + end + end + + def test_tree_sitter_python_adapter_mines_loop_and_with_locals_without_keyword_writes + grammar = ENV["DECOMPLEX_TS_PYTHON_PATH"] + skip "set DECOMPLEX_TS_PYTHON_PATH to run Python structural facts test" unless grammar && File.file?(grammar) + + with_file(<<~PY, ".py") do |path| + def download(urls, dest_dir): + with ThreadPoolExecutor(max_workers=4) as pool: + for url in urls: + filename = url.split("/")[-1] + dest_path = os.path.join(dest_dir, filename) + task_id = progress.add_task("download", filename=filename, start=False) + pool.submit(copy_url, task_id, url, dest_path) + PY + statement = Decomplex::Syntax.parse(path, parser: "tree_sitter", language: :python) + .local_methods + .first + .statements + .first + + assert_includes statement.reads, "urls" + assert_includes statement.reads, "url" + assert_includes statement.reads, "pool" + assert_includes statement.writes, "url" + assert_includes statement.writes, "pool" + refute_includes statement.writes, "urls" + refute_includes statement.writes, "max_workers" + refute_includes statement.writes, "start" + end + end + + def test_tree_sitter_python_adapter_counts_callable_locals_as_reads + grammar = ENV["DECOMPLEX_TS_PYTHON_PATH"] + skip "set DECOMPLEX_TS_PYTHON_PATH to run Python structural facts test" unless grammar && File.file?(grammar) + + with_file(<<~PY, ".py") do |path| + def invoke(callback, value): + runner = callback + return runner(value) + PY + statements = Decomplex::Syntax.parse(path, parser: "tree_sitter", language: :python) + .local_methods + .first + .statements + + assert_equal ["callback"], statements[0].reads.to_a + assert_equal %w[runner value], statements[1].reads.to_a.sort + end + end + + def test_tree_sitter_python_adapter_mines_named_expression_writes + grammar = ENV["DECOMPLEX_TS_PYTHON_PATH"] + skip "set DECOMPLEX_TS_PYTHON_PATH to run Python structural facts test" unless grammar && File.file?(grammar) + + with_file(<<~PY, ".py") do |path| + def scan(text, index): + if (character := text[index]): + return character + PY + statement = Decomplex::Syntax.parse(path, parser: "tree_sitter", language: :python) + .local_methods + .first + .statements + .first + + assert_includes statement.writes, "character" + assert_includes statement.reads, "text" + assert_includes statement.reads, "index" + assert_includes statement.dependencies, ["character", "text"] + assert_includes statement.dependencies, ["character", "index"] + end + end + + def test_tree_sitter_python_adapter_groups_try_except_as_one_statement + grammar = ENV["DECOMPLEX_TS_PYTHON_PATH"] + skip "set DECOMPLEX_TS_PYTHON_PATH to run Python structural facts test" unless grammar && File.file?(grammar) + + with_file(<<~PY, ".py") do |path| + def foo(): + try: + raise RuntimeError("Hello") + except Exception as e: + raise e from e + PY + statements = Decomplex::Syntax.parse(path, parser: "tree_sitter", language: :python) + .local_methods + .first + .statements + + assert_equal 1, statements.length + assert_includes statements.first.writes, "e" + assert_includes statements.first.reads, "e" + end + end + + def test_tree_sitter_python_adapter_groups_if_elif_chain_as_one_statement + grammar = ENV["DECOMPLEX_TS_PYTHON_PATH"] + skip "set DECOMPLEX_TS_PYTHON_PATH to run Python structural facts test" unless grammar && File.file?(grammar) + + with_file(<<~PY, ".py") do |path| + def align(value): + if value == "left": + return 1 + elif value == "right": + return 2 + PY + statements = Decomplex::Syntax.parse(path, parser: "tree_sitter", language: :python) + .local_methods + .first + .statements + + assert_equal 1, statements.length + assert_equal ["value"], statements.first.reads.to_a + end + end + + def test_tree_sitter_python_adapter_ignores_import_paths_that_match_locals + grammar = ENV["DECOMPLEX_TS_PYTHON_PATH"] + skip "set DECOMPLEX_TS_PYTHON_PATH to run Python structural facts test" unless grammar && File.file?(grammar) + + with_file(<<~PY, ".py") do |path| + def inspect(): + from rich._inspect import Inspect + _inspect = Inspect() + return _inspect + PY + statements = Decomplex::Syntax.parse(path, parser: "tree_sitter", language: :python) + .local_methods + .first + .statements + + assert_empty statements[0].reads + assert_equal ["_inspect"], statements[1].writes.to_a + assert_equal ["_inspect"], statements[2].reads.to_a + end + end + + def test_tree_sitter_python_adapter_reads_bare_with_context_local + grammar = ENV["DECOMPLEX_TS_PYTHON_PATH"] + skip "set DECOMPLEX_TS_PYTHON_PATH to run Python structural facts test" unless grammar && File.file?(grammar) + + with_file(<<~PY, ".py") do |path| + def use_status(status): + with status: + sleep(0.2) + PY + statement = Decomplex::Syntax.parse(path, parser: "tree_sitter", language: :python) + .local_methods + .first + .statements + .first + + assert_equal ["status"], statement.reads.to_a + end + end + def test_tree_sitter_c_adapter_extracts_functions_branches_and_pointer_state grammar = ENV["DECOMPLEX_TS_C_PATH"] skip "set DECOMPLEX_TS_C_PATH to run C structural facts test" unless grammar && File.file?(grammar) From 40a8d231500787c55cc417bb969c4d4a2a424b4a Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Sat, 20 Jun 2026 06:33:39 +0000 Subject: [PATCH 37/52] Align decomplex Rust adapter parity --- gems/decomplex/lib/decomplex/syntax.rb | 21 +- .../decomplex/lib/decomplex/syntax/effects.rb | 339 ++++++++++++- gems/decomplex/lib/decomplex/syntax/python.rb | 31 +- gems/decomplex/rust/src/decomplex/ast.rs | 9 +- .../decomplex/detectors/false_simplicity.rs | 476 +++--------------- .../src/decomplex/detectors/function_lcom.rs | 7 +- .../src/decomplex/detectors/path_condition.rs | 377 +++++++++++++- .../detectors/state_branch_density.rs | 83 ++- gems/decomplex/rust/src/decomplex/syntax.rs | 21 + .../src/decomplex/syntax/adapters/base.rs | 26 + .../rust/src/decomplex/syntax/adapters/c.rs | 12 + .../rust/src/decomplex/syntax/adapters/cpp.rs | 12 + .../src/decomplex/syntax/adapters/csharp.rs | 12 + .../rust/src/decomplex/syntax/adapters/go.rs | 12 + .../src/decomplex/syntax/adapters/java.rs | 12 + .../decomplex/syntax/adapters/javascript.rs | 16 + .../src/decomplex/syntax/adapters/kotlin.rs | 12 + .../rust/src/decomplex/syntax/adapters/lua.rs | 8 + .../rust/src/decomplex/syntax/adapters/php.rs | 26 + .../src/decomplex/syntax/adapters/python.rs | 75 ++- .../src/decomplex/syntax/adapters/ruby.rs | 21 + .../src/decomplex/syntax/adapters/rust.rs | 16 + .../src/decomplex/syntax/adapters/swift.rs | 8 + .../decomplex/syntax/adapters/typescript.rs | 16 + .../rust/src/decomplex/syntax/adapters/zig.rs | 21 + .../decomplex/syntax/tree_sitter_adapter.rs | 175 ++++++- 26 files changed, 1363 insertions(+), 481 deletions(-) diff --git a/gems/decomplex/lib/decomplex/syntax.rb b/gems/decomplex/lib/decomplex/syntax.rb index 4d0d2a022..abab448a7 100644 --- a/gems/decomplex/lib/decomplex/syntax.rb +++ b/gems/decomplex/lib/decomplex/syntax.rb @@ -436,7 +436,7 @@ def local_methods(document) local_statements = statements.each_with_index.map do |statement, index| generic_local_statement(statement, index, local_names) end - owner = function_def.owner.to_s == file_owner(document.file) ? "(top-level)" : function_def.owner + owner = local_method_owner(document, function_def.owner) LocalMethod.new( id: "#{owner}##{function_def.name}", @@ -464,6 +464,14 @@ def path_condition_sites(document) private + def local_method_owner(document, owner) + file_owner_name = file_owner(document.file) + owner_name = owner.to_s + return "(top-level)" if owner_name == file_owner_name + + owner_name.sub(/\A#{Regexp.escape(file_owner_name)}::/, "") + end + def generic_predicate_body(node) body = generic_function_body_node(node) return nil unless body @@ -834,6 +842,8 @@ def push_owner_context(document, stack, node) return stack unless owner parent_owner = current_owner_from_stack(stack) + parent_owner ||= current_file_owner_from_stack(stack) \ + if current_language(stack) == :python && current_function_entry?(stack) full_owner = if parent_owner && parent_owner != owner && !owner.include?("::") "#{parent_owner}::#{owner}" else @@ -856,6 +866,15 @@ def current_owner_from_stack(stack) entry && entry[:owner] end + def current_file_owner_from_stack(stack) + entry = stack.reverse.find { |item| item.is_a?(Hash) && item[:file_owner] } + entry && entry[:file_owner] + end + + def current_function_entry?(stack) + stack.reverse.any? { |item| item.is_a?(Hash) && item[:function] } + end + def current_language(stack) entry = stack.reverse.find { |item| item.is_a?(Hash) && item[:language] } entry && entry[:language] diff --git a/gems/decomplex/lib/decomplex/syntax/effects.rb b/gems/decomplex/lib/decomplex/syntax/effects.rb index 80e36df57..e4766b1f9 100644 --- a/gems/decomplex/lib/decomplex/syntax/effects.rb +++ b/gems/decomplex/lib/decomplex/syntax/effects.rb @@ -31,7 +31,23 @@ def effect_lexicon def semantic_effect_sites_from_calls(document) return [] unless effect_lexicon - document.call_sites.filter_map { |call| semantic_effect_site_for_call(call) } + by_operation = {} + document.call_sites.each do |call| + site = semantic_effect_site_for_call(call) + next unless site + + key = [site.kind, site.detail, site.file, site.function, site.owner, + site.line, call.receiver, call.message, call.arguments] + current = by_operation[key] + if current.nil? || span_width(site.span) > span_width(current.span) + by_operation[key] = site + end + end + by_operation.values + end + + def span_width(span_value) + ((span_value[2] - span_value[0]) * 100_000) + (span_value[3] - span_value[1]) end def semantic_effect_site_for_call(call) @@ -79,7 +95,8 @@ def bare_effect_site_for_call(call, message) return nil unless call.receiver.to_s == "self" lexicon = effect_lexicon - return semantic_effect_site_from_call(call, :hidden_io, message) if lexicon.io_bare.include?(message) + return semantic_effect_site_from_call(call, :hidden_io, message) \ + if lexicon.io_bare.include?(message) || GENERIC_SYSTEM_IO_BARE.include?(message) return semantic_effect_site_from_call(call, :context_dependency, message) if lexicon.context_bare.include?(message) nil @@ -150,12 +167,16 @@ def semantic_effect_sites(document) end end + GENERIC_SYSTEM_IO_BARE = %w[print println eprintln printf puts panic].freeze + COMMON_CALLBACK_SET = %w[transaction synchronize lock with_lock unlock + mutex atomic subscribe callback hook].freeze + GENERIC_SYSTEM_EFFECT_LEXICON = EffectLexicon.new( dispatch_mids: [].freeze, meta_mids: [].freeze, method_obj_mids: [].freeze, io_consts: [].freeze, - io_bare: %w[print println eprintln printf puts panic].freeze, + io_bare: GENERIC_SYSTEM_IO_BARE, dir_context: [].freeze, context_pairs: {}.freeze, context_bare: [].freeze, @@ -163,6 +184,218 @@ def semantic_effect_sites(document) core_consts: [].freeze ).freeze + PYTHON_EFFECT_LEXICON = EffectLexicon.new( + dispatch_mids: %w[getattr setattr hasattr __getattr__ __setattr__ import_module].freeze, + meta_mids: %w[eval exec compile type globals locals vars setattr delattr].freeze, + method_obj_mids: %w[method].freeze, + io_consts: %w[Path pathlib os sys subprocess socket shutil].freeze, + io_bare: %w[print input open exec eval].freeze, + dir_context: %w[getcwd home].freeze, + context_pairs: { + "time" => %w[time monotonic perf_counter], + "datetime" => %w[now today utcnow], + "random" => %w[random randint randrange choice] + }.freeze, + context_bare: %w[random randint randrange].freeze, + callback_set: COMMON_CALLBACK_SET, + core_consts: [].freeze + ).freeze + + JAVASCRIPT_EFFECT_LEXICON = EffectLexicon.new( + dispatch_mids: %w[eval Function call apply bind].freeze, + meta_mids: %w[eval Function defineProperty defineProperties setPrototypeOf].freeze, + method_obj_mids: %w[method].freeze, + io_consts: %w[console Console fs process Deno Bun].freeze, + io_bare: %w[setTimeout setInterval fetch require import].freeze, + dir_context: [].freeze, + context_pairs: { + "Date" => %w[now], + "Math" => %w[random], + "performance" => %w[now] + }.freeze, + context_bare: [].freeze, + callback_set: COMMON_CALLBACK_SET, + core_consts: [].freeze + ).freeze + + GO_EFFECT_LEXICON = EffectLexicon.new( + dispatch_mids: %w[Call CallSlice Method MethodByName ValueOf TypeOf].freeze, + meta_mids: %w[Call CallSlice MethodByName New MakeFunc].freeze, + method_obj_mids: %w[method].freeze, + io_consts: %w[os io ioutil fs net http exec syscall].freeze, + io_bare: %w[panic print println recover].freeze, + dir_context: %w[Getwd UserHomeDir].freeze, + context_pairs: { + "time" => %w[Now Since Until], + "rand" => %w[Int Intn Float64 Read] + }.freeze, + context_bare: [].freeze, + callback_set: (COMMON_CALLBACK_SET + %w[Lock Unlock RLock RUnlock Do Go Add Done Wait]).freeze, + core_consts: [].freeze + ).freeze + + RUST_EFFECT_LEXICON = EffectLexicon.new( + dispatch_mids: %w[downcast downcast_ref downcast_mut call call_mut call_once].freeze, + meta_mids: %w[transmute from_raw_parts from_raw_parts_mut].freeze, + method_obj_mids: %w[method].freeze, + io_consts: %w[std tokio fs env process net io].freeze, + io_bare: %w[panic todo unimplemented unreachable].freeze, + dir_context: %w[current_dir home_dir].freeze, + context_pairs: { + "SystemTime" => %w[now], + "Instant" => %w[now] + }.freeze, + context_bare: [].freeze, + callback_set: (COMMON_CALLBACK_SET + %w[read write spawn await]).freeze, + core_consts: [].freeze + ).freeze + + ZIG_EFFECT_LEXICON = EffectLexicon.new( + dispatch_mids: %w[field fieldParentPtr ptrCast alignCast call].freeze, + meta_mids: %w[typeInfo TypeOf ptrCast intFromPtr ptrFromInt eval].freeze, + method_obj_mids: %w[method].freeze, + io_consts: %w[std os fs process net Thread Mutex Atomic].freeze, + io_bare: %w[panic unreachable].freeze, + dir_context: [].freeze, + context_pairs: { + "time" => %w[timestamp nanoTimestamp milliTimestamp] + }.freeze, + context_bare: [].freeze, + callback_set: (COMMON_CALLBACK_SET + %w[spawn wait signal]).freeze, + core_consts: [].freeze + ).freeze + + LUA_EFFECT_LEXICON = EffectLexicon.new( + dispatch_mids: %w[load loadfile dofile require rawget rawset].freeze, + meta_mids: %w[setmetatable getmetatable debug eval load loadfile].freeze, + method_obj_mids: %w[method].freeze, + io_consts: %w[io os debug package].freeze, + io_bare: %w[print error assert require collectgarbage].freeze, + dir_context: [].freeze, + context_pairs: { + "os" => %w[time clock date getenv], + "math" => %w[random] + }.freeze, + context_bare: [].freeze, + callback_set: COMMON_CALLBACK_SET, + core_consts: [].freeze + ).freeze + + C_EFFECT_LEXICON = EffectLexicon.new( + dispatch_mids: %w[dlsym dlopen GetProcAddress].freeze, + meta_mids: %w[setjmp longjmp va_start va_arg].freeze, + method_obj_mids: %w[method].freeze, + io_consts: %w[FILE DIR pthread mutex atomic].freeze, + io_bare: %w[printf fprintf fopen open read write close system exec abort exit assert].freeze, + dir_context: %w[getcwd getenv].freeze, + context_pairs: {}.freeze, + context_bare: %w[rand time clock].freeze, + callback_set: (COMMON_CALLBACK_SET + %w[pthread_mutex_lock pthread_mutex_unlock]).freeze, + core_consts: [].freeze + ).freeze + + CPP_EFFECT_LEXICON = EffectLexicon.new( + dispatch_mids: %w[dynamic_cast typeid any_cast get_if visit invoke].freeze, + meta_mids: %w[reinterpret_cast const_cast dlsym dlopen].freeze, + method_obj_mids: %w[method].freeze, + io_consts: %w[std filesystem fstream iostream thread mutex atomic].freeze, + io_bare: %w[throw abort exit assert system].freeze, + dir_context: %w[current_path].freeze, + context_pairs: { + "chrono" => %w[now], + "random_device" => %w[operator()] + }.freeze, + context_bare: [].freeze, + callback_set: (COMMON_CALLBACK_SET + %w[try_lock wait notify_one notify_all]).freeze, + core_consts: [].freeze + ).freeze + + CSHARP_EFFECT_LEXICON = EffectLexicon.new( + dispatch_mids: %w[Invoke GetMethod GetProperty GetField Activator CreateInstance].freeze, + meta_mids: %w[Invoke GetType Reflection Emit DynamicMethod].freeze, + method_obj_mids: %w[method].freeze, + io_consts: %w[Console File Directory Path Process Socket HttpClient Environment].freeze, + io_bare: %w[throw].freeze, + dir_context: %w[CurrentDirectory GetEnvironmentVariable].freeze, + context_pairs: { + "DateTime" => %w[Now UtcNow Today], + "Guid" => %w[NewGuid], + "Random" => %w[Next NextDouble] + }.freeze, + context_bare: [].freeze, + callback_set: (COMMON_CALLBACK_SET + %w[Lock Monitor Enter Exit Wait Pulse]).freeze, + core_consts: [].freeze + ).freeze + + JAVA_EFFECT_LEXICON = EffectLexicon.new( + dispatch_mids: %w[invoke getMethod getDeclaredMethod getField getDeclaredField forName].freeze, + meta_mids: %w[invoke setAccessible newInstance Proxy].freeze, + method_obj_mids: %w[method].freeze, + io_consts: %w[System File Files Paths ProcessBuilder Socket HttpClient Thread Lock AtomicReference].freeze, + io_bare: %w[throw].freeze, + dir_context: %w[getProperty getenv].freeze, + context_pairs: { + "System" => %w[currentTimeMillis nanoTime getenv getProperty], + "Instant" => %w[now], + "UUID" => %w[randomUUID], + "Math" => %w[random] + }.freeze, + context_bare: [].freeze, + callback_set: (COMMON_CALLBACK_SET + %w[wait notify notifyAll submit execute]).freeze, + core_consts: [].freeze + ).freeze + + SWIFT_EFFECT_LEXICON = EffectLexicon.new( + dispatch_mids: %w[perform value setValue selector NSClassFromString].freeze, + meta_mids: %w[Mirror unsafeBitCast withUnsafePointer withUnsafeBytes].freeze, + method_obj_mids: %w[method].freeze, + io_consts: %w[FileManager Process URLSession DispatchQueue Thread Lock NSLock].freeze, + io_bare: %w[print fatalError preconditionFailure assertionFailure].freeze, + dir_context: %w[currentDirectoryPath homeDirectoryForCurrentUser].freeze, + context_pairs: { + "Date" => %w[now], + "UUID" => %w[init] + }.freeze, + context_bare: [].freeze, + callback_set: (COMMON_CALLBACK_SET + %w[async sync]).freeze, + core_consts: [].freeze + ).freeze + + KOTLIN_EFFECT_LEXICON = EffectLexicon.new( + dispatch_mids: %w[invoke call callBy memberProperties declaredMemberFunctions].freeze, + meta_mids: %w[reflection javaClass Class forName setAccessible].freeze, + method_obj_mids: %w[method].freeze, + io_consts: %w[System File Files Paths ProcessBuilder Socket HttpClient Thread Mutex AtomicReference].freeze, + io_bare: %w[println print error check require TODO].freeze, + dir_context: %w[getProperty getenv].freeze, + context_pairs: { + "System" => %w[currentTimeMillis nanoTime getenv getProperty], + "Instant" => %w[now], + "UUID" => %w[randomUUID], + "Random" => %w[nextInt nextLong nextDouble] + }.freeze, + context_bare: [].freeze, + callback_set: (COMMON_CALLBACK_SET + %w[synchronized launch async await]).freeze, + core_consts: [].freeze + ).freeze + + PHP_EFFECT_LEXICON = EffectLexicon.new( + dispatch_mids: %w[call_user_func call_user_func_array __call __callStatic].freeze, + meta_mids: %w[eval ReflectionClass ReflectionMethod ReflectionFunction class_alias].freeze, + method_obj_mids: %w[Closure fromCallable].freeze, + io_consts: %w[FilesystemIterator DirectoryIterator PDO mysqli].freeze, + io_bare: %w[print printf fopen file_get_contents file_put_contents exec shell_exec system passthru die exit trigger_error].freeze, + dir_context: %w[getcwd getenv].freeze, + context_pairs: { + "DateTime" => %w[createFromFormat], + "DateTimeImmutable" => %w[createFromFormat], + "random_int" => %w[call] + }.freeze, + context_bare: %w[time microtime random_int rand mt_rand].freeze, + callback_set: COMMON_CALLBACK_SET, + core_consts: [].freeze + ).freeze + class TreeSitterLanguageAdapter private @@ -175,7 +408,7 @@ class RustSyntaxAdapter private def effect_lexicon - GENERIC_SYSTEM_EFFECT_LEXICON + RUST_EFFECT_LEXICON end end @@ -183,7 +416,103 @@ class ZigSyntaxAdapter private def effect_lexicon - GENERIC_SYSTEM_EFFECT_LEXICON + ZIG_EFFECT_LEXICON + end + end + + class PythonSyntaxAdapter + private + + def effect_lexicon + PYTHON_EFFECT_LEXICON + end + end + + class JavaScriptSyntaxAdapter + private + + def effect_lexicon + JAVASCRIPT_EFFECT_LEXICON + end + end + + class TypeScriptSyntaxAdapter + private + + def effect_lexicon + JAVASCRIPT_EFFECT_LEXICON + end + end + + class GoSyntaxAdapter + private + + def effect_lexicon + GO_EFFECT_LEXICON + end + end + + class LuaSyntaxAdapter + private + + def effect_lexicon + LUA_EFFECT_LEXICON + end + end + + class CSyntaxAdapter + private + + def effect_lexicon + C_EFFECT_LEXICON + end + end + + class CppSyntaxAdapter + private + + def effect_lexicon + CPP_EFFECT_LEXICON + end + end + + class CSharpSyntaxAdapter + private + + def effect_lexicon + CSHARP_EFFECT_LEXICON + end + end + + class JavaSyntaxAdapter + private + + def effect_lexicon + JAVA_EFFECT_LEXICON + end + end + + class SwiftSyntaxAdapter + private + + def effect_lexicon + SWIFT_EFFECT_LEXICON + end + end + + class KotlinSyntaxAdapter + private + + def effect_lexicon + KOTLIN_EFFECT_LEXICON + end + end + + class PhpSyntaxAdapter + private + + def effect_lexicon + PHP_EFFECT_LEXICON end end end diff --git a/gems/decomplex/lib/decomplex/syntax/python.rb b/gems/decomplex/lib/decomplex/syntax/python.rb index 23e2d2343..dabd09324 100644 --- a/gems/decomplex/lib/decomplex/syntax/python.rb +++ b/gems/decomplex/lib/decomplex/syntax/python.rb @@ -47,7 +47,8 @@ def named_children SIMPLE_ACTION_WRAPPER_NODE_KINDS = %w[block].freeze COMPARISON_NODE_KINDS = %w[comparison_operator binary_operator boolean_operator].freeze BRANCH_NODE_KINDS = %w[if_statement for_statement match_statement].freeze - LOOP_NODE_KINDS = %w[for_statement].freeze + LOOP_NODE_KINDS = %w[for_statement while_statement].freeze + TEXT_LOOP_NODE_KINDS = %w[block].freeze BRANCH_LOOP_NODE_KINDS = LOOP_NODE_KINDS CASE_NODE_KINDS = %w[match_statement].freeze HIDDEN_CASE_WRAPPER_NODE_KINDS = %w[block].freeze @@ -165,7 +166,7 @@ def local_methods(document) local_statements = statements.each_with_index.map do |statement, index| generic_local_statement(statement, index, local_names) end - owner = function_def.owner.to_s == file_owner(document.file) ? "(top-level)" : function_def.owner + owner = local_method_owner(document, function_def.owner) LocalMethod.new( id: "#{owner}##{function_def.name}", @@ -216,7 +217,8 @@ def python_function_body_statements(node, document) end def python_adjacent_call_target(node) - return nil unless %w[identifier].include?(node.kind) + return python_adjacent_member_call_target(node) if node.kind == "attribute" + return nil unless node.kind == "identifier" args = next_sibling(node) return nil unless args&.kind == "argument_list" @@ -224,12 +226,33 @@ def python_adjacent_call_target(node) { receiver: "self", message: node.text, - arguments: args.named_children.map { |child| normalize_text(child.text) } + arguments: args.named_children.map { |child| normalize_text(child.text) }, + source_node: python_adjacent_call_source_node(node, args) } rescue StandardError nil end + def python_adjacent_member_call_target(node) + args = next_sibling(node) + return nil unless args&.kind == "argument_list" + + target_from_callee(node).merge( + arguments: args.named_children.map { |child| normalize_text(child.text) }, + source_node: python_adjacent_call_source_node(node, args) + ) + rescue StandardError + nil + end + + def python_adjacent_call_source_node(node, args) + parent = parent_node(node) + return node unless parent + + call_text = "#{node.text}#{args.text}" + parent.text.to_s.include?(call_text) ? parent : node + end + def assignment_lhs?(node) return false if parent_node(node)&.kind == "keyword_argument" diff --git a/gems/decomplex/rust/src/decomplex/ast.rs b/gems/decomplex/rust/src/decomplex/ast.rs index 7778f6d34..e1995e5c6 100644 --- a/gems/decomplex/rust/src/decomplex/ast.rs +++ b/gems/decomplex/rust/src/decomplex/ast.rs @@ -40,6 +40,7 @@ pub struct RawNode { pub text: String, pub span: Span, pub named: bool, + pub field_name: Option, pub children: Vec, } @@ -48,7 +49,12 @@ impl RawNode { let mut cursor = node.walk(); let mut children: Vec = node .children(&mut cursor) - .map(|child| Self::from_tree_sitter(child, source)) + .enumerate() + .map(|(index, child)| { + let mut raw = Self::from_tree_sitter(child, source); + raw.field_name = node.field_name_for_child(index as u32).map(str::to_string); + raw + }) .collect(); if node.kind() == "argument_list" @@ -161,6 +167,7 @@ impl RawNode { text: node_text(node, source).to_string(), span: span(node), named: node.is_named(), + field_name: None, children, } } diff --git a/gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs b/gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs index c245906ff..bd3748a2b 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs @@ -1,4 +1,4 @@ -use crate::decomplex::ast::{self, Child, Node, Span}; +use crate::decomplex::ast::Span; use crate::decomplex::syntax::adapters::false_simplicity_lexicon::{ false_simplicity_lexicon, FalseSimplicityLexicon, }; @@ -51,18 +51,60 @@ pub fn scan_documents(documents: &[Document]) -> Vec { let mut classrecs = Vec::new(); for document in documents { hits.extend(hits_for_document(document)); - let mut detector = FalseSimplicity::new( - document.file.clone(), - document.lines.clone(), - document.language, - ); - detector.walk(&document.normalized_root, &[], &[]); - hits.extend(detector.hits); - classrecs.extend(detector.classrecs); + let (doc_recs, doc_hits) = class_records_for_document(document); + classrecs.extend(doc_recs); + hits.extend(doc_hits); } Report::new(hits, classrecs).findings() } +fn class_records_for_document(document: &Document) -> (Vec, Vec) { + let function_owners = document + .function_defs + .iter() + .map(|function| function.owner.clone()) + .filter(|owner| !owner.is_empty()) + .collect::>(); + let lexicon = false_simplicity_lexicon(document.language); + let mut recs = Vec::new(); + let mut hits = Vec::new(); + + for owner in &document.owner_defs { + let canonical = owner.name.trim_start_matches("::").to_string(); + if canonical.is_empty() { + continue; + } + if !function_owners.contains(&owner.name) && !function_owners.contains(&canonical) { + continue; + } + let simple = canonical + .split("::") + .last() + .unwrap_or(canonical.as_str()) + .to_string(); + let core = !canonical.contains("::") && lexicon.core_consts.contains(&simple.as_str()); + recs.push(ClassRec { + name: canonical.clone(), + file: owner.file.clone(), + line: owner.line, + core, + span: owner.span, + }); + if core { + hits.push(Hit { + kind: "monkeypatch".to_string(), + detail: simple.clone(), + file: owner.file.clone(), + defn: simple, + line: owner.line, + span: owner.span, + }); + } + } + + (recs, hits) +} + fn hits_for_document(document: &Document) -> Vec { let lexicon = false_simplicity_lexicon(document.language); document @@ -88,8 +130,8 @@ fn semantic_effect_hit_for_call(call: &CallSite, lexicon: &FalseSimplicityLexico } else { return None; } - } else if let Some(detail) = const_effect_detail(call, message, lexicon) { - ("hidden_io", detail) + } else if let Some((kind, detail)) = const_effect_kind_detail(call, message, lexicon) { + (kind, detail) } else if call.receiver == "self" && (lexicon.io_bare.contains(&message) || GENERIC_SYSTEM_IO_BARE.contains(&message)) { @@ -112,11 +154,11 @@ fn semantic_effect_hit_for_call(call: &CallSite, lexicon: &FalseSimplicityLexico }) } -fn const_effect_detail( +fn const_effect_kind_detail( call: &CallSite, message: &str, lexicon: &FalseSimplicityLexicon, -) -> Option { +) -> Option<(&'static str, String)> { let receiver = call.receiver.as_str(); if receiver.is_empty() || receiver == "self" { return None; @@ -127,20 +169,23 @@ fn const_effect_detail( .next() .unwrap_or(""); if base == "Dir" && lexicon.dir_context.contains(&message) { - return Some(format!("Dir.{message}")); + return Some(("context_dependency", format!("Dir.{message}"))); } if lexicon.io_consts.contains(&base) || receiver.starts_with("Net::") { - return Some(format!("{}.{}", receiver.trim_start_matches("::"), message)); + return Some(( + "hidden_io", + format!("{}.{}", receiver.trim_start_matches("::"), message), + )); } if receiver == "ENV" { - return Some("ENV".to_string()); + return Some(("context_dependency", "ENV".to_string())); } if lexicon .context_pairs .iter() .any(|(name, mids)| *name == base && mids.contains(&message)) { - return Some(format!("{base}.{message}")); + return Some(("context_dependency", format!("{base}.{message}"))); } None } @@ -174,403 +219,6 @@ fn variable_receiver(receiver: &str) -> bool { && chars.all(|ch| ch == '_' || ch == '!' || ch == '?' || ch.is_ascii_alphanumeric()) } -struct FalseSimplicity { - file: String, - lines: Vec, - language: Language, - lexicon: FalseSimplicityLexicon, - hits: Vec, - classrecs: Vec, -} - -impl FalseSimplicity { - fn new(file: String, lines: Vec, language: Language) -> Self { - Self { - file, - lines, - language, - lexicon: false_simplicity_lexicon(language), - hits: Vec::new(), - classrecs: Vec::new(), - } - } - - fn walk(&mut self, node: &Node, defs: &[String], cls: &[String]) { - match node.r#type.as_str() { - "CLASS" | "MODULE" => { - self.walk_class(node, defs, cls); - return; - } - "SCLASS" => { - if self.language == Language::Ruby { - if let Some(recv) = node.children.first().and_then(ast::node) { - if recv.r#type != "SELF" { - self.emit( - "metaprogramming", - &format!("class << {}", ast::slice(recv, &self.lines)), - self.defn_name(defs), - node, - ); - } - } - } - } - "DEFN" | "DEFS" => { - let name_index = if node.r#type == "DEFS" { 1 } else { 0 }; - let name = ast::child_to_string(node.children.get(name_index)); - if self.language == Language::Ruby { - if let Some(name) = name.as_deref() { - if matches!(name, "method_missing" | "respond_to_missing?") { - self.emit( - "metaprogramming", - &format!("def {name}"), - self.defn_name(defs), - node, - ); - } - } - } - let mut next_defs = defs.to_vec(); - if let Some(name) = name { - next_defs.push(name); - } - for child in node.children.iter().filter_map(ast::node) { - self.walk(child, &next_defs, cls); - } - return; - } - "CALL" | "FCALL" | "VCALL" | "OPCALL" => self.classify_call(node, defs), - "ATTRASGN" => { - if let Some(mid) = ast::child_to_string(node.children.get(1)) { - self.emit("hidden_mutation", &mid, self.defn_name(defs), node); - } - } - "OP_ASGN1" | "OP_ASGN2" => { - self.emit("hidden_mutation", "op-assign", self.defn_name(defs), node); - } - "GVAR" | "GASGN" => { - if self.language == Language::Ruby { - if let Some(name) = ast::child_to_string(node.children.first()) { - self.emit("context_dependency", &name, self.defn_name(defs), node); - } - } - } - "XSTR" | "DXSTR" => { - if self.language == Language::Ruby { - self.emit("hidden_io", "backtick", self.defn_name(defs), node); - } - } - "YIELD" => { - if self.language == Language::Ruby { - self.emit("dynamic_dispatch", "yield", self.defn_name(defs), node); - } - } - "ITER" => { - if let Some(call) = node.children.first().and_then(ast::node) { - if let Some(mid) = self.callee_mid(call) { - if self.callback(&mid) && !self.lexicon.meta_mids.contains(&mid.as_str()) { - self.emit("callback_inversion", &mid, self.defn_name(defs), node); - } - } - } - } - _ => {} - } - - for child in node.children.iter().filter_map(ast::node) { - self.walk(child, defs, cls); - } - } - - fn walk_class(&mut self, node: &Node, defs: &[String], cls: &[String]) { - let Some(cpath) = node.children.first().and_then(ast::node) else { - return; - }; - let body = if node.r#type == "CLASS" { - node.children.get(2).and_then(ast::node) - } else { - node.children.get(1).and_then(ast::node) - }; - let simple = self.const_simple(cpath); - let based = cpath.r#type == "COLON2" - && !matches!(cpath.children.first(), None | Some(Child::Nil)) - && !cpath.text.starts_with("::"); - let mut name_parts = cls.to_vec(); - name_parts.push(self.const_text(cpath)); - let fqn = name_parts.join("::"); - if body.is_some_and(|body| self.has_def(body)) { - let core = - cls.is_empty() && !based && self.lexicon.core_consts.contains(&simple.as_str()); - self.classrecs.push(ClassRec { - name: fqn.clone(), - file: self.file.clone(), - line: node.first_lineno, - core, - span: self.span(node), - }); - if core { - self.emit("monkeypatch", &simple, &simple, node); - } - } - let mut next_cls = cls.to_vec(); - next_cls.push(self.const_text(cpath)); - for child in node.children.iter().filter_map(ast::node) { - self.walk(child, defs, &next_cls); - } - } - - fn classify_call(&mut self, call: &Node, defs: &[String]) { - let (recv, mid) = match call.r#type.as_str() { - "CALL" | "OPCALL" => ( - call.children.first().and_then(ast::node), - ast::child_to_string(call.children.get(1)), - ), - _ => (None, ast::child_to_string(call.children.first())), - }; - let Some(mid) = mid else { - return; - }; - - if self.block_pass(call) - && self.callback(&mid) - && !self.lexicon.meta_mids.contains(&mid.as_str()) - { - self.emit("callback_inversion", &mid, self.defn_name(defs), call); - return; - } - if self.lexicon.meta_mids.contains(&mid.as_str()) { - self.emit("metaprogramming", &mid, self.defn_name(defs), call); - return; - } - if self.lexicon.dispatch_mids.contains(&mid.as_str()) { - self.emit("dynamic_dispatch", &mid, self.defn_name(defs), call); - return; - } - - if mid == "call" { - if let Some(recv) = recv { - if self.method_obj(recv) { - self.emit( - "dynamic_dispatch", - "method(...).call", - self.defn_name(defs), - call, - ); - return; - } - if self.var_recv(recv) { - self.emit( - "dynamic_dispatch", - &format!("{}.call", ast::slice(recv, &self.lines)), - self.defn_name(defs), - call, - ); - return; - } - } - } - - if let Some(cp) = recv.and_then(|recv| self.const_recv(recv)) { - let base = cp - .trim_start_matches("::") - .split("::") - .next() - .unwrap_or("") - .to_string(); - if base == "Dir" && self.lexicon.dir_context.contains(&mid.as_str()) { - self.emit( - "context_dependency", - &format!("Dir.{mid}"), - self.defn_name(defs), - call, - ); - return; - } - if self.lexicon.io_consts.contains(&base.as_str()) - || (self.language == Language::Ruby && cp.starts_with("Net::")) - { - self.emit( - "hidden_io", - &format!("{cp}.{mid}"), - self.defn_name(defs), - call, - ); - return; - } - if self.language == Language::Ruby { - if base == "URI" && mid == "open" { - self.emit("hidden_io", "URI.open", self.defn_name(defs), call); - return; - } - if cp == "ENV" { - self.emit("context_dependency", "ENV", self.defn_name(defs), call); - return; - } - } - if self.context_pair(&base, &mid) { - self.emit( - "context_dependency", - &format!("{base}.{mid}"), - self.defn_name(defs), - call, - ); - return; - } - } - - if recv.is_none() { - if self.lexicon.io_bare.contains(&mid.as_str()) { - self.emit("hidden_io", &mid, self.defn_name(defs), call); - return; - } - if self.lexicon.context_bare.contains(&mid.as_str()) { - self.emit("context_dependency", &mid, self.defn_name(defs), call); - return; - } - } - - if mid.len() > 1 && mid.ends_with('!') && !matches!(mid.as_str(), "!=" | "!~") { - self.emit("hidden_mutation", &mid, self.defn_name(defs), call); - return; - } - if call.r#type == "OPCALL" && mid == "<<" { - self.emit("hidden_mutation", "<<", self.defn_name(defs), call); - } - } - - fn emit(&mut self, kind: &str, detail: &str, defn: &str, node: &Node) { - self.hits.push(Hit { - kind: kind.to_string(), - detail: detail.to_string(), - file: self.file.clone(), - defn: defn.to_string(), - line: node.first_lineno, - span: self.span(node), - }); - } - - fn defn_name<'a>(&self, defs: &'a [String]) -> &'a str { - defs.last().map(String::as_str).unwrap_or("(top-level)") - } - - fn span(&self, node: &Node) -> Span { - [ - node.first_lineno, - node.first_column, - node.last_lineno, - node.last_column, - ] - } - - fn callback(&self, mid: &str) -> bool { - self.lexicon.callback_set.contains(&mid) - || ["with_", "around_", "on_", "before_", "after_"] - .iter() - .any(|prefix| mid.starts_with(prefix)) - || mid.ends_with("_hook") - } - - fn callee_mid(&self, call: &Node) -> Option { - match call.r#type.as_str() { - "CALL" | "OPCALL" => ast::child_to_string(call.children.get(1)), - "FCALL" | "VCALL" => ast::child_to_string(call.children.first()), - _ => None, - } - } - - fn block_pass(&self, call: &Node) -> bool { - let args = match call.r#type.as_str() { - "CALL" | "OPCALL" => call.children.get(2), - "FCALL" => call.children.get(1), - _ => None, - }; - let Some(args) = args.and_then(ast::node) else { - return false; - }; - args.r#type == "BLOCK_PASS" - || (args.r#type == "LIST" - && args - .children - .iter() - .filter_map(ast::node) - .any(|child| child.r#type == "BLOCK_PASS")) - } - - fn method_obj(&self, recv: &Node) -> bool { - let mid = match recv.r#type.as_str() { - "CALL" => ast::child_to_string(recv.children.get(1)), - "FCALL" => ast::child_to_string(recv.children.first()), - _ => None, - }; - mid.is_some_and(|mid| self.lexicon.method_obj_mids.contains(&mid.as_str())) - } - - fn var_recv(&self, recv: &Node) -> bool { - matches!( - recv.r#type.as_str(), - "VCALL" | "LVAR" | "DVAR" | "IVAR" | "CVAR" | "GVAR" - ) - } - - fn const_recv(&self, recv: &Node) -> Option { - if matches!(recv.r#type.as_str(), "CONST" | "COLON2" | "COLON3") { - Some(self.const_text(recv)) - } else { - None - } - } - - fn const_text(&self, node: &Node) -> String { - match node.r#type.as_str() { - "CONST" => ast::child_to_string(node.children.first()).unwrap_or_default(), - "COLON3" => format!( - "::{}", - ast::child_to_string(node.children.first()).unwrap_or_default() - ), - "COLON2" => { - let name = ast::child_to_string(node.children.get(1)).unwrap_or_default(); - if node.text.starts_with("::") { - format!("::{name}") - } else if let Some(base) = node.children.first().and_then(ast::node) { - format!("{}::{name}", self.const_text(base)) - } else { - name - } - } - _ => ast::slice(node, &self.lines), - } - } - - fn const_simple(&self, node: &Node) -> String { - match node.r#type.as_str() { - "CONST" | "COLON3" => ast::child_to_string(node.children.first()).unwrap_or_default(), - "COLON2" => ast::child_to_string(node.children.get(1)).unwrap_or_default(), - _ => self.const_text(node), - } - } - - fn has_def(&self, node: &Node) -> bool { - let _ = self.language; - if matches!(node.r#type.as_str(), "DEFN" | "DEFS") { - return true; - } - if matches!(node.r#type.as_str(), "CLASS" | "MODULE") { - return false; - } - node.children - .iter() - .filter_map(ast::node) - .any(|child| self.has_def(child)) - } - - fn context_pair(&self, base: &str, mid: &str) -> bool { - self.lexicon - .context_pairs - .iter() - .any(|(key, mids)| *key == base && mids.contains(&mid)) - } -} - struct Report { hits: Vec, } diff --git a/gems/decomplex/rust/src/decomplex/detectors/function_lcom.rs b/gems/decomplex/rust/src/decomplex/detectors/function_lcom.rs index 3f8b83231..d003e3ba3 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/function_lcom.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/function_lcom.rs @@ -201,7 +201,7 @@ impl FunctionLcom { raw_components: Vec>, statements: &[local_flow::Statement], ) -> Vec { - raw_components + let mut components = raw_components .into_iter() .filter_map(|vars| { let touched = statements @@ -223,7 +223,10 @@ impl FunctionLcom { statements: touched, }) }) - .collect() + .collect::>(); + components + .sort_by_key(|component| component.statements.first().copied().unwrap_or(usize::MAX)); + components } fn components(&self, statements: &[local_flow::Statement]) -> Vec> { diff --git a/gems/decomplex/rust/src/decomplex/detectors/path_condition.rs b/gems/decomplex/rust/src/decomplex/detectors/path_condition.rs index f0591756e..deeab38f4 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/path_condition.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/path_condition.rs @@ -1,4 +1,5 @@ -use crate::decomplex::ast::{self, Child, Node, Span}; +use crate::decomplex::ast::{self, normalize_text, Child, Node, RawNode, Span}; +use crate::decomplex::syntax::adapters::{language_profile, LanguageProfile}; use crate::decomplex::syntax::{self, Document, Language}; use anyhow::Result; use serde::Serialize; @@ -47,12 +48,12 @@ pub fn scan_files(files: &[PathBuf], language: Language) -> Result PathConditionReport { - let mined_sites = documents + let raw_sites = documents .iter() - .flat_map(sites_from_mined_facts) + .flat_map(sites_from_raw_facts) .collect::>(); - if !mined_sites.is_empty() { - return Report::new(mined_sites).findings(); + if !raw_sites.is_empty() { + return Report::new(raw_sites).findings(); } let mut sites = Vec::new(); @@ -64,6 +65,372 @@ pub fn scan_documents(documents: &[Document]) -> PathConditionReport { Report::new(sites).findings() } +fn sites_from_raw_facts(document: &Document) -> Vec { + let profile = language_profile(document.language); + let mut sites = Vec::new(); + for function in &document.function_defs { + for statement in raw_function_body_statements(profile, &function.body) { + raw_path_walk( + document, + profile, + statement, + &function.name, + &[], + &mut sites, + ); + } + } + sites +} + +fn raw_function_body_node<'a>( + profile: &dyn LanguageProfile, + node: &'a RawNode, +) -> Option<&'a RawNode> { + if let Some(body) = raw_child_by_field(node, "body") { + return Some(body); + } + raw_named_children(node).into_iter().rev().find(|child| { + profile + .function_body_node_kinds() + .contains(&child.kind.as_str()) + }) +} + +fn raw_function_body_statements<'a>( + profile: &dyn LanguageProfile, + node: &'a RawNode, +) -> Vec<&'a RawNode> { + let Some(body) = raw_function_body_node(profile, node) else { + return Vec::new(); + }; + + let mut named = raw_named_children(body) + .into_iter() + .filter(|child| !raw_comment_node(child)) + .collect::>(); + if named.len() == 1 + && profile + .nested_statement_wrapper_node_kinds() + .contains(&named[0].kind.as_str()) + { + if raw_branch_node(profile, named[0]) { + return vec![named[0]]; + } + named = raw_named_children(named[0]) + .into_iter() + .filter(|child| !raw_comment_node(child)) + .collect(); + } + if named.is_empty() && body.text.trim().is_empty() { + return Vec::new(); + } + if raw_branch_node(profile, body) || raw_assignment_statement(profile, body) || named.is_empty() + { + return vec![body]; + } + named +} + +fn raw_path_walk( + document: &Document, + profile: &dyn LanguageProfile, + node: &RawNode, + function: &str, + guards: &[String], + out: &mut Vec, +) { + if raw_nested_local_scope(profile, node) { + return; + } + + if raw_branch_node(profile, node) { + let condition = raw_branch_condition(node); + let atoms = raw_path_condition_atoms(profile, condition); + for child in raw_branch_body_nodes(profile, node) { + let mut next_guards = guards.to_vec(); + next_guards.extend(atoms.clone()); + raw_path_walk(document, profile, child, function, &next_guards, out); + } + return; + } + + if guards.len() >= 2 && raw_path_action_node(profile, node) { + let mut unique = guards.to_vec(); + unique.sort(); + unique.dedup(); + out.push(Site { + guards: unique, + action: profile.normalize_source_text(&node.text), + file: document.file.clone(), + defn: function.to_string(), + line: node.span[0], + span: node.span, + }); + return; + } + + for child in raw_named_children(node) { + raw_path_walk(document, profile, child, function, guards, out); + } +} + +fn raw_path_condition_atoms( + profile: &dyn LanguageProfile, + condition: Option<&RawNode>, +) -> Vec { + let Some(condition) = condition else { + return Vec::new(); + }; + if raw_boolean_container(profile, condition) && raw_boolean_and(profile, condition) { + let mut atoms = raw_flatten_boolean_and(profile, condition) + .into_iter() + .map(|child| raw_decision_member_text(profile, &child.text)) + .collect::>(); + atoms.sort(); + atoms.dedup(); + atoms + } else { + vec![raw_decision_member_text(profile, &condition.text)] + } +} + +fn raw_branch_condition(node: &RawNode) -> Option<&RawNode> { + raw_child_by_field(node, "condition") + .or_else(|| raw_child_by_field(node, "value")) + .or_else(|| raw_child_by_field(node, "subject")) + .or_else(|| raw_named_children(node).into_iter().next()) +} + +fn raw_branch_body_nodes<'a>(profile: &dyn LanguageProfile, node: &'a RawNode) -> Vec<&'a RawNode> { + let mut bodies = ["consequence", "body", "alternative"] + .into_iter() + .filter_map(|field| raw_child_by_field(node, field)) + .collect::>(); + if bodies.is_empty() { + bodies = raw_named_children(node).into_iter().skip(1).collect(); + } + bodies + .into_iter() + .flat_map(|body| { + if raw_simple_action_wrapper(profile, body) { + return vec![body]; + } + let body_children = raw_named_children(body); + let children = if profile + .path_transparent_branch_body_node_kinds() + .contains(&body.kind.as_str()) + { + body_children.into_iter().skip(1).collect::>() + } else { + body_children + }; + let children = children + .into_iter() + .flat_map(|child| { + if profile + .path_transparent_branch_body_node_kinds() + .contains(&child.kind.as_str()) + { + raw_named_children(child) + .into_iter() + .skip(1) + .collect::>() + } else { + vec![child] + } + }) + .filter(|child| !raw_comment_node(child)) + .collect::>(); + if children.is_empty() { + vec![body] + } else { + children + } + }) + .collect() +} + +fn raw_path_action_node(profile: &dyn LanguageProfile, node: &RawNode) -> bool { + if raw_branch_node(profile, node) { + return false; + } + raw_simple_action_wrapper(profile, node) + || raw_assignment_statement(profile, node) + || profile + .path_action_node_kinds() + .contains(&node.kind.as_str()) +} + +fn raw_simple_action_wrapper(profile: &dyn LanguageProfile, node: &RawNode) -> bool { + if !profile + .simple_action_wrapper_node_kinds() + .contains(&node.kind.as_str()) + { + return false; + } + let text = normalize_text(&node.text); + if text.contains('{') || text.contains('}') { + return false; + } + let text = text.strip_suffix(';').unwrap_or(&text).trim(); + let Some(open) = text.find('(') else { + return false; + }; + text.ends_with(')') + && text[..open] + .chars() + .all(|ch| ch == '_' || ch == '.' || ch.is_ascii_alphanumeric()) +} + +fn raw_assignment_statement(profile: &dyn LanguageProfile, node: &RawNode) -> bool { + profile + .assignment_node_kinds() + .contains(&node.kind.as_str()) + || node.children.iter().any(|child| { + !child.named + && profile + .assignment_operator_tokens() + .contains(&child.text.as_str()) + }) +} + +fn raw_branch_node(profile: &dyn LanguageProfile, node: &RawNode) -> bool { + profile.branch_node_kinds().contains(&node.kind.as_str()) +} + +fn raw_nested_local_scope(profile: &dyn LanguageProfile, node: &RawNode) -> bool { + profile.function_node_kinds().contains(&node.kind.as_str()) + || profile + .class_owner_node_kinds() + .contains(&node.kind.as_str()) + || profile + .module_owner_node_kinds() + .contains(&node.kind.as_str()) + || profile + .generic_owner_node_kinds() + .contains(&node.kind.as_str()) + || profile + .struct_owner_node_kinds() + .contains(&node.kind.as_str()) +} + +fn raw_boolean_container(profile: &dyn LanguageProfile, node: &RawNode) -> bool { + if profile + .boolean_container_node_kinds() + .contains(&node.kind.as_str()) + { + return true; + } + if raw_parenthesized_wrapper(profile, node) { + return raw_named_children(node) + .into_iter() + .next() + .map(|child| raw_boolean_container(profile, child)) + .unwrap_or(false); + } + false +} + +fn raw_boolean_and(profile: &dyn LanguageProfile, node: &RawNode) -> bool { + if raw_parenthesized_wrapper(profile, node) { + return raw_named_children(node) + .into_iter() + .next() + .map(|child| raw_boolean_and(profile, child)) + .unwrap_or(false); + } + raw_direct_operator(node) + .map(|operator| profile.boolean_and_operators().contains(&operator.as_str())) + .unwrap_or(false) +} + +fn raw_flatten_boolean_and<'a>( + profile: &dyn LanguageProfile, + node: &'a RawNode, +) -> Vec<&'a RawNode> { + if !(raw_boolean_container(profile, node) && raw_boolean_and(profile, node)) { + return vec![node]; + } + if raw_parenthesized_wrapper(profile, node) { + return raw_named_children(node) + .into_iter() + .next() + .map(|child| raw_flatten_boolean_and(profile, child)) + .unwrap_or_else(|| vec![node]); + } + raw_named_children(node) + .into_iter() + .flat_map(|child| raw_flatten_boolean_and(profile, child)) + .collect() +} + +fn raw_parenthesized_wrapper(profile: &dyn LanguageProfile, node: &RawNode) -> bool { + profile + .parenthesized_wrapper_node_kinds() + .contains(&node.kind.as_str()) + && raw_named_children(node).len() == 1 +} + +fn raw_decision_member_text(profile: &dyn LanguageProfile, text: &str) -> String { + profile.normalize_source_text(&strip_enclosing_parentheses(text)) +} + +fn strip_enclosing_parentheses(text: &str) -> String { + let mut value = text.trim().to_string(); + loop { + if !(value.starts_with('(') && value.ends_with(')')) { + break value; + } + if !enclosing_parentheses_wrap_all(&value) { + break value; + } + value = value[1..value.len() - 1].trim().to_string(); + } +} + +fn enclosing_parentheses_wrap_all(text: &str) -> bool { + let mut depth = 0isize; + for (index, ch) in text.chars().enumerate() { + if ch == '(' { + depth += 1; + } else if ch == ')' { + depth -= 1; + } + if depth == 0 && index < text.len() - 1 { + return false; + } + if depth < 0 { + return false; + } + } + depth == 0 +} + +fn raw_direct_operator(node: &RawNode) -> Option { + node.children + .iter() + .find(|child| { + let text = child.text.trim(); + !child.named && !matches!(text, "(" | ")") + }) + .map(|child| normalize_text(&child.text)) +} + +fn raw_named_children(node: &RawNode) -> Vec<&RawNode> { + node.children.iter().filter(|child| child.named).collect() +} + +fn raw_child_by_field<'a>(node: &'a RawNode, field: &str) -> Option<&'a RawNode> { + node.children + .iter() + .find(|child| child.field_name.as_deref() == Some(field)) +} + +fn raw_comment_node(node: &RawNode) -> bool { + node.kind.contains("comment") +} + fn sites_from_mined_facts(document: &Document) -> Vec { let mut sites = Vec::new(); for decision in &document.decision_sites { diff --git a/gems/decomplex/rust/src/decomplex/detectors/state_branch_density.rs b/gems/decomplex/rust/src/decomplex/detectors/state_branch_density.rs index 77a6d4ecd..0c35bccef 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/state_branch_density.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/state_branch_density.rs @@ -94,65 +94,58 @@ pub fn scan_documents(documents: &[Document]) -> Vec { } fn decisions_from_mined_facts(document: &Document) -> Vec { - let state_fields = document - .state_writes - .iter() - .map(|write| normalized_state_field(&write.field)) - .collect::>(); - - document - .decision_sites - .iter() - .filter_map(|decision| { - let refs = document - .state_reads - .iter() - .filter(|read| { - read.function == decision.function && span_inside(read.span, decision.span) - }) - .filter_map(|read| mined_state_ref(read, &state_fields)) - .collect::>() - .into_iter() - .collect::>(); - if refs.is_empty() { - return None; - } - Some(Decision { + filter_wrapper_decisions( + document + .branch_decisions + .iter() + .map(|decision| Decision { file: decision.file.clone(), defn: decision.function.clone(), line: decision.line, span: decision.span, predicate: decision.predicate.clone(), - state_refs: refs, + state_refs: decision.state_refs.clone(), }) + .collect(), + ) +} + +fn filter_wrapper_decisions(decisions: Vec) -> Vec { + decisions + .iter() + .filter(|decision| { + !(wrapper_predicate(&decision.predicate) && nested_state_decision(decision, &decisions)) }) + .cloned() .collect() } -fn mined_state_ref(read: &syntax::StateRead, state_fields: &BTreeSet) -> Option { - let field = normalized_state_field(&read.field); - if !state_fields.is_empty() && !state_fields.contains(&field) { - return None; - } - let receiver = read.receiver.trim_start_matches('$'); - if receiver.is_empty() || matches!(receiver, "self" | "this") { - Some(field) - } else { - Some(format!("{}.{}", receiver, field)) - } +fn wrapper_predicate(predicate: &str) -> bool { + ["if", "unless", "while", "until"].iter().any(|prefix| { + predicate == *prefix + || predicate + .strip_prefix(prefix) + .map(|rest| rest.starts_with(char::is_whitespace)) + .unwrap_or(false) + }) } -fn normalized_state_field(field: &str) -> String { - field - .trim_start_matches('@') - .trim_start_matches('$') - .to_string() +fn nested_state_decision(decision: &Decision, decisions: &[Decision]) -> bool { + decisions.iter().any(|candidate| { + !std::ptr::eq(candidate, decision) + && candidate.defn == decision.defn + && span_encloses(decision.span, candidate.span) + && candidate + .state_refs + .iter() + .all(|state_ref| decision.state_refs.contains(state_ref)) + }) } -fn span_inside(inner: Span, outer: Span) -> bool { - let starts_after_or_at = inner[0] > outer[0] || (inner[0] == outer[0] && inner[1] >= outer[1]); - let ends_before_or_at = inner[2] < outer[2] || (inner[2] == outer[2] && inner[3] <= outer[3]); - starts_after_or_at && ends_before_or_at +fn span_encloses(outer: Span, inner: Span) -> bool { + let starts_before_or_at = outer[0] < inner[0] || (outer[0] == inner[0] && outer[1] <= inner[1]); + let ends_after_or_at = outer[2] > inner[2] || (outer[2] == inner[2] && outer[3] >= inner[3]); + starts_before_or_at && ends_after_or_at } struct StateBranchDensity { diff --git a/gems/decomplex/rust/src/decomplex/syntax.rs b/gems/decomplex/rust/src/decomplex/syntax.rs index 2c4643791..7978303e0 100644 --- a/gems/decomplex/rust/src/decomplex/syntax.rs +++ b/gems/decomplex/rust/src/decomplex/syntax.rs @@ -100,10 +100,12 @@ pub struct Document { pub root: RawNode, pub normalized_root: NormalizedNode, pub function_defs: Vec, + pub owner_defs: Vec, pub call_sites: Vec, pub state_reads: Vec, pub state_writes: Vec, pub decision_sites: Vec, + pub branch_decisions: Vec, pub dispatch_sites: Vec, pub predicate_aliases: Vec, pub comparison_uses: Vec, @@ -121,6 +123,15 @@ pub struct FunctionDef { pub params: Vec, } +#[derive(Clone, Debug)] +pub struct OwnerDef { + pub file: String, + pub name: String, + pub kind: String, + pub line: usize, + pub span: Span, +} + #[derive(Clone, Debug, Eq, PartialEq, Serialize)] pub struct CallSite { pub receiver: String, @@ -181,6 +192,16 @@ pub struct DecisionSite { pub enclosing_span: Span, } +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct BranchDecision { + pub file: String, + pub function: String, + pub line: usize, + pub span: Span, + pub predicate: String, + pub state_refs: Vec, +} + #[derive(Clone, Debug, Eq, PartialEq, Serialize)] pub struct DispatchSite { pub variant_set: Vec, diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs index e7f4962a7..699bd41cd 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs @@ -318,6 +318,18 @@ pub(crate) trait LanguageProfile { EMPTY_NODE_KINDS } + fn path_action_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn simple_action_wrapper_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn path_transparent_branch_body_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + fn expression_body_operator_tokens(&self) -> &[&str] { DEFAULT_EXPRESSION_BODY_OPERATOR_TOKENS } @@ -375,6 +387,20 @@ pub(crate) trait LanguageProfile { self.default_owner_name_from_declaration(node, source) } + fn owner_kind(&self, node: Node<'_>) -> String { + if self.class_owner_node_kinds().contains(&node.kind()) { + "class".to_string() + } else if self.module_owner_node_kinds().contains(&node.kind()) { + "module".to_string() + } else if self.impl_owner_node_kinds().contains(&node.kind()) { + "impl".to_string() + } else if self.struct_owner_node_kinds().contains(&node.kind()) { + "struct".to_string() + } else { + "owner".to_string() + } + } + fn default_owner_name_from_declaration(&self, node: Node<'_>, source: &str) -> Option { if self.class_owner_node_kinds().contains(&node.kind()) || self.module_owner_node_kinds().contains(&node.kind()) diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/c.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/c.rs index 187b48a0e..d18862f9a 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/c.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/c.rs @@ -58,6 +58,18 @@ impl LanguageProfile for CProfile { &["=", "+=", "-=", "*=", "/=", "%="] } + fn path_action_node_kinds(&self) -> &[&str] { + &[ + "call_expression", + "expression_statement", + "return_statement", + ] + } + + fn simple_action_wrapper_node_kinds(&self) -> &[&str] { + &["compound_statement"] + } + fn local_declaration_node_kinds(&self) -> &[&str] { &["declaration", "init_declarator"] } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/cpp.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/cpp.rs index c2f4430a5..ee996e330 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/cpp.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/cpp.rs @@ -62,6 +62,18 @@ impl LanguageProfile for CppProfile { &["=", "+=", "-=", "*=", "/=", "%="] } + fn path_action_node_kinds(&self) -> &[&str] { + &[ + "call_expression", + "expression_statement", + "return_statement", + ] + } + + fn simple_action_wrapper_node_kinds(&self) -> &[&str] { + &["compound_statement"] + } + fn local_declaration_node_kinds(&self) -> &[&str] { &["declaration", "init_declarator"] } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/csharp.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/csharp.rs index ec2463bd6..51e1ae869 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/csharp.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/csharp.rs @@ -49,6 +49,18 @@ impl LanguageProfile for CSharpProfile { &["=", "+=", "-=", "*=", "/=", "%="] } + fn path_action_node_kinds(&self) -> &[&str] { + &[ + "invocation_expression", + "expression_statement", + "return_statement", + ] + } + + fn simple_action_wrapper_node_kinds(&self) -> &[&str] { + &["block"] + } + fn local_identifier_wrapper_node_kinds(&self) -> &[&str] { &["argument"] } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/go.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/go.rs index 7cc2a78f0..a9536016c 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/go.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/go.rs @@ -85,6 +85,18 @@ impl LanguageProfile for GoProfile { &["=", ":=", "+=", "-=", "*=", "/=", "%="] } + fn path_action_node_kinds(&self) -> &[&str] { + &[ + "call_expression", + "expression_statement", + "return_statement", + ] + } + + fn simple_action_wrapper_node_kinds(&self) -> &[&str] { + &["block", "statement_list"] + } + fn local_declaration_node_kinds(&self) -> &[&str] { &["short_var_declaration", "variable_declaration"] } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/java.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/java.rs index 3f73af2fc..7ec8910f6 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/java.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/java.rs @@ -52,6 +52,18 @@ impl LanguageProfile for JavaProfile { &["=", "+=", "-=", "*=", "/=", "%="] } + fn path_action_node_kinds(&self) -> &[&str] { + &[ + "method_invocation", + "expression_statement", + "return_statement", + ] + } + + fn simple_action_wrapper_node_kinds(&self) -> &[&str] { + &["block"] + } + fn local_declaration_node_kinds(&self) -> &[&str] { &["local_variable_declaration", "variable_declarator"] } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/javascript.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/javascript.rs index 2968a2e39..64458ed62 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/javascript.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/javascript.rs @@ -53,6 +53,18 @@ impl LanguageProfile for JavaScriptProfile { &["=", "+=", "-=", "*=", "/=", "%=", "&&=", "||="] } + fn path_action_node_kinds(&self) -> &[&str] { + &[ + "call_expression", + "expression_statement", + "return_statement", + ] + } + + fn simple_action_wrapper_node_kinds(&self) -> &[&str] { + &["statement_block"] + } + fn local_declaration_node_kinds(&self) -> &[&str] { &["lexical_declaration", "variable_declarator"] } @@ -65,6 +77,10 @@ impl LanguageProfile for JavaScriptProfile { &["binary_expression"] } + fn branch_node_kinds(&self) -> &[&str] { + &["if_statement", "for_in_statement", "switch_statement"] + } + fn case_node_kinds(&self) -> &[&str] { &["switch_statement"] } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/kotlin.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/kotlin.rs index ea1ae3c8a..51dc785db 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/kotlin.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/kotlin.rs @@ -90,6 +90,14 @@ impl LanguageProfile for KotlinProfile { &["=", "+=", "-=", "*=", "/=", "%="] } + fn path_action_node_kinds(&self) -> &[&str] { + &["call_expression", "jump_expression"] + } + + fn simple_action_wrapper_node_kinds(&self) -> &[&str] { + &["statements", "control_structure_body", "function_body"] + } + fn local_identifier_wrapper_node_kinds(&self) -> &[&str] { &["directly_assignable_expression", "value_argument"] } @@ -113,6 +121,10 @@ impl LanguageProfile for KotlinProfile { ] } + fn branch_node_kinds(&self) -> &[&str] { + &["if_expression", "for_statement", "when_expression"] + } + fn case_node_kinds(&self) -> &[&str] { &["when_expression"] } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/lua.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/lua.rs index 007b6bd5a..d03469f3f 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/lua.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/lua.rs @@ -60,6 +60,14 @@ impl LanguageProfile for LuaProfile { &["=", "+=", "-=", "*=", "/=", "%="] } + fn path_action_node_kinds(&self) -> &[&str] { + &["function_call", "expression_list", "return_statement"] + } + + fn simple_action_wrapper_node_kinds(&self) -> &[&str] { + &["block"] + } + fn local_declaration_node_kinds(&self) -> &[&str] { &["variable_declaration"] } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/php.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/php.rs index 0948afecb..f47fe569d 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/php.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/php.rs @@ -56,6 +56,21 @@ impl LanguageProfile for PhpProfile { &["=", "+=", "-=", "*=", "/=", "%="] } + fn path_action_node_kinds(&self) -> &[&str] { + &[ + "function_call_expression", + "member_call_expression", + "scoped_call_expression", + "expression_statement", + "return_statement", + "print_intrinsic", + ] + } + + fn simple_action_wrapper_node_kinds(&self) -> &[&str] { + &["compound_statement", "declaration_list"] + } + fn comparison_node_kinds(&self) -> &[&str] { &["binary_expression"] } @@ -100,6 +115,10 @@ impl LanguageProfile for PhpProfile { &["parenthesized_expression"] } + fn branch_node_kinds(&self) -> &[&str] { + &["if_statement", "foreach_statement", "switch_statement"] + } + fn field_like_node_kinds(&self) -> &[&str] { &[ "member_access_expression", @@ -144,6 +163,13 @@ impl LanguageProfile for PhpProfile { if !self.call_node_kinds().contains(&node.kind()) { return None; } + if node.kind() == "print_intrinsic" { + return Some(CallTarget::new( + "self".to_string(), + "print".to_string(), + self.call_argument_texts(node, source), + )); + } let mut target = self.default_call_target(node, source)?; target.receiver = php_normalize_receiver(&target.receiver); Some(target) diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/python.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/python.rs index e1cf90cf8..5f75db6c7 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/python.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/python.rs @@ -1,6 +1,7 @@ +use super::super::tree_sitter_adapter::{AssignmentTarget, Target}; use super::super::Language; use super::base::LanguageProfile; -use tree_sitter::Language as TreeSitterLanguage; +use tree_sitter::{Language as TreeSitterLanguage, Node}; pub(crate) struct PythonProfile; @@ -17,6 +18,15 @@ impl LanguageProfile for PythonProfile { &["function_definition"] } + fn function_visibility(&self, node: tree_sitter::Node<'_>, source: &str) -> Option { + let name = self.function_name(node, source)?; + if name.starts_with('_') && !name.starts_with("__") { + Some("private".to_string()) + } else { + Some("public".to_string()) + } + } + fn class_owner_node_kinds(&self) -> &[&str] { &["class_definition"] } @@ -57,6 +67,10 @@ impl LanguageProfile for PythonProfile { &["comparison_operator", "binary_operator", "boolean_operator"] } + fn branch_node_kinds(&self) -> &[&str] { + &["if_statement", "for_statement", "match_statement"] + } + fn case_node_kinds(&self) -> &[&str] { &["match_statement"] } @@ -100,4 +114,63 @@ impl LanguageProfile for PythonProfile { fn field_like_node_kinds(&self) -> &[&str] { &["attribute"] } + + fn path_action_node_kinds(&self) -> &[&str] { + &["call", "expression_statement", "return_statement"] + } + + fn simple_action_wrapper_node_kinds(&self) -> &[&str] { + &["block"] + } + + fn path_transparent_branch_body_node_kinds(&self) -> &[&str] { + &["if_statement"] + } + + fn state_read_target(&self, node: Node<'_>, source: &str) -> Option { + if python_type_annotation_expression(node) { + return None; + } + let target = self.default_state_read_target(node, source)?; + if python_with_context_expression(node) && python_lock_context_field(&target.field) { + return None; + } + Some(target) + } + + fn state_write_source_node<'tree>( + &self, + _node: Node<'tree>, + assignment: &AssignmentTarget<'tree>, + ) -> Node<'tree> { + assignment.lhs + } +} + +fn python_with_context_expression(node: Node<'_>) -> bool { + let mut current = node.parent(); + while let Some(parent) = current { + match parent.kind() { + "with_clause" | "with_item" => return true, + "block" | "function_definition" | "class_definition" | "module" => return false, + _ => current = parent.parent(), + } + } + false +} + +fn python_type_annotation_expression(node: Node<'_>) -> bool { + let mut current = Some(node); + while let Some(item) = current { + match item.kind() { + "type" | "type_parameter" => return true, + "block" | "function_definition" | "class_definition" | "module" => return false, + _ => current = item.parent(), + } + } + false +} + +fn python_lock_context_field(field: &str) -> bool { + field == "_lock" || field.ends_with("_lock") } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/ruby.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/ruby.rs index bfa7f82d1..694c09499 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/ruby.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/ruby.rs @@ -64,10 +64,31 @@ impl LanguageProfile for RubyProfile { &["=", "+=", "-=", "*=", "/=", "%=", "&&=", "||="] } + fn path_action_node_kinds(&self) -> &[&str] { + &["call", "return"] + } + + fn simple_action_wrapper_node_kinds(&self) -> &[&str] { + &["body_statement"] + } + fn comparison_node_kinds(&self) -> &[&str] { &["binary"] } + fn branch_node_kinds(&self) -> &[&str] { + &[ + "if", + "unless", + "if_modifier", + "unless_modifier", + "case", + "while", + "until", + "for", + ] + } + fn case_node_kinds(&self) -> &[&str] { &["case"] } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/rust.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/rust.rs index decf3600b..09f66e7ec 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/rust.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/rust.rs @@ -57,6 +57,18 @@ impl LanguageProfile for RustProfile { &["=", "+=", "-=", "*=", "/=", "%="] } + fn path_action_node_kinds(&self) -> &[&str] { + &[ + "call_expression", + "expression_statement", + "return_expression", + ] + } + + fn simple_action_wrapper_node_kinds(&self) -> &[&str] { + &["block"] + } + fn local_identifier_wrapper_node_kinds(&self) -> &[&str] { &["pattern"] } @@ -73,6 +85,10 @@ impl LanguageProfile for RustProfile { &["binary_expression"] } + fn branch_node_kinds(&self) -> &[&str] { + &["if_expression", "match_expression", "for_expression"] + } + fn case_node_kinds(&self) -> &[&str] { &["match_expression"] } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/swift.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/swift.rs index 73dd2914c..a755ecb39 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/swift.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/swift.rs @@ -62,6 +62,14 @@ impl LanguageProfile for SwiftProfile { &["=", "+=", "-=", "*=", "/=", "%="] } + fn path_action_node_kinds(&self) -> &[&str] { + &["call_expression", "control_transfer_statement"] + } + + fn simple_action_wrapper_node_kinds(&self) -> &[&str] { + &["statements", "control_structure_body", "function_body"] + } + fn local_identifier_wrapper_node_kinds(&self) -> &[&str] { &[ "directly_assignable_expression", diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/typescript.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/typescript.rs index 263fc39b9..35ed6b80a 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/typescript.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/typescript.rs @@ -53,6 +53,18 @@ impl LanguageProfile for TypeScriptProfile { &["=", "+=", "-=", "*=", "/=", "%=", "&&=", "||="] } + fn path_action_node_kinds(&self) -> &[&str] { + &[ + "call_expression", + "expression_statement", + "return_statement", + ] + } + + fn simple_action_wrapper_node_kinds(&self) -> &[&str] { + &["statement_block"] + } + fn local_declaration_node_kinds(&self) -> &[&str] { &["lexical_declaration", "variable_declarator"] } @@ -65,6 +77,10 @@ impl LanguageProfile for TypeScriptProfile { &["binary_expression"] } + fn branch_node_kinds(&self) -> &[&str] { + &["if_statement", "for_in_statement", "switch_statement"] + } + fn case_node_kinds(&self) -> &[&str] { &["switch_statement"] } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/zig.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/zig.rs index 07b72d9d8..09ceb42a9 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/zig.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/zig.rs @@ -62,6 +62,18 @@ impl LanguageProfile for ZigProfile { &["=", "+=", "-=", "*=", "/=", "%="] } + fn path_action_node_kinds(&self) -> &[&str] { + &[ + "call_expression", + "expression_statement", + "return_expression", + ] + } + + fn simple_action_wrapper_node_kinds(&self) -> &[&str] { + &["block"] + } + fn local_declaration_node_kinds(&self) -> &[&str] { &["variable_declaration"] } @@ -70,6 +82,15 @@ impl LanguageProfile for ZigProfile { &["binary_expression"] } + fn branch_node_kinds(&self) -> &[&str] { + &[ + "if_statement", + "switch_expression", + "for_statement", + "labeled_statement", + ] + } + fn case_node_kinds(&self) -> &[&str] { &["switch_expression"] } diff --git a/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs b/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs index 7e5df2d20..d3f24680c 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs @@ -1,7 +1,7 @@ use super::{ adapters::{language_profile, LanguageProfile}, - CallSite, ComparisonUse, DecisionSite, DispatchSite, Document, FunctionDef, Language, - PredicateAlias, StateRead, StateWrite, + BranchDecision, CallSite, ComparisonUse, DecisionSite, DispatchSite, Document, FunctionDef, + Language, OwnerDef, PredicateAlias, StateRead, StateWrite, }; use crate::decomplex::ast::{line, node_text, normalize_text, normalize_tree, span, RawNode}; use anyhow::{Context, Result}; @@ -13,10 +13,12 @@ use tree_sitter::{Node, Parser}; pub fn parse_file(file: PathBuf, language: Language) -> Result { let parsed = ParsedDocument::parse(file, language)?; let mut function_defs = Vec::new(); + let mut owner_defs = Vec::new(); let mut call_sites = Vec::new(); let mut state_reads = Vec::new(); let mut state_writes = Vec::new(); let mut decision_sites = Vec::new(); + let mut branch_decisions = Vec::new(); let mut dispatch_sites = Vec::new(); let mut predicate_aliases = Vec::new(); let mut comparison_uses = Vec::new(); @@ -33,10 +35,12 @@ pub fn parse_file(file: PathBuf, language: Language) -> Result { language, &context, &mut function_defs, + &mut owner_defs, &mut call_sites, &mut state_reads, &mut state_writes, &mut decision_sites, + &mut branch_decisions, &mut predicate_aliases, &mut comparison_uses, &mut seen_writes, @@ -64,10 +68,12 @@ pub fn parse_file(file: PathBuf, language: Language) -> Result { root: RawNode::from_tree_sitter(parsed.tree.root_node(), &parsed.source), normalized_root: normalize_tree(parsed.tree.root_node(), &parsed.source, language), function_defs, + owner_defs, call_sites, state_reads, state_writes, decision_sites, + branch_decisions, dispatch_sites, predicate_aliases, comparison_uses, @@ -102,6 +108,7 @@ struct ContextState { function: Option, function_line: Option, pub receiver: Option, + locals: BTreeSet, controls: Vec, } @@ -113,6 +120,7 @@ impl ContextState { function: None, function_line: None, receiver: None, + locals: BTreeSet::new(), controls: Vec::new(), } } @@ -150,10 +158,12 @@ fn collect_facts( language: Language, context: &ContextState, function_defs: &mut Vec, + owner_defs: &mut Vec, call_sites: &mut Vec, state_reads: &mut Vec, state_writes: &mut Vec, decision_sites: &mut Vec, + branch_decisions: &mut Vec, predicate_aliases: &mut Vec, comparison_uses: &mut Vec, seen_writes: &mut HashSet, @@ -173,6 +183,7 @@ fn collect_facts( language, ); record_function_def(node, source, file, language, &next_context, function_defs); + record_owner_def(node, source, file, language, &next_context, owner_defs); record_call_site( node, source, @@ -209,6 +220,14 @@ fn collect_facts( decision_sites, seen_decisions, ); + record_branch_decision( + node, + source, + file, + language, + &next_context, + branch_decisions, + ); record_predicate_alias(node, source, file, language, predicate_aliases); record_comparison_use(node, source, file, language, &next_context, comparison_uses); @@ -221,10 +240,12 @@ fn collect_facts( language, &next_context, function_defs, + owner_defs, call_sites, state_reads, state_writes, decision_sites, + branch_decisions, predicate_aliases, comparison_uses, seen_writes, @@ -482,6 +503,38 @@ fn record_function_def( out.push(function); } +fn record_owner_def( + node: Node<'_>, + source: &str, + file: &Path, + language: Language, + context: &ContextState, + out: &mut Vec, +) { + let profile = language_profile(language); + if profile.owner_name_from_declaration(node, source).is_none() { + return; + } + let owner = OwnerDef { + file: file.to_string_lossy().to_string(), + name: context.current_owner(), + kind: profile.owner_kind(node), + line: line(node), + span: span(node), + }; + let key = (owner.file.clone(), owner.name.clone(), owner.kind.clone()); + if out.iter().any(|existing| { + ( + existing.file.clone(), + existing.name.clone(), + existing.kind.clone(), + ) == key + }) { + return; + } + out.push(owner); +} + fn record_predicate_alias( node: Node<'_>, source: &str, @@ -639,6 +692,116 @@ fn record_decision_site( } } +fn record_branch_decision( + node: Node<'_>, + source: &str, + file: &Path, + language: Language, + context: &ContextState, + out: &mut Vec, +) { + let profile = language_profile(language); + if !branch_decision_node(profile, node, source) { + return; + } + if branch_decision_wrapper_for_real_branch(profile, node, source) { + return; + } + let Some(condition) = branch_condition_node(profile, node) else { + return; + }; + let mut refs = BTreeSet::new(); + collect_branch_state_refs(profile, condition, source, context, &mut refs); + if refs.is_empty() { + return; + } + out.push(BranchDecision { + file: file.to_string_lossy().to_string(), + function: context.current_function(), + line: line(node), + span: span(node), + predicate: profile.normalize_source_text(node_text(condition, source)), + state_refs: refs.into_iter().collect(), + }); +} + +fn branch_decision_node(profile: &dyn LanguageProfile, node: Node<'_>, source: &str) -> bool { + profile.branch_node_kinds().contains(&node.kind()) + || profile.hidden_case(node) + || profile.control_context(node, source).as_deref() == Some("conditional") +} + +fn branch_decision_wrapper_for_real_branch( + profile: &dyn LanguageProfile, + node: Node<'_>, + source: &str, +) -> bool { + if profile.branch_node_kinds().contains(&node.kind()) || profile.hidden_case(node) { + return false; + } + if profile.control_context(node, source).as_deref() != Some("conditional") { + return false; + } + first_named_child(node) + .map(|child| branch_decision_node(profile, child, source)) + .unwrap_or(false) +} + +fn branch_condition_node<'tree>( + _profile: &dyn LanguageProfile, + node: Node<'tree>, +) -> Option> { + node.child_by_field_name("condition") + .or_else(|| node.child_by_field_name("value")) + .or_else(|| node.child_by_field_name("subject")) + .or_else(|| first_named_child(node)) +} + +fn collect_branch_state_refs( + profile: &dyn LanguageProfile, + node: Node<'_>, + source: &str, + context: &ContextState, + out: &mut BTreeSet, +) { + if let Some(target) = profile.state_read_target(node, source) { + let field = normalized_state_ref_field(&target.field); + let receiver = target.receiver.trim_start_matches('$'); + if constant_like_state_ref(receiver, &field) { + // Constants and type namespaces are not mutable object state. + } else if (receiver.is_empty() || matches!(receiver, "self" | "this")) + && context.locals.contains(&field) + { + // Function-local bindings are not object state, even when a + // language permits bare predicate-style method calls. + } else if receiver.is_empty() || matches!(receiver, "self" | "this") { + out.insert(field); + } else { + out.insert(format!("{receiver}.{field}")); + } + } + + let mut cursor = node.walk(); + for child in node.named_children(&mut cursor) { + collect_branch_state_refs(profile, child, source, context, out); + } +} + +fn normalized_state_ref_field(field: &str) -> String { + field + .trim_start_matches('@') + .trim_start_matches('$') + .to_string() +} + +fn constant_like_state_ref(receiver: &str, field: &str) -> bool { + starts_uppercase(receiver) || (receiver.is_empty() && starts_uppercase(field)) +} + +fn starts_uppercase(value: &str) -> bool { + matches!(value.chars().next(), Some(ch) if ch.is_ascii_uppercase()) +} + fn record_conjunction_decision( profile: &dyn LanguageProfile, mut node: Node<'_>, @@ -761,6 +924,10 @@ fn push_function_context( context.function_line = Some(line(node)); context.owner = Some(owner); context.receiver = profile.function_receiver_name(node, source); + context.locals = profile.function_params(node, source).into_iter().collect(); + if let Some(receiver) = &context.receiver { + context.locals.insert(receiver.clone()); + } context } @@ -854,7 +1021,9 @@ fn record_state_read( return; }; let target = normalize_target_receiver(target, context); - if namespace_receiver(&target.receiver) { + if namespace_receiver(&target.receiver) + || constant_like_state_ref(&target.receiver, &target.field) + { return; } From 336b68295a1131382264fa3ceecd3416ff413b8f Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Sat, 20 Jun 2026 07:08:41 +0000 Subject: [PATCH 38/52] Align decomplex Rust detector parity --- gems/decomplex/lib/decomplex/syntax.rb | 21 ++++- .../src/decomplex/detectors/derived_state.rs | 56 +++++++++++-- .../detectors/inconsistent_rename_clone.rs | 29 ++++--- .../detectors/oversized_predicate.rs | 83 +++++-------------- .../decomplex/syntax/tree_sitter_adapter.rs | 16 +++- 5 files changed, 120 insertions(+), 85 deletions(-) diff --git a/gems/decomplex/lib/decomplex/syntax.rb b/gems/decomplex/lib/decomplex/syntax.rb index abab448a7..1f88dd95d 100644 --- a/gems/decomplex/lib/decomplex/syntax.rb +++ b/gems/decomplex/lib/decomplex/syntax.rb @@ -1201,7 +1201,8 @@ def record_branch_decision(document, node, stack, out, immutable_readers:, immut immutable_readers: immutable_readers, immutable_reader_types: immutable_reader_types, type_aliases: type_aliases, - method_param_types: method_param_types + method_param_types: method_param_types, + params: current_params(stack) ) refs.uniq! refs.sort! @@ -1560,12 +1561,13 @@ def first_token_kind(node) end def collect_state_refs(node, refs, defn:, immutable_readers:, immutable_reader_types:, type_aliases:, - method_param_types:) + method_param_types:, params:) if (ref = direct_state_ref(node)) refs << ref elsif (target = state_read_target(node)) unless namespace_receiver?(target[:receiver]) - unless immutable_state_read?(target, defn, immutable_readers, immutable_reader_types, type_aliases, method_param_types) + unless branch_local_param_ref?(node, target, params) || + immutable_state_read?(target, defn, immutable_readers, immutable_reader_types, type_aliases, method_param_types) refs << (target[:receiver] == "self" ? target[:field] : "#{target[:receiver]}.#{target[:field]}") end end @@ -1578,11 +1580,22 @@ def collect_state_refs(node, refs, defn:, immutable_readers:, immutable_reader_t immutable_readers: immutable_readers, immutable_reader_types: immutable_reader_types, type_aliases: type_aliases, - method_param_types: method_param_types + method_param_types: method_param_types, + params: params ) if ts_node?(child) end end + def branch_local_param_ref?(node, target, params) + field = target[:field].to_s + return false unless params.include?(field) + + receiver = target[:receiver].to_s + return false unless receiver.empty? || receiver == "self" + + normalize_text(node.text) == field + end + def immutable_state_read?(target, defn, immutable_readers, immutable_reader_types, type_aliases, method_param_types) receiver = target[:receiver].to_s field = target[:field].to_sym diff --git a/gems/decomplex/rust/src/decomplex/detectors/derived_state.rs b/gems/decomplex/rust/src/decomplex/detectors/derived_state.rs index 274e6d30b..2b7027905 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/derived_state.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/derived_state.rs @@ -51,12 +51,17 @@ fn assignments(method: &MethodSummary) -> Vec { .statements .iter() .flat_map(|statement| { - statement - .writes - .iter() + let mut writes = statement.writes.iter().cloned().collect::>(); + writes.sort_by(|a, b| { + write_position(&statement.source, a) + .cmp(&write_position(&statement.source, b)) + .then_with(|| a.cmp(b)) + }); + writes + .into_iter() .map(|name| Asgn { - name: name.clone(), - deps: dependencies_for(statement, name), + deps: dependencies_for(statement, &name), + name, line: statement.line, span: statement.span, statement_index: statement.index, @@ -66,6 +71,47 @@ fn assignments(method: &MethodSummary) -> Vec { .collect() } +fn write_position(source: &str, name: &str) -> usize { + identifier_positions(source) + .into_iter() + .find_map(|(identifier, position)| (identifier == name).then_some(position)) + .unwrap_or(usize::MAX) +} + +fn identifier_positions(source: &str) -> Vec<(String, usize)> { + let mut out = Vec::new(); + let mut current = String::new(); + let mut start = 0usize; + for (index, ch) in source.char_indices() { + if ch == '_' || ch.is_ascii_alphanumeric() { + if current.is_empty() { + start = index; + } + current.push(ch); + } else if !current.is_empty() { + if current + .chars() + .next() + .map(|first| first == '_' || first.is_ascii_alphabetic()) + .unwrap_or(false) + { + out.push((current.clone(), start)); + } + current.clear(); + } + } + if !current.is_empty() + && current + .chars() + .next() + .map(|first| first == '_' || first.is_ascii_alphabetic()) + .unwrap_or(false) + { + out.push((current, start)); + } + out +} + fn dependencies_for(statement: &Statement, name: &str) -> Vec { let mut deps: Vec<_> = statement .dependencies diff --git a/gems/decomplex/rust/src/decomplex/detectors/inconsistent_rename_clone.rs b/gems/decomplex/rust/src/decomplex/detectors/inconsistent_rename_clone.rs index 837fbf46b..5e2b604d8 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/inconsistent_rename_clone.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/inconsistent_rename_clone.rs @@ -187,7 +187,6 @@ impl Report { continue; } out.extend(self.inconsistent_pairs(ref_block, candidate)); - out.extend(self.inconsistent_pairs(candidate, ref_block)); } } out @@ -200,32 +199,38 @@ impl Report { ) -> Vec { let mut out = Vec::new(); for (ref_name, positions) in self.ref_classes(ref_block) { - let mut spellings = BTreeSet::new(); + let mut spellings = Vec::new(); for pos in positions { if let Some(name) = candidate.names.get(pos) { - spellings.insert(name.clone()); + if !spellings.contains(name) { + spellings.push(name.clone()); + } } } if spellings.len() < 2 { continue; } - out.push(self.finding( - ref_block, - candidate, - &ref_name, - spellings.into_iter().collect(), - )); + out.push(self.finding(ref_block, candidate, &ref_name, spellings)); } out } - fn ref_classes(&self, ref_block: &Block) -> BTreeMap> { + fn ref_classes(&self, ref_block: &Block) -> Vec<(String, Vec)> { + let mut order = Vec::new(); let mut classes: BTreeMap> = BTreeMap::new(); for (index, name) in ref_block.names.iter().enumerate() { + if !classes.contains_key(name) { + order.push(name.clone()); + } classes.entry(name.clone()).or_default().push(index); } - classes.retain(|_, v| v.len() >= 2); - classes + order + .into_iter() + .filter_map(|name| { + let positions = classes.remove(&name)?; + (positions.len() >= 2).then_some((name, positions)) + }) + .collect() } fn same_unit(&self, left: &Block, right: &Block) -> bool { diff --git a/gems/decomplex/rust/src/decomplex/detectors/oversized_predicate.rs b/gems/decomplex/rust/src/decomplex/detectors/oversized_predicate.rs index 032554bb1..b8a3a66d6 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/oversized_predicate.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/oversized_predicate.rs @@ -1,4 +1,4 @@ -use crate::decomplex::ast::{self, Child, Node, Span}; +use crate::decomplex::ast::Span; use crate::decomplex::syntax::{self, Document, Language}; use anyhow::Result; use serde::Serialize; @@ -20,8 +20,6 @@ pub struct ResultReport { } const LIMIT: usize = 3; -const PREDICATE_NODES: &[&str] = &["IF", "UNLESS", "WHILE", "UNTIL"]; - pub fn scan_files(files: &[PathBuf], language: Language) -> Result { let documents = syntax::parse_files(files, language)?; Ok(scan_documents(&documents)) @@ -30,85 +28,48 @@ pub fn scan_files(files: &[PathBuf], language: Language) -> Result pub fn scan_documents(documents: &[Document]) -> ResultReport { let mut findings = Vec::new(); for document in documents { - let mut scanner = - OversizedPredicate::new(document.file.clone(), document.lines.clone(), LIMIT); - scanner.walk(&document.normalized_root, &Vec::new()); - findings.extend(scanner.findings); + let scanner = OversizedPredicate::new(LIMIT); + for site in &document.decision_sites { + if let Some(finding) = scanner.finding_for_site(site) { + findings.push(finding); + } + } } ResultReport { findings } } struct OversizedPredicate { - file: String, - lines: Vec, limit: usize, - findings: Vec, } impl OversizedPredicate { - fn new(file: String, lines: Vec, limit: usize) -> Self { - Self { - file, - lines, - limit, - findings: Vec::new(), - } + fn new(limit: usize) -> Self { + Self { limit } } - fn walk(&mut self, node: &Node, defstack: &[String]) { - let mut next_defstack = defstack.to_vec(); - if matches!(node.r#type.as_str(), "DEFN" | "DEFS") { - let name_index = if node.r#type == "DEFS" { 1 } else { 0 }; - if let Some(Child::Symbol(name)) = node.children.get(name_index) { - next_defstack.push(name.clone()); - } - } - - self.record_predicate(node, &next_defstack); - - for child in node.children.iter().filter_map(ast::node) { - self.walk(child, &next_defstack); + fn finding_for_site( + &self, + site: &crate::decomplex::syntax::DecisionSite, + ) -> Option { + if self.predicate_helper(&site.function) { + return None; } - } - - fn record_predicate(&mut self, node: &Node, defstack: &[String]) { - if !PREDICATE_NODES.contains(&node.r#type.as_str()) { - return; - } - - let defn = defstack.last().map(|s| s.as_str()).unwrap_or(""); - if self.predicate_helper(defn) { - return; - } - - let cond = node.children.get(0).and_then(ast::node); - let Some(cond) = cond else { return }; - - let predicate = ast::slice(cond, &self.lines); - let atoms_text = self.condition_atoms(&predicate); + let atoms_text = self.condition_atoms(&site.predicate); if atoms_text.len() <= self.limit { - return; + return None; } - let at = format!("{}:{}:{}", self.file, defn, node.first_lineno); + let at = format!("{}:{}:{}", site.file, site.function, site.line); let mut spans = BTreeMap::new(); - spans.insert( - at.clone(), - [ - node.first_lineno, - node.first_column, - node.last_lineno, - node.last_column, - ], - ); + spans.insert(at.clone(), site.enclosing_span); - self.findings.push(OversizedPredicateRow { + Some(OversizedPredicateRow { at, count: atoms_text.len(), - predicate, + predicate: site.predicate.clone(), atoms: atoms_text, spans, - }); + }) } fn condition_atoms(&self, predicate: &str) -> Vec { diff --git a/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs b/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs index d3f24680c..566da6545 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs @@ -769,9 +769,7 @@ fn collect_branch_state_refs( let receiver = target.receiver.trim_start_matches('$'); if constant_like_state_ref(receiver, &field) { // Constants and type namespaces are not mutable object state. - } else if (receiver.is_empty() || matches!(receiver, "self" | "this")) - && context.locals.contains(&field) - { + } else if branch_local_ref(node, source, receiver, &field, context) { // Function-local bindings are not object state, even when a // language permits bare predicate-style method calls. } else if receiver.is_empty() || matches!(receiver, "self" | "this") { @@ -787,6 +785,18 @@ fn collect_branch_state_refs( } } +fn branch_local_ref( + node: Node<'_>, + source: &str, + receiver: &str, + field: &str, + context: &ContextState, +) -> bool { + (receiver.is_empty() || matches!(receiver, "self" | "this")) + && context.locals.contains(field) + && normalize_text(node_text(node, source)) == field +} + fn normalized_state_ref_field(field: &str) -> String { field .trim_start_matches('@') From 3c4a1bfe9e72183e410513b2556acaa84cff867b Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Sat, 20 Jun 2026 08:49:53 +0000 Subject: [PATCH 39/52] Fix decomplex Python parity regressions --- gems/decomplex/lib/decomplex/locality_drag.rb | 15 +- .../decomplex/detectors/flay_similarity.rs | 17 +- .../src/decomplex/detectors/locality_drag.rs | 55 ++++- .../weighted_inlined_cognitive_complexity.rs | 121 +++++++--- .../src/decomplex/syntax/adapters/base.rs | 131 +++++++---- .../src/decomplex/syntax/adapters/python.rs | 213 +++++++++++++++++- 6 files changed, 460 insertions(+), 92 deletions(-) diff --git a/gems/decomplex/lib/decomplex/locality_drag.rb b/gems/decomplex/lib/decomplex/locality_drag.rb index 4054b4728..384253976 100644 --- a/gems/decomplex/lib/decomplex/locality_drag.rb +++ b/gems/decomplex/lib/decomplex/locality_drag.rb @@ -31,7 +31,10 @@ def self.scan( summaries = LocalFlow.scan(files) complexity_scores = Array(files).each_with_object({}) do |file, scores| document = Syntax.parse(file, parser: "tree_sitter") - scores.merge!(document.local_complexity_scores) + document.local_methods.each do |method| + scores[complexity_key(method)] = + document.local_complexity_scores.fetch(method.id, { score: 0.0 }) + end end new( summaries, @@ -75,7 +78,7 @@ def findings def findings_for(summary) return [] if summary.statements.size < @min_unrelated_statements + 2 - local_complexity = @complexity_scores.fetch(summary.id, { score: 0.0 })[:score].to_f + local_complexity = @complexity_scores.fetch(complexity_key(summary), { score: 0.0 })[:score].to_f return [] if local_complexity < @min_local_complexity findings = summary.statements.each_with_index.flat_map do |statement, index| @@ -279,5 +282,13 @@ def reason_for(variable, unrelated, gap_lines, boundaries, local_complexity) def round(value) (value * 10).round / 10.0 end + + def self.complexity_key(method) + [method.file, method.line, method.name] + end + + def complexity_key(method) + self.class.complexity_key(method) + end end end diff --git a/gems/decomplex/rust/src/decomplex/detectors/flay_similarity.rs b/gems/decomplex/rust/src/decomplex/detectors/flay_similarity.rs index 146621f68..c56bc2c6b 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/flay_similarity.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/flay_similarity.rs @@ -135,8 +135,7 @@ impl Scanner { } } - let mut seen = HashSet::new(); - let mut out = Vec::new(); + let mut best_by_key: BTreeMap = BTreeMap::new(); for rows in groups.values() { let cluster = uniq_sites( rows.iter() @@ -165,17 +164,21 @@ impl Scanner { .collect::>(); key.sort(); let key = key.join("\0"); - if !seen.insert(key) { - continue; - } let mass = rows .iter() .map(|(_, signature_mass)| *signature_mass) .max() .unwrap_or(0); - out.push(self.finding_for(&cluster, "type3", mass)); + let finding = self.finding_for(&cluster, "type3", mass); + if best_by_key + .get(&key) + .map(|existing| existing.mass < finding.mass) + .unwrap_or(true) + { + best_by_key.insert(key, finding); + } } - out + best_by_key.into_values().collect() } fn finding_for( diff --git a/gems/decomplex/rust/src/decomplex/detectors/locality_drag.rs b/gems/decomplex/rust/src/decomplex/detectors/locality_drag.rs index 3f401ba52..57bb76c80 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/locality_drag.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/locality_drag.rs @@ -1,6 +1,6 @@ use crate::decomplex::ast::Span; use crate::decomplex::detectors::{local_flow, weighted_inlined_cognitive_complexity}; -use crate::decomplex::syntax::{Document, Language}; +use crate::decomplex::syntax::{self, Document, Language}; use anyhow::Result; use serde::Serialize; use std::collections::{BTreeMap, BTreeSet}; @@ -46,16 +46,29 @@ pub struct BoundaryInfo { } pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { - let summaries = local_flow::scan_files(files, language)?; - Ok(scan_summaries(summaries)) + let documents = syntax::parse_files(files, language)?; + Ok(scan_documents(&documents)) } pub fn scan_documents(documents: &[Document]) -> Vec { - scan_summaries(local_flow::scan_documents(documents)) + let summaries = local_flow::scan_documents(documents); + let complexity_scores = weighted_inlined_cognitive_complexity::raw_complexity_scores(documents); + scan_summaries_with_scores(summaries, complexity_scores) } pub fn scan_summaries(summaries: Vec) -> Vec { - let mut detector = LocalityDrag::new(summaries); + let mut detector = LocalityDrag::new(summaries, BTreeMap::new()); + detector.findings() +} + +fn scan_summaries_with_scores( + summaries: Vec, + complexity_scores: BTreeMap< + (String, usize, String), + weighted_inlined_cognitive_complexity::ScoreResult, + >, +) -> Vec { + let mut detector = LocalityDrag::new(summaries, complexity_scores); detector.findings() } @@ -66,10 +79,18 @@ struct LocalityDrag { min_local_complexity: f64, min_score: isize, max_findings_per_method: usize, + complexity_scores: + BTreeMap<(String, usize, String), weighted_inlined_cognitive_complexity::ScoreResult>, } impl LocalityDrag { - fn new(summaries: Vec) -> Self { + fn new( + summaries: Vec, + complexity_scores: BTreeMap< + (String, usize, String), + weighted_inlined_cognitive_complexity::ScoreResult, + >, + ) -> Self { Self { summaries, min_unrelated_statements: 4, @@ -77,6 +98,7 @@ impl LocalityDrag { min_local_complexity: 12.0, min_score: 60, max_findings_per_method: 3, + complexity_scores, } } @@ -102,12 +124,7 @@ impl LocalityDrag { return Vec::new(); } - let scorer = weighted_inlined_cognitive_complexity::LocalScorer::new(); - let local_complexity = summary - .raw_node - .as_ref() - .map(|node| scorer.score_raw(node).score) - .unwrap_or_else(|| scorer.score(&summary.node).score); + let local_complexity = self.local_complexity(summary); if local_complexity < self.min_local_complexity { return Vec::new(); } @@ -135,6 +152,20 @@ impl LocalityDrag { .collect() } + fn local_complexity(&self, summary: &local_flow::MethodSummary) -> f64 { + self.complexity_scores + .get(&(summary.file.clone(), summary.line, summary.name.clone())) + .map(|score| score.score) + .unwrap_or_else(|| { + let scorer = weighted_inlined_cognitive_complexity::LocalScorer::new(); + summary + .raw_node + .as_ref() + .map(|node| scorer.score_raw(node).score) + .unwrap_or_else(|| scorer.score(&summary.node).score) + }) + } + fn finding_for_write( &self, summary: &local_flow::MethodSummary, diff --git a/gems/decomplex/rust/src/decomplex/detectors/weighted_inlined_cognitive_complexity.rs b/gems/decomplex/rust/src/decomplex/detectors/weighted_inlined_cognitive_complexity.rs index 7d2b61a5d..2621089d4 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/weighted_inlined_cognitive_complexity.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/weighted_inlined_cognitive_complexity.rs @@ -66,7 +66,9 @@ pub fn scan_documents(documents: &[Document]) -> Vec BTreeMap<(String, usize, String), ScoreResult> { +pub(crate) fn raw_complexity_scores( + documents: &[Document], +) -> BTreeMap<(String, usize, String), ScoreResult> { let mut out = BTreeMap::new(); for document in documents { for function in &document.function_defs { @@ -218,17 +220,13 @@ impl LocalScorer { nesting: usize, signals: &mut BTreeMap, ) -> f64 { - node.children - .iter() - .filter_map(ast::node) - .map(|child| { - if child.r#type == "WHEN" { - self.score_when(child, nesting, signals) - } else { - self.score_node(child, nesting, signals) - } - }) - .sum() + compensated_sum(node.children.iter().filter_map(ast::node).map(|child| { + if child.r#type == "WHEN" { + self.score_when(child, nesting, signals) + } else { + self.score_node(child, nesting, signals) + } + })) } fn score_when( @@ -287,11 +285,12 @@ impl LocalScorer { nesting: usize, signals: &mut BTreeMap, ) -> f64 { - node.children - .iter() - .filter_map(ast::node) - .map(|child| self.score_node(child, nesting, signals)) - .sum() + compensated_sum( + node.children + .iter() + .filter_map(ast::node) + .map(|child| self.score_node(child, nesting, signals)), + ) } fn predicate_cost(&self, node: Option<&Node>, signals: &mut BTreeMap) -> f64 { @@ -368,7 +367,12 @@ impl LocalScorer { } else { 0.0 }; - return exit_cost + self.score_raw_children(node, nesting, signals); + let child_cost = if raw_bare_early_exit_wrapper(node) { + 0.0 + } else { + self.score_raw_children(node, nesting, signals) + }; + return exit_cost + child_cost; } if raw_boolean_node(node) { @@ -385,10 +389,17 @@ impl LocalScorer { nesting: usize, signals: &mut BTreeMap, ) -> f64 { - node.children - .iter() - .map(|child| self.score_raw_node(child, nesting, signals)) - .sum() + compensated_sum(node.children.iter().map(|child| { + if raw_transparent_single_line_suite_statement(node, child) { + if raw_bare_early_exit_wrapper(child) { + 0.0 + } else { + self.score_raw_children(child, nesting, signals) + } + } else { + self.score_raw_node(child, nesting, signals) + } + })) } fn raw_predicate_cost( @@ -506,6 +517,44 @@ fn raw_early_exit(node: &RawNode) -> bool { ) } +fn raw_transparent_single_line_suite_statement(parent: &RawNode, child: &RawNode) -> bool { + parent.kind == "block" + && parent.children.len() == 1 + && parent.text == child.text + && matches!( + child.kind.as_str(), + "return_statement" | "break_statement" | "continue_statement" + ) +} + +fn raw_bare_early_exit_wrapper(node: &RawNode) -> bool { + matches!( + node.kind.as_str(), + "return_statement" | "break_statement" | "continue_statement" + ) && node.children.len() == 1 + && !node.children[0].named + && node.children[0].text == node.text +} + +fn compensated_sum(values: impl IntoIterator) -> f64 { + let mut sum = 0.0f64; + let mut compensation = 0.0f64; + for value in values { + let next = sum + value; + if sum.abs() >= value.abs() { + compensation += (sum - next) + value; + } else { + compensation += (value - next) + sum; + } + sum = next; + } + sum + compensation +} + +fn format_one_decimal(value: f64) -> String { + format!("{value:.1}") +} + fn raw_boolean_node(node: &RawNode) -> bool { matches!( node.kind.as_str(), @@ -676,10 +725,15 @@ impl Analyzer { .map(|(_, edges)| { edges .into_iter() - .max_by(|a, b| { - self.edge_weight(&a.r#type) - .partial_cmp(&self.edge_weight(&b.r#type)) - .unwrap() + .fold(None, |best: Option, edge| { + let Some(current) = best else { + return Some(edge); + }; + if self.edge_weight(&edge.r#type) > self.edge_weight(¤t.r#type) { + Some(edge) + } else { + Some(current) + } }) .unwrap() }) @@ -734,7 +788,16 @@ impl Analyzer { fn strongest_chain(&self, score: &LocalScore, contributions: &[Contribution]) -> Vec { let chain = contributions .iter() - .max_by(|a, b| a.score.partial_cmp(&b.score).unwrap()) + .fold(None, |best: Option<&Contribution>, contribution| { + let Some(current) = best else { + return Some(contribution); + }; + if contribution.score > current.score { + Some(contribution) + } else { + Some(current) + } + }) .map(|c| c.chain.clone()) .unwrap_or_default(); let mut out = vec![score.name.clone()]; @@ -746,13 +809,13 @@ impl Analyzer { if single_caller_callees.is_empty() { format!( "same-owner call chain adds {} weighted cognitive points", - hidden + format_one_decimal(hidden) ) } else { format!( "{} single-caller helper(s) add {} weighted cognitive points", single_caller_callees.len(), - hidden + format_one_decimal(hidden) ) } } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs index 699bd41cd..4630a8fce 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs @@ -1024,38 +1024,51 @@ pub(crate) trait LanguageProfile { } fn clone_candidates(&self, document: &Document) -> Vec { - default_clone_candidates(document) + let mut out = Vec::new(); + let mut seen = HashSet::new(); + + for function in &document.function_defs { + let candidate = clone_candidate_for( + self, + document, + &function.body, + Some("defn"), + Some(function.name.as_str()), + ); + clone_add_candidate(&mut out, &mut seen, candidate); + } + + let mut nodes = Vec::new(); + document.root.walk(&mut nodes); + for node in nodes { + if self.clone_candidate_node(node) { + let candidate = clone_candidate_for(self, document, node, None, None); + clone_add_candidate(&mut out, &mut seen, candidate); + } + } + + out } fn clone_fingerprint(&self, node: &RawNode) -> (String, usize) { - clone_fingerprint(node, &mut HashSet::new()) + clone_fingerprint_for_profile(self, node, &mut HashSet::new()) } -} -fn default_clone_candidates(document: &Document) -> Vec { - let mut out = Vec::new(); - let mut seen = HashSet::new(); - - for function in &document.function_defs { - let candidate = clone_candidate_for( - document, - &function.body, - Some("defn"), - Some(function.name.as_str()), - ); - clone_add_candidate(&mut out, &mut seen, candidate); - } - - let mut nodes = Vec::new(); - document.root.walk(&mut nodes); - for node in nodes { - if clone_candidate_node(node) { - let candidate = clone_candidate_for(document, node, None, None); - clone_add_candidate(&mut out, &mut seen, candidate); - } + fn clone_candidate_node(&self, node: &RawNode) -> bool { + default_clone_candidate_node(node) } - out + fn clone_fingerprint_children<'a>(&self, node: &'a RawNode) -> Vec<&'a RawNode> { + node.children.iter().collect() + } + + fn clone_child_fingerprint( + &self, + _parent: &RawNode, + _child: &RawNode, + ) -> Option<(String, usize)> { + None + } } fn clone_add_candidate( @@ -1076,24 +1089,25 @@ fn clone_add_candidate( } } -fn clone_candidate_for( +fn clone_candidate_for( + profile: &P, document: &Document, node: &RawNode, node_name: Option<&str>, function_name: Option<&str>, ) -> Option { - let (fingerprint, mass) = clone_fingerprint(node, &mut HashSet::new()); + let (fingerprint, mass) = profile.clone_fingerprint(node); if fingerprint.is_empty() { return None; } let line = node.line(); let method = clone_method_span_for(document, line); - let children = clone_fuzzy_children_for(node); + let children = clone_fuzzy_children_for(profile, node); let mut child_fingerprints = Vec::new(); let mut child_masses = Vec::new(); for child in children { - let (child_fp, child_mass) = clone_fingerprint(child, &mut HashSet::new()); + let (child_fp, child_mass) = profile.clone_fingerprint(child); if !child_fp.is_empty() && child_mass > 0 { child_fingerprints.push(child_fp); child_masses.push(child_mass); @@ -1119,7 +1133,7 @@ fn clone_candidate_for( }) } -fn clone_candidate_node(node: &RawNode) -> bool { +pub(super) fn default_clone_candidate_node(node: &RawNode) -> bool { node.named && !CLONE_SKIP_KINDS.contains(&node.kind.as_str()) && CLONE_CANDIDATE_KINDS.contains(&node.kind.as_str()) @@ -1127,11 +1141,22 @@ fn clone_candidate_node(node: &RawNode) -> bool { && !node.named_children().is_empty() } -fn clone_fuzzy_children_for(node: &RawNode) -> Vec<&RawNode> { - let source = clone_body_node(node).unwrap_or(node); - let mut children = source.named_children(); +fn clone_fuzzy_children_for<'a, P: LanguageProfile + ?Sized>( + profile: &P, + node: &'a RawNode, +) -> Vec<&'a RawNode> { + let source = clone_body_node_for(profile, node).unwrap_or(node); + let mut children = profile + .clone_fingerprint_children(source) + .into_iter() + .filter(|child| child.named) + .collect::>(); if children.is_empty() { - children = node.named_children(); + children = profile + .clone_fingerprint_children(node) + .into_iter() + .filter(|child| child.named) + .collect(); } children .into_iter() @@ -1142,13 +1167,29 @@ fn clone_fuzzy_children_for(node: &RawNode) -> Vec<&RawNode> { .collect() } +fn clone_body_node_for<'a, P: LanguageProfile + ?Sized>( + profile: &P, + node: &'a RawNode, +) -> Option<&'a RawNode> { + clone_body_node(node).or_else(|| { + profile + .clone_fingerprint_children(node) + .into_iter() + .find(|child| CLONE_BODY_KINDS.contains(&child.kind.as_str())) + }) +} + fn clone_body_node(node: &RawNode) -> Option<&RawNode> { node.children .iter() .find(|child| CLONE_BODY_KINDS.contains(&child.kind.as_str())) } -fn clone_fingerprint(node: &RawNode, active: &mut HashSet) -> (String, usize) { +fn clone_fingerprint_for_profile( + profile: &P, + node: &RawNode, + active: &mut HashSet, +) -> (String, usize) { let key = clone_node_key(node); if active.contains(&key) || node.kind == "comment" { return (String::new(), 0); @@ -1156,7 +1197,7 @@ fn clone_fingerprint(node: &RawNode, active: &mut HashSet) -> (String, u active.insert(key.clone()); let out = if CLONE_CALL_KINDS.contains(&node.kind.as_str()) && clone_call_message(node).is_some() { - clone_fingerprint_call(node, active) + clone_fingerprint_call(profile, node, active) } else if node.children.is_empty() { let token = clone_terminal_token(node); if token.is_empty() { @@ -1167,8 +1208,10 @@ fn clone_fingerprint(node: &RawNode, active: &mut HashSet) -> (String, u } else { let mut child_parts = Vec::new(); let mut mass = 1; - for child in &node.children { - let (child_fp, child_mass) = clone_fingerprint(child, active); + for child in profile.clone_fingerprint_children(node) { + let (child_fp, child_mass) = profile + .clone_child_fingerprint(node, child) + .unwrap_or_else(|| clone_fingerprint_for_profile(profile, child, active)); if child_fp.is_empty() { continue; } @@ -1185,12 +1228,18 @@ fn clone_fingerprint(node: &RawNode, active: &mut HashSet) -> (String, u out } -fn clone_fingerprint_call(node: &RawNode, active: &mut HashSet) -> (String, usize) { +fn clone_fingerprint_call( + profile: &P, + node: &RawNode, + active: &mut HashSet, +) -> (String, usize) { let message = clone_call_message(node).unwrap_or_default(); let mut child_parts = Vec::new(); let mut mass = 1; - for child in &node.children { - let (child_fp, child_mass) = clone_fingerprint(child, active); + for child in profile.clone_fingerprint_children(node) { + let (child_fp, child_mass) = profile + .clone_child_fingerprint(node, child) + .unwrap_or_else(|| clone_fingerprint_for_profile(profile, child, active)); if child_fp.is_empty() { continue; } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/python.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/python.rs index 5f75db6c7..e865225d4 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/python.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/python.rs @@ -1,6 +1,7 @@ use super::super::tree_sitter_adapter::{AssignmentTarget, Target}; use super::super::Language; -use super::base::LanguageProfile; +use super::base::{default_clone_candidate_node, LanguageProfile}; +use crate::decomplex::ast::RawNode; use tree_sitter::{Language as TreeSitterLanguage, Node}; pub(crate) struct PythonProfile; @@ -145,6 +146,79 @@ impl LanguageProfile for PythonProfile { ) -> Node<'tree> { assignment.lhs } + + fn clone_candidate_node(&self, node: &RawNode) -> bool { + if python_assignment_wrapper_node(node) { + return false; + } + default_clone_candidate_node(node) + } + + fn clone_fingerprint_children<'a>(&self, node: &'a RawNode) -> Vec<&'a RawNode> { + if node.kind == "type" + && node.children.len() == 1 + && matches!(node.children[0].kind.as_str(), "generic_type" | "string") + { + return node.children[0].children.iter().collect(); + } + if python_terminal_wrapper_node(node) { + return Vec::new(); + } + if node.kind == "expression_statement" && node.children.len() == 1 { + let child = &node.children[0]; + if python_expression_wrapper_child(child) { + return child.children.iter().collect(); + } + } + if python_call_expression_statement(node) { + return node.children.iter().collect(); + } + if node.kind == "block" && node.children.len() == 1 { + let child = &node.children[0]; + if python_single_statement_block_child(child) { + return child.children.iter().collect(); + } + if child.kind == "expression_statement" { + return python_expression_statement_clone_children(child); + } + } + if node.kind == "if_statement" { + return node + .children + .iter() + .flat_map(python_if_statement_clone_children) + .collect(); + } + if matches!(node.kind.as_str(), "else_clause" | "except_clause") { + return node + .children + .iter() + .flat_map(python_clause_clone_children) + .collect(); + } + if node.kind == "with_clause" { + if python_simple_with_clause(node) { + return Vec::new(); + } + return node + .children + .iter() + .flat_map(python_with_clause_clone_children) + .collect(); + } + node.children.iter().collect() + } + + fn clone_child_fingerprint( + &self, + _parent: &RawNode, + child: &RawNode, + ) -> Option<(String, usize)> { + if python_escape_only_string_content(child) { + return Some(("lit".to_string(), 1)); + } + None + } } fn python_with_context_expression(node: Node<'_>) -> bool { @@ -174,3 +248,140 @@ fn python_type_annotation_expression(node: Node<'_>) -> bool { fn python_lock_context_field(field: &str) -> bool { field == "_lock" || field.ends_with("_lock") } + +fn python_assignment_wrapper_node(node: &RawNode) -> bool { + matches!(node.kind.as_str(), "assignment" | "augmented_assignment") +} + +fn python_expression_wrapper_child(node: &RawNode) -> bool { + python_assignment_wrapper_node(node) + || matches!(node.kind.as_str(), "call" | "string" | "yield") +} + +fn python_expression_statement_clone_children(node: &RawNode) -> Vec<&RawNode> { + if node.kind == "expression_statement" && node.children.len() == 1 { + let child = &node.children[0]; + if python_expression_wrapper_child(child) { + return child.children.iter().collect(); + } + } + node.children.iter().collect() +} + +fn python_call_expression_statement(node: &RawNode) -> bool { + if node.kind != "expression_statement" { + return false; + } + if node.children.len() == 1 && node.children[0].kind == "call" { + return true; + } + node.children + .iter() + .any(|child| matches!(child.kind.as_str(), "argument_list" | "arguments")) + && node + .children + .iter() + .all(|child| !python_assignment_wrapper_node(child) && !python_assignment_token(child)) +} + +fn python_assignment_token(node: &RawNode) -> bool { + matches!(node.text.as_str(), "=" | "+=" | "-=" | "*=" | "/=" | "%=") +} + +fn python_terminal_wrapper_node(node: &RawNode) -> bool { + if matches!(node.kind.as_str(), "break_statement" | "continue_statement") { + return node.children.len() == 1 && node.children[0].text == node.text; + } + if node.kind == "as_pattern_target" { + return node.children.len() == 1 && node.children[0].kind == "identifier"; + } + if node.kind == "dotted_name" { + return node.children.len() == 1 && node.children[0].kind == "identifier"; + } + if node.kind == "keyword_separator" { + return node.children.len() == 1 && node.children[0].text == node.text; + } + python_simple_type_wrapper_node(node) +} + +fn python_simple_type_wrapper_node(node: &RawNode) -> bool { + if node.kind != "type" || node.children.len() != 1 { + return false; + } + let child = &node.children[0]; + child.children.is_empty() + && matches!( + child.kind.as_str(), + "identifier" | "none" | "true" | "false" | "integer" | "float" | "string" + ) + && child.text == node.text +} + +fn python_escape_only_string_content(node: &RawNode) -> bool { + node.kind == "string_content" + && node.children.len() == 1 + && node.children[0].kind == "escape_sequence" + && node.children[0].text == node.text +} + +fn python_single_statement_block_child(node: &RawNode) -> bool { + matches!( + node.kind.as_str(), + "assert_statement" + | "break_statement" + | "continue_statement" + | "for_statement" + | "function_definition" + | "if_statement" + | "raise_statement" + | "try_statement" + | "with_statement" + | "while_statement" + ) +} + +fn python_if_statement_clone_children(node: &RawNode) -> Vec<&RawNode> { + if node.kind == "block" + && node.children.len() == 1 + && matches!( + node.children[0].kind.as_str(), + "break_statement" | "continue_statement" + ) + { + return node.children[0].children.iter().collect(); + } + vec![node] +} + +fn python_clause_clone_children(node: &RawNode) -> Vec<&RawNode> { + if node.kind == "block" + && node.children.len() == 1 + && matches!( + node.children[0].kind.as_str(), + "break_statement" | "continue_statement" + ) + { + return node.children[0].children.iter().collect(); + } + vec![node] +} + +fn python_simple_with_clause(node: &RawNode) -> bool { + if node.children.len() != 1 || node.children[0].kind != "with_item" { + return false; + } + let with_item = &node.children[0]; + with_item.text == node.text + && with_item.children.len() == 1 + && with_item.children[0].kind == "identifier" +} + +fn python_with_clause_clone_children(node: &RawNode) -> Vec<&RawNode> { + if node.kind == "with_item" { + if node.children.len() == 1 && node.children[0].kind == "as_pattern" { + return node.children[0].children.iter().collect(); + } + return node.children.iter().collect(); + } + vec![node] +} From 6b365d0cd034c139385cd91e319db0e4505d84e6 Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Sat, 20 Jun 2026 10:34:58 +0000 Subject: [PATCH 40/52] Improve decomplex Go local-flow parity --- gems/decomplex/lib/decomplex/syntax.rb | 275 +++++++++++-- gems/decomplex/lib/decomplex/syntax/go.rb | 92 ++++- gems/decomplex/lib/decomplex/syntax/php.rb | 4 +- gems/decomplex/lib/decomplex/syntax/python.rb | 4 +- .../src/decomplex/detectors/local_flow.rs | 369 +++++++++++++++--- .../src/decomplex/syntax/adapters/base.rs | 32 ++ .../rust/src/decomplex/syntax/adapters/go.rs | 49 ++- .../rust/src/decomplex/syntax/adapters/php.rs | 4 + 8 files changed, 734 insertions(+), 95 deletions(-) diff --git a/gems/decomplex/lib/decomplex/syntax.rb b/gems/decomplex/lib/decomplex/syntax.rb index 1f88dd95d..b5f57540e 100644 --- a/gems/decomplex/lib/decomplex/syntax.rb +++ b/gems/decomplex/lib/decomplex/syntax.rb @@ -87,6 +87,7 @@ class TreeSitterLanguageAdapter nested_statement_wrapper_node_kinds: :NESTED_STATEMENT_WRAPPER_NODE_KINDS, identifier_node_kinds: :IDENTIFIER_NODE_KINDS, local_identifier_wrapper_node_kinds: :LOCAL_IDENTIFIER_WRAPPER_NODE_KINDS, + indexed_lhs_node_kinds: :INDEXED_LHS_NODE_KINDS, assignment_node_kinds: :ASSIGNMENT_NODE_KINDS, assignment_operator_tokens: :ASSIGNMENT_OPERATOR_TOKENS, local_declaration_node_kinds: :LOCAL_DECLARATION_NODE_KINDS, @@ -514,6 +515,10 @@ def generic_function_body_statements(node) def generic_local_names(function_def, statements) names = Set.new(function_def.params.to_a.map(&:to_s)) + if method_receiver_node_kinds.include?(function_def.body.kind) && + (receiver = function_receiver_name(function_def.body, [])) + names.add(receiver) + end statements.each do |statement| names.merge(generic_local_writes(statement)) end @@ -521,8 +526,20 @@ def generic_local_names(function_def, statements) end def generic_local_statement(node, index, local_names) - reads = generic_local_reads(node, local_names).uniq - writes = generic_local_writes(node).uniq + declaration_target_keys = generic_declaration_target_keys(node) + assignment_lhs_read_target_keys = generic_assignment_lhs_read_target_keys(node) + assignment_lhs_target_keys = generic_assignment_lhs_target_keys(node) + reads = generic_local_reads( + node, + local_names, + declaration_target_keys: declaration_target_keys, + assignment_lhs_target_keys: assignment_lhs_read_target_keys + ).uniq + writes = generic_local_writes( + node, + declaration_target_keys: declaration_target_keys, + assignment_lhs_target_keys: assignment_lhs_target_keys + ).uniq LocalStatement.new( index: index, line: line(node), @@ -531,18 +548,28 @@ def generic_local_statement(node, index, local_names) source: normalize_text(node.text), reads: reads.to_set, writes: writes.to_set, - dependencies: generic_assignment_dependencies(node, local_names), + dependencies: generic_assignment_dependencies( + node, + local_names, + declaration_target_keys: declaration_target_keys, + assignment_lhs_read_target_keys: assignment_lhs_read_target_keys, + assignment_lhs_target_keys: assignment_lhs_target_keys + ), co_uses: reads.combination(2).map { |left, right| [left, right] } ) end - def generic_local_reads(node, local_names) + def generic_local_reads(node, local_names, declaration_target_keys: nil, assignment_lhs_target_keys: nil) + declaration_target_keys ||= generic_declaration_target_keys(node) + assignment_lhs_target_keys ||= generic_assignment_lhs_read_target_keys(node) reads = [] generic_walk_local(node) do |child| name = generic_local_identifier_text(child) next unless name next unless local_names.include?(name) next if generic_local_write_node?(child) + next if assignment_lhs_target_keys.include?(node_key(child)) + next if declaration_target_keys.include?(node_key(child)) next if generic_declaration_name?(child) next if generic_member_name?(child) next if skip_local_read_identifier?(child) @@ -552,27 +579,48 @@ def generic_local_reads(node, local_names) reads end - def generic_local_writes(node) + def generic_local_writes(node, declaration_target_keys: nil, assignment_lhs_target_keys: nil) + declaration_target_keys ||= generic_declaration_target_keys(node) + assignment_lhs_target_keys ||= generic_assignment_lhs_target_keys(node) writes = [] - if (name = generic_local_declaration_name(node)) - writes << name - end + writes.concat(generic_local_declaration_names(node)) writes.concat(generic_assignment_lhs_names(node)) generic_walk_local(node) do |child| - next unless generic_identifier?(child) - next unless generic_local_write_node?(child) + name = generic_local_identifier_text(child) + next unless name + next unless generic_local_write_node?(child) || + declaration_target_keys.include?(node_key(child)) || + assignment_lhs_target_keys.include?(node_key(child)) - writes << child.text.to_s + writes << name end writes end - def generic_assignment_dependencies(node, local_names) - lhs_names = generic_local_writes(node) + def generic_assignment_dependencies( + node, + local_names, + declaration_target_keys: nil, + assignment_lhs_read_target_keys: nil, + assignment_lhs_target_keys: nil + ) + declaration_target_keys ||= generic_declaration_target_keys(node) + assignment_lhs_read_target_keys ||= generic_assignment_lhs_read_target_keys(node) + assignment_lhs_target_keys ||= generic_assignment_lhs_target_keys(node) + lhs_names = generic_local_writes( + node, + declaration_target_keys: declaration_target_keys, + assignment_lhs_target_keys: assignment_lhs_target_keys + ) return [] if lhs_names.empty? - reads = generic_local_reads(node, local_names) - lhs_names + reads = generic_local_reads( + node, + local_names, + declaration_target_keys: declaration_target_keys, + assignment_lhs_target_keys: assignment_lhs_read_target_keys + ) - lhs_names lhs_names.product(reads).reject { |left, right| left == right }.uniq end @@ -663,42 +711,69 @@ def generic_declaration_name?(node) parent = parent_node(node) return false unless parent - generic_local_declaration_name_node(parent) == node + generic_local_declaration_name_nodes(parent).any? { |candidate| candidate == node } + end + + def generic_declaration_name_in_tree?(root, target) + generic_local_declaration_name_nodes(root).any? { |candidate| candidate == target } || + root.named_children.any? { |child| generic_declaration_name_in_tree?(child, target) } + end + + def generic_declaration_target_keys(root) + keys = Set.new + generic_walk_local(root) do |node| + generic_local_declaration_name_nodes(node).each { |target| keys << node_key(target) } + end + keys end def generic_local_declaration_name(node) - generic_local_declaration_name_node(node)&.text + generic_local_declaration_name_nodes(node).filter_map { |child| generic_local_declaration_text(child) }.first + end + + def generic_local_declaration_names(node) + generic_local_declaration_name_nodes(node).filter_map { |child| generic_local_declaration_text(child) } + end + + def generic_local_declaration_text(node) + generic_local_identifier_text(node) || (simple_identifier_text?(node&.text) ? node.text.to_s : nil) end def generic_local_declaration_name_node(node) - return nil unless ts_node?(node) - return nil unless local_declaration_node_kinds.include?(node.kind) + generic_local_declaration_name_nodes(node).first + end + + def generic_local_declaration_name_nodes(node) + return [] unless ts_node?(node) + return [] unless local_declaration_node_kinds.include?(node.kind) if short_variable_declaration_node_kinds.include?(node.kind) left = node.named_children.find { |child| variable_declaration_node_kinds.include?(child.kind) } if left - identifier = left.named_children.find { |child| generic_identifier?(child) } - return identifier if identifier + identifiers = left.named_children.select { |child| generic_local_identifier_text(child) } + return identifiers unless identifiers.empty? + return [left] if simple_identifier_text?(left.text) end - return left if simple_identifier_text?(left&.text) + return [] end variable = node.named_children.find { |child| variable_declaration_node_kinds.include?(child.kind) } - return variable if simple_identifier_text?(variable&.text) + return [variable] if simple_identifier_text?(variable&.text) declaration_assignment = node.named_children.find { |child| declaration_assignment_node_kinds.include?(child.kind) } if declaration_assignment lhs = declaration_assignment.named_children.first identifier = lhs&.named_children&.find { |child| generic_identifier?(child) } - return identifier if identifier - return lhs if simple_identifier_text?(lhs&.text) + return [identifier] if identifier + return [lhs] if simple_identifier_text?(lhs&.text) end - named_field(node, "pattern") || + candidate = named_field(node, "pattern") || named_field(node, "name") || node.named_children.find { |child| local_identifier_wrapper_node_kinds.include?(child.kind) } || node.named_children.find { |child| variable_declaration_node_kinds.include?(child.kind) }&.named_children&.find { |child| generic_identifier?(child) } || node.named_children.find { |child| generic_identifier?(child) } + candidate ? [candidate] : [] end def generic_assignment_lhs_names(node) @@ -706,13 +781,161 @@ def generic_assignment_lhs_names(node) return [] unless assignment_node_kinds.include?(node.kind) lhs = named_field(node, "left") || node.named_children.first + collect_generic_assignment_lhs_names(lhs) + end + + def collect_generic_assignment_lhs_names(lhs) return [] unless ts_node?(lhs) - return [lhs.text] if generic_identifier?(lhs) + + if indexed_lhs_node?(lhs) + object = lhs.named_children.first + return collect_generic_assignment_lhs_names(object) + end + + if expression_list_node_kinds.include?(lhs.kind) + return [lhs.text] if lhs.named_children.empty? && generic_local_identifier_text(lhs) + + return lhs.named_children.flat_map { |child| collect_generic_assignment_lhs_names(child) } + end + + if indexed_lhs_node_kinds.include?(lhs.kind) + object = lhs.named_children.first + return collect_generic_assignment_lhs_names(object) + end + + if (name = generic_local_identifier_text(lhs)) + return [name] + end + return [] if generic_identifier?(lhs) + return [] if generic_member_name?(lhs) return [lhs.text] if simple_identifier_text?(lhs.text) lhs.named_children.filter_map { |child| child.text if generic_identifier?(child) } end + def generic_assignment_lhs_in_tree?(root, target) + return false unless ts_node?(root) + + if assignment_node_kinds.include?(root.kind) + lhs = named_field(root, "left") || root.named_children.first + return generic_assignment_lhs_read_target?(lhs, target) + end + + root.named_children.any? { |child| generic_assignment_lhs_in_tree?(child, target) } + end + + def generic_assignment_lhs_read_target_keys(root) + keys = Set.new + generic_walk_local(root) do |node| + next unless assignment_node_kinds.include?(node.kind) + + lhs = named_field(node, "left") || node.named_children.first + collect_generic_assignment_lhs_read_target_keys(lhs, keys) + end + keys + end + + def generic_assignment_lhs_target_keys(root) + keys = Set.new + generic_walk_local(root) do |node| + next unless assignment_node_kinds.include?(node.kind) + + lhs = named_field(node, "left") || node.named_children.first + collect_generic_assignment_lhs_target_keys(lhs, keys) + end + keys + end + + def collect_generic_assignment_lhs_read_target_keys(lhs, keys) + return unless ts_node?(lhs) + + if indexed_lhs_node?(lhs) + lhs.named_children.each { |child| collect_generic_assignment_lhs_read_target_keys(child, keys) } + return + end + + if expression_list_node_kinds.include?(lhs.kind) + if lhs.named_children.empty? && generic_local_identifier_text(lhs) + keys << node_key(lhs) + return + end + + lhs.named_children.each { |child| collect_generic_assignment_lhs_read_target_keys(child, keys) } + return + end + + return if field_like_node?(lhs) + + if generic_identifier?(lhs) || generic_local_identifier_text(lhs) + keys << node_key(lhs) + return + end + + lhs.named_children.each { |child| collect_generic_assignment_lhs_read_target_keys(child, keys) } + end + + def collect_generic_assignment_lhs_target_keys(lhs, keys) + return unless ts_node?(lhs) + + if indexed_lhs_node?(lhs) + object = lhs.named_children.first + collect_generic_assignment_lhs_target_keys(object, keys) + return + end + + if expression_list_node_kinds.include?(lhs.kind) + if lhs.named_children.empty? && generic_local_identifier_text(lhs) + keys << node_key(lhs) + return + end + + lhs.named_children.each { |child| collect_generic_assignment_lhs_target_keys(child, keys) } + return + end + + return if field_like_node?(lhs) + + if generic_identifier?(lhs) || generic_local_identifier_text(lhs) + keys << node_key(lhs) + return + end + + lhs.named_children.each { |child| collect_generic_assignment_lhs_target_keys(child, keys) } + end + + def generic_assignment_lhs_target?(lhs, target) + generic_assignment_lhs_read_target?(lhs, target) + end + + def generic_assignment_lhs_read_target?(lhs, target) + return false unless ts_node?(lhs) + + return ts_node_contains?(lhs, target) if indexed_lhs_node?(lhs) + + if expression_list_node_kinds.include?(lhs.kind) + return lhs == target if lhs.named_children.empty? && generic_local_identifier_text(lhs) + + return lhs.named_children.any? { |child| generic_assignment_lhs_read_target?(child, target) } + end + + return false if field_like_node?(lhs) + + return lhs == target if generic_identifier?(lhs) + + ts_node_contains?(lhs, target) + end + + def indexed_lhs_node?(node) + ts_node?(node) && indexed_lhs_node_kinds.include?(node.kind) + end + + def ts_node_contains?(root, target) + return false unless ts_node?(root) + return true if root == target + + root.named_children.any? { |child| ts_node_contains?(child, target) } + end + def simple_identifier_text?(text) text.to_s.match?(/\A[A-Za-z_]\w*\z/) end diff --git a/gems/decomplex/lib/decomplex/syntax/go.rb b/gems/decomplex/lib/decomplex/syntax/go.rb index 5904153cf..e4cdf783f 100644 --- a/gems/decomplex/lib/decomplex/syntax/go.rb +++ b/gems/decomplex/lib/decomplex/syntax/go.rb @@ -32,10 +32,11 @@ class GoSyntaxAdapter < TreeSitterLanguageAdapter IDENTIFIER_NODE_KINDS = %w[identifier].freeze FIELD_IDENTIFIER_NODE_KINDS = %w[field_identifier].freeze PARAMETER_IDENTIFIER_NODE_KINDS = %w[identifier field_identifier].freeze - LOCAL_IDENTIFIER_WRAPPER_NODE_KINDS = %w[expression_list].freeze - LOCAL_DECLARATION_NODE_KINDS = %w[short_var_declaration variable_declaration].freeze - SHORT_VARIABLE_DECLARATION_NODE_KINDS = %w[short_var_declaration].freeze - VARIABLE_DECLARATION_NODE_KINDS = %w[expression_list variable_declaration].freeze + LOCAL_IDENTIFIER_WRAPPER_NODE_KINDS = %w[expression_list literal_element].freeze + INDEXED_LHS_NODE_KINDS = %w[index_expression slice_expression].freeze + LOCAL_DECLARATION_NODE_KINDS = %w[short_var_declaration range_clause var_declaration variable_declaration].freeze + SHORT_VARIABLE_DECLARATION_NODE_KINDS = %w[short_var_declaration range_clause].freeze + VARIABLE_DECLARATION_NODE_KINDS = %w[expression_list var_spec variable_declaration].freeze LOCAL_VARIABLE_DECLARATOR_NODE_KINDS = [].freeze FIELD_DECLARATION_NODE_KINDS = %w[field_declaration].freeze DECLARATION_SITE_PARENT_NODE_KINDS = %w[parameter_declaration function_declaration method_declaration type_spec].freeze @@ -81,6 +82,55 @@ def visibility(_document, node) exported_name_visibility(function_name(node)) end + def function_params(node) + lists = node.named_children.select { |child| child.kind == "parameter_list" } + params = node.kind == "method_declaration" ? lists[1] : lists.first + return super unless params + + params.named_children.filter_map { |param| parameter_name(param) }.uniq + end + + def generic_function_body_statements(node) + body = generic_function_body_node(node) + return super unless body + + named = body.named_children.reject { |child| comment_node?(child) } + if named.size == 1 && named.first.kind == "statement_list" && go_adjacent_call_statement?(named.first) + return [named.first] + end + + super + end + + def generic_local_identifier_text(node) + name = super + name == "_" ? nil : name + end + + def generic_local_declaration_text(node) + return nil if node.text == "_" + + super + end + + def generic_local_write_node?(node) + go_update_statement_target?(node) || super + end + + def skip_local_read_identifier?(node) + go_keyed_element_key?(node) || super + end + + def generic_local_declaration_name_nodes(node) + return go_var_spec_name_nodes(node) if node.kind == "var_declaration" + + super + end + + def indexed_lhs_node?(node) + super || (node.kind == "expression_list" && node.children.any? { |child| !child.named? && child.text == "[" }) + end + private def boolean_container?(node) @@ -88,6 +138,40 @@ def boolean_container?(node) super end + + def go_update_statement_target?(node) + parent = parent_node(node) + return false unless parent && %w[inc_statement dec_statement].include?(parent.kind) + + parent.named_children.first == node + end + + def go_adjacent_call_statement?(node) + named = node.named_children.reject { |child| comment_node?(child) } + named.size == 2 && + adjacent_call_node_kinds.include?(named.first.kind) && + argument_list_node_kinds.include?(named.last.kind) + end + + def go_keyed_element_key?(node) + parent = parent_node(node) + return false unless parent&.kind == "keyed_element" + + parent.named_children.first == node + end + + def go_var_spec_name_nodes(node) + go_var_spec_nodes(node).flat_map do |spec| + names = spec.named_children.take_while { |child| child.kind == "identifier" } + names.empty? ? [] : names + end + end + + def go_var_spec_nodes(node) + return [node] if node.kind == "var_spec" + + node.named_children.flat_map { |child| go_var_spec_nodes(child) } + end end end end diff --git a/gems/decomplex/lib/decomplex/syntax/php.rb b/gems/decomplex/lib/decomplex/syntax/php.rb index a6be7e5c8..6805ab832 100644 --- a/gems/decomplex/lib/decomplex/syntax/php.rb +++ b/gems/decomplex/lib/decomplex/syntax/php.rb @@ -225,8 +225,8 @@ def generic_member_name?(node) super end - def generic_local_writes(node) - (super + php_local_write_names(node)).map { |name| php_identifier_text_value(name) }.uniq + def generic_local_writes(node, **kwargs) + (super(node, **kwargs) + php_local_write_names(node)).map { |name| php_identifier_text_value(name) }.uniq end def generic_local_write_node?(node) diff --git a/gems/decomplex/lib/decomplex/syntax/python.rb b/gems/decomplex/lib/decomplex/syntax/python.rb index dabd09324..1b5c256cd 100644 --- a/gems/decomplex/lib/decomplex/syntax/python.rb +++ b/gems/decomplex/lib/decomplex/syntax/python.rb @@ -263,8 +263,8 @@ def generic_local_write_node?(node) super || python_annotation_lhs?(node) || python_loop_target?(node) end - def generic_local_writes(node) - (super + python_with_alias_names(node)).uniq + def generic_local_writes(node, **kwargs) + (super(node, **kwargs) + python_with_alias_names(node)).uniq end def skip_local_read_identifier?(node) diff --git a/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs b/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs index ce65fa8d1..2c1ff9296 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs @@ -73,29 +73,14 @@ pub fn scan_documents(documents: &[Document]) -> Vec { for document in documents { let normalized = normalized_local_methods(document); if document.language != Language::Ruby { - if document.language == Language::Python { - let raw = raw_local_methods(document); - let raw_keys: BTreeSet<_> = raw.iter().map(method_summary_key).collect(); - out.extend(raw); - out.extend( - normalized - .into_iter() - .filter(|summary| !raw_keys.contains(&method_summary_key(summary))), - ); - } else { - let mut normalized_by_key: BTreeMap<_, _> = normalized + let raw = raw_local_methods(document); + let raw_keys: BTreeSet<_> = raw.iter().map(method_summary_key).collect(); + out.extend(raw); + out.extend( + normalized .into_iter() - .map(|summary| (method_summary_key(&summary), summary)) - .collect(); - for raw in raw_local_methods(document) { - out.push( - normalized_by_key - .remove(&method_summary_key(&raw)) - .unwrap_or(raw), - ); - } - out.extend(normalized_by_key.into_values()); - } + .filter(|summary| !raw_keys.contains(&method_summary_key(summary))), + ); continue; } @@ -217,12 +202,42 @@ fn raw_local_names( profile: &dyn LanguageProfile, ) -> BTreeSet { let mut names: BTreeSet = function.params.iter().cloned().collect(); + if let Some(receiver) = raw_function_receiver_name(&function.body, profile) { + names.insert(receiver); + } for statement in statements { names.extend(raw_local_writes(statement, profile)); } names } +fn raw_function_receiver_name(node: &RawNode, profile: &dyn LanguageProfile) -> Option { + if !profile + .method_receiver_node_kinds() + .contains(&node.kind.as_str()) + { + return None; + } + let receiver_params = raw_named_children(node).into_iter().find(|child| { + profile + .parameter_list_node_kinds() + .contains(&child.kind.as_str()) + })?; + let receiver = raw_named_children(receiver_params) + .into_iter() + .find(|child| { + profile + .receiver_parameter_node_kinds() + .contains(&child.kind.as_str()) + })?; + let name = raw_named_children(receiver).into_iter().find(|child| { + profile + .first_argument_receiver_name_node_kinds() + .contains(&child.kind.as_str()) + })?; + raw_local_identifier_text(name, profile) +} + fn raw_statement_summary( node: &RawNode, index: usize, @@ -236,7 +251,7 @@ fn raw_statement_summary( line: node.span[0], end_line: node.span[2], span: node.span, - source: ast::normalize_text(&node.text), + source: profile.normalize_source_text(&node.text), dependencies: raw_assignment_dependencies(node, local_names, profile), co_uses: co_use_pairs(&reads), reads, @@ -260,6 +275,7 @@ fn raw_local_reads( }; if local_names.contains(&name) && !raw_local_write_node(child, parent, profile) + && !raw_assignment_lhs_read_in_tree(node, child, profile) && !raw_python_import_name(parent, profile) && !raw_python_with_alias_read(child, parent, profile) && !raw_declaration_name_in_tree(node, child, profile) @@ -277,8 +293,14 @@ fn raw_local_writes(node: &RawNode, profile: &dyn LanguageProfile) -> BTreeSet BTreeSet O .identifier_node_kinds() .contains(&node.kind.as_str()) { - return Some(node.text.clone()); + let text = profile.normalize_local_identifier_text(&node.text); + return (!text.is_empty()).then_some(text); } if profile .local_identifier_wrapper_node_kinds() @@ -437,7 +467,8 @@ fn raw_local_identifier_text(node: &RawNode, profile: &dyn LanguageProfile) -> O && raw_named_children(node).is_empty() && simple_identifier(&node.text) { - return Some(node.text.clone()); + let text = profile.normalize_local_identifier_text(&node.text); + return (!text.is_empty()).then_some(text); } None } @@ -460,6 +491,16 @@ fn raw_local_write_node( let Some(parent) = parent else { return false; }; + if profile + .update_statement_node_kinds() + .contains(&parent.kind.as_str()) + && raw_named_children(parent) + .first() + .map(|target| std::ptr::eq(*target, node)) + .unwrap_or(false) + { + return true; + } if profile .assignment_node_kinds() .contains(&parent.kind.as_str()) @@ -612,8 +653,11 @@ fn raw_declaration_name( profile: &dyn LanguageProfile, ) -> bool { parent - .and_then(|parent| raw_local_declaration_name_node(parent, profile)) - .map(|name| std::ptr::eq(name, node) || raw_contains_node(name, node)) + .map(|parent| { + raw_local_declaration_name_nodes(parent, profile) + .into_iter() + .any(|name| std::ptr::eq(name, node) || raw_contains_node(name, node)) + }) .unwrap_or(false) } @@ -622,9 +666,9 @@ fn raw_declaration_name_in_tree( target: &RawNode, profile: &dyn LanguageProfile, ) -> bool { - raw_local_declaration_name_node(root, profile) - .map(|name| std::ptr::eq(name, target) || raw_contains_node(name, target)) - .unwrap_or(false) + raw_local_declaration_name_nodes(root, profile) + .into_iter() + .any(|name| std::ptr::eq(name, target) || raw_contains_node(name, target)) || root .children .iter() @@ -635,14 +679,20 @@ fn raw_local_declaration_name_node<'a>( node: &'a RawNode, profile: &dyn LanguageProfile, ) -> Option<&'a RawNode> { + raw_local_declaration_name_nodes(node, profile) + .into_iter() + .next() +} + +fn raw_local_declaration_name_nodes<'a>( + node: &'a RawNode, + profile: &dyn LanguageProfile, +) -> Vec<&'a RawNode> { if !profile .local_declaration_node_kinds() .contains(&node.kind.as_str()) - && !profile - .variable_declaration_node_kinds() - .contains(&node.kind.as_str()) { - return None; + return Vec::new(); } if profile @@ -654,20 +704,28 @@ fn raw_local_declaration_name_node<'a>( .variable_declaration_node_kinds() .contains(&child.kind.as_str()) }) { - return raw_first_identifier(left, profile).or(Some(left)); + let identifiers = raw_named_children(left) + .into_iter() + .filter(|child| raw_local_identifier_text(child, profile).is_some()) + .collect::>(); + if !identifiers.is_empty() { + return identifiers; + } + if simple_identifier(&left.text) { + return vec![left]; + } } + return Vec::new(); } - if let Some(variable) = raw_named_children(node).into_iter().find(|child| { - profile - .variable_declaration_node_kinds() - .contains(&child.kind.as_str()) - }) { - if simple_identifier(&variable.text) { - return Some(variable); - } - if let Some(identifier) = raw_first_identifier(variable, profile) { - return Some(identifier); + let variables = raw_variable_declaration_nodes(node, profile); + if !variables.is_empty() { + let names = variables + .into_iter() + .flat_map(|variable| raw_variable_declaration_name_nodes(variable, profile)) + .collect::>(); + if !names.is_empty() { + return names; } } @@ -677,7 +735,10 @@ fn raw_local_declaration_name_node<'a>( .contains(&child.kind.as_str()) }) { if let Some(lhs) = raw_named_children(declaration_assignment).first().copied() { - return raw_first_identifier(lhs, profile).or(Some(lhs)); + return raw_first_identifier(lhs, profile) + .or(Some(lhs)) + .into_iter() + .collect(); } } @@ -689,6 +750,60 @@ fn raw_local_declaration_name_node<'a>( .contains(&child.kind.as_str()) }) .or_else(|| raw_first_identifier(node, profile)) + .into_iter() + .collect() +} + +fn raw_variable_declaration_nodes<'a>( + node: &'a RawNode, + profile: &dyn LanguageProfile, +) -> Vec<&'a RawNode> { + let mut out = Vec::new(); + raw_collect_variable_declaration_nodes(node, profile, &mut out); + out +} + +fn raw_collect_variable_declaration_nodes<'a>( + node: &'a RawNode, + profile: &dyn LanguageProfile, + out: &mut Vec<&'a RawNode>, +) { + if profile + .variable_declaration_node_kinds() + .contains(&node.kind.as_str()) + { + out.push(node); + return; + } + for child in raw_named_children(node) { + raw_collect_variable_declaration_nodes(child, profile, out); + } +} + +fn raw_variable_declaration_name_nodes<'a>( + variable: &'a RawNode, + profile: &dyn LanguageProfile, +) -> Vec<&'a RawNode> { + if simple_identifier(&variable.text) { + return vec![variable]; + } + + if profile + .multi_name_variable_declaration_node_kinds() + .contains(&variable.kind.as_str()) + { + let names = raw_named_children(variable) + .into_iter() + .take_while(|child| raw_local_identifier_text(child, profile).is_some()) + .collect::>(); + if !names.is_empty() { + return names; + } + } + + raw_first_identifier(variable, profile) + .into_iter() + .collect() } fn raw_first_identifier<'a>( @@ -720,6 +835,152 @@ fn raw_assignment_lhs(node: &RawNode, parent: &RawNode, profile: &dyn LanguagePr .unwrap_or(false) } +fn raw_assignment_lhs_read_in_tree( + root: &RawNode, + target: &RawNode, + profile: &dyn LanguageProfile, +) -> bool { + if profile + .assignment_node_kinds() + .contains(&root.kind.as_str()) + { + if let Some(lhs) = raw_named_children(root).first() { + if raw_assignment_lhs_read_target(lhs, target, profile) { + return true; + } + } + } + root.children + .iter() + .any(|child| raw_assignment_lhs_read_in_tree(child, target, profile)) +} + +fn raw_assignment_lhs_write_in_tree( + root: &RawNode, + target: &RawNode, + profile: &dyn LanguageProfile, +) -> bool { + if profile + .assignment_node_kinds() + .contains(&root.kind.as_str()) + { + if let Some(lhs) = raw_named_children(root).first() { + if raw_assignment_lhs_write_target(lhs, target, profile) { + return true; + } + } + } + root.children + .iter() + .any(|child| raw_assignment_lhs_write_in_tree(child, target, profile)) +} + +fn raw_assignment_lhs_read_target( + lhs: &RawNode, + target: &RawNode, + profile: &dyn LanguageProfile, +) -> bool { + if raw_indexed_lhs_node(lhs, profile) { + return raw_contains_node(lhs, target); + } + if raw_field_like_node(lhs, profile) { + return profile.suppress_field_receiver_lhs_reads() + && raw_member_receiver_target(lhs, target, profile); + } + if raw_local_identifier_text(lhs, profile).is_some() { + return std::ptr::eq(lhs, target); + } + if profile + .expression_list_node_kinds() + .contains(&lhs.kind.as_str()) + { + if raw_named_children(lhs).is_empty() && raw_local_identifier_text(lhs, profile).is_some() { + return std::ptr::eq(lhs, target); + } + return raw_named_children(lhs) + .into_iter() + .any(|child| raw_assignment_lhs_read_target(child, target, profile)); + } + raw_contains_node(lhs, target) +} + +fn raw_assignment_lhs_write_target( + lhs: &RawNode, + target: &RawNode, + profile: &dyn LanguageProfile, +) -> bool { + if raw_indexed_lhs_node(lhs, profile) { + return raw_named_children(lhs) + .first() + .map(|object| { + !raw_field_like_node(object, profile) + && raw_assignment_lhs_write_target(object, target, profile) + }) + .unwrap_or(false); + } + if raw_field_like_node(lhs, profile) { + return raw_member_receiver_target(lhs, target, profile); + } + if raw_local_identifier_text(lhs, profile).is_some() { + return std::ptr::eq(lhs, target); + } + if profile + .expression_list_node_kinds() + .contains(&lhs.kind.as_str()) + { + if raw_named_children(lhs).is_empty() && raw_local_identifier_text(lhs, profile).is_some() { + return std::ptr::eq(lhs, target); + } + return raw_named_children(lhs) + .into_iter() + .any(|child| raw_assignment_lhs_write_target(child, target, profile)); + } + raw_contains_node(lhs, target) +} + +fn raw_indexed_lhs_node(node: &RawNode, profile: &dyn LanguageProfile) -> bool { + profile + .indexed_lhs_node_kinds() + .contains(&node.kind.as_str()) + || (profile + .indexed_lhs_bracket_wrapper_node_kinds() + .contains(&node.kind.as_str()) + && node + .children + .iter() + .any(|child| !child.named && child.text == "[")) +} + +fn raw_field_like_node(node: &RawNode, profile: &dyn LanguageProfile) -> bool { + profile + .field_like_node_kinds() + .contains(&node.kind.as_str()) + || (profile + .field_like_dot_wrapper_node_kinds() + .contains(&node.kind.as_str()) + && node + .children + .iter() + .any(|child| !child.named && child.text == ".")) +} + +fn raw_member_receiver_target( + node: &RawNode, + target: &RawNode, + profile: &dyn LanguageProfile, +) -> bool { + let Some(receiver) = raw_named_children(node).first().copied() else { + return false; + }; + if raw_local_identifier_text(receiver, profile).is_some() { + return std::ptr::eq(receiver, target); + } + if raw_field_like_node(receiver, profile) { + return raw_member_receiver_target(receiver, target, profile); + } + false +} + fn raw_member_name( node: &RawNode, parent: Option<&RawNode>, @@ -728,10 +989,7 @@ fn raw_member_name( let Some(parent) = parent else { return false; }; - if !profile - .field_like_node_kinds() - .contains(&parent.kind.as_str()) - { + if !raw_field_like_node(parent, profile) { return false; } raw_named_children(parent) @@ -744,10 +1002,7 @@ fn raw_call_name(node: &RawNode, parent: Option<&RawNode>, profile: &dyn Languag let Some(parent) = parent else { return false; }; - if profile - .field_like_node_kinds() - .contains(&parent.kind.as_str()) - { + if raw_field_like_node(parent, profile) { return false; } profile.call_node_kinds().contains(&parent.kind.as_str()) diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs index 4630a8fce..46e6935ad 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs @@ -181,6 +181,10 @@ pub(crate) trait LanguageProfile { EMPTY_NODE_KINDS } + fn normalize_local_identifier_text(&self, text: &str) -> String { + text.to_string() + } + fn field_identifier_node_kinds(&self) -> &[&str] { EMPTY_NODE_KINDS } @@ -201,6 +205,18 @@ pub(crate) trait LanguageProfile { EMPTY_NODE_KINDS } + fn indexed_lhs_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn indexed_lhs_bracket_wrapper_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn update_statement_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + fn local_declaration_node_kinds(&self) -> &[&str] { EMPTY_NODE_KINDS } @@ -213,6 +229,10 @@ pub(crate) trait LanguageProfile { EMPTY_NODE_KINDS } + fn multi_name_variable_declaration_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + fn declaration_assignment_node_kinds(&self) -> &[&str] { EMPTY_NODE_KINDS } @@ -221,6 +241,10 @@ pub(crate) trait LanguageProfile { EMPTY_NODE_KINDS } + fn method_receiver_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + fn receiver_parameter_node_kinds(&self) -> &[&str] { EMPTY_NODE_KINDS } @@ -318,6 +342,14 @@ pub(crate) trait LanguageProfile { EMPTY_NODE_KINDS } + fn field_like_dot_wrapper_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn suppress_field_receiver_lhs_reads(&self) -> bool { + false + } + fn path_action_node_kinds(&self) -> &[&str] { EMPTY_NODE_KINDS } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/go.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/go.rs index a9536016c..87074f337 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/go.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/go.rs @@ -58,15 +58,27 @@ impl LanguageProfile for GoProfile { } fn local_identifier_wrapper_node_kinds(&self) -> &[&str] { + &["expression_list", "literal_element"] + } + + fn indexed_lhs_node_kinds(&self) -> &[&str] { + &["index_expression", "slice_expression"] + } + + fn indexed_lhs_bracket_wrapper_node_kinds(&self) -> &[&str] { &["expression_list"] } + fn update_statement_node_kinds(&self) -> &[&str] { + &["inc_statement", "dec_statement"] + } + fn call_node_kinds(&self) -> &[&str] { &["call_expression", "go_statement"] } fn identifier_node_kinds(&self) -> &[&str] { - &["identifier", "type_identifier"] + &["identifier"] } fn field_identifier_node_kinds(&self) -> &[&str] { @@ -98,21 +110,42 @@ impl LanguageProfile for GoProfile { } fn local_declaration_node_kinds(&self) -> &[&str] { - &["short_var_declaration", "variable_declaration"] + &[ + "short_var_declaration", + "range_clause", + "var_declaration", + "variable_declaration", + ] } fn short_variable_declaration_node_kinds(&self) -> &[&str] { - &["short_var_declaration"] + &["short_var_declaration", "range_clause"] } fn variable_declaration_node_kinds(&self) -> &[&str] { - &["expression_list", "variable_declaration"] + &["expression_list", "var_spec", "variable_declaration"] + } + + fn multi_name_variable_declaration_node_kinds(&self) -> &[&str] { + &["var_spec"] + } + + fn normalize_local_identifier_text(&self, text: &str) -> String { + if text == "_" { + String::new() + } else { + text.to_string() + } } fn receiver_type_node_kinds(&self) -> &[&str] { &["pointer_type", "type_identifier"] } + fn method_receiver_node_kinds(&self) -> &[&str] { + &["method_declaration"] + } + fn receiver_parameter_node_kinds(&self) -> &[&str] { &["parameter_declaration"] } @@ -177,6 +210,14 @@ impl LanguageProfile for GoProfile { &["selector_expression"] } + fn field_like_dot_wrapper_node_kinds(&self) -> &[&str] { + &["expression_list"] + } + + fn suppress_field_receiver_lhs_reads(&self) -> bool { + true + } + fn call_target<'tree>(&self, node: Node<'tree>, source: &str) -> Option> { match node.kind() { "call_expression" => self.default_call_target(node, source), diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/php.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/php.rs index f47fe569d..70ac14759 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/php.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/php.rs @@ -135,6 +135,10 @@ impl LanguageProfile for PhpProfile { php_identifier_text_value(text) } + fn normalize_local_identifier_text(&self, text: &str) -> String { + php_identifier_text_value(text) + } + fn function_name(&self, node: Node<'_>, source: &str) -> Option { if self.function_node_kinds().contains(&node.kind()) { return node From b7569ac348494ad9dcb9474224810bbd753b9b76 Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Sat, 20 Jun 2026 11:47:09 +0000 Subject: [PATCH 41/52] Improve decomplex Rust detector fact architecture --- .../agents/fixture-coverage-gap-analysis.md | 88 +++ .../examples/oracles/local-flow.json | 107 ++- gems/decomplex/lib/decomplex/syntax.rb | 33 +- gems/decomplex/lib/decomplex/syntax/go.rb | 8 + .../decomplex/detectors/false_simplicity.rs | 130 +--- .../rust/src/decomplex/detectors/fat_union.rs | 224 +----- .../detectors/implicit_control_flow.rs | 661 +----------------- .../src/decomplex/detectors/local_flow.rs | 69 +- .../src/decomplex/detectors/locality_drag.rs | 38 +- .../detectors/state_branch_density.rs | 272 +------ .../src/decomplex/detectors/state_mesh.rs | 251 +------ .../detectors/temporal_ordering_pressure.rs | 266 +------ .../weighted_inlined_cognitive_complexity.rs | 515 +------------- gems/decomplex/rust/src/decomplex/syntax.rs | 19 + .../src/decomplex/syntax/adapters/base.rs | 8 + .../rust/src/decomplex/syntax/adapters/go.rs | 8 + .../rust/src/decomplex/syntax/complexity.rs | 314 +++++++++ .../decomplex/syntax/tree_sitter_adapter.rs | 135 +++- gems/decomplex/rust/src/lib.rs | 1 + gems/decomplex/rust/tests/examples_oracle.rs | 651 +++++++++++++++++ gems/decomplex/test/examples_oracle_test.rb | 7 +- 21 files changed, 1496 insertions(+), 2309 deletions(-) create mode 100644 gems/decomplex/docs/agents/fixture-coverage-gap-analysis.md create mode 100644 gems/decomplex/rust/src/decomplex/syntax/complexity.rs create mode 100644 gems/decomplex/rust/src/lib.rs create mode 100644 gems/decomplex/rust/tests/examples_oracle.rs diff --git a/gems/decomplex/docs/agents/fixture-coverage-gap-analysis.md b/gems/decomplex/docs/agents/fixture-coverage-gap-analysis.md new file mode 100644 index 000000000..0c1d47a1d --- /dev/null +++ b/gems/decomplex/docs/agents/fixture-coverage-gap-analysis.md @@ -0,0 +1,88 @@ +# Decomplex fixture and coverage gap analysis + +Date: 2026-06-20 + +## Current measured state + +The shared detector examples now run in both places: + +- Ruby: `gems/decomplex/test/examples_oracle_test.rb` +- Rust: `gems/decomplex/rust/tests/examples_oracle.rs` + +Current shared fixture grid: + +- 15 languages. +- 24 detectors. +- 360 detector/language fixture cells. +- 0 missing fixture cells. + +Current Rust coverage from `cargo llvm-cov`, with Rust test code excluded from the line counts: + +- Rust production: 68,796 / 84,602 executable lines, 81.32%. +- Rust detectors: 5,728 / 6,725 executable lines, 85.17%. + +The largest earlier false signal was stale Rust-only detector code. Several low-coverage detector paths were not missing fixture coverage; they were code paths Ruby no longer owns in detectors: + +- `state_branch_density`: removed the normalized-AST fallback scanner. Ruby consumes mined `branch_decisions`. +- `fat_union`: removed the normalized-root fallback scanner. Ruby consumes dispatch facts. +- `false_simplicity`: moved semantic-effect classification into syntax facts. The detector now consumes `semantic_effect_sites`. +- `state_mesh`: removed normalized-root read/write fallback behavior. The detector now consumes state facts. +- `temporal_ordering_pressure`: now discovers owners from both owner and function facts like Ruby. +- `weighted_inlined_cognitive_complexity` and `locality_drag`: moved local complexity scoring to `Document#local_complexity_scores`, matching Ruby's syntax fact boundary. + +## Detectors below 90% Rust LoC coverage + +These are the remaining detector implementation files below 90% after the architecture cleanup: + +| Detector | Coverage | Primary gap | +| --- | ---: | --- | +| `sequence_mine` | 62.07% | One fixture hits only the positive pair. It misses ignored/declarative calls, nested protocol events, confidence filters, denominator branches, and sort tie-breaks. | +| `derived_state` | 65.38% | Fixture hits one stale derived variable. It misses multi-write ordering, self-dependency exclusion, no-reassignment, and recomputed-derived negatives. | +| `redundant_nil_guard` | 69.57% | Fixture is too narrow for guard shapes. Needs safe navigation, explicit nil checks, chained guards, local reassignment, and negative useful guards. | +| `decision_pressure` | 79.01% | Fixture hits local contract assignment only. It misses essential dispatch, rescue-nil, receiver/index/local contract canonicalization, conditional assignment rejection, and ranking. | +| `state_branch_density` | 79.44% | Fixture hits one non-nested state predicate. It misses wrapper suppression for nested branches and multi-row ranking. | +| `false_simplicity` | 79.59% | Oracle asserts only `kind`. It misses detail/support/scatter, top-level effects, monkeypatch/core owner cases, reopen cases, and grouping/ranking. | +| `state_mesh` | 81.35% | Fixture has one field. It misses multi-field percentiles, semantic-alias re-derivations, custom fields, and graph details. | +| `path_condition` | 84.38% | Fixture hits one neglected condition. It misses action/guard extraction variants, support/confidence filters, span containment, and negative paths. | +| `weighted_inlined_cognitive_complexity` | 84.81% | Architecture is now correct; fixture still needs multi-finding ranking, shared public step weighting, cycle/visited guard, and missing-callee branches. | +| `structural_topology` | 84.85% | Fixture misses self-call exclusion, singleton/static scoped names, multi-line source spans, hidden Ruby owner wrappers, and enclosing-span helper branches. | +| `local_flow` | 86.93% | The oracle is stronger than before but still not broad enough for all syntax categories. Needs local-flow semantic cases by grammar feature. | +| `locality_drag` | 89.89% | Needs one more case for low-complexity/short-gap negatives, rewrite-before-use, related gap expansion, and ranking. | + +## Fixture strategy + +The plan is sound, with one correction: do not write fixtures to cover code that should not exist in detectors. First delete or move misplaced detector-owned syntax work, then expand fixtures around the remaining legitimate detector behavior. + +Use these fixture layers: + +1. Keep the existing `examples//.` files as smoke tests. +2. Add case fixtures where one file per detector is not enough. Preferred layout: + - `examples///.` + - `examples/oracles//.json` +3. Keep oracles shared across languages. Only scrub location/SARIF fields; do not collapse semantic fields to counts when the detector behavior depends on the omitted fields. +4. Run both engines against the same oracle projection: + - Ruby for cross-engine parity. + - Rust integration tests for Rust CI truth and Rust LCOV coverage. + +## Immediate fixture expansion order + +Highest leverage order: + +1. `sequence_mine`: add support/confidence negative cases and nested protocol events. +2. `derived_state`: add stale, recomputed, self-dependent, and multi-write cases. +3. `redundant_nil_guard`: add guard-shape matrix and useful-guard negatives. +4. `state_branch_density`: add nested wrapper suppression and multi-row ranking. +5. `decision_pressure`: add essential dispatch and rescue-nil cases. +6. `false_simplicity`: strengthen projection to include `kind`, `detail`, `support`, and `scatter`, then add effect and monkeypatch cases. +7. `state_mesh`: add multi-field/re-derivation/custom-field cases. +8. `local_flow`: add syntax-facts-style fixtures for reads/writes/dependencies/co-uses across declarations, destructuring, member/index writes, loops, closures, and cleanup blocks. + +## Root-cause/report/SARIF plan + +Ignoring reporting for the current detector pass is reasonable. The downstream plan should still be: + +1. Create a shared facts JSON oracle containing detector outputs and syntax facts. +2. Feed that JSON into Ruby and Rust root-cause code and compare a stable projected output. +3. Reuse the same facts JSON for report and SARIF snapshot tests later. + +That gives coverage for root cause, convergence, report, and SARIF without multiplying language fixtures. The JSON should contain full facts, not a detector-specific subset, so later stages can share it. diff --git a/gems/decomplex/examples/oracles/local-flow.json b/gems/decomplex/examples/oracles/local-flow.json index 4b12452d2..3fb7a8586 100644 --- a/gems/decomplex/examples/oracles/local-flow.json +++ b/gems/decomplex/examples/oracles/local-flow.json @@ -4,8 +4,111 @@ }, "expected": [ { - "statement_count": 6, - "boundary_count": 1 + "method": "mixed", + "statements": [ + { + "reads": [ + "price", + "tax" + ], + "writes": [ + "subtotal" + ], + "dependencies": [ + [ + "subtotal", + "price" + ], + [ + "subtotal", + "tax" + ] + ], + "co_uses": [ + [ + "price", + "tax" + ] + ] + }, + { + "reads": [ + "subtotal" + ], + "writes": [ + "total" + ], + "dependencies": [ + [ + "total", + "subtotal" + ] + ], + "co_uses": [ + ] + }, + { + "reads": [ + ], + "writes": [ + "timestamp" + ], + "dependencies": [ + ], + "co_uses": [ + ] + }, + { + "reads": [ + ], + "writes": [ + "buffer" + ], + "dependencies": [ + ], + "co_uses": [ + ] + }, + { + "reads": [ + "buffer", + "timestamp" + ], + "writes": [ + ], + "dependencies": [ + ], + "co_uses": [ + [ + "buffer", + "timestamp" + ] + ] + }, + { + "reads": [ + "buffer", + "total" + ], + "writes": [ + ], + "dependencies": [ + ], + "co_uses": [ + [ + "buffer", + "total" + ] + ] + } + ], + "boundaries": [ + { + "before_index": 1, + "after_index": 2, + "kind": "blank" + } + ] } ] } diff --git a/gems/decomplex/lib/decomplex/syntax.rb b/gems/decomplex/lib/decomplex/syntax.rb index b5f57540e..13259dc3d 100644 --- a/gems/decomplex/lib/decomplex/syntax.rb +++ b/gems/decomplex/lib/decomplex/syntax.rb @@ -806,6 +806,10 @@ def collect_generic_assignment_lhs_names(lhs) if (name = generic_local_identifier_text(lhs)) return [name] end + if field_assignment_writes_receiver? && field_like_node?(lhs) + receiver = lhs.named_children.first + return collect_generic_assignment_lhs_names(receiver) + end return [] if generic_identifier?(lhs) return [] if generic_member_name?(lhs) return [lhs.text] if simple_identifier_text?(lhs.text) @@ -864,7 +868,13 @@ def collect_generic_assignment_lhs_read_target_keys(lhs, keys) return end - return if field_like_node?(lhs) + if field_like_node?(lhs) + if suppress_field_receiver_lhs_reads? + receiver = lhs.named_children.first + collect_generic_assignment_lhs_read_target_keys(receiver, keys) + end + return + end if generic_identifier?(lhs) || generic_local_identifier_text(lhs) keys << node_key(lhs) @@ -893,7 +903,13 @@ def collect_generic_assignment_lhs_target_keys(lhs, keys) return end - return if field_like_node?(lhs) + if field_like_node?(lhs) + return unless field_assignment_writes_receiver? + + receiver = lhs.named_children.first + collect_generic_assignment_lhs_target_keys(receiver, keys) + return + end if generic_identifier?(lhs) || generic_local_identifier_text(lhs) keys << node_key(lhs) @@ -918,7 +934,10 @@ def generic_assignment_lhs_read_target?(lhs, target) return lhs.named_children.any? { |child| generic_assignment_lhs_read_target?(child, target) } end - return false if field_like_node?(lhs) + if field_like_node?(lhs) + return suppress_field_receiver_lhs_reads? && + generic_assignment_lhs_read_target?(lhs.named_children.first, target) + end return lhs == target if generic_identifier?(lhs) @@ -929,6 +948,14 @@ def indexed_lhs_node?(node) ts_node?(node) && indexed_lhs_node_kinds.include?(node.kind) end + def suppress_field_receiver_lhs_reads? + false + end + + def field_assignment_writes_receiver? + false + end + def ts_node_contains?(root, target) return false unless ts_node?(root) return true if root == target diff --git a/gems/decomplex/lib/decomplex/syntax/go.rb b/gems/decomplex/lib/decomplex/syntax/go.rb index e4cdf783f..7b2a3fbd7 100644 --- a/gems/decomplex/lib/decomplex/syntax/go.rb +++ b/gems/decomplex/lib/decomplex/syntax/go.rb @@ -131,6 +131,14 @@ def indexed_lhs_node?(node) super || (node.kind == "expression_list" && node.children.any? { |child| !child.named? && child.text == "[" }) end + def suppress_field_receiver_lhs_reads? + true + end + + def field_assignment_writes_receiver? + true + end + private def boolean_container?(node) diff --git a/gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs b/gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs index bd3748a2b..4d8a12c20 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs @@ -1,8 +1,6 @@ use crate::decomplex::ast::Span; -use crate::decomplex::syntax::adapters::false_simplicity_lexicon::{ - false_simplicity_lexicon, FalseSimplicityLexicon, -}; -use crate::decomplex::syntax::{self, CallSite, Document, Language}; +use crate::decomplex::syntax::adapters::false_simplicity_lexicon::false_simplicity_lexicon; +use crate::decomplex::syntax::{self, Document, Language}; use anyhow::Result; use serde::Serialize; use std::collections::{BTreeMap, BTreeSet}; @@ -38,9 +36,6 @@ struct ClassRec { span: Span, } -const GENERIC_SYSTEM_IO_BARE: &[&str] = - &["print", "println", "eprintln", "printf", "puts", "panic"]; - pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { let documents = syntax::parse_files(files, language)?; Ok(scan_documents(&documents)) @@ -106,119 +101,24 @@ fn class_records_for_document(document: &Document) -> (Vec, Vec) } fn hits_for_document(document: &Document) -> Vec { - let lexicon = false_simplicity_lexicon(document.language); document - .call_sites + .semantic_effect_sites .iter() - .filter_map(|call| semantic_effect_hit_for_call(call, &lexicon)) + .map(|site| Hit { + kind: site.kind.clone(), + detail: site.detail.clone(), + file: site.file.clone(), + defn: if site.function.is_empty() { + "(top-level)".to_string() + } else { + site.function.clone() + }, + line: site.line, + span: site.span, + }) .collect() } -fn semantic_effect_hit_for_call(call: &CallSite, lexicon: &FalseSimplicityLexicon) -> Option { - let message = call.message.as_str(); - let (kind, detail) = if effect_callback_call(call, message, lexicon) { - ("callback_inversion", message.to_string()) - } else if lexicon.meta_mids.contains(&message) { - ("metaprogramming", message.to_string()) - } else if lexicon.dispatch_mids.contains(&message) { - ("dynamic_dispatch", message.to_string()) - } else if message == "call" && !call.receiver.is_empty() { - if method_object_receiver(&call.receiver, lexicon) { - ("dynamic_dispatch", "method(...).call".to_string()) - } else if variable_receiver(&call.receiver) { - ("dynamic_dispatch", format!("{}.call", call.receiver)) - } else { - return None; - } - } else if let Some((kind, detail)) = const_effect_kind_detail(call, message, lexicon) { - (kind, detail) - } else if call.receiver == "self" - && (lexicon.io_bare.contains(&message) || GENERIC_SYSTEM_IO_BARE.contains(&message)) - { - ("hidden_io", message.to_string()) - } else if call.receiver == "self" && lexicon.context_bare.contains(&message) { - ("context_dependency", message.to_string()) - } else if message.len() > 1 && message.ends_with('!') && !matches!(message, "!=" | "!~") { - ("hidden_mutation", message.to_string()) - } else { - return None; - }; - - Some(Hit { - kind: kind.to_string(), - detail, - file: call.file.clone(), - defn: call.function.clone(), - line: call.line, - span: call.span, - }) -} - -fn const_effect_kind_detail( - call: &CallSite, - message: &str, - lexicon: &FalseSimplicityLexicon, -) -> Option<(&'static str, String)> { - let receiver = call.receiver.as_str(); - if receiver.is_empty() || receiver == "self" { - return None; - } - let base = receiver - .trim_start_matches("::") - .split("::") - .next() - .unwrap_or(""); - if base == "Dir" && lexicon.dir_context.contains(&message) { - return Some(("context_dependency", format!("Dir.{message}"))); - } - if lexicon.io_consts.contains(&base) || receiver.starts_with("Net::") { - return Some(( - "hidden_io", - format!("{}.{}", receiver.trim_start_matches("::"), message), - )); - } - if receiver == "ENV" { - return Some(("context_dependency", "ENV".to_string())); - } - if lexicon - .context_pairs - .iter() - .any(|(name, mids)| *name == base && mids.contains(&message)) - { - return Some(("context_dependency", format!("{base}.{message}"))); - } - None -} - -fn effect_callback_call(call: &CallSite, message: &str, lexicon: &FalseSimplicityLexicon) -> bool { - (call.block || call.arguments.iter().any(|arg| arg.starts_with('&'))) - && effect_callback_name(message, lexicon) - && !lexicon.meta_mids.contains(&message) -} - -fn effect_callback_name(message: &str, lexicon: &FalseSimplicityLexicon) -> bool { - lexicon.callback_set.contains(&message) - || message.starts_with("with_") - || message.starts_with("around_") - || message.starts_with("on_") - || message.starts_with("before_") - || message.starts_with("after_") - || message.ends_with("_hook") -} - -fn method_object_receiver(receiver: &str, lexicon: &FalseSimplicityLexicon) -> bool { - lexicon - .method_obj_mids - .iter() - .any(|name| receiver.contains(name)) -} - -fn variable_receiver(receiver: &str) -> bool { - let mut chars = receiver.chars(); - matches!(chars.next(), Some(first) if first == '@' || first == '$' || first == '_' || first.is_ascii_lowercase()) - && chars.all(|ch| ch == '_' || ch == '!' || ch == '?' || ch.is_ascii_alphanumeric()) -} - struct Report { hits: Vec, } diff --git a/gems/decomplex/rust/src/decomplex/detectors/fat_union.rs b/gems/decomplex/rust/src/decomplex/detectors/fat_union.rs index b8da2ae56..c8aab01c9 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/fat_union.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/fat_union.rs @@ -1,4 +1,4 @@ -use crate::decomplex::ast::{self, Child, Node, Span}; +use crate::decomplex::ast::Span; use crate::decomplex::syntax::{self, DispatchSite, Document, Language}; use anyhow::Result; use serde::Serialize; @@ -23,17 +23,6 @@ pub struct FatUnionRow { pub spans: BTreeMap, } -#[derive(Clone, Debug)] -struct Read { - name: String, - span: Span, -} - -#[derive(Clone, Debug)] -struct VariantReads { - reads: Vec, -} - pub fn scan_files(files: &[PathBuf], language: Language) -> Result { let documents = syntax::parse_files(files, language)?; Ok(scan_documents(&documents)) @@ -154,214 +143,3 @@ fn fat_unions_from_sites( }); rows.into_iter().map(|(_, row)| row).collect() } - -#[allow(dead_code)] -fn scan_documents_from_normalized_roots(documents: &[Document]) -> FatUnionReport { - let mut out = Vec::new(); - for document in documents { - let mut detector = FatUnion::new(document.file.clone(), document.lines.clone()); - detector.walk(&document.normalized_root, &Vec::new()); - out.extend(detector.findings()); - } - out.sort_by(|a, b| { - b.common - .len() - .cmp(&a.common.len()) - .then_with(|| a.at.cmp(&b.at)) - }); - FatUnionReport { fat_unions: out } -} - -struct FatUnion { - file: String, - lines: Vec, - reports: Vec, -} - -impl FatUnion { - fn new(file: String, lines: Vec) -> Self { - Self { - file, - lines, - reports: Vec::new(), - } - } - - fn walk(&mut self, node: &Node, defstack: &[String]) { - let mut next_defstack = defstack.to_vec(); - if matches!(node.r#type.as_str(), "DEFN" | "DEFS") { - let name_index = if node.r#type == "DEFS" { 1 } else { 0 }; - if let Some(Child::Symbol(name)) = node.children.get(name_index) { - next_defstack.push(name.clone()); - } - } - - if matches!(node.r#type.as_str(), "CASE" | "CASE2") { - self.analyze_case(node, &next_defstack); - } - - for child in node.children.iter().filter_map(ast::node) { - self.walk(child, &next_defstack); - } - } - - fn analyze_case(&mut self, node: &Node, defstack: &[String]) { - let (cond, first_when) = if node.r#type == "CASE2" { - (None, node.children.get(0).and_then(ast::node)) - } else { - ( - node.children.get(0).and_then(ast::node), - node.children.get(1).and_then(ast::node), - ) - }; - - let mut variants = BTreeMap::new(); - let mut current_when = first_when; - while let Some(when_node) = current_when { - if when_node.r#type != "WHEN" { - break; - } - if let Some(pat) = when_node.children.get(0).and_then(ast::node) { - if let Some(variant_name) = self.variant_name(pat) { - let reads = self.collect_reads( - when_node - .children - .get(1) - .and_then(ast::node) - .unwrap_or(when_node), - ); - variants.insert(variant_name, VariantReads { reads }); - } - } - current_when = when_node.children.get(2).and_then(ast::node); - } - - if variants.len() < 3 { - return; - } - - let mut common = None; - let mut member_counts: BTreeMap = BTreeMap::new(); - for v in variants.values() { - let names: BTreeSet<_> = v.reads.iter().map(|r| r.name.clone()).collect(); - for name in &names { - *member_counts.entry(name.clone()).or_insert(0) += 1; - } - match common { - None => common = Some(names), - Some(ref mut c) => { - *c = c.intersection(&names).cloned().collect(); - } - } - } - - let common = common.unwrap_or_default(); - if common.len() < 2 { - return; - } - let variant: BTreeSet<_> = member_counts - .iter() - .filter_map(|(name, count)| { - (*count == 1 && !common.contains(name)).then(|| name.clone()) - }) - .collect(); - let total = common.len() + variant.len(); - if total == 0 || (common.len() as f64 / total as f64) < 0.6 { - return; - } - - let subject_name = self.subject_name(cond); - let defn = defstack.last().map(|s| s.as_str()).unwrap_or(""); - let at = format!("{}:{}:{}", self.file, defn, node.first_lineno); - - let mut spans = BTreeMap::new(); - spans.insert( - at.clone(), - [ - node.first_lineno, - node.first_column, - node.last_lineno, - node.last_column, - ], - ); - - let mut variant_set: Vec<_> = variants.keys().cloned().collect(); - variant_set.sort(); - let mut common_vec: Vec<_> = common.into_iter().collect(); - common_vec.sort(); - let mut variant_vec: Vec<_> = variant.into_iter().collect(); - variant_vec.sort(); - - self.reports.push(FatUnionRow { - name: subject_name, - common: common_vec, - variant: variant_vec.clone(), - degenerate: variant_vec.is_empty(), - support: 1, - scatter: 1, - variant_set, - at, - spans, - }); - } - - fn variant_name(&self, node: &Node) -> Option { - let n = if node.r#type == "LIST" { - node.children.iter().filter_map(ast::node).next()? - } else { - node - }; - match n.r#type.as_str() { - "CONST" | "CONSTANT" | "COLON2" | "COLON3" | "SCOPE_RESOLUTION" => { - Some(ast::slice(n, &self.lines)) - } - _ => None, - } - } - - fn collect_reads(&self, node: &Node) -> Vec { - let mut out = Vec::new(); - self.walk_reads(node, &mut out); - out - } - - fn walk_reads(&self, node: &Node, out: &mut Vec) { - if matches!(node.r#type.as_str(), "CALL" | "OPCALL") { - if let Some(Child::Symbol(mid)) = node.children.get(1) { - out.push(Read { - name: mid.clone(), - span: [ - node.first_lineno, - node.first_column, - node.last_lineno, - node.last_column, - ], - }); - } - } else if matches!(node.r#type.as_str(), "FCALL" | "VCALL") { - if let Some(Child::Symbol(mid)) = node.children.get(0) { - out.push(Read { - name: mid.clone(), - span: [ - node.first_lineno, - node.first_column, - node.last_lineno, - node.last_column, - ], - }); - } - } - for child in node.children.iter().filter_map(ast::node) { - self.walk_reads(child, out); - } - } - - fn subject_name(&self, cond: Option<&Node>) -> String { - cond.map(|c| ast::slice(c, &self.lines)) - .unwrap_or_else(|| "implicit".to_string()) - } - - fn findings(&self) -> Vec { - self.reports.clone() - } -} diff --git a/gems/decomplex/rust/src/decomplex/detectors/implicit_control_flow.rs b/gems/decomplex/rust/src/decomplex/detectors/implicit_control_flow.rs index bc8b21d47..6a3da82f2 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/implicit_control_flow.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/implicit_control_flow.rs @@ -1,4 +1,4 @@ -use crate::decomplex::ast::{self, Child, Node, Span}; +use crate::decomplex::ast::Span; use crate::decomplex::syntax::{self, Document, Language}; use anyhow::Result; use serde::Serialize; @@ -53,119 +53,9 @@ struct MethodSequence { calls: Vec, } -#[derive(Clone, Debug)] -struct Path { - calls: Vec, - terminal: bool, -} - -const PATH_LIMIT: usize = 64; - -const IGNORED_MIDS: &[&str] = &[ - "abstract!", - "alias_method", - "any", - "attr_accessor", - "attr_reader", - "attr_writer", - "bind", - "cast", - "checked", - "enum", - "extend", - "final", - "include", - "interface!", - "let", - "must", - "must_because", - "nilable", - "override", - "overridable", - "params", - "prepend", - "private", - "private_class_method", - "protected", - "public", - "require", - "require_relative", - "requires_ancestor", - "sealed!", - "sig", - "type_member", - "type_template", - "untyped", - "unsafe", - "void", - "a_kind_of", - "after", - "around", - "before", - "be", - "be_a", - "be_an", - "be_empty", - "be_falsey", - "be_nil", - "be_truthy", - "change", - "contain_exactly", - "context", - "describe", - "eq", - "eql", - "equal", - "expect", - "have_attributes", - "have_key", - "have_received", - "it", - "match", - "not_to", - "raise_error", - "receive", - "subject", - "to", -]; - const OPTIONAL_DIAGNOSTIC_MIDS: &[&str] = &["error!", "fixable!", "read_interpolated_string", "warn!"]; -const MUTATING_MIDS: &[&str] = &[ - "<<", - "[]=", - "add", - "append", - "clear", - "collect!", - "compact!", - "concat", - "declare", - "delete", - "delete_if", - "each_key=", - "fill", - "filter!", - "keep_if", - "mark", - "merge!", - "move", - "push", - "reject!", - "replace", - "resolve", - "shift", - "stamp", - "store", - "unshift", - "update", - "write", -]; - -const NON_MUTATING_OPERATOR_MIDS: &[&str] = &["!", "!=", "!~"]; -const MUTATING_SUFFIXES: &[&str] = &["!"]; - pub fn scan_files(files: &[PathBuf], language: Language) -> Result { let documents = syntax::parse_files(files, language)?; Ok(scan_documents(&documents)) @@ -247,311 +137,6 @@ fn normalize_protocol_state(name: &str) -> String { .to_string() } -struct ImplicitControlFlow<'a> { - file: String, - lines: Vec, - effect_index: &'a EffectIndex, - sequences: Vec, -} - -impl<'a> ImplicitControlFlow<'a> { - fn new(file: String, lines: Vec, effect_index: &'a EffectIndex) -> Self { - Self { - file, - lines, - effect_index, - sequences: Vec::new(), - } - } - - fn walk(&mut self, node: &Node, owners: &[String]) { - if matches!(node.r#type.as_str(), "CLASS" | "MODULE") { - let mut next_owners = owners.to_vec(); - next_owners.push(self.owner_name(node)); - for child in node.children.iter().filter_map(ast::node) { - self.walk(child, &next_owners); - } - } else if matches!(node.r#type.as_str(), "DEFN" | "DEFS") { - self.record_method_paths(node, &owners.join("::")); - } else { - for child in node.children.iter().filter_map(ast::node) { - self.walk(child, owners); - } - } - } - - fn record_method_paths(&mut self, node: &Node, owner: &str) { - let defn = self.method_name(node); - for path in self.method_paths(node) { - let calls: Vec<_> = path - .calls - .iter() - .map(|c| self.call_for(c, owner, &defn)) - .collect(); - if calls.iter().filter(|c| self.stateful_call(c)).count() < 2 { - continue; - } - - self.sequences.push(MethodSequence { - file: self.file.clone(), - owner: owner.to_string(), - defn: defn.clone(), - line: node.first_lineno, - calls, - }); - } - } - - fn method_paths(&self, node: &Node) -> Vec { - self.paths_for_statements(&ast::body_stmts(node), 0) - } - - fn paths_for_statements(&self, statements: &[&Node], depth: usize) -> Vec { - if depth > 10 { - return vec![self.empty_path()]; - } - let mut paths = vec![self.empty_path()]; - for stmt in statements { - if stmt.r#type == "BEGIN" { - continue; - } - let stmt_paths = self.paths_for(stmt, depth + 1); - paths = self.append_statement_paths(paths, stmt_paths); - } - paths - } - - fn append_statement_paths(&self, paths: Vec, stmt_paths: Vec) -> Vec { - self.combine_path_lists(paths, stmt_paths) - } - - fn combine_path_lists(&self, left_paths: Vec, right_paths: Vec) -> Vec { - let mut combined = Vec::new(); - for left in left_paths { - if left.terminal { - combined.push(left); - } else { - for right in &right_paths { - let mut calls = left.calls.clone(); - calls.extend(right.calls.clone()); - combined.push(Path { - calls, - terminal: right.terminal, - }); - } - } - } - combined.into_iter().take(PATH_LIMIT).collect() - } - - fn paths_for(&self, node: &Node, depth: usize) -> Vec { - if depth > 10 { - return vec![self.empty_path()]; - } - match node.r#type.as_str() { - "BLOCK" => self.paths_for_statements( - &node - .children - .iter() - .filter_map(ast::node) - .collect::>(), - depth, - ), - "SCOPE" => self.paths_for( - node.children.get(2).and_then(ast::node).unwrap_or(node), - depth, - ), - "IF" | "UNLESS" => self.branch_paths(node, depth), - "CASE" | "CASE2" => self.case_paths(node, depth), - "RETURN" | "BREAK" | "NEXT" | "REDO" | "RETRY" => self - .generic_paths(node, depth) - .into_iter() - .map(|mut p| { - p.terminal = true; - p - }) - .collect(), - _ => self.generic_paths(node, depth), - } - } - - fn branch_paths(&self, node: &Node, depth: usize) -> Vec { - if depth > 10 { - return vec![self.empty_path()]; - } - let cond = node.children.get(0).and_then(ast::node); - let pos = node.children.get(1).and_then(ast::node); - let neg = node.children.get(2).and_then(ast::node); - - let mut alts = self.paths_for(pos.unwrap_or(node), depth + 1); - if let Some(n) = neg { - alts.extend(self.paths_for(n, depth + 1)); - } else { - alts.push(self.empty_path()); - } - - self.combine_path_lists(self.paths_for(cond.unwrap_or(node), depth + 1), alts) - } - - fn case_paths(&self, node: &Node, depth: usize) -> Vec { - if depth > 10 { - return vec![self.empty_path()]; - } - let (cond, first_when) = if node.r#type == "CASE2" { - (None, node.children.get(0).and_then(ast::node)) - } else { - ( - node.children.get(0).and_then(ast::node), - node.children.get(1).and_then(ast::node), - ) - }; - self.combine_path_lists( - cond.map(|c| self.paths_for(c, depth + 1)) - .unwrap_or(vec![self.empty_path()]), - self.when_paths(first_when, depth + 1), - ) - } - - fn when_paths(&self, node: Option<&Node>, depth: usize) -> Vec { - if depth > 10 { - return vec![self.empty_path()]; - } - let Some(n) = node else { - return vec![self.empty_path()]; - }; - if n.r#type != "WHEN" { - return self.paths_for(n, depth + 1); - } - - let pat = n.children.get(0).and_then(ast::node); - let body = n.children.get(1).and_then(ast::node); - let next = n.children.get(2).and_then(ast::node); - - let current = self.combine_path_lists( - self.paths_for(pat.unwrap_or(n), depth + 1), - self.paths_for(body.unwrap_or(n), depth + 1), - ); - let mut out = current; - out.extend(self.when_paths(next, depth + 1)); - out.into_iter().take(PATH_LIMIT).collect() - } - - fn generic_paths(&self, node: &Node, depth: usize) -> Vec { - if depth > 10 { - return vec![self.empty_path()]; - } - if matches!( - node.r#type.as_str(), - "CLASS" | "MODULE" | "DEFN" | "DEFS" | "LAMBDA" - ) { - return vec![self.empty_path()]; - } - - let mut child_paths = vec![self.empty_path()]; - for child in node.children.iter().filter_map(ast::node) { - child_paths = self.combine_path_lists(child_paths, self.paths_for(child, depth + 1)); - } - - if let Some(mid) = self.internal_protocol_call(node) { - self.combine_path_lists( - vec![Path { - calls: vec![self.raw_call(&mid, node)], - terminal: false, - }], - child_paths, - ) - } else { - child_paths - } - } - - fn raw_call(&self, mid: &str, node: &Node) -> Call { - Call { - mid: mid.to_string(), - file: self.file.clone(), - line: node.first_lineno, - span: [ - node.first_lineno, - node.first_column, - node.last_lineno, - node.last_column, - ], - reads: Vec::new(), - writes: Vec::new(), - } - } - - fn call_for(&self, call: &Call, owner: &str, _defn: &str) -> Call { - let effect = self.effect_index.effect_for(owner, &call.mid); - Call { - mid: call.mid.clone(), - file: call.file.clone(), - line: call.line, - span: call.span, - reads: effect.map(|e| e.reads.clone()).unwrap_or_default(), - writes: effect.map(|e| e.writes.clone()).unwrap_or_default(), - } - } - - fn stateful_call(&self, call: &Call) -> bool { - !call.reads.is_empty() || !call.writes.is_empty() - } - - fn empty_path(&self) -> Path { - Path { - calls: Vec::new(), - terminal: false, - } - } - - fn owner_name(&self, node: &Node) -> String { - let text = ast::slice( - node.children.first().and_then(ast::node).unwrap_or(node), - &self.lines, - ); - if text.is_empty() { - "(anonymous)".to_string() - } else { - text - } - } - - fn method_name(&self, node: &Node) -> String { - if node.r#type == "DEFS" { - ast::child_to_string(node.children.get(1)).unwrap_or_else(|| "?".to_string()) - } else { - ast::child_to_string(node.children.get(0)).unwrap_or_else(|| "?".to_string()) - } - } - - fn internal_protocol_call(&self, node: &Node) -> Option { - let mid = self.call_mid(node)?; - if IGNORED_MIDS.contains(&mid.as_str()) { - return None; - } - if !self.internal_receiver(node) { - return None; - } - Some(mid) - } - - fn call_mid(&self, node: &Node) -> Option { - match node.r#type.as_str() { - "CALL" | "OPCALL" | "ATTRASGN" => ast::child_to_string(node.children.get(1)), - "FCALL" | "VCALL" => ast::child_to_string(node.children.get(0)), - _ => None, - } - } - - fn internal_receiver(&self, node: &Node) -> bool { - if matches!(node.r#type.as_str(), "FCALL" | "VCALL") { - return true; - } - let receiver = node.children.get(0).and_then(ast::node); - receiver.map(|r| r.r#type == "SELF").unwrap_or(false) - } -} - struct EffectIndex { by_owner_name: BTreeMap<(String, String), MethodEffect>, by_name: BTreeMap>, @@ -631,250 +216,6 @@ impl EffectIndex { } } -struct EffectCollector { - lines: Vec, -} - -impl EffectCollector { - fn new(_file: String, lines: Vec) -> Self { - Self { lines } - } - - fn scan(&self, root: &Node) -> Vec { - let mut out = Vec::new(); - self.walk(root, &Vec::new(), &mut out); - out - } - - fn walk(&self, node: &Node, owners: &[String], out: &mut Vec) { - if matches!(node.r#type.as_str(), "CLASS" | "MODULE") { - let mut next_owners = owners.to_vec(); - next_owners.push(self.owner_name(node)); - for child in node.children.iter().filter_map(ast::node) { - self.walk(child, &next_owners, out); - } - } else if matches!(node.r#type.as_str(), "DEFN" | "DEFS") { - out.push(self.method_effect(node, &owners.join("::"))); - } else { - for child in node.children.iter().filter_map(ast::node) { - self.walk(child, owners, out); - } - } - } - - fn method_effect(&self, node: &Node, owner: &str) -> MethodEffect { - let mut reads = BTreeSet::new(); - let mut writes = BTreeSet::new(); - self.collect_state_access(node, &mut reads, &mut writes); - MethodEffect { - owner: owner.to_string(), - name: self.method_name(node), - reads: { - let mut v: Vec<_> = reads.into_iter().collect(); - v.sort(); - v - }, - writes: { - let mut v: Vec<_> = writes.into_iter().collect(); - v.sort(); - v - }, - } - } - - fn collect_state_access( - &self, - node: &Node, - reads: &mut BTreeSet, - writes: &mut BTreeSet, - ) { - if matches!(node.r#type.as_str(), "CLASS" | "MODULE" | "LAMBDA") { - return; - } - - match node.r#type.as_str() { - "IASGN" => { - if let Some(s) = ast::child_to_string(node.children.get(0)) { - writes.insert(self.normalize_state(&s)); - } - } - "LASGN" => self.collect_index_write(node, writes), - "IVAR" => { - if let Some(s) = ast::child_to_string(node.children.get(0)) { - reads.insert(self.normalize_state(&s)); - } - } - "ATTRASGN" => self.collect_attr_write(node, writes), - "CALL" | "OPCALL" => { - self.collect_bare_reader_comparison(node, reads); - self.collect_receiver_mutation(node, writes); - self.collect_self_reader(node, reads); - } - "VCALL" | "FCALL" => self.collect_self_reader(node, reads), - _ => {} - } - - for child in node.children.iter().filter_map(ast::node) { - self.collect_state_access(child, reads, writes); - } - } - - fn collect_attr_write(&self, node: &Node, writes: &mut BTreeSet) { - let receiver = node.children.get(0).and_then(ast::node); - let mid = ast::child_to_string(node.children.get(1)); - let Some(mid) = mid else { return }; - let attr = mid.trim_end_matches('=').to_string(); - - if mid == "[]=" { - if let Some(t) = receiver.and_then(|r| self.state_receiver_token(r)) { - writes.insert(t); - } - } else if receiver.map(|r| self.self_receiver(r)).unwrap_or(false) { - writes.insert(self.normalize_state(&attr)); - } else if let Some(t) = receiver.and_then(|r| self.state_receiver_token(r)) { - writes.insert(format!("{}.{}", t, attr)); - } - } - - fn collect_index_write(&self, node: &Node, writes: &mut BTreeSet) { - let name = ast::child_to_string(node.children.get(0)).unwrap_or_default(); - if name.contains('[') { - writes.insert(self.normalize_state(name.split('[').next().unwrap())); - } - } - - fn collect_bare_reader_comparison(&self, node: &Node, reads: &mut BTreeSet) { - let receiver = node.children.get(0).and_then(ast::node); - let mid = ast::child_to_string(node.children.get(1)).unwrap_or_default(); - if matches!(mid.as_str(), "==" | "!=" | "===" | "<" | "<=" | ">" | ">=") { - if let Some(r) = receiver { - if r.r#type == "LVAR" { - if let Some(name) = ast::child_to_string(r.children.get(0)) { - reads.insert(self.normalize_state(&name)); - } - } - } - } - } - - fn collect_receiver_mutation(&self, node: &Node, writes: &mut BTreeSet) { - let receiver = node.children.get(0).and_then(ast::node); - let mid = ast::child_to_string(node.children.get(1)).unwrap_or_default(); - if self.mutating_mid(&mid) { - if let Some(r) = receiver { - if let Some(t) = self.state_receiver_token(r) { - writes.insert(t); - } - } - } - } - - fn collect_self_reader(&self, node: &Node, reads: &mut BTreeSet) { - let mid = self.call_mid(node); - let Some(mid) = mid else { return }; - if self.mutating_mid(&mid) { - return; - } - if IGNORED_MIDS.contains(&mid.as_str()) { - return; - } - if !self.no_args(node) { - return; - } - if node.r#type == "CALL" - && !node - .children - .get(0) - .and_then(ast::node) - .map(|receiver| self.self_receiver(receiver)) - .unwrap_or(false) - { - return; - } - reads.insert(self.normalize_state(&mid)); - } - - fn mutating_mid(&self, mid: &str) -> bool { - if NON_MUTATING_OPERATOR_MIDS.contains(&mid) { - return false; - } - MUTATING_MIDS.contains(&mid) || MUTATING_SUFFIXES.iter().any(|s| mid.ends_with(s)) - } - - fn no_args(&self, node: &Node) -> bool { - match node.r#type.as_str() { - "CALL" | "OPCALL" => node - .children - .get(2) - .map(|c| matches!(c, Child::Nil)) - .unwrap_or(true), - "VCALL" => true, - "FCALL" => node - .children - .get(1) - .map(|c| matches!(c, Child::Nil)) - .unwrap_or(true), - _ => false, - } - } - - fn state_receiver_token(&self, node: &Node) -> Option { - match node.r#type.as_str() { - "IVAR" => ast::child_to_string(node.children.get(0)).map(|s| self.normalize_state(&s)), - "SELF" => Some("self".to_string()), - "VCALL" | "FCALL" | "LVAR" => { - ast::child_to_string(node.children.get(0)).map(|s| self.normalize_state(&s)) - } - "CALL" => { - if self.no_args(node) { - ast::child_to_string(node.children.get(1)).map(|s| self.normalize_state(&s)) - } else { - None - } - } - _ => None, - } - } - - fn self_receiver(&self, node: &Node) -> bool { - node.r#type == "SELF" - } - - fn call_mid(&self, node: &Node) -> Option { - match node.r#type.as_str() { - "CALL" | "OPCALL" | "ATTRASGN" => ast::child_to_string(node.children.get(1)), - "FCALL" | "VCALL" => ast::child_to_string(node.children.get(0)), - _ => None, - } - } - - fn owner_name(&self, node: &Node) -> String { - let text = ast::slice( - node.children.first().and_then(ast::node).unwrap_or(node), - &self.lines, - ); - if text.is_empty() { - "(anonymous)".to_string() - } else { - text - } - } - - fn method_name(&self, node: &Node) -> String { - if node.r#type == "DEFS" { - ast::child_to_string(node.children.get(1)).unwrap_or_else(|| "?".to_string()) - } else { - ast::child_to_string(node.children.get(0)).unwrap_or_else(|| "?".to_string()) - } - } - - fn normalize_state(&self, name: &str) -> String { - name.trim_start_matches('@') - .trim_end_matches('=') - .to_string() - } -} - struct Report { sequences: Vec, site_call_sets: BTreeMap<(String, String, String, usize), BTreeMap>, diff --git a/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs b/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs index 2c1ff9296..c70dd4217 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs @@ -281,6 +281,7 @@ fn raw_local_reads( && !raw_declaration_name_in_tree(node, child, profile) && !raw_declaration_name(child, parent, profile) && !raw_member_name(child, parent, profile) + && !raw_keyed_element_key(child, parent, profile) { reads.push(name); } @@ -840,6 +841,12 @@ fn raw_assignment_lhs_read_in_tree( target: &RawNode, profile: &dyn LanguageProfile, ) -> bool { + if profile + .deferred_statement_node_kinds() + .contains(&root.kind.as_str()) + { + return false; + } if profile .assignment_node_kinds() .contains(&root.kind.as_str()) @@ -860,6 +867,12 @@ fn raw_assignment_lhs_write_in_tree( target: &RawNode, profile: &dyn LanguageProfile, ) -> bool { + if profile + .deferred_statement_node_kinds() + .contains(&root.kind.as_str()) + { + return false; + } if profile .assignment_node_kinds() .contains(&root.kind.as_str()) @@ -887,8 +900,12 @@ fn raw_assignment_lhs_read_target( return profile.suppress_field_receiver_lhs_reads() && raw_member_receiver_target(lhs, target, profile); } - if raw_local_identifier_text(lhs, profile).is_some() { - return std::ptr::eq(lhs, target); + if let Some(lhs_name) = raw_local_identifier_text(lhs, profile) { + return std::ptr::eq(lhs, target) + || (raw_contains_node(lhs, target) + && raw_local_identifier_text(target, profile) + .map(|target_name| target_name == lhs_name) + .unwrap_or(false)); } if profile .expression_list_node_kinds() @@ -912,17 +929,18 @@ fn raw_assignment_lhs_write_target( if raw_indexed_lhs_node(lhs, profile) { return raw_named_children(lhs) .first() - .map(|object| { - !raw_field_like_node(object, profile) - && raw_assignment_lhs_write_target(object, target, profile) - }) + .map(|object| raw_assignment_lhs_write_target(object, target, profile)) .unwrap_or(false); } if raw_field_like_node(lhs, profile) { return raw_member_receiver_target(lhs, target, profile); } - if raw_local_identifier_text(lhs, profile).is_some() { - return std::ptr::eq(lhs, target); + if let Some(lhs_name) = raw_local_identifier_text(lhs, profile) { + return std::ptr::eq(lhs, target) + || (raw_contains_node(lhs, target) + && raw_local_identifier_text(target, profile) + .map(|target_name| target_name == lhs_name) + .unwrap_or(false)); } if profile .expression_list_node_kinds() @@ -975,9 +993,21 @@ fn raw_member_receiver_target( if raw_local_identifier_text(receiver, profile).is_some() { return std::ptr::eq(receiver, target); } + if raw_indexed_lhs_node(receiver, profile) { + return raw_named_children(receiver) + .first() + .map(|object| raw_member_receiver_target(object, target, profile)) + .unwrap_or(false); + } if raw_field_like_node(receiver, profile) { return raw_member_receiver_target(receiver, target, profile); } + if raw_named_children(receiver) + .into_iter() + .any(|child| raw_member_receiver_target(child, target, profile)) + { + return true; + } false } @@ -1012,6 +1042,29 @@ fn raw_call_name(node: &RawNode, parent: Option<&RawNode>, profile: &dyn Languag .unwrap_or(false) } +fn raw_keyed_element_key( + node: &RawNode, + parent: Option<&RawNode>, + profile: &dyn LanguageProfile, +) -> bool { + let Some(parent) = parent else { + return false; + }; + if !profile + .keyed_element_node_kinds() + .contains(&parent.kind.as_str()) + { + return false; + } + raw_named_children(parent) + .first() + .map(|key| std::ptr::eq(*key, node)) + .unwrap_or(false) + || raw_next_sibling(node, parent) + .map(|sibling| !sibling.named && sibling.text == ":") + .unwrap_or(false) +} + fn raw_assignment_statement(node: &RawNode, profile: &dyn LanguageProfile) -> bool { profile .assignment_node_kinds() diff --git a/gems/decomplex/rust/src/decomplex/detectors/locality_drag.rs b/gems/decomplex/rust/src/decomplex/detectors/locality_drag.rs index 57bb76c80..9148fbf7c 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/locality_drag.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/locality_drag.rs @@ -1,6 +1,6 @@ use crate::decomplex::ast::Span; -use crate::decomplex::detectors::{local_flow, weighted_inlined_cognitive_complexity}; -use crate::decomplex::syntax::{self, Document, Language}; +use crate::decomplex::detectors::local_flow; +use crate::decomplex::syntax::{self, Document, Language, LocalComplexityScore}; use anyhow::Result; use serde::Serialize; use std::collections::{BTreeMap, BTreeSet}; @@ -52,7 +52,15 @@ pub fn scan_files(files: &[PathBuf], language: Language) -> Result Vec { let summaries = local_flow::scan_documents(documents); - let complexity_scores = weighted_inlined_cognitive_complexity::raw_complexity_scores(documents); + let complexity_scores = documents + .iter() + .flat_map(|document| { + document + .local_complexity_scores + .iter() + .map(|(id, score)| ((document.file.clone(), id.clone()), score.clone())) + }) + .collect(); scan_summaries_with_scores(summaries, complexity_scores) } @@ -63,10 +71,7 @@ pub fn scan_summaries(summaries: Vec) -> Vec, - complexity_scores: BTreeMap< - (String, usize, String), - weighted_inlined_cognitive_complexity::ScoreResult, - >, + complexity_scores: BTreeMap<(String, String), LocalComplexityScore>, ) -> Vec { let mut detector = LocalityDrag::new(summaries, complexity_scores); detector.findings() @@ -79,17 +84,13 @@ struct LocalityDrag { min_local_complexity: f64, min_score: isize, max_findings_per_method: usize, - complexity_scores: - BTreeMap<(String, usize, String), weighted_inlined_cognitive_complexity::ScoreResult>, + complexity_scores: BTreeMap<(String, String), LocalComplexityScore>, } impl LocalityDrag { fn new( summaries: Vec, - complexity_scores: BTreeMap< - (String, usize, String), - weighted_inlined_cognitive_complexity::ScoreResult, - >, + complexity_scores: BTreeMap<(String, String), LocalComplexityScore>, ) -> Self { Self { summaries, @@ -154,16 +155,9 @@ impl LocalityDrag { fn local_complexity(&self, summary: &local_flow::MethodSummary) -> f64 { self.complexity_scores - .get(&(summary.file.clone(), summary.line, summary.name.clone())) + .get(&(summary.file.clone(), summary.id.clone())) .map(|score| score.score) - .unwrap_or_else(|| { - let scorer = weighted_inlined_cognitive_complexity::LocalScorer::new(); - summary - .raw_node - .as_ref() - .map(|node| scorer.score_raw(node).score) - .unwrap_or_else(|| scorer.score(&summary.node).score) - }) + .unwrap_or(0.0) } fn finding_for_write( diff --git a/gems/decomplex/rust/src/decomplex/detectors/state_branch_density.rs b/gems/decomplex/rust/src/decomplex/detectors/state_branch_density.rs index 0c35bccef..cd37e51ba 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/state_branch_density.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/state_branch_density.rs @@ -1,6 +1,4 @@ -use crate::decomplex::ast::{self, Child, Node, Span}; -use crate::decomplex::parallel; -use crate::decomplex::syntax::adapters::language_profile; +use crate::decomplex::ast::Span; use crate::decomplex::syntax::{self, Document, Language}; use anyhow::Result; use serde::Serialize; @@ -30,11 +28,6 @@ struct Decision { state_refs: Vec, } -const BRANCH_TYPES: &[&str] = &["IF", "UNLESS", "WHILE", "UNTIL"]; -const NOISE_MIDS: &[&str] = &[ - "!", "!=", "==", "===", "<", "<=", ">", ">=", "[]", "[]=", "to_s", "inspect", "class", -]; - pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { let documents = syntax::parse_files(files, language)?; Ok(scan_documents(&documents)) @@ -45,51 +38,6 @@ pub fn scan_documents(documents: &[Document]) -> Vec { .iter() .flat_map(decisions_from_mined_facts) .collect::>(); - if !all_decisions.is_empty() { - return Report::new(all_decisions).findings(); - } - - let mut global_immutable_readers: BTreeMap> = BTreeMap::new(); - let mut global_immutable_reader_types: BTreeMap> = - BTreeMap::new(); - let mut global_type_aliases: BTreeMap = BTreeMap::new(); - - for document in documents { - let profile = language_profile(document.language); - - for (name, readers) in profile.immutable_struct_readers(&document.lines) { - global_immutable_readers - .entry(name) - .or_default() - .extend(readers); - } - for (name, reader_types) in profile.immutable_struct_reader_types(&document.lines) { - global_immutable_reader_types - .entry(name) - .or_default() - .extend(reader_types); - } - global_type_aliases.extend(profile.type_aliases(&document.lines)); - } - - let decision_chunks = parallel::map_ordered(documents, |document| { - let profile = language_profile(document.language); - let method_param_types = profile.method_param_types(&document.lines); - let mut scanner = StateBranchDensity::new( - Some(document.file.clone()), - document.lines.clone(), - Some(global_immutable_readers.clone()), - Some(global_immutable_reader_types.clone()), - Some(global_type_aliases.clone()), - Some(method_param_types), - ); - scanner.walk(&document.normalized_root, &Vec::new()); - Ok(scanner.decisions) - }) - .expect("state-branch-density document scan"); - - let all_decisions = decision_chunks.into_iter().flatten().collect(); - Report::new(all_decisions).findings() } @@ -148,224 +96,6 @@ fn span_encloses(outer: Span, inner: Span) -> bool { starts_before_or_at && ends_after_or_at } -struct StateBranchDensity { - file: String, - lines: Vec, - decisions: Vec, - immutable_readers: BTreeMap>, - immutable_reader_types: BTreeMap>, - type_aliases: BTreeMap, - method_param_types: BTreeMap>, -} - -impl StateBranchDensity { - fn new( - file: Option, - lines: Vec, - immutable_readers: Option>>, - immutable_reader_types: Option>>, - type_aliases: Option>, - method_param_types: Option>>, - ) -> Self { - let ir = immutable_readers.unwrap_or_else(BTreeMap::new); - let irt = immutable_reader_types.unwrap_or_else(|| BTreeMap::new()); - let ta = type_aliases.unwrap_or_else(|| BTreeMap::new()); - Self { - file: file.unwrap_or_default(), - lines: lines.clone(), - decisions: Vec::new(), - immutable_readers: ir, - immutable_reader_types: irt, - type_aliases: ta, - method_param_types: method_param_types.unwrap_or_else(BTreeMap::new), - } - } - - fn walk(&mut self, node: &Node, defstack: &[String]) { - let mut next_defstack = defstack.to_vec(); - if matches!(node.r#type.as_str(), "DEFN" | "DEFS") { - let name_index = if node.r#type == "DEFS" { 1 } else { 0 }; - if let Some(Child::Symbol(name)) = node.children.get(name_index) { - next_defstack.push(name.clone()); - } - } - - self.record_branch(node, &next_defstack); - for child in node.children.iter().filter_map(ast::node) { - self.walk(child, &next_defstack); - } - } - - fn record_branch(&mut self, node: &Node, defstack: &[String]) { - let cond = match node.r#type.as_str() { - t if BRANCH_TYPES.contains(&t) => node.children.first().and_then(ast::node), - "CASE" => node.children.first().and_then(ast::node), - _ => None, - }; - let Some(cond) = cond else { return }; - - let defn = defstack.last().map(|s| s.as_str()).unwrap_or("(top-level)"); - let refs = self.state_refs(cond, defn); - if refs.is_empty() { - return; - } - - self.decisions.push(Decision { - file: self.file.clone(), - defn: defn.to_string(), - line: node.first_lineno, - span: [ - node.first_lineno, - node.first_column, - node.last_lineno, - node.last_column, - ], - predicate: ast::slice(cond, &self.lines), - state_refs: refs - .into_iter() - .collect::>() - .into_iter() - .collect(), - }); - } - - fn state_refs(&self, node: &Node, defn: &str) -> Vec { - let mut refs = Vec::new(); - self.collect_state_refs(node, &mut refs, defn); - refs - } - - fn collect_state_refs(&self, node: &Node, refs: &mut Vec, defn: &str) { - match node.r#type.as_str() { - "IVAR" | "GVAR" => { - if let Some(Child::String(name)) = node.children.first() { - refs.push(name.clone()); - } - } - "CALL" | "QCALL" | "OPCALL" => { - let recv = node.children.get(0).and_then(ast::node); - let mid = node.children.get(1).and_then(|c| match c { - Child::Symbol(s) => Some(s), - _ => None, - }); - let args = node.children.get(2); - if let (Some(recv), Some(mid)) = (recv, mid) { - if self.state_attr_read(recv, mid, args, defn) { - refs.push(format!("{}.{}", ast::slice(recv, &self.lines), mid)); - } - } - } - _ => {} - } - for child in node.children.iter().filter_map(ast::node) { - self.collect_state_refs(child, refs, defn); - } - } - - fn state_attr_read(&self, recv: &Node, mid: &str, args: Option<&Child>, defn: &str) -> bool { - if NOISE_MIDS.contains(&mid) { - return false; - } - if !self.empty_arg_list(args) { - return false; - } - if self.immutable_struct_const_read(recv, mid, defn) { - return false; - } - true - } - - fn immutable_struct_const_read(&self, recv: &Node, mid: &str, defn: &str) -> bool { - let Some(owner_type) = self.immutable_receiver_type(recv, defn) else { - return false; - }; - self.immutable_reader(&owner_type, mid) - } - - fn immutable_receiver_type(&self, recv: &Node, defn: &str) -> Option { - if matches!(recv.r#type.as_str(), "CALL" | "QCALL" | "OPCALL") { - let recv_recv = recv.children.get(0).and_then(ast::node)?; - let recv_mid = recv.children.get(1).and_then(|c| match c { - Child::Symbol(s) => Some(s), - _ => None, - })?; - let recv_args = recv.children.get(2); - return self.immutable_reader_result_type(recv_recv, recv_mid, recv_args, defn); - } - if recv.r#type == "LVAR" { - let name = match recv.children.first()? { - Child::String(s) => s, - _ => return None, - }; - return self.method_param_types.get(defn)?.get(name).cloned(); - } - None - } - - fn immutable_reader(&self, type_name: &str, mid: &str) -> bool { - let resolved = self.resolve_type_alias(type_name); - let readers = self.immutable_readers.get(&resolved).or_else(|| { - resolved - .split("::") - .last() - .and_then(|last| self.immutable_readers.get(last)) - }); - readers.map(|r| r.contains(mid)).unwrap_or(false) - } - - fn immutable_reader_result_type( - &self, - recv: &Node, - mid: &str, - args: Option<&Child>, - defn: &str, - ) -> Option { - if !self.empty_arg_list(args) { - return None; - } - let owner_type = self.immutable_receiver_type(recv, defn)?; - let resolved = self.resolve_type_alias(&owner_type); - let reader_types = self.immutable_reader_types.get(&resolved).or_else(|| { - resolved - .split("::") - .last() - .and_then(|last| self.immutable_reader_types.get(last)) - })?; - reader_types.get(mid).cloned() - } - - fn empty_arg_list(&self, args: Option<&Child>) -> bool { - match args { - None | Some(Child::Nil) => true, - Some(Child::Node(node)) if node.r#type == "LIST" => { - node.children.iter().all(|c| matches!(c, Child::Nil)) - } - _ => false, - } - } - - fn resolve_type_alias(&self, type_name: &str) -> String { - let mut seen = BTreeSet::new(); - let mut current = type_name.to_string(); - loop { - if seen.contains(¤t) { - return current; - } - seen.insert(current.clone()); - let target = self.type_aliases.get(¤t).or_else(|| { - current - .split("::") - .last() - .and_then(|last| self.type_aliases.get(last)) - }); - match target { - Some(t) => current = t.clone(), - None => return current, - } - } - } -} - struct Report { decisions: Vec, } diff --git a/gems/decomplex/rust/src/decomplex/detectors/state_mesh.rs b/gems/decomplex/rust/src/decomplex/detectors/state_mesh.rs index 0b0677c3e..5cf1465d5 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/state_mesh.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/state_mesh.rs @@ -1,4 +1,4 @@ -use crate::decomplex::ast::{self, Child, Node, Span}; +use crate::decomplex::ast::Span; use crate::decomplex::detectors::semantic_alias; use crate::decomplex::syntax::{self, Document, Language}; use anyhow::Result; @@ -168,22 +168,13 @@ pub fn scan_documents_with_semantic_aliases_and_min_writes( semantic_aliases: &semantic_alias::SemanticAliasReport, min_writes: usize, ) -> StateMeshReport { - let mut src_map = BTreeMap::new(); - for document in documents { - src_map.insert( - document.file.clone(), - (document.normalized_root.clone(), document.lines.clone()), - ); - } - - let mut sm = StateMesh::new(src_map, min_writes); + let mut sm = StateMesh::new(min_writes); sm.load_document_facts(documents); sm.find_re_derivations(semantic_aliases); sm.to_json_graph() } struct StateMesh { - src_map: BTreeMap)>, min_writes: usize, custom_fields: Option>, writes: Vec, @@ -192,9 +183,8 @@ struct StateMesh { } impl StateMesh { - fn new(src_map: BTreeMap)>, min_writes: usize) -> Self { + fn new(min_writes: usize) -> Self { Self { - src_map, min_writes, custom_fields: None, writes: Vec::new(), @@ -203,16 +193,6 @@ impl StateMesh { } } - fn run(&mut self, semantic_aliases: &semantic_alias::SemanticAliasReport) { - self.discover_fields(); - if self.known_field_norms().is_empty() { - return; - } - - self.find_reads(); - self.find_re_derivations(semantic_aliases); - } - fn load_document_facts(&mut self, documents: &[Document]) { for document in documents { for write in &document.state_writes { @@ -256,215 +236,6 @@ impl StateMesh { } } - fn discover_fields(&mut self) { - let files: Vec<_> = self.src_map.keys().cloned().collect(); - for file in files { - let (root, lines) = self.src_map.get(&file).unwrap(); - let mut writes = Vec::new(); - self.walk_writes(root, lines, &Vec::new(), &file, &mut writes); - self.writes.extend(writes); - } - } - - fn walk_writes( - &self, - node: &Node, - lines: &[String], - defstack: &[String], - file: &str, - out: &mut Vec, - ) { - let mut next_defstack = defstack.to_vec(); - match node.r#type.as_str() { - "CLASS" | "MODULE" | "DEFN" => { - if let Some(Child::Symbol(name)) = node.children.first() { - next_defstack.push(name.clone()); - } - } - "DEFS" => { - if let Some(Child::Symbol(name)) = node.children.get(1) { - next_defstack.push(name.clone()); - } - } - "ATTRASGN" => { - if let (Some(recv), Some(Child::Symbol(msg))) = ( - node.children.get(0).and_then(ast::node), - node.children.get(1), - ) { - if msg != "[]=" { - let attr = msg.trim_end_matches('=').to_string(); - let norm = self.normalize(&attr); - out.push(Write { - attr, - norm, - recv: self.recv_slice(Some(recv), lines), - file: file.to_string(), - defn: next_defstack - .last() - .cloned() - .unwrap_or_else(|| "(top-level)".to_string()), - line: node.first_lineno, - span: [ - node.first_lineno, - node.first_column, - node.last_lineno, - node.last_column, - ], - }); - } - } - } - "IASGN" => { - if let Some(Child::String(attr)) = node.children.first() { - let norm = self.normalize(attr); - out.push(Write { - attr: attr.clone(), - norm, - recv: "self".to_string(), - file: file.to_string(), - defn: next_defstack - .last() - .cloned() - .unwrap_or_else(|| "(top-level)".to_string()), - line: node.first_lineno, - span: [ - node.first_lineno, - node.first_column, - node.last_lineno, - node.last_column, - ], - }); - } - } - _ => {} - } - - for child in node.children.iter().filter_map(ast::node) { - self.walk_writes(child, lines, &next_defstack, file, out); - } - } - - fn find_reads(&mut self) { - let field_norms = self.known_field_norms(); - let files: Vec<_> = self.src_map.keys().cloned().collect(); - for file in files { - let (root, lines) = self.src_map.get(&file).unwrap(); - let mut reads = Vec::new(); - self.walk_reads(root, lines, &Vec::new(), &file, &field_norms, &mut reads); - self.reads.extend(reads); - } - } - - fn walk_reads( - &self, - node: &Node, - lines: &[String], - defstack: &[String], - file: &str, - field_norms: &BTreeSet, - out: &mut Vec, - ) { - let mut next_defstack = defstack.to_vec(); - match node.r#type.as_str() { - "CLASS" | "MODULE" | "DEFN" => { - if let Some(Child::Symbol(name)) = node.children.first() { - next_defstack.push(name.clone()); - } - } - "DEFS" => { - if let Some(Child::Symbol(name)) = node.children.get(1) { - next_defstack.push(name.clone()); - } - } - "CALL" | "OPCALL" | "FCALL" | "VCALL" => { - let recv = if node.r#type == "CALL" || node.r#type == "OPCALL" { - node.children.get(0).and_then(ast::node) - } else { - None - }; - let mid = if node.r#type == "CALL" || node.r#type == "OPCALL" { - node.children.get(1) - } else { - node.children.get(0) - }; - let args = if node.r#type == "CALL" || node.r#type == "OPCALL" { - node.children.get(2) - } else { - node.children.get(1) - }; - - if let Some(Child::Symbol(name)) = mid { - if args.is_none() - || matches!(args, Some(Child::Nil)) - || self.is_empty_list(args) - { - if field_norms.contains(name) { - self.push_read( - Read { - attr: name.clone(), - norm: name.clone(), - recv: self.recv_slice(recv, lines), - file: file.to_string(), - defn: next_defstack - .last() - .cloned() - .unwrap_or_else(|| "(top-level)".to_string()), - line: node.first_lineno, - span: [ - node.first_lineno, - node.first_column, - node.last_lineno, - node.last_column, - ], - }, - out, - ); - } - } - } - } - "IVAR" => { - if let Some(Child::String(name)) = node.children.first() { - let norm = self.normalize(name); - if field_norms.contains(&norm) { - self.push_read( - Read { - attr: name.clone(), - norm, - recv: "self".to_string(), - file: file.to_string(), - defn: next_defstack - .last() - .cloned() - .unwrap_or_else(|| "(top-level)".to_string()), - line: node.first_lineno, - span: [ - node.first_lineno, - node.first_column, - node.last_lineno, - node.last_column, - ], - }, - out, - ); - } - } - } - _ => {} - } - - for child in node.children.iter().filter_map(ast::node) { - self.walk_reads(child, lines, &next_defstack, file, field_norms, out); - } - } - - fn push_read(&self, read: Read, out: &mut Vec) { - if self.write_target_read(&read) { - return; - } - out.push(read); - } - fn write_target_read(&self, read: &Read) -> bool { self.writes.iter().any(|write| { write.file == read.file @@ -817,20 +588,4 @@ impl StateMesh { } norms } - - fn recv_slice(&self, node: Option<&Node>, lines: &[String]) -> String { - let Some(node) = node else { - return "?".to_string(); - }; - ast::slice(node, lines) - } - - fn is_empty_list(&self, args: Option<&Child>) -> bool { - if let Some(Child::Node(node)) = args { - if node.r#type == "LIST" { - return node.children.iter().all(|c| matches!(c, Child::Nil)); - } - } - false - } } diff --git a/gems/decomplex/rust/src/decomplex/detectors/temporal_ordering_pressure.rs b/gems/decomplex/rust/src/decomplex/detectors/temporal_ordering_pressure.rs index 247b60dcf..185102d4d 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/temporal_ordering_pressure.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/temporal_ordering_pressure.rs @@ -1,5 +1,5 @@ -use crate::decomplex::ast::{self, Child, Node, Span}; -use crate::decomplex::syntax::{self, Document, Language, StateRead, StateWrite}; +use crate::decomplex::ast::Span; +use crate::decomplex::syntax::{self, Document, Language}; use anyhow::Result; use serde::Serialize; use std::collections::{BTreeMap, BTreeSet}; @@ -57,9 +57,16 @@ pub fn scan_documents(documents: &[Document]) -> Vec Vec { let owners = document - .function_defs + .owner_defs .iter() - .map(|function| function.owner.clone()) + .map(|owner| owner.name.clone()) + .chain( + document + .function_defs + .iter() + .map(|function| function.owner.clone()), + ) + .filter(|owner| !owner.is_empty()) .collect::>(); owners .into_iter() @@ -177,257 +184,6 @@ fn pressure_row( }) } -struct TemporalOrderingPressure { - file: String, - lines: Vec, - state_reads: Vec, - state_writes: Vec, -} - -impl TemporalOrderingPressure { - fn new( - file: String, - lines: Vec, - state_reads: Vec, - state_writes: Vec, - ) -> Self { - Self { - file, - lines, - state_reads, - state_writes, - } - } - - fn scan(&mut self, root: &Node) -> Vec { - let mut out = Vec::new(); - self.walk_owners(root, &Vec::new(), &mut out); - out - } - - fn walk_owners( - &self, - node: &Node, - owners: &[String], - out: &mut Vec, - ) { - if matches!(node.r#type.as_str(), "CLASS" | "MODULE") { - let owner = self.full_owner_name(owners, node); - let methods = self.owner_methods(node, &owner); - if let Some(row) = self.pressure_row(&owner, &methods) { - out.push(row); - } - let mut next_owners = owners.to_vec(); - next_owners.push(self.owner_segment(node)); - for child in node.children.iter().filter_map(ast::node) { - self.walk_owners(child, &next_owners, out); - } - } else { - for child in node.children.iter().filter_map(ast::node) { - self.walk_owners(child, owners, out); - } - } - } - - fn full_owner_name(&self, owners: &[String], node: &Node) -> String { - let mut next = owners.to_vec(); - next.push(self.owner_segment(node)); - next.join("::") - } - - fn owner_segment(&self, node: &Node) -> String { - let name = ast::slice( - node.children.first().and_then(ast::node).unwrap_or(node), - &self.lines, - ); - if name.is_empty() { - "(anonymous)".to_string() - } else { - name - } - } - - fn owner_methods(&self, owner_node: &Node, owner: &str) -> Vec { - let Some(body) = self.owner_body(owner_node) else { - return Vec::new(); - }; - - let stmts = if body.r#type == "BLOCK" { - body.children - .iter() - .filter_map(ast::node) - .collect::>() - } else { - vec![body] - }; - - let mut visibility = "public".to_string(); - let mut methods = Vec::new(); - - for stmt in stmts { - if self.visibility_marker(stmt) { - if let Some(Child::Symbol(name)) = stmt.children.first() { - visibility = name.clone(); - } - } else if matches!(stmt.r#type.as_str(), "DEFN" | "DEFS") { - methods.push(self.method_state(stmt, &visibility, owner)); - } - } - methods - } - - fn owner_body<'a>(&self, owner_node: &'a Node) -> Option<&'a Node> { - let scope_index = if owner_node.r#type == "CLASS" { 2 } else { 1 }; - let scope = owner_node.children.get(scope_index).and_then(ast::node)?; - if scope.r#type != "SCOPE" { - return None; - } - scope.children.get(2).and_then(ast::node) - } - - fn visibility_marker(&self, node: &Node) -> bool { - if node.r#type == "VCALL" { - if let Some(Child::Symbol(name)) = node.children.first() { - return matches!(name.as_str(), "public" | "protected" | "private"); - } - } - false - } - - fn method_state(&self, defn_node: &Node, visibility: &str, owner: &str) -> MethodState { - let name_index = if defn_node.r#type == "DEFS" { 1 } else { 0 }; - let name = defn_node - .children - .get(name_index) - .and_then(|c| match c { - Child::Symbol(s) => Some(s.clone()), - _ => None, - }) - .unwrap_or_else(|| "(anonymous)".to_string()); - - let reads = self.state_reads_for(owner, &name); - let writes = self.state_writes_for(owner, &name); - - MethodState { - name, - line: defn_node.first_lineno, - span: [ - defn_node.first_lineno, - defn_node.first_column, - defn_node.last_lineno, - defn_node.last_column, - ], - visibility: visibility.to_string(), - reads, - writes, - } - } - - fn state_reads_for(&self, owner: &str, function: &str) -> Vec { - sorted_unique( - self.state_reads - .iter() - .filter(|read| read.owner == owner && read.function == function) - .map(|read| read.field.clone()), - ) - } - - fn state_writes_for(&self, owner: &str, function: &str) -> Vec { - sorted_unique( - self.state_writes - .iter() - .filter(|write| write.owner == owner && write.function == function) - .map(|write| write.field.clone()), - ) - } - - fn pressure_row( - &self, - owner: &str, - methods: &[MethodState], - ) -> Option { - let public_methods: Vec<_> = methods - .iter() - .filter(|m| m.visibility == "public") - .collect(); - let state_methods: Vec<_> = public_methods - .iter() - .filter(|m| !m.reads.is_empty() || !m.writes.is_empty()) - .collect(); - let writers: Vec<_> = public_methods - .iter() - .filter(|m| !m.writes.is_empty()) - .collect(); - - if state_methods.len() < 3 || writers.len() < 2 { - return None; - } - - let mut fields_set = BTreeSet::new(); - for m in &state_methods { - for r in &m.reads { - fields_set.insert(r.clone()); - } - for w in &m.writes { - fields_set.insert(w.clone()); - } - } - let fields: Vec<_> = fields_set.into_iter().collect(); - - let shared_fields: Vec<_> = fields - .iter() - .filter(|field| { - state_methods - .iter() - .filter(|m| m.reads.contains(*field) || m.writes.contains(*field)) - .count() - >= 2 - }) - .cloned() - .collect(); - - if shared_fields.is_empty() { - return None; - } - - let n = state_methods.len(); - let state_space_exp = fields.len(); - let state_space = 2usize.pow(state_space_exp.min(12) as u32); - let score = (n * writers.len() * shared_fields.len().max(1)) + state_space; - - let first_line = state_methods.first()?.line; - let at = format!("{}:{}:{}", self.file, owner, first_line); - - let mut sites = Vec::new(); - let mut spans = BTreeMap::new(); - for m in &state_methods { - let loc = format!("{}:{}:{}", self.file, m.name, m.line); - sites.push(loc.clone()); - spans.insert(loc, m.span); - } - - Some(TemporalOrderingPressureRow { - at, - file: self.file.clone(), - owner: owner.to_string(), - public_methods: public_methods.len(), - state_methods: n, - writers: writers.len(), - state_fields: fields, - shared_fields, - orderings: self.factorial_label(n), - state_space: format!("2^{}", state_space_exp), - score, - sites, - spans, - }) - } - - fn factorial_label(&self, n: usize) -> String { - format!("{}!", n) - } -} - fn sorted_unique(values: impl Iterator) -> Vec { let mut out: Vec<_> = values.collect::>().into_iter().collect(); out.sort(); diff --git a/gems/decomplex/rust/src/decomplex/detectors/weighted_inlined_cognitive_complexity.rs b/gems/decomplex/rust/src/decomplex/detectors/weighted_inlined_cognitive_complexity.rs index 2621089d4..41b657e27 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/weighted_inlined_cognitive_complexity.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/weighted_inlined_cognitive_complexity.rs @@ -1,6 +1,6 @@ -use crate::decomplex::ast::{self, Node, RawNode, Span}; +use crate::decomplex::ast::Span; use crate::decomplex::detectors::{local_flow, structural_topology}; -use crate::decomplex::syntax::{self, Document, Language}; +use crate::decomplex::syntax::{self, Document, Language, LocalComplexityScore}; use anyhow::Result; use serde::Serialize; use std::collections::{BTreeMap, BTreeSet}; @@ -33,7 +33,15 @@ pub fn scan_files( pub fn scan_documents(documents: &[Document]) -> Vec { let topology_report = structural_topology::scan_documents(documents); let topology = structural_topology::Graph::new(topology_report.methods, topology_report.edges); - let raw_scores = raw_complexity_scores(documents); + let complexity_scores = documents + .iter() + .flat_map(|document| { + document + .local_complexity_scores + .iter() + .map(|(id, score)| ((document.file.clone(), id.clone()), score.clone())) + }) + .collect::>(); let mut scores = BTreeMap::new(); for summary in local_flow::scan_documents(documents) { @@ -43,10 +51,13 @@ pub fn scan_documents(documents: &[Document]) -> Vec Vec BTreeMap<(String, usize, String), ScoreResult> { - let mut out = BTreeMap::new(); - for document in documents { - for function in &document.function_defs { - out.insert( - (function.file.clone(), function.line, function.name.clone()), - LocalScorer::new().score_raw(&function.body), - ); - } - } - out -} - struct LocalScore { id: String, owner: String, @@ -103,487 +99,10 @@ struct Contribution { chain: Vec, } -const METHOD_TYPES: &[&str] = &["DEFN", "DEFS"]; -const SKIP_NESTED_TYPES: &[&str] = &["CLASS", "MODULE", "DEFN", "DEFS", "LAMBDA"]; -const BRANCH_TYPES: &[&str] = &["IF", "UNLESS"]; -const LOOP_TYPES: &[&str] = &["WHILE", "UNTIL", "FOR", "ITER"]; -const CASE_TYPES: &[&str] = &["CASE", "CASE2"]; -const RESCUE_TYPES: &[&str] = &["RESCUE", "RESBODY"]; -const EARLY_EXIT_TYPES: &[&str] = &["RETURN", "BREAK", "NEXT", "REDO", "RETRY"]; -const BOOLEAN_TYPES: &[&str] = &["AND", "OR"]; - -pub struct LocalScorer {} - -#[derive(Clone)] -pub struct ScoreResult { - pub score: f64, - pub signals: BTreeMap, -} - -impl LocalScorer { - pub fn new() -> Self { - Self {} - } - - pub fn score(&self, method_node: &Node) -> ScoreResult { - let mut signals = BTreeMap::new(); - ScoreResult { - score: self.round(self.score_node(method_node, 0, &mut signals)), - signals, - } - } - - pub fn score_raw(&self, method_node: &RawNode) -> ScoreResult { - let mut signals = BTreeMap::new(); - ScoreResult { - score: self.round(self.score_raw_node(method_node, 0, &mut signals)), - signals, - } - } - - fn score_node( - &self, - node: &Node, - nesting: usize, - signals: &mut BTreeMap, - ) -> f64 { - if self.skip_nested(node) { - return 0.0; - } - - match node.r#type.as_str() { - t if BRANCH_TYPES.contains(&t) => self.score_branch(node, nesting, signals), - t if LOOP_TYPES.contains(&t) => self.score_loop(node, nesting, signals), - t if CASE_TYPES.contains(&t) => self.score_case(node, nesting, signals), - t if RESCUE_TYPES.contains(&t) => self.score_rescue(node, nesting, signals), - t if EARLY_EXIT_TYPES.contains(&t) => self.score_early_exit(node, nesting, signals), - t if BOOLEAN_TYPES.contains(&t) => self.score_boolean_node(node, nesting, signals), - _ => self.score_children(node, nesting, signals), - } - } - - fn skip_nested(&self, node: &Node) -> bool { - SKIP_NESTED_TYPES.contains(&node.r#type.as_str()) - && !METHOD_TYPES.contains(&node.r#type.as_str()) - } - - fn score_branch( - &self, - node: &Node, - nesting: usize, - signals: &mut BTreeMap, - ) -> f64 { - *signals.entry("branches".to_string()).or_insert(0) += 1; - if nesting > 0 { - *signals.entry("nested".to_string()).or_insert(0) += 1; - } - let condition = node.children.get(0).and_then(ast::node); - let positive = node.children.get(1).and_then(ast::node); - let negative = node.children.get(2).and_then(ast::node); - - self.branch_cost(nesting) - + self.predicate_cost(condition, signals) - + positive - .map(|n| self.score_node(n, nesting + 1, signals)) - .unwrap_or(0.0) - + negative - .map(|n| self.score_node(n, nesting + 1, signals)) - .unwrap_or(0.0) - } - - fn score_loop( - &self, - node: &Node, - nesting: usize, - signals: &mut BTreeMap, - ) -> f64 { - *signals.entry("loops".to_string()).or_insert(0) += 1; - if nesting > 0 { - *signals.entry("nested".to_string()).or_insert(0) += 1; - } - self.branch_cost(nesting) + self.score_children(node, nesting + 1, signals) - } - - fn score_case( - &self, - node: &Node, - nesting: usize, - signals: &mut BTreeMap, - ) -> f64 { - *signals.entry("cases".to_string()).or_insert(0) += 1; - 0.5 + self.score_case_children(node, nesting, signals) - } - - fn score_case_children( - &self, - node: &Node, - nesting: usize, - signals: &mut BTreeMap, - ) -> f64 { - compensated_sum(node.children.iter().filter_map(ast::node).map(|child| { - if child.r#type == "WHEN" { - self.score_when(child, nesting, signals) - } else { - self.score_node(child, nesting, signals) - } - })) - } - - fn score_when( - &self, - node: &Node, - nesting: usize, - signals: &mut BTreeMap, - ) -> f64 { - let body = node.children.get(1).and_then(ast::node); - let next_when = node.children.get(2).and_then(ast::node); - body.map(|n| self.score_node(n, nesting + 1, signals)) - .unwrap_or(0.0) - + next_when - .map(|n| self.score_node(n, nesting, signals)) - .unwrap_or(0.0) - } - - fn score_rescue( - &self, - node: &Node, - nesting: usize, - signals: &mut BTreeMap, - ) -> f64 { - *signals.entry("rescues".to_string()).or_insert(0) += 1; - self.branch_cost(nesting) + self.score_children(node, nesting + 1, signals) - } - - fn score_early_exit( - &self, - node: &Node, - nesting: usize, - signals: &mut BTreeMap, - ) -> f64 { - *signals.entry("early_exits".to_string()).or_insert(0) += 1; - let exit_cost = if nesting > 0 { - 0.5 + (nesting as f64 * 0.25) - } else { - 0.0 - }; - exit_cost + self.score_children(node, nesting, signals) - } - - fn score_boolean_node( - &self, - node: &Node, - nesting: usize, - signals: &mut BTreeMap, - ) -> f64 { - *signals.entry("boolean_ops".to_string()).or_insert(0) += 1; - 0.25 + self.score_children(node, nesting, signals) - } - - fn score_children( - &self, - node: &Node, - nesting: usize, - signals: &mut BTreeMap, - ) -> f64 { - compensated_sum( - node.children - .iter() - .filter_map(ast::node) - .map(|child| self.score_node(child, nesting, signals)), - ) - } - - fn predicate_cost(&self, node: Option<&Node>, signals: &mut BTreeMap) -> f64 { - let Some(node) = node else { return 0.0 }; - let bools = self.boolean_count(node); - *signals.entry("boolean_ops".to_string()).or_insert(0) += bools; - (bools as f64) * 0.5 - } - - fn boolean_count(&self, node: &Node) -> usize { - let own = if BOOLEAN_TYPES.contains(&node.r#type.as_str()) { - 1 - } else { - 0 - }; - own + node - .children - .iter() - .filter_map(ast::node) - .map(|child| self.boolean_count(child)) - .sum::() - } - - fn branch_cost(&self, nesting: usize) -> f64 { - 1.1 + (nesting as f64) - } - - fn round(&self, value: f64) -> f64 { - (value * 10.0).round() / 10.0 - } - - fn score_raw_node( - &self, - node: &RawNode, - nesting: usize, - signals: &mut BTreeMap, - ) -> f64 { - if raw_skip_nested(node) { - return 0.0; - } - - if raw_branch(node) { - *signals.entry("branches".to_string()).or_insert(0) += 1; - if nesting > 0 { - *signals.entry("nested".to_string()).or_insert(0) += 1; - } - return self.branch_cost(nesting) - + self.raw_predicate_cost(raw_condition_node(node), signals) - + self.score_raw_children(node, nesting + 1, signals); - } - - if raw_loop(node) { - *signals.entry("loops".to_string()).or_insert(0) += 1; - if nesting > 0 { - *signals.entry("nested".to_string()).or_insert(0) += 1; - } - return self.branch_cost(nesting) + self.score_raw_children(node, nesting + 1, signals); - } - - if raw_case(node) { - *signals.entry("cases".to_string()).or_insert(0) += 1; - return 0.5 + self.score_raw_children(node, nesting + 1, signals); - } - - if raw_rescue(node) { - *signals.entry("rescues".to_string()).or_insert(0) += 1; - return self.branch_cost(nesting) + self.score_raw_children(node, nesting + 1, signals); - } - - if raw_early_exit(node) { - *signals.entry("early_exits".to_string()).or_insert(0) += 1; - let exit_cost = if nesting > 0 { - 0.5 + (nesting as f64 * 0.25) - } else { - 0.0 - }; - let child_cost = if raw_bare_early_exit_wrapper(node) { - 0.0 - } else { - self.score_raw_children(node, nesting, signals) - }; - return exit_cost + child_cost; - } - - if raw_boolean_node(node) { - *signals.entry("boolean_ops".to_string()).or_insert(0) += 1; - return 0.25 + self.score_raw_children(node, nesting, signals); - } - - self.score_raw_children(node, nesting, signals) - } - - fn score_raw_children( - &self, - node: &RawNode, - nesting: usize, - signals: &mut BTreeMap, - ) -> f64 { - compensated_sum(node.children.iter().map(|child| { - if raw_transparent_single_line_suite_statement(node, child) { - if raw_bare_early_exit_wrapper(child) { - 0.0 - } else { - self.score_raw_children(child, nesting, signals) - } - } else { - self.score_raw_node(child, nesting, signals) - } - })) - } - - fn raw_predicate_cost( - &self, - node: Option<&RawNode>, - signals: &mut BTreeMap, - ) -> f64 { - let Some(node) = node else { return 0.0 }; - let bools = raw_boolean_count(node); - *signals.entry("boolean_ops".to_string()).or_insert(0) += bools; - (bools as f64) * 0.5 - } -} - -fn raw_skip_nested(node: &RawNode) -> bool { - matches!(node.kind.as_str(), "class" | "module" | "lambda") -} - -fn raw_branch(node: &RawNode) -> bool { - (matches!( - node.kind.as_str(), - "if" | "unless" | "if_statement" | "if_expression" | "if_modifier" | "unless_modifier" - ) && !node.named_children().is_empty()) - || raw_hidden_if(node) - || raw_modifier_if(node) -} - -fn raw_hidden_if(node: &RawNode) -> bool { - if node.kind == "expression_statement" && node.text.trim_start().starts_with("if ") { - return true; - } - matches!( - node.kind.as_str(), - "body_statement" | "block" | "statements" | "statement_list" - ) && node - .children - .first() - .map(|child| !child.named && matches!(child.kind.as_str(), "if" | "unless")) - .unwrap_or(false) -} - -fn raw_modifier_if(node: &RawNode) -> bool { - if matches!(node.kind.as_str(), "if_modifier" | "unless_modifier") { - return true; - } - if node.kind != "body_statement" { - return false; - } - let mut seen_named = false; - node.children.iter().any(|child| { - seen_named |= child.named; - seen_named && !child.named && matches!(child.kind.as_str(), "if" | "unless") - }) -} - -fn raw_loop(node: &RawNode) -> bool { - matches!( - node.kind.as_str(), - "while" - | "until" - | "while_statement" - | "for" - | "for_statement" - | "for_in_statement" - | "do_block" - ) || raw_hidden_loop(node) - || (node.kind == "expression_statement" - && starts_with_any(node.text.trim_start(), &["for", "while", "loop"])) - || (node.kind == "labeled_statement" && node.text.trim_start().starts_with("for ")) -} - -fn raw_hidden_loop(node: &RawNode) -> bool { - matches!( - node.kind.as_str(), - "body_statement" | "block" | "statements" | "statement_list" - ) && node - .children - .first() - .map(|child| !child.named && matches!(child.kind.as_str(), "for" | "while" | "loop")) - .unwrap_or(false) -} - -fn starts_with_any(text: &str, words: &[&str]) -> bool { - words - .iter() - .any(|word| text == *word || text.starts_with(&format!("{word} "))) -} - -fn raw_case(node: &RawNode) -> bool { - matches!( - node.kind.as_str(), - "case" | "switch_statement" | "switch_expression" | "match_statement" | "match_expression" - ) || (node.kind == "expression_statement" && node.text.trim_start().starts_with("match ")) -} - -fn raw_rescue(node: &RawNode) -> bool { - matches!( - node.kind.as_str(), - "rescue" | "rescue_modifier" | "rescue_clause" | "rescue_body" - ) -} - -fn raw_early_exit(node: &RawNode) -> bool { - (node.named || node.kind == "return") - && matches!( - node.kind.as_str(), - "return" - | "break" - | "next" - | "redo" - | "retry" - | "return_statement" - | "break_statement" - | "continue_statement" - ) -} - -fn raw_transparent_single_line_suite_statement(parent: &RawNode, child: &RawNode) -> bool { - parent.kind == "block" - && parent.children.len() == 1 - && parent.text == child.text - && matches!( - child.kind.as_str(), - "return_statement" | "break_statement" | "continue_statement" - ) -} - -fn raw_bare_early_exit_wrapper(node: &RawNode) -> bool { - matches!( - node.kind.as_str(), - "return_statement" | "break_statement" | "continue_statement" - ) && node.children.len() == 1 - && !node.children[0].named - && node.children[0].text == node.text -} - -fn compensated_sum(values: impl IntoIterator) -> f64 { - let mut sum = 0.0f64; - let mut compensation = 0.0f64; - for value in values { - let next = sum + value; - if sum.abs() >= value.abs() { - compensation += (sum - next) + value; - } else { - compensation += (value - next) + sum; - } - sum = next; - } - sum + compensation -} - fn format_one_decimal(value: f64) -> String { format!("{value:.1}") } -fn raw_boolean_node(node: &RawNode) -> bool { - matches!( - node.kind.as_str(), - "binary" - | "binary_expression" - | "boolean_operator" - | "conjunction_expression" - | "disjunction_expression" - ) && node - .children - .iter() - .any(|child| !child.named && matches!(child.text.as_str(), "&&" | "||" | "and" | "or")) -} - -fn raw_condition_node(node: &RawNode) -> Option<&RawNode> { - if raw_modifier_if(node) { - return node.named_children().last().copied(); - } - if node.kind == "body_statement" { - return node.named_children().first().copied(); - } - node.named_children().first().copied() -} - -fn raw_boolean_count(node: &RawNode) -> usize { - let own = usize::from(raw_boolean_node(node)); - own + node.children.iter().map(raw_boolean_count).sum::() -} - struct Analyzer { topology: structural_topology::Graph, scores: BTreeMap, diff --git a/gems/decomplex/rust/src/decomplex/syntax.rs b/gems/decomplex/rust/src/decomplex/syntax.rs index 7978303e0..70228823d 100644 --- a/gems/decomplex/rust/src/decomplex/syntax.rs +++ b/gems/decomplex/rust/src/decomplex/syntax.rs @@ -1,4 +1,5 @@ pub(crate) mod adapters; +pub(crate) mod complexity; pub mod tree_sitter_adapter; use crate::decomplex::ast::{Node as NormalizedNode, RawNode, Span}; @@ -107,6 +108,8 @@ pub struct Document { pub decision_sites: Vec, pub branch_decisions: Vec, pub dispatch_sites: Vec, + pub semantic_effect_sites: Vec, + pub local_complexity_scores: BTreeMap, pub predicate_aliases: Vec, pub comparison_uses: Vec, } @@ -213,6 +216,22 @@ pub struct DispatchSite { pub span: Span, } +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct SemanticEffectSite { + pub kind: String, + pub detail: String, + pub file: String, + pub function: String, + pub line: usize, + pub span: Span, +} + +#[derive(Clone, Debug, PartialEq, Serialize)] +pub struct LocalComplexityScore { + pub score: f64, + pub signals: BTreeMap, +} + #[derive(Clone, Debug, Eq, PartialEq, Serialize)] pub struct ComparisonUse { pub canon_source: String, diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs index 46e6935ad..ddf27bacd 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs @@ -346,6 +346,14 @@ pub(crate) trait LanguageProfile { EMPTY_NODE_KINDS } + fn keyed_element_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn deferred_statement_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + fn suppress_field_receiver_lhs_reads(&self) -> bool { false } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/go.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/go.rs index 87074f337..bcbe24dfd 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/go.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/go.rs @@ -214,6 +214,14 @@ impl LanguageProfile for GoProfile { &["expression_list"] } + fn keyed_element_node_kinds(&self) -> &[&str] { + &["keyed_element"] + } + + fn deferred_statement_node_kinds(&self) -> &[&str] { + &["defer_statement"] + } + fn suppress_field_receiver_lhs_reads(&self) -> bool { true } diff --git a/gems/decomplex/rust/src/decomplex/syntax/complexity.rs b/gems/decomplex/rust/src/decomplex/syntax/complexity.rs new file mode 100644 index 000000000..de8afcae8 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/complexity.rs @@ -0,0 +1,314 @@ +use super::{FunctionDef, LocalComplexityScore}; +use crate::decomplex::ast::RawNode; +use std::collections::BTreeMap; +use std::path::Path; + +pub(crate) fn local_complexity_scores( + file: &str, + functions: &[FunctionDef], +) -> BTreeMap { + functions + .iter() + .map(|function| { + let owner = local_method_owner(file, &function.owner); + let id = format!("{}#{}", owner, function.name); + (id, LocalComplexityScorer::new().score(&function.body)) + }) + .collect() +} + +struct LocalComplexityScorer; + +impl LocalComplexityScorer { + fn new() -> Self { + Self + } + + fn score(&self, method_node: &RawNode) -> LocalComplexityScore { + let mut signals = BTreeMap::new(); + LocalComplexityScore { + score: self.round(self.score_node(method_node, 0, &mut signals)), + signals, + } + } + + fn score_node( + &self, + node: &RawNode, + nesting: usize, + signals: &mut BTreeMap, + ) -> f64 { + if skip_nested(node) { + return 0.0; + } + + if branch(node) { + *signals.entry("branches".to_string()).or_insert(0) += 1; + if nesting > 0 { + *signals.entry("nested".to_string()).or_insert(0) += 1; + } + return self.branch_cost(nesting) + + self.predicate_cost(condition_node(node), signals) + + self.score_children(node, nesting + 1, signals); + } + + if loop_node(node) { + *signals.entry("loops".to_string()).or_insert(0) += 1; + if nesting > 0 { + *signals.entry("nested".to_string()).or_insert(0) += 1; + } + return self.branch_cost(nesting) + self.score_children(node, nesting + 1, signals); + } + + if case_node(node) { + *signals.entry("cases".to_string()).or_insert(0) += 1; + return 0.5 + self.score_children(node, nesting + 1, signals); + } + + if rescue_node(node) { + *signals.entry("rescues".to_string()).or_insert(0) += 1; + return self.branch_cost(nesting) + self.score_children(node, nesting + 1, signals); + } + + if early_exit(node) { + *signals.entry("early_exits".to_string()).or_insert(0) += 1; + let exit_cost = if nesting > 0 { + 0.5 + (nesting as f64 * 0.25) + } else { + 0.0 + }; + let child_cost = if bare_early_exit_wrapper(node) { + 0.0 + } else { + self.score_children(node, nesting, signals) + }; + return exit_cost + child_cost; + } + + if boolean_node(node) { + *signals.entry("boolean_ops".to_string()).or_insert(0) += 1; + return 0.25 + self.score_children(node, nesting, signals); + } + + self.score_children(node, nesting, signals) + } + + fn score_children( + &self, + node: &RawNode, + nesting: usize, + signals: &mut BTreeMap, + ) -> f64 { + compensated_sum(node.children.iter().map(|child| { + if transparent_single_line_suite_statement(node, child) { + if bare_early_exit_wrapper(child) { + 0.0 + } else { + self.score_children(child, nesting, signals) + } + } else { + self.score_node(child, nesting, signals) + } + })) + } + + fn predicate_cost(&self, node: Option<&RawNode>, signals: &mut BTreeMap) -> f64 { + let Some(node) = node else { return 0.0 }; + let bools = boolean_count(node); + *signals.entry("boolean_ops".to_string()).or_insert(0) += bools; + (bools as f64) * 0.5 + } + + fn branch_cost(&self, nesting: usize) -> f64 { + 1.1 + (nesting as f64) + } + + fn round(&self, value: f64) -> f64 { + (value * 10.0).round() / 10.0 + } +} + +fn local_method_owner(file: &str, owner: &str) -> String { + let file_owner = file_owner(file); + if owner == file_owner { + return "(top-level)".to_string(); + } + owner + .strip_prefix(&format!("{file_owner}::")) + .unwrap_or(owner) + .to_string() +} + +fn file_owner(file: &str) -> String { + Path::new(file) + .file_stem() + .and_then(|stem| stem.to_str()) + .unwrap_or("Object") + .to_string() +} + +fn skip_nested(node: &RawNode) -> bool { + matches!(node.kind.as_str(), "class" | "module" | "lambda") +} + +fn branch(node: &RawNode) -> bool { + (matches!( + node.kind.as_str(), + "if" | "unless" | "if_statement" | "if_expression" | "if_modifier" | "unless_modifier" + ) && !node.named_children().is_empty()) + || hidden_if(node) + || modifier_if(node) +} + +fn hidden_if(node: &RawNode) -> bool { + if node.kind == "expression_statement" && node.text.trim_start().starts_with("if ") { + return true; + } + matches!( + node.kind.as_str(), + "body_statement" | "block" | "statements" | "statement_list" + ) && node + .children + .first() + .map(|child| !child.named && matches!(child.kind.as_str(), "if" | "unless")) + .unwrap_or(false) +} + +fn modifier_if(node: &RawNode) -> bool { + if matches!(node.kind.as_str(), "if_modifier" | "unless_modifier") { + return true; + } + if node.kind != "body_statement" { + return false; + } + let mut seen_named = false; + node.children.iter().any(|child| { + seen_named |= child.named; + seen_named && !child.named && matches!(child.kind.as_str(), "if" | "unless") + }) +} + +fn loop_node(node: &RawNode) -> bool { + matches!( + node.kind.as_str(), + "while" + | "until" + | "while_statement" + | "for" + | "for_statement" + | "for_in_statement" + | "do_block" + ) || hidden_loop(node) + || (node.kind == "expression_statement" + && starts_with_any(node.text.trim_start(), &["for", "while", "loop"])) + || (node.kind == "labeled_statement" && node.text.trim_start().starts_with("for ")) +} + +fn hidden_loop(node: &RawNode) -> bool { + matches!( + node.kind.as_str(), + "body_statement" | "block" | "statements" | "statement_list" + ) && node + .children + .first() + .map(|child| !child.named && matches!(child.kind.as_str(), "for" | "while" | "loop")) + .unwrap_or(false) +} + +fn starts_with_any(text: &str, words: &[&str]) -> bool { + words + .iter() + .any(|word| text == *word || text.starts_with(&format!("{word} "))) +} + +fn case_node(node: &RawNode) -> bool { + matches!( + node.kind.as_str(), + "case" | "switch_statement" | "switch_expression" | "match_statement" | "match_expression" + ) || (node.kind == "expression_statement" && node.text.trim_start().starts_with("match ")) +} + +fn rescue_node(node: &RawNode) -> bool { + matches!( + node.kind.as_str(), + "rescue" | "rescue_modifier" | "rescue_clause" | "rescue_body" + ) +} + +fn early_exit(node: &RawNode) -> bool { + (node.named || node.kind == "return") + && matches!( + node.kind.as_str(), + "return" + | "break" + | "next" + | "redo" + | "retry" + | "return_statement" + | "break_statement" + | "continue_statement" + ) +} + +fn transparent_single_line_suite_statement(parent: &RawNode, child: &RawNode) -> bool { + parent.kind == "block" + && parent.children.len() == 1 + && parent.text == child.text + && matches!( + child.kind.as_str(), + "return_statement" | "break_statement" | "continue_statement" + ) +} + +fn bare_early_exit_wrapper(node: &RawNode) -> bool { + matches!( + node.kind.as_str(), + "return_statement" | "break_statement" | "continue_statement" + ) && node.children.len() == 1 + && !node.children[0].named + && node.children[0].text == node.text +} + +fn boolean_node(node: &RawNode) -> bool { + matches!( + node.kind.as_str(), + "binary" + | "binary_expression" + | "boolean_operator" + | "conjunction_expression" + | "disjunction_expression" + ) && node + .children + .iter() + .any(|child| !child.named && matches!(child.text.as_str(), "&&" | "||" | "and" | "or")) +} + +fn condition_node(node: &RawNode) -> Option<&RawNode> { + if modifier_if(node) { + return node.named_children().last().copied(); + } + if node.kind == "body_statement" { + return node.named_children().first().copied(); + } + node.named_children().first().copied() +} + +fn boolean_count(node: &RawNode) -> usize { + let own = usize::from(boolean_node(node)); + own + node.children.iter().map(boolean_count).sum::() +} + +fn compensated_sum(values: impl IntoIterator) -> f64 { + let mut sum = 0.0f64; + let mut compensation = 0.0f64; + for value in values { + let next = sum + value; + if sum.abs() >= value.abs() { + compensation += (sum - next) + value; + } else { + compensation += (value - next) + sum; + } + sum = next; + } + sum + compensation +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs b/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs index 566da6545..5f974063c 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs @@ -1,9 +1,13 @@ use super::{ - adapters::{language_profile, LanguageProfile}, + adapters::{ + false_simplicity_lexicon::{false_simplicity_lexicon, FalseSimplicityLexicon}, + language_profile, LanguageProfile, + }, BranchDecision, CallSite, ComparisonUse, DecisionSite, DispatchSite, Document, FunctionDef, - Language, OwnerDef, PredicateAlias, StateRead, StateWrite, + Language, OwnerDef, PredicateAlias, SemanticEffectSite, StateRead, StateWrite, }; use crate::decomplex::ast::{line, node_text, normalize_text, normalize_tree, span, RawNode}; +use crate::decomplex::syntax::complexity::local_complexity_scores; use anyhow::{Context, Result}; use std::collections::{BTreeMap, BTreeSet, HashSet}; use std::fs; @@ -59,6 +63,9 @@ pub fn parse_file(file: PathBuf, language: Language) -> Result { &mut dispatch_sites, ); collect_equality_dispatch_sites(&comparison_uses, &call_sites, &mut dispatch_sites); + let semantic_effect_sites = semantic_effect_sites_from_calls(language, &call_sites); + let local_complexity_scores = + local_complexity_scores(&parsed.file.to_string_lossy(), &function_defs); Ok(Document { file: parsed.file.to_string_lossy().to_string(), @@ -75,6 +82,8 @@ pub fn parse_file(file: PathBuf, language: Language) -> Result { decision_sites, branch_decisions, dispatch_sites, + semantic_effect_sites, + local_complexity_scores, predicate_aliases, comparison_uses, }) @@ -256,6 +265,128 @@ fn collect_facts( } } +const GENERIC_SYSTEM_IO_BARE: &[&str] = + &["print", "println", "eprintln", "printf", "puts", "panic"]; + +fn semantic_effect_sites_from_calls( + language: Language, + call_sites: &[CallSite], +) -> Vec { + let lexicon = false_simplicity_lexicon(language); + call_sites + .iter() + .filter_map(|call| semantic_effect_site_for_call(call, &lexicon)) + .collect() +} + +fn semantic_effect_site_for_call( + call: &CallSite, + lexicon: &FalseSimplicityLexicon, +) -> Option { + let message = call.message.as_str(); + let (kind, detail) = if effect_callback_call(call, message, lexicon) { + ("callback_inversion", message.to_string()) + } else if lexicon.meta_mids.contains(&message) { + ("metaprogramming", message.to_string()) + } else if lexicon.dispatch_mids.contains(&message) { + ("dynamic_dispatch", message.to_string()) + } else if message == "call" && !call.receiver.is_empty() { + if method_object_receiver(&call.receiver, lexicon) { + ("dynamic_dispatch", "method(...).call".to_string()) + } else if variable_receiver(&call.receiver) { + ("dynamic_dispatch", format!("{}.call", call.receiver)) + } else { + return None; + } + } else if let Some((kind, detail)) = const_effect_kind_detail(call, message, lexicon) { + (kind, detail) + } else if call.receiver == "self" + && (lexicon.io_bare.contains(&message) || GENERIC_SYSTEM_IO_BARE.contains(&message)) + { + ("hidden_io", message.to_string()) + } else if call.receiver == "self" && lexicon.context_bare.contains(&message) { + ("context_dependency", message.to_string()) + } else if message.len() > 1 && message.ends_with('!') && !matches!(message, "!=" | "!~") { + ("hidden_mutation", message.to_string()) + } else { + return None; + }; + + Some(SemanticEffectSite { + kind: kind.to_string(), + detail, + file: call.file.clone(), + function: call.function.clone(), + line: call.line, + span: call.span, + }) +} + +fn const_effect_kind_detail( + call: &CallSite, + message: &str, + lexicon: &FalseSimplicityLexicon, +) -> Option<(&'static str, String)> { + let receiver = call.receiver.as_str(); + if receiver.is_empty() || receiver == "self" { + return None; + } + let base = receiver + .trim_start_matches("::") + .split("::") + .next() + .unwrap_or(""); + if base == "Dir" && lexicon.dir_context.contains(&message) { + return Some(("context_dependency", format!("Dir.{message}"))); + } + if lexicon.io_consts.contains(&base) || receiver.starts_with("Net::") { + return Some(( + "hidden_io", + format!("{}.{}", receiver.trim_start_matches("::"), message), + )); + } + if receiver == "ENV" { + return Some(("context_dependency", "ENV".to_string())); + } + if lexicon + .context_pairs + .iter() + .any(|(name, mids)| *name == base && mids.contains(&message)) + { + return Some(("context_dependency", format!("{base}.{message}"))); + } + None +} + +fn effect_callback_call(call: &CallSite, message: &str, lexicon: &FalseSimplicityLexicon) -> bool { + (call.block || call.arguments.iter().any(|arg| arg.starts_with('&'))) + && effect_callback_name(message, lexicon) + && !lexicon.meta_mids.contains(&message) +} + +fn effect_callback_name(message: &str, lexicon: &FalseSimplicityLexicon) -> bool { + lexicon.callback_set.contains(&message) + || message.starts_with("with_") + || message.starts_with("around_") + || message.starts_with("on_") + || message.starts_with("before_") + || message.starts_with("after_") + || message.ends_with("_hook") +} + +fn method_object_receiver(receiver: &str, lexicon: &FalseSimplicityLexicon) -> bool { + lexicon + .method_obj_mids + .iter() + .any(|name| receiver.contains(name)) +} + +fn variable_receiver(receiver: &str) -> bool { + let mut chars = receiver.chars(); + matches!(chars.next(), Some(first) if first == '@' || first == '$' || first == '_' || first.is_ascii_lowercase()) + && chars.all(|ch| ch == '_' || ch == '!' || ch == '?' || ch.is_ascii_alphanumeric()) +} + fn collect_dispatch_sites( node: Node<'_>, source: &str, diff --git a/gems/decomplex/rust/src/lib.rs b/gems/decomplex/rust/src/lib.rs new file mode 100644 index 000000000..4f8adb8cf --- /dev/null +++ b/gems/decomplex/rust/src/lib.rs @@ -0,0 +1 @@ +pub mod decomplex; diff --git a/gems/decomplex/rust/tests/examples_oracle.rs b/gems/decomplex/rust/tests/examples_oracle.rs new file mode 100644 index 000000000..f3116e925 --- /dev/null +++ b/gems/decomplex/rust/tests/examples_oracle.rs @@ -0,0 +1,651 @@ +use anyhow::{bail, Context, Result}; +use decomplex_rust::decomplex::detectors::{ + co_update, decision_pressure, derived_state, false_simplicity, fat_union, flay_similarity, + function_lcom, implicit_control_flow, inconsistent_rename_clone, local_flow, locality_drag, + miner, operational_discontinuity, oversized_predicate, path_condition, predicate_alias, + redundant_nil_guard, semantic_alias, sequence_mine, state_branch_density, state_mesh, + structural_topology, temporal_ordering_pressure, weighted_inlined_cognitive_complexity, +}; +use decomplex_rust::decomplex::syntax::Language; +use serde::Serialize; +use serde_json::{json, Map, Value}; +use std::collections::BTreeSet; +use std::fs; +use std::path::{Path, PathBuf}; + +#[test] +fn shared_examples_match_oracles() -> Result<()> { + let examples_root = examples_root(); + let oracle_dir = examples_root.join("oracles"); + let mut failures = Vec::new(); + + for fixture in fixture_paths(&examples_root)? { + let detector = file_stem(&fixture)?; + let oracle_path = oracle_dir.join(format!("{detector}.json")); + if !oracle_path.is_file() { + failures.push(format!( + "{}: missing oracle {}", + fixture.display(), + oracle_path.display() + )); + continue; + } + + let oracle: Value = serde_json::from_str(&fs::read_to_string(&oracle_path)?)?; + let expected = oracle + .get("expected") + .cloned() + .with_context(|| format!("{} missing expected", oracle_path.display()))?; + let detector_name = oracle + .get("detector") + .and_then(Value::as_str) + .with_context(|| format!("{} missing detector", oracle_path.display()))?; + let options = oracle.get("options").cloned().unwrap_or_else(|| json!({})); + let language = language_for_fixture(&fixture)?; + let actual = run_detector(detector_name, &[fixture.clone()], language, &options) + .with_context(|| format!("{} {}", detector_name, fixture.display()))?; + let projected = project_detector_output(&detector, actual); + + if projected != expected { + failures.push(format!( + "{} {}\nexpected: {}\nactual: {}", + detector_name, + fixture.display(), + expected, + projected + )); + } + } + + if failures.is_empty() { + Ok(()) + } else { + bail!("shared example oracle failures:\n{}", failures.join("\n\n")) + } +} + +fn examples_root() -> PathBuf { + Path::new(env!("CARGO_MANIFEST_DIR")).join("../examples") +} + +fn fixture_paths(examples_root: &Path) -> Result> { + let mut paths = Vec::new(); + for language_dir in fs::read_dir(examples_root)? { + let language_dir = language_dir?.path(); + if !language_dir.is_dir() + || language_dir.file_name().and_then(|name| name.to_str()) == Some("oracles") + { + continue; + } + for entry in fs::read_dir(&language_dir)? { + let path = entry?.path(); + if path.is_file() && language_for_fixture(&path).is_ok() { + paths.push(path); + } + } + } + paths.sort(); + Ok(paths) +} + +fn file_stem(path: &Path) -> Result { + path.file_stem() + .and_then(|stem| stem.to_str()) + .map(str::to_string) + .with_context(|| format!("missing file stem for {}", path.display())) +} + +fn language_for_fixture(path: &Path) -> Result { + let extension = path + .extension() + .and_then(|extension| extension.to_str()) + .with_context(|| format!("missing extension for {}", path.display()))?; + Language::for_extension(extension) + .with_context(|| format!("unsupported fixture extension: {}", path.display())) +} + +fn run_detector( + detector: &str, + files: &[PathBuf], + language: Language, + options: &Value, +) -> Result { + match detector { + "co-update" => value(co_update::scan_files(files, language)?), + "decision-pressure" => value(decision_pressure::scan_files(files, language)?), + "predicate-alias" | "predicate-aliases" => { + value(predicate_alias::scan_files(files, language)?) + } + "miner" | "decision-miner" => value(miner::scan_files(files, language)?), + "semantic-alias" | "semantic-aliases" => { + value(semantic_alias::scan_files(files, language)?) + } + "flay-similarity" | "structural-similarity" => { + let mass = option_usize(options, "mass", 32)?; + let fuzzy = option_usize(options, "fuzzy", 1)?; + Ok(json!({ "findings": flay_similarity::scan_files(files, language, mass, fuzzy)? })) + } + "temporal-ordering-pressure" => { + value(temporal_ordering_pressure::scan_files(files, language)?) + } + "state-branch-density" => value(state_branch_density::scan_files(files, language)?), + "redundant-nil-guard" => value(redundant_nil_guard::scan_files(files, language)?), + "state-mesh" | "state-heatmap" => value(state_mesh::scan_files(files, language)?), + "inconsistent-rename-clone" => { + value(inconsistent_rename_clone::scan_files(files, language)?) + } + "derived-state" => value(derived_state::scan_files(files, language)?), + "implicit-control-flow" | "ordered-protocol-mine" => { + value(implicit_control_flow::scan_files(files, language)?) + } + "weighted-inlined-complexity" => value(weighted_inlined_cognitive_complexity::scan_files( + files, language, + )?), + "locality-drag" => value(locality_drag::scan_files(files, language)?), + "operational-discontinuity" => { + value(operational_discontinuity::scan_files(files, language)?) + } + "oversized-predicate" => value(oversized_predicate::scan_files(files, language)?), + "path-condition" => value(path_condition::scan_files(files, language)?), + "sequence-mine" | "broken-protocol" => value(sequence_mine::scan_files(files, language)?), + "function-lcom" => value(function_lcom::scan_files(files, language)?), + "false-simplicity" => value(false_simplicity::scan_files(files, language)?), + "fat-union" => value(fat_union::scan_files(files, language)?), + "local-flow" => value(local_flow::scan_files(files, language)?), + "structural-topology" => value(structural_topology::scan_files(files, language)?), + _ => bail!("unsupported detector: {detector}"), + } +} + +fn value(value: T) -> Result { + Ok(serde_json::to_value(value)?) +} + +fn option_usize(options: &Value, key: &str, default: usize) -> Result { + match options.get(key) { + Some(value) => value + .as_u64() + .map(|value| value as usize) + .with_context(|| format!("option {key} must be an integer")), + None => Ok(default), + } +} + +fn project_detector_output(detector: &str, output: Value) -> Value { + match detector { + "co-update" => json!({ + "co_written_pairs": rows(field(&output, "co_written_pairs"), &["pair", "support"]), + "neglected_updates": rows(field(&output, "neglected_updates"), &["pair", "support", "has", "missing"]), + }), + "decision-pressure" => rows(&output, &["contract", "decisions", "essential", "methods"]), + "predicate-alias" => json!({ + "alias_clusters": array(field(&output, "alias_clusters")).iter().map(|row| { + json!({ "name_count": array(field(row, "names")).len() }) + }).collect::>() + }), + "miner" => json!({ + "missing_abstractions": array(field(&output, "missing_abstractions")).iter().map(|row| { + pick(row, &["kind", "members", "support", "scatter"]) + }).collect::>(), + "neglected_conditions": rows(field(&output, "neglected_conditions"), &["pattern", "support", "missing"]), + }), + "semantic-alias" => json!({ + "alias_clusters": array(field(&output, "alias_clusters")).iter().map(|row| { + json!({ + "canon": canonical_predicate(field(row, "canon")), + "name_count": array(field(row, "names")).len(), + }) + }).collect::>(), + "reification_miss_count": array(field(&output, "reification_misses")).len(), + }), + "flay-similarity" => Value::Array( + array(field(&output, "findings")) + .iter() + .map(|row| { + let mut projected = object(pick(row, &["clone_type", "node"])); + projected.insert( + "site_count".to_string(), + json!(array(field(row, "sites")).len()), + ); + Value::Object(projected) + }) + .collect(), + ), + "temporal-ordering-pressure" => Value::Array( + array(&output) + .iter() + .map(|row| { + let mut projected = object(pick( + row, + &[ + "owner", + "public_methods", + "state_methods", + "writers", + "orderings", + ], + )); + projected.insert( + "state_fields".to_string(), + json!(canonical_state_refs(field(row, "state_fields"))), + ); + projected.insert( + "shared_fields".to_string(), + json!(canonical_state_refs(field(row, "shared_fields"))), + ); + Value::Object(projected) + }) + .collect(), + ), + "state-branch-density" => Value::Array( + array(&output) + .iter() + .map(|row| { + let mut projected = object(pick(row, &["decisions"])); + projected.insert( + "method".to_string(), + json!(canonical_method_name(field(row, "method"))), + ); + projected.insert( + "state_refs".to_string(), + json!(canonical_state_refs(field(row, "state_refs"))), + ); + Value::Object(projected) + }) + .collect(), + ), + "redundant-nil-guard" => rows(&output, &["local"]), + "state-mesh" => project_state_mesh(&output), + "inconsistent-rename-clone" => Value::Array( + array(&output) + .iter() + .map(|row| { + let mut projected = object(pick(row, &["ref_name"])); + projected.insert( + "divergent_count".to_string(), + json!(array(field(row, "divergent")).len()), + ); + Value::Object(projected) + }) + .collect(), + ), + "derived-state" => rows(&output, &["derived", "source"]), + "implicit-control-flow" => json!({ + "ordered_protocols": project_protocols(field(&output, "ordered_protocols")), + "order_drift": project_protocols(field(&output, "order_drift")), + }), + "weighted-inlined-complexity" => Value::Array( + array(&output) + .iter() + .map(|row| { + let mut projected = object(pick(row, &["method", "depth"])); + projected.insert( + "callee_count".to_string(), + json!(array(field(row, "single_caller_callees")).len()), + ); + Value::Object(projected) + }) + .collect(), + ), + "locality-drag" => rows(&output, &["variable"]), + "operational-discontinuity" => rows(&output, &["resets", "confidence"]), + "oversized-predicate" => Value::Array( + array(field(&output, "findings")) + .iter() + .map(|row| { + let mut projected = object(pick(row, &["count"])); + projected.insert( + "atom_count".to_string(), + json!(array(field(row, "atoms")).len()), + ); + Value::Object(projected) + }) + .collect(), + ), + "path-condition" => Value::Array( + array(field(&output, "neglected")) + .iter() + .map(|row| { + json!({ + "pattern": canonical_predicate_atoms(field(row, "pattern")), + "support": field(row, "support").clone(), + "missing": canonical_predicate(field(row, "missing")), + "action": canonical_action(field(row, "action")), + }) + }) + .collect(), + ), + "sequence-mine" => rows( + field(&output, "broken"), + &["pair", "support", "has", "missing"], + ), + "function-lcom" => rows( + &output, + &[ + "mode", + "components", + "locals", + "statements", + "terminal_join", + ], + ), + "false-simplicity" => rows(&output, &["kind"]), + "fat-union" => Value::Array( + array(field(&output, "fat_unions")) + .iter() + .map(|row| { + let mut projected = object(pick( + row, + &["common", "variant", "degenerate", "support", "scatter"], + )); + projected.insert( + "variant_set".to_string(), + json!(canonical_variants(field(row, "variant_set"))), + ); + Value::Object(projected) + }) + .collect(), + ), + "local-flow" => project_local_flow(&output), + "structural-topology" => json!({ + "method_count": array(field(&output, "methods")).len(), + "edges": rows(field(&output, "edges"), &["caller_name", "callee_name", "type"]), + }), + _ => scrub_locations(&output), + } +} + +fn project_local_flow(output: &Value) -> Value { + Value::Array( + array(output) + .iter() + .map(|method| { + json!({ + "method": field(method, "name").clone(), + "statements": array(field(method, "statements")).iter().map(|statement| { + json!({ + "reads": sorted_array(field(statement, "reads")), + "writes": sorted_array(field(statement, "writes")), + "dependencies": field(statement, "dependencies").clone(), + "co_uses": field(statement, "co_uses").clone(), + }) + }).collect::>(), + "boundaries": array(field(method, "boundaries")).iter().map(|boundary| { + pick(boundary, &["before_index", "after_index", "kind"]) + }).collect::>(), + }) + }) + .collect(), + ) +} + +fn project_state_mesh(output: &Value) -> Value { + let state_mesh = field(output, "state_mesh"); + let fields = field(output, "fields"); + let field_names = fields + .as_object() + .map(|object| { + canonical_state_refs(&Value::Array( + object.keys().cloned().map(Value::String).collect(), + )) + }) + .unwrap_or_default(); + json!({ + "state_mesh": pick( + state_mesh, + &["total_fields", "total_writes", "total_reads", "total_re_derivations"], + ), + "field_names": field_names, + }) +} + +fn project_protocols(rows_value: &Value) -> Value { + Value::Array( + array(rows_value) + .iter() + .map(|row| { + let mut projected = object(pick( + row, + &["protocol", "dependency", "support", "observed", "missing"], + )); + projected.insert( + "states".to_string(), + json!(canonical_state_refs(field(row, "states"))), + ); + Value::Object(projected) + }) + .collect(), + ) +} + +fn rows(value: &Value, keys: &[&str]) -> Value { + Value::Array(array(value).iter().map(|row| pick(row, keys)).collect()) +} + +fn pick(row: &Value, keys: &[&str]) -> Value { + let mut out = Map::new(); + if let Some(object) = row.as_object() { + for key in keys { + if let Some(value) = object.get(*key) { + out.insert((*key).to_string(), canonical_value(value)); + } + } + } + Value::Object(out) +} + +fn canonical_value(value: &Value) -> Value { + match value { + Value::Object(object) => { + let mut out = Map::new(); + let mut keys = object.keys().collect::>(); + keys.sort(); + for key in keys { + out.insert(key.clone(), canonical_value(&object[key])); + } + Value::Object(out) + } + Value::Array(values) => Value::Array(values.iter().map(canonical_value).collect()), + _ => value.clone(), + } +} + +fn scrub_locations(value: &Value) -> Value { + match value { + Value::Object(object) => { + let mut out = Map::new(); + let mut keys = object.keys().collect::>(); + keys.sort(); + for key in keys { + if LOCATION_KEYS.contains(&key.as_str()) { + continue; + } + out.insert(key.clone(), scrub_locations(&object[key])); + } + Value::Object(out) + } + Value::Array(values) => Value::Array(values.iter().map(scrub_locations).collect()), + _ => value.clone(), + } +} + +const LOCATION_KEYS: &[&str] = &[ + "at", + "boundaries", + "boundary_crossings", + "component_lines", + "defn", + "examples", + "file", + "gap_lines", + "line", + "locations", + "predicate", + "raw", + "reason", + "sites", + "span", + "spans", + "source", +]; + +fn canonical_variants(value: &Value) -> Vec { + let mut values = array(value) + .iter() + .map(|item| item.as_str().unwrap_or("").replace(':', ".")) + .map(|text| collapse_dots(&text.replace('_', "."))) + .collect::>(); + values.sort(); + values +} + +fn canonical_state_refs(value: &Value) -> Vec { + let mut values = BTreeSet::new(); + for item in array(value) { + let mut text = value_text(item); + if let Some(stripped) = text.strip_prefix('@') { + text = stripped.to_string(); + } + if let Some(stripped) = text.strip_prefix("self.") { + text = stripped.to_string(); + } else if let Some(stripped) = text.strip_prefix("this.") { + text = stripped.to_string(); + } + values.insert(text); + } + values.into_iter().collect() +} + +fn canonical_method_name(value: &Value) -> String { + value_text(value) + .rsplit(['.', ':', '#']) + .next() + .unwrap_or("") + .to_string() +} + +fn canonical_predicate_atoms(value: &Value) -> Vec { + let mut atoms = array(value) + .iter() + .map(canonical_predicate) + .collect::>(); + atoms.sort(); + atoms +} + +fn canonical_predicate(value: &Value) -> String { + let mut text = value_text(value) + .trim() + .trim_end_matches(';') + .trim() + .to_string(); + text = replace_symbol_literals(&text); + text = strip_noarg_suffix(&text); + text +} + +fn canonical_action(value: &Value) -> String { + canonical_predicate(value) +} + +fn replace_symbol_literals(text: &str) -> String { + let mut out = String::new(); + let chars = text.chars().collect::>(); + let mut i = 0; + while i < chars.len() { + if chars[i] == ':' && i + 1 < chars.len() && ident_start(chars[i + 1]) { + i += 1; + let start = i; + while i < chars.len() && ident_continue(chars[i]) { + i += 1; + } + out.push_str(&chars[start..i].iter().collect::().to_uppercase()); + } else { + out.push(chars[i]); + i += 1; + } + } + out +} + +fn strip_noarg_suffix(text: &str) -> String { + let mut out = String::new(); + let chars = text.chars().collect::>(); + let mut i = 0; + while i < chars.len() { + if ident_start(chars[i]) { + let start = i; + i += 1; + while i < chars.len() && (ident_continue(chars[i]) || chars[i] == '.') { + i += 1; + } + if i < chars.len() && chars[i] == '?' { + out.push_str(&chars[start..i].iter().collect::()); + i += 1; + } else if i + 1 < chars.len() && chars[i] == '(' && chars[i + 1] == ')' { + out.push_str(&chars[start..i].iter().collect::()); + i += 2; + } else { + out.push_str(&chars[start..i].iter().collect::()); + } + } else { + out.push(chars[i]); + i += 1; + } + } + out +} + +fn ident_start(ch: char) -> bool { + ch == '_' || ch.is_ascii_alphabetic() +} + +fn ident_continue(ch: char) -> bool { + ch == '_' || ch.is_ascii_alphanumeric() +} + +fn collapse_dots(text: &str) -> String { + let mut out = String::new(); + let mut previous_dot = false; + for ch in text.chars() { + if ch == '.' { + if !previous_dot { + out.push(ch); + } + previous_dot = true; + } else { + out.push(ch); + previous_dot = false; + } + } + out +} + +fn sorted_array(value: &Value) -> Value { + let mut values = array(value).iter().map(canonical_value).collect::>(); + values.sort_by_key(|value| value.to_string()); + Value::Array(values) +} + +fn object(value: Value) -> Map { + value.as_object().cloned().unwrap_or_default() +} + +fn field<'a>(value: &'a Value, key: &str) -> &'a Value { + value + .as_object() + .and_then(|object| object.get(key)) + .unwrap_or(&Value::Null) +} + +fn array(value: &Value) -> &[Value] { + value.as_array().map(Vec::as_slice).unwrap_or(&[]) +} + +fn value_text(value: &Value) -> String { + match value { + Value::String(text) => text.clone(), + Value::Null => String::new(), + _ => value.to_string(), + } +} diff --git a/gems/decomplex/test/examples_oracle_test.rb b/gems/decomplex/test/examples_oracle_test.rb index 46d9bae62..fc597b7ba 100644 --- a/gems/decomplex/test/examples_oracle_test.rb +++ b/gems/decomplex/test/examples_oracle_test.rb @@ -180,8 +180,11 @@ def project_detector_output(detector, output) when "local-flow" Array(output).map do |method| { - "statement_count" => Array(method["statements"]).size, - "boundary_count" => Array(method["boundaries"]).size + "method" => method["name"], + "statements" => Array(method["statements"]).map do |statement| + pick(statement, %w[reads writes dependencies co_uses]) + end, + "boundaries" => rows(method["boundaries"], %w[before_index after_index kind]) } end when "structural-topology" From 77669839d0ce12cb1a55fdf768e2743c068d0aef Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Sat, 20 Jun 2026 12:56:04 +0000 Subject: [PATCH 42/52] Add shared Decomplex fact oracles --- .../facts/detectors/decision-pressure.json | 188 ++ .../detectors/derived-state-branches.json | 353 ++ .../facts/detectors/derived-state.json | 156 + .../facts/detectors/false-simplicity.json | 87 + .../facts/detectors/function-lcom.json | 248 ++ .../detectors/local-flow-go-receiver.json | 755 +++++ .../detectors/local-flow-python-raw.json | 2834 +++++++++++++++++ .../facts/detectors/locality-drag.json | 260 ++ .../detectors/operational-discontinuity.json | 245 ++ .../detectors/path-condition-derived.json | 444 +++ .../facts/detectors/path-condition-raw.json | 274 ++ .../facts/detectors/path-condition.json | 98 + .../detectors/redundant-nil-guard-facts.json | 402 +++ .../detectors/redundant-nil-guard-rich.json | 2209 +++++++++++++ .../facts/detectors/sequence-mine-nested.json | 193 ++ .../facts/detectors/sequence-mine-rich.json | 220 ++ .../facts/detectors/sequence-mine.json | 79 + .../facts/detectors/state-branch-density.json | 86 + .../facts/detectors/state-mesh-rich.json | 408 +++ .../examples/facts/detectors/state-mesh.json | 223 ++ .../detectors/structural-topology-rich.json | 195 ++ .../facts/detectors/structural-topology.json | 171 + .../weighted-inlined-complexity-rich.json | 443 +++ .../weighted-inlined-complexity.json | 205 ++ .../examples/facts/report/postprocess.json | 392 +++ .../lib/decomplex/detector_runner.rb | 211 ++ gems/decomplex/rust/src/decomplex/ast.rs | 8 +- .../rust/src/decomplex/convergence.rs | 5 +- .../decomplex/detectors/decision_pressure.rs | 8 +- .../src/decomplex/detectors/derived_state.rs | 6 +- .../src/decomplex/detectors/local_flow.rs | 33 +- .../src/decomplex/detectors/locality_drag.rs | 2 +- .../src/decomplex/detectors/path_condition.rs | 103 +- .../detectors/redundant_nil_guard.rs | 36 +- .../detectors/structural_topology.rs | 54 +- .../weighted_inlined_cognitive_complexity.rs | 9 +- gems/decomplex/rust/src/decomplex/report.rs | 8 + .../rust/src/decomplex/root_cause.rs | 3 +- gems/decomplex/rust/src/decomplex/syntax.rs | 89 +- .../decomplex/syntax/tree_sitter_adapter.rs | 1 + gems/decomplex/rust/src/main.rs | 217 +- gems/decomplex/rust/tests/examples_oracle.rs | 299 +- gems/decomplex/test/examples_oracle_test.rb | 26 + .../test/report_facts_oracle_test.rb | 106 + 44 files changed, 12202 insertions(+), 190 deletions(-) create mode 100644 gems/decomplex/examples/facts/detectors/decision-pressure.json create mode 100644 gems/decomplex/examples/facts/detectors/derived-state-branches.json create mode 100644 gems/decomplex/examples/facts/detectors/derived-state.json create mode 100644 gems/decomplex/examples/facts/detectors/false-simplicity.json create mode 100644 gems/decomplex/examples/facts/detectors/function-lcom.json create mode 100644 gems/decomplex/examples/facts/detectors/local-flow-go-receiver.json create mode 100644 gems/decomplex/examples/facts/detectors/local-flow-python-raw.json create mode 100644 gems/decomplex/examples/facts/detectors/locality-drag.json create mode 100644 gems/decomplex/examples/facts/detectors/operational-discontinuity.json create mode 100644 gems/decomplex/examples/facts/detectors/path-condition-derived.json create mode 100644 gems/decomplex/examples/facts/detectors/path-condition-raw.json create mode 100644 gems/decomplex/examples/facts/detectors/path-condition.json create mode 100644 gems/decomplex/examples/facts/detectors/redundant-nil-guard-facts.json create mode 100644 gems/decomplex/examples/facts/detectors/redundant-nil-guard-rich.json create mode 100644 gems/decomplex/examples/facts/detectors/sequence-mine-nested.json create mode 100644 gems/decomplex/examples/facts/detectors/sequence-mine-rich.json create mode 100644 gems/decomplex/examples/facts/detectors/sequence-mine.json create mode 100644 gems/decomplex/examples/facts/detectors/state-branch-density.json create mode 100644 gems/decomplex/examples/facts/detectors/state-mesh-rich.json create mode 100644 gems/decomplex/examples/facts/detectors/state-mesh.json create mode 100644 gems/decomplex/examples/facts/detectors/structural-topology-rich.json create mode 100644 gems/decomplex/examples/facts/detectors/structural-topology.json create mode 100644 gems/decomplex/examples/facts/detectors/weighted-inlined-complexity-rich.json create mode 100644 gems/decomplex/examples/facts/detectors/weighted-inlined-complexity.json create mode 100644 gems/decomplex/examples/facts/report/postprocess.json create mode 100644 gems/decomplex/test/report_facts_oracle_test.rb diff --git a/gems/decomplex/examples/facts/detectors/decision-pressure.json b/gems/decomplex/examples/facts/detectors/decision-pressure.json new file mode 100644 index 000000000..38e872e35 --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/decision-pressure.json @@ -0,0 +1,188 @@ +{ + "detector": "decision-pressure", + "input": { + "documents": [ + { + "file": "facts/decision.rb", + "language": "ruby", + "local_contract_assignments": { + "check": { + "candidate": "input.user" + } + }, + "local_methods": [ + { + "id": "Fixture#check", + "owner": "Fixture", + "name": "check", + "file": "facts/decision.rb", + "line": 1, + "span": [ + 1, + 0, + 20, + 3 + ], + "statements": [ + { + "index": 0, + "line": 1, + "end_line": 1, + "span": [ + 1, + 0, + 1, + 22 + ], + "source": "candidate = input.user", + "reads": [ + "input" + ], + "writes": [ + "candidate" + ], + "dependencies": [ + [ + "candidate", + "input" + ] + ], + "co_uses": [ + + ] + }, + { + "index": 1, + "line": 2, + "end_line": 2, + "span": [ + 2, + 0, + 2, + 25 + ], + "source": "candidate.name rescue nil", + "reads": [ + "candidate" + ], + "writes": [ + + ], + "dependencies": [ + + ], + "co_uses": [ + + ] + } + ], + "boundaries": [ + + ] + } + ], + "call_sites": [ + { + "receiver": "candidate", + "message": "nil?", + "file": "facts/decision.rb", + "function": "check", + "owner": "Fixture", + "line": 3, + "span": [ + 3, + 2, + 3, + 16 + ], + "conditional": false, + "arguments": [ + + ], + "control": null, + "safe_navigation": false, + "block": false + }, + { + "receiver": "candidate", + "message": "ready?", + "file": "facts/decision.rb", + "function": "check", + "owner": "Fixture", + "line": 4, + "span": [ + 4, + 2, + 4, + 18 + ], + "conditional": false, + "arguments": [ + + ], + "control": null, + "safe_navigation": false, + "block": false + }, + { + "receiver": "candidate", + "message": "name", + "file": "facts/decision.rb", + "function": "check", + "owner": "Fixture", + "line": 2, + "span": [ + 2, + 0, + 2, + 20 + ], + "conditional": false, + "arguments": [ + + ], + "control": null, + "safe_navigation": false, + "block": false + } + ] + } + ] + }, + "expected": [ + { + "contract": ".user", + "decisions": 1, + "essential": 1, + "methods": 1, + "sites": [ + "facts/decision.rb:check:3" + ], + "spans": { + "facts/decision.rb:check:3": [ + 3, + 2, + 3, + 16 + ] + } + }, + { + "contract": ".name", + "decisions": 1, + "essential": 0, + "methods": 1, + "sites": [ + "facts/decision.rb:check:2" + ], + "spans": { + "facts/decision.rb:check:2": [ + 2, + 0, + 2, + 25 + ] + } + } + ] +} diff --git a/gems/decomplex/examples/facts/detectors/derived-state-branches.json b/gems/decomplex/examples/facts/detectors/derived-state-branches.json new file mode 100644 index 000000000..7326c37e5 --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/derived-state-branches.json @@ -0,0 +1,353 @@ +{ + "detector": "derived-state", + "input": { + "documents": [ + { + "file": "facts/derived_branches.rb", + "language": "ruby", + "local_methods": [ + { + "id": "Fixture#ordered", + "owner": "Fixture", + "name": "ordered", + "file": "facts/derived_branches.rb", + "line": 1, + "span": [ + 1, + 0, + 20, + 3 + ], + "statements": [ + { + "index": 0, + "line": 1, + "end_line": 1, + "span": [ + 1, + 0, + 1, + 20 + ], + "source": "z = input; a = input", + "reads": [ + "input" + ], + "writes": [ + "z", + "a" + ], + "dependencies": [ + [ + "z", + "input" + ], + [ + "a", + "input" + ] + ], + "co_uses": [ + + ] + } + ], + "boundaries": [ + + ] + }, + { + "id": "Fixture#self_dep", + "owner": "Fixture", + "name": "self_dep", + "file": "facts/derived_branches.rb", + "line": 3, + "span": [ + 1, + 0, + 20, + 3 + ], + "statements": [ + { + "index": 0, + "line": 3, + "end_line": 3, + "span": [ + 3, + 0, + 3, + 11 + ], + "source": "same = same", + "reads": [ + "same" + ], + "writes": [ + "same" + ], + "dependencies": [ + [ + "same", + "same" + ] + ], + "co_uses": [ + + ] + } + ], + "boundaries": [ + + ] + }, + { + "id": "Fixture#stale", + "owner": "Fixture", + "name": "stale", + "file": "facts/derived_branches.rb", + "line": 9, + "span": [ + 1, + 0, + 20, + 3 + ], + "statements": [ + { + "index": 0, + "line": 9, + "end_line": 9, + "span": [ + 9, + 0, + 9, + 13 + ], + "source": "source = load", + "reads": [ + "load" + ], + "writes": [ + "source" + ], + "dependencies": [ + [ + "source", + "load" + ] + ], + "co_uses": [ + + ] + }, + { + "index": 1, + "line": 10, + "end_line": 10, + "span": [ + 10, + 0, + 10, + 16 + ], + "source": "derived = source", + "reads": [ + "source" + ], + "writes": [ + "derived" + ], + "dependencies": [ + [ + "derived", + "source" + ] + ], + "co_uses": [ + + ] + }, + { + "index": 2, + "line": 13, + "end_line": 13, + "span": [ + 13, + 0, + 13, + 15 + ], + "source": "source = reload", + "reads": [ + "reload" + ], + "writes": [ + "source" + ], + "dependencies": [ + [ + "source", + "reload" + ] + ], + "co_uses": [ + + ] + } + ], + "boundaries": [ + + ] + }, + { + "id": "Fixture#recomputed", + "owner": "Fixture", + "name": "recomputed", + "file": "facts/derived_branches.rb", + "line": 5, + "span": [ + 1, + 0, + 20, + 3 + ], + "statements": [ + { + "index": 0, + "line": 5, + "end_line": 5, + "span": [ + 5, + 0, + 5, + 13 + ], + "source": "source = load", + "reads": [ + "load" + ], + "writes": [ + "source" + ], + "dependencies": [ + [ + "source", + "load" + ] + ], + "co_uses": [ + + ] + }, + { + "index": 1, + "line": 6, + "end_line": 6, + "span": [ + 6, + 0, + 6, + 16 + ], + "source": "derived = source", + "reads": [ + "source" + ], + "writes": [ + "derived" + ], + "dependencies": [ + [ + "derived", + "source" + ] + ], + "co_uses": [ + + ] + }, + { + "index": 2, + "line": 7, + "end_line": 7, + "span": [ + 7, + 0, + 7, + 15 + ], + "source": "source = reload", + "reads": [ + "reload" + ], + "writes": [ + "source" + ], + "dependencies": [ + [ + "source", + "reload" + ] + ], + "co_uses": [ + + ] + }, + { + "index": 3, + "line": 8, + "end_line": 8, + "span": [ + 8, + 0, + 8, + 16 + ], + "source": "derived = source", + "reads": [ + "source" + ], + "writes": [ + "derived" + ], + "dependencies": [ + [ + "derived", + "source" + ] + ], + "co_uses": [ + + ] + } + ], + "boundaries": [ + + ] + } + ] + } + ] + }, + "expected": [ + { + "at": "facts/derived_branches.rb:stale:10", + "defn": "stale", + "derived": "derived", + "derived_at": 10, + "file": "facts/derived_branches.rb", + "gap": 3, + "source": "source", + "source_reassigned_at": 13, + "spans": { + "facts/derived_branches.rb:stale:10": [ + 10, + 0, + 10, + 16 + ] + } + } + ] +} diff --git a/gems/decomplex/examples/facts/detectors/derived-state.json b/gems/decomplex/examples/facts/detectors/derived-state.json new file mode 100644 index 000000000..0e521875d --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/derived-state.json @@ -0,0 +1,156 @@ +{ + "detector": "derived-state", + "input": { + "documents": [ + { + "file": "facts/derived.rb", + "language": "ruby", + "local_methods": [ + { + "id": "Fixture#refresh", + "owner": "Fixture", + "name": "refresh", + "file": "facts/derived.rb", + "line": 1, + "span": [ + 1, + 0, + 20, + 3 + ], + "statements": [ + { + "index": 0, + "line": 1, + "end_line": 1, + "span": [ + 1, + 0, + 1, + 13 + ], + "source": "source = load", + "reads": [ + "load" + ], + "writes": [ + "source" + ], + "dependencies": [ + [ + "source", + "load" + ] + ], + "co_uses": [ + + ] + }, + { + "index": 1, + "line": 2, + "end_line": 2, + "span": [ + 2, + 0, + 2, + 16 + ], + "source": "derived = source", + "reads": [ + "source" + ], + "writes": [ + "derived" + ], + "dependencies": [ + [ + "derived", + "source" + ] + ], + "co_uses": [ + + ] + }, + { + "index": 2, + "line": 7, + "end_line": 7, + "span": [ + 7, + 0, + 7, + 15 + ], + "source": "source = reload", + "reads": [ + "reload" + ], + "writes": [ + "source" + ], + "dependencies": [ + [ + "source", + "reload" + ] + ], + "co_uses": [ + + ] + }, + { + "index": 3, + "line": 9, + "end_line": 9, + "span": [ + 9, + 0, + 9, + 12 + ], + "source": "use(derived)", + "reads": [ + "derived" + ], + "writes": [ + + ], + "dependencies": [ + + ], + "co_uses": [ + + ] + } + ], + "boundaries": [ + + ] + } + ] + } + ] + }, + "expected": [ + { + "at": "facts/derived.rb:refresh:2", + "defn": "refresh", + "derived": "derived", + "derived_at": 2, + "file": "facts/derived.rb", + "gap": 5, + "source": "source", + "source_reassigned_at": 7, + "spans": { + "facts/derived.rb:refresh:2": [ + 2, + 0, + 2, + 16 + ] + } + } + ] +} diff --git a/gems/decomplex/examples/facts/detectors/false-simplicity.json b/gems/decomplex/examples/facts/detectors/false-simplicity.json new file mode 100644 index 000000000..c7acd3a3c --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/false-simplicity.json @@ -0,0 +1,87 @@ +{ + "detector": "false-simplicity", + "input": { + "documents": [ + { + "file": "facts/false_a.rb", + "language": "ruby", + "source": "", + "lines": [], + "root": {"kind": "program", "text": "", "span": [1, 0, 1, 0], "named": true, "field_name": null, "children": []}, + "normalized_root": {"type": "ROOT", "children": [], "first_lineno": 1, "first_column": 0, "last_lineno": 1, "last_column": 0, "text": ""}, + "function_defs": [ + {"file": "facts/false_a.rb", "name": "run", "owner": "Project::Thing", "line": 2, "span": [2, 0, 2, 8], "body": {"kind": "body_statement", "text": "", "span": [2, 0, 2, 8], "named": true, "field_name": null, "children": []}, "visibility": null, "params": []} + ], + "owner_defs": [ + {"file": "facts/false_a.rb", "name": "Project::Thing", "kind": "class", "line": 1, "span": [1, 0, 1, 20]} + ], + "call_sites": [], + "state_reads": [], + "state_writes": [], + "decision_sites": [], + "branch_decisions": [], + "dispatch_sites": [], + "semantic_effect_sites": [ + {"kind": "hidden_io", "detail": "puts", "file": "facts/false_a.rb", "function": "run", "line": 3, "span": [3, 2, 3, 6]} + ], + "local_complexity_scores": {}, + "predicate_aliases": [], + "comparison_uses": [] + }, + { + "file": "facts/false_b.rb", + "language": "ruby", + "source": "", + "lines": [], + "root": {"kind": "program", "text": "", "span": [1, 0, 1, 0], "named": true, "field_name": null, "children": []}, + "normalized_root": {"type": "ROOT", "children": [], "first_lineno": 1, "first_column": 0, "last_lineno": 1, "last_column": 0, "text": ""}, + "function_defs": [ + {"file": "facts/false_b.rb", "name": "again", "owner": "Project::Thing", "line": 2, "span": [2, 0, 2, 10], "body": {"kind": "body_statement", "text": "", "span": [2, 0, 2, 10], "named": true, "field_name": null, "children": []}, "visibility": null, "params": []} + ], + "owner_defs": [ + {"file": "facts/false_b.rb", "name": "Project::Thing", "kind": "class", "line": 1, "span": [1, 0, 1, 20]} + ], + "call_sites": [], + "state_reads": [], + "state_writes": [], + "decision_sites": [], + "branch_decisions": [], + "dispatch_sites": [], + "semantic_effect_sites": [], + "local_complexity_scores": {}, + "predicate_aliases": [], + "comparison_uses": [] + } + ] + }, + "expected": [ + { + "at": "facts/false_a.rb:Project::Thing:1", + "detail": "reopen Project::Thing", + "kind": "monkeypatch", + "scatter": 2, + "sites": [ + "facts/false_a.rb:Project::Thing:1", + "facts/false_b.rb:Project::Thing:1" + ], + "spans": { + "facts/false_a.rb:Project::Thing:1": [1, 0, 1, 20], + "facts/false_b.rb:Project::Thing:1": [1, 0, 1, 20] + }, + "support": 2 + }, + { + "at": "facts/false_a.rb:run:3", + "detail": "puts", + "kind": "hidden_io", + "scatter": 1, + "sites": [ + "facts/false_a.rb:run:3" + ], + "spans": { + "facts/false_a.rb:run:3": [3, 2, 3, 6] + }, + "support": 1 + } + ] +} diff --git a/gems/decomplex/examples/facts/detectors/function-lcom.json b/gems/decomplex/examples/facts/detectors/function-lcom.json new file mode 100644 index 000000000..b871c2d57 --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/function-lcom.json @@ -0,0 +1,248 @@ +{ + "detector": "function-lcom", + "input": { + "documents": [ + { + "file": "facts/lcom.rb", + "language": "ruby", + "local_methods": [ + { + "id": "Fixture#mixed", + "owner": "Fixture", + "name": "mixed", + "file": "facts/lcom.rb", + "line": 1, + "span": [ + 1, + 0, + 20, + 3 + ], + "statements": [ + { + "index": 0, + "line": 1, + "end_line": 1, + "span": [ + 1, + 0, + 1, + 11 + ], + "source": "a = input_a", + "reads": [ + "input_a" + ], + "writes": [ + "a" + ], + "dependencies": [ + [ + "a", + "input_a" + ] + ], + "co_uses": [ + + ] + }, + { + "index": 1, + "line": 2, + "end_line": 2, + "span": [ + 2, + 0, + 2, + 16 + ], + "source": "b = normalize(a)", + "reads": [ + "a" + ], + "writes": [ + "b" + ], + "dependencies": [ + [ + "b", + "a" + ] + ], + "co_uses": [ + + ] + }, + { + "index": 2, + "line": 3, + "end_line": 3, + "span": [ + 3, + 0, + 3, + 11 + ], + "source": "c = input_c", + "reads": [ + "input_c" + ], + "writes": [ + "c" + ], + "dependencies": [ + [ + "c", + "input_c" + ] + ], + "co_uses": [ + + ] + }, + { + "index": 3, + "line": 4, + "end_line": 4, + "span": [ + 4, + 0, + 4, + 16 + ], + "source": "d = normalize(c)", + "reads": [ + "c" + ], + "writes": [ + "d" + ], + "dependencies": [ + [ + "d", + "c" + ] + ], + "co_uses": [ + + ] + }, + { + "index": 4, + "line": 5, + "end_line": 5, + "span": [ + 5, + 0, + 5, + 11 + ], + "source": "e = input_e", + "reads": [ + "input_e" + ], + "writes": [ + "e" + ], + "dependencies": [ + [ + "e", + "input_e" + ] + ], + "co_uses": [ + + ] + }, + { + "index": 5, + "line": 6, + "end_line": 6, + "span": [ + 6, + 0, + 6, + 16 + ], + "source": "f = normalize(e)", + "reads": [ + "e" + ], + "writes": [ + "f" + ], + "dependencies": [ + [ + "f", + "e" + ] + ], + "co_uses": [ + + ] + } + ], + "boundaries": [ + + ] + } + ] + } + ] + }, + "expected": [ + { + "at": "facts/lcom.rb:mixed:1", + "component_lines": [ + [ + 1, + 2 + ], + [ + 3, + 4 + ], + [ + 5, + 6 + ] + ], + "component_vars": [ + [ + "a", + "b", + "input_a" + ], + [ + "c", + "d", + "input_c" + ], + [ + "e", + "f", + "input_e" + ] + ], + "components": 3, + "defn": "mixed", + "file": "facts/lcom.rb", + "line": 1, + "locals": 9, + "method": "mixed", + "mode": "disjoint", + "owner": "Fixture", + "score": 45, + "spans": { + "facts/lcom.rb:mixed:1": [ + 1, + 0, + 20, + 3 + ] + }, + "statements": 6, + "terminal_join": false + } + ] +} diff --git a/gems/decomplex/examples/facts/detectors/local-flow-go-receiver.json b/gems/decomplex/examples/facts/detectors/local-flow-go-receiver.json new file mode 100644 index 000000000..8e3bd0767 --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/local-flow-go-receiver.json @@ -0,0 +1,755 @@ +{ + "detector": "local-flow", + "input": { + "documents": [ + { + "file": "facts/local_flow_receiver.go", + "language": "go", + "source": "package main\n\ntype Worker struct{}\n\nfunc (w *Worker) Handle(price int, tax int) Result {\n subtotal := price + tax\n return Result_init(w, subtotal)\n}\n", + "lines": [ + "package main", + "", + "type Worker struct{}", + "", + "func (w *Worker) Handle(price int, tax int) Result {", + " subtotal := price + tax", + " return Result_init(w, subtotal)", + "}" + ], + "function_defs": [ + { + "file": "facts/local_flow_receiver.go", + "name": "Handle", + "owner": "Worker", + "line": 5, + "span": [ + 5, + 0, + 8, + 1 + ], + "body": { + "kind": "method_declaration", + "text": "func (w *Worker) Handle(price int, tax int) Result {\n subtotal := price + tax\n return Result_init(w, subtotal)\n}", + "span": [ + 5, + 0, + 8, + 1 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "func", + "text": "func", + "span": [ + 5, + 0, + 5, + 4 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "parameter_list", + "text": "(w *Worker)", + "span": [ + 5, + 5, + 5, + 16 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "(", + "text": "(", + "span": [ + 5, + 5, + 5, + 6 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "parameter_declaration", + "text": "w *Worker", + "span": [ + 5, + 6, + 5, + 15 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "w", + "span": [ + 5, + 6, + 5, + 7 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "pointer_type", + "text": "*Worker", + "span": [ + 5, + 8, + 5, + 15 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "*", + "text": "*", + "span": [ + 5, + 8, + 5, + 9 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "type_identifier", + "text": "Worker", + "span": [ + 5, + 9, + 5, + 15 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + }, + { + "kind": ")", + "text": ")", + "span": [ + 5, + 15, + 5, + 16 + ], + "named": false, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": "field_identifier", + "text": "Handle", + "span": [ + 5, + 17, + 5, + 23 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "parameter_list", + "text": "(price int, tax int)", + "span": [ + 5, + 23, + 5, + 43 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "(", + "text": "(", + "span": [ + 5, + 23, + 5, + 24 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "parameter_declaration", + "text": "price int", + "span": [ + 5, + 24, + 5, + 33 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "price", + "span": [ + 5, + 24, + 5, + 29 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "type_identifier", + "text": "int", + "span": [ + 5, + 30, + 5, + 33 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": ",", + "text": ",", + "span": [ + 5, + 33, + 5, + 34 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "parameter_declaration", + "text": "tax int", + "span": [ + 5, + 35, + 5, + 42 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "tax", + "span": [ + 5, + 35, + 5, + 38 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "type_identifier", + "text": "int", + "span": [ + 5, + 39, + 5, + 42 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": ")", + "text": ")", + "span": [ + 5, + 42, + 5, + 43 + ], + "named": false, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": "type_identifier", + "text": "Result", + "span": [ + 5, + 44, + 5, + 50 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "block", + "text": "{\n subtotal := price + tax\n return Result_init(w, subtotal)\n}", + "span": [ + 5, + 51, + 8, + 1 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "{", + "text": "{", + "span": [ + 5, + 51, + 5, + 52 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "statement_list", + "text": "subtotal := price + tax\n return Result_init(w, subtotal)\n", + "span": [ + 6, + 2, + 8, + 0 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "short_var_declaration", + "text": "subtotal := price + tax", + "span": [ + 6, + 2, + 6, + 25 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "expression_list", + "text": "subtotal", + "span": [ + 6, + 2, + 6, + 10 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ":=", + "text": ":=", + "span": [ + 6, + 11, + 6, + 13 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "expression_list", + "text": "price + tax", + "span": [ + 6, + 14, + 6, + 25 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "price", + "span": [ + 6, + 14, + 6, + 19 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "+", + "text": "+", + "span": [ + 6, + 20, + 6, + 21 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "tax", + "span": [ + 6, + 22, + 6, + 25 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + }, + { + "kind": "return_statement", + "text": "return Result_init(w, subtotal)", + "span": [ + 7, + 2, + 7, + 33 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "return", + "text": "return", + "span": [ + 7, + 2, + 7, + 8 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "expression_list", + "text": "Result_init(w, subtotal)", + "span": [ + 7, + 9, + 7, + 33 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "Result_init", + "span": [ + 7, + 9, + 7, + 20 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "argument_list", + "text": "(w, subtotal)", + "span": [ + 7, + 20, + 7, + 33 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "(", + "text": "(", + "span": [ + 7, + 20, + 7, + 21 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "w", + "span": [ + 7, + 21, + 7, + 22 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ",", + "text": ",", + "span": [ + 7, + 22, + 7, + 23 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "subtotal", + "span": [ + 7, + 24, + 7, + 32 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ")", + "text": ")", + "span": [ + 7, + 32, + 7, + 33 + ], + "named": false, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + } + ] + } + ] + }, + { + "kind": "}", + "text": "}", + "span": [ + 8, + 0, + 8, + 1 + ], + "named": false, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + }, + "visibility": "public", + "params": [ + "price", + "tax" + ] + } + ] + } + ] + }, + "expected": [ + { + "boundaries": [ + + ], + "file": "facts/local_flow_receiver.go", + "id": "Worker#Handle", + "line": 5, + "name": "Handle", + "owner": "Worker", + "span": [ + 5, + 0, + 8, + 1 + ], + "statements": [ + { + "co_uses": [ + [ + "price", + "tax" + ] + ], + "dependencies": [ + [ + "subtotal", + "price" + ], + [ + "subtotal", + "tax" + ] + ], + "end_line": 6, + "index": 0, + "line": 6, + "reads": [ + "price", + "tax" + ], + "source": "subtotal := price + tax", + "span": [ + 6, + 2, + 6, + 25 + ], + "writes": [ + "subtotal" + ] + }, + { + "co_uses": [ + [ + "subtotal", + "w" + ] + ], + "dependencies": [ + + ], + "end_line": 7, + "index": 1, + "line": 7, + "reads": [ + "subtotal", + "w" + ], + "source": "return Result_init(w, subtotal)", + "span": [ + 7, + 2, + 7, + 33 + ], + "writes": [ + + ] + } + ] + } + ] +} diff --git a/gems/decomplex/examples/facts/detectors/local-flow-python-raw.json b/gems/decomplex/examples/facts/detectors/local-flow-python-raw.json new file mode 100644 index 000000000..82a2b8db1 --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/local-flow-python-raw.json @@ -0,0 +1,2834 @@ +{ + "detector": "local-flow", + "input": { + "documents": [ + { + "file": "facts/local_flow_python.py", + "language": "python", + "source": "class Worker:\n def handle(self, price, tax, items, mapping, target):\n subtotal = price + tax\n running: int = subtotal\n annotated: int\n annotated = running\n if (chosen := mapping.get(\"key\")):\n running = running + chosen\n for item in items:\n running = running + item\n for other in items:\n running = running + other\n with open(\"log\") as handle:\n buffer = handle.read()\n target.slot = running\n target[price] = buffer\n mapping = {\"key\": buffer}\n return Result(running, buffer, target)\n", + "lines": [ + "class Worker:", + " def handle(self, price, tax, items, mapping, target):", + " subtotal = price + tax", + " running: int = subtotal", + " annotated: int", + " annotated = running", + " if (chosen := mapping.get(\"key\")):", + " running = running + chosen", + " for item in items:", + " running = running + item", + " for other in items:", + " running = running + other", + " with open(\"log\") as handle:", + " buffer = handle.read()", + " target.slot = running", + " target[price] = buffer", + " mapping = {\"key\": buffer}", + " return Result(running, buffer, target)" + ], + "function_defs": [ + { + "file": "facts/local_flow_python.py", + "name": "handle", + "owner": "Worker", + "line": 2, + "span": [ + 2, + 4, + 18, + 46 + ], + "body": { + "kind": "block", + "text": "def handle(self, price, tax, items, mapping, target):\n subtotal = price + tax\n running: int = subtotal\n annotated: int\n annotated = running\n if (chosen := mapping.get(\"key\")):\n running = running + chosen\n for item in items:\n running = running + item\n for other in items:\n running = running + other\n with open(\"log\") as handle:\n buffer = handle.read()\n target.slot = running\n target[price] = buffer\n mapping = {\"key\": buffer}\n return Result(running, buffer, target)", + "span": [ + 2, + 4, + 18, + 46 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "def", + "text": "def", + "span": [ + 2, + 4, + 2, + 7 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "handle", + "span": [ + 2, + 8, + 2, + 14 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "parameters", + "text": "(self, price, tax, items, mapping, target)", + "span": [ + 2, + 14, + 2, + 56 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "(", + "text": "(", + "span": [ + 2, + 14, + 2, + 15 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "self", + "span": [ + 2, + 15, + 2, + 19 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ",", + "text": ",", + "span": [ + 2, + 19, + 2, + 20 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "price", + "span": [ + 2, + 21, + 2, + 26 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ",", + "text": ",", + "span": [ + 2, + 26, + 2, + 27 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "tax", + "span": [ + 2, + 28, + 2, + 31 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ",", + "text": ",", + "span": [ + 2, + 31, + 2, + 32 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "items", + "span": [ + 2, + 33, + 2, + 38 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ",", + "text": ",", + "span": [ + 2, + 38, + 2, + 39 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "mapping", + "span": [ + 2, + 40, + 2, + 47 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ",", + "text": ",", + "span": [ + 2, + 47, + 2, + 48 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "target", + "span": [ + 2, + 49, + 2, + 55 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ")", + "text": ")", + "span": [ + 2, + 55, + 2, + 56 + ], + "named": false, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": ":", + "text": ":", + "span": [ + 2, + 56, + 2, + 57 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "block", + "text": "subtotal = price + tax\n running: int = subtotal\n annotated: int\n annotated = running\n if (chosen := mapping.get(\"key\")):\n running = running + chosen\n for item in items:\n running = running + item\n for other in items:\n running = running + other\n with open(\"log\") as handle:\n buffer = handle.read()\n target.slot = running\n target[price] = buffer\n mapping = {\"key\": buffer}\n return Result(running, buffer, target)", + "span": [ + 3, + 8, + 18, + 46 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "expression_statement", + "text": "subtotal = price + tax", + "span": [ + 3, + 8, + 3, + 30 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "subtotal", + "span": [ + 3, + 8, + 3, + 16 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "=", + "text": "=", + "span": [ + 3, + 17, + 3, + 18 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "binary_operator", + "text": "price + tax", + "span": [ + 3, + 19, + 3, + 30 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "price", + "span": [ + 3, + 19, + 3, + 24 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "+", + "text": "+", + "span": [ + 3, + 25, + 3, + 26 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "tax", + "span": [ + 3, + 27, + 3, + 30 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + }, + { + "kind": "expression_statement", + "text": "running: int = subtotal", + "span": [ + 4, + 8, + 4, + 31 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "running", + "span": [ + 4, + 8, + 4, + 15 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ":", + "text": ":", + "span": [ + 4, + 15, + 4, + 16 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "type", + "text": "int", + "span": [ + 4, + 17, + 4, + 20 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "=", + "text": "=", + "span": [ + 4, + 21, + 4, + 22 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "subtotal", + "span": [ + 4, + 23, + 4, + 31 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": "expression_statement", + "text": "annotated: int", + "span": [ + 5, + 8, + 5, + 22 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "annotated", + "span": [ + 5, + 8, + 5, + 17 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ":", + "text": ":", + "span": [ + 5, + 17, + 5, + 18 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "type", + "text": "int", + "span": [ + 5, + 19, + 5, + 22 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": "expression_statement", + "text": "annotated = running", + "span": [ + 6, + 8, + 6, + 27 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "annotated", + "span": [ + 6, + 8, + 6, + 17 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "=", + "text": "=", + "span": [ + 6, + 18, + 6, + 19 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "running", + "span": [ + 6, + 20, + 6, + 27 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": "if_statement", + "text": "if (chosen := mapping.get(\"key\")):\n running = running + chosen", + "span": [ + 7, + 8, + 8, + 38 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "if", + "text": "if", + "span": [ + 7, + 8, + 7, + 10 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "parenthesized_expression", + "text": "(chosen := mapping.get(\"key\"))", + "span": [ + 7, + 11, + 7, + 41 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "(", + "text": "(", + "span": [ + 7, + 11, + 7, + 12 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "named_expression", + "text": "chosen := mapping.get(\"key\")", + "span": [ + 7, + 12, + 7, + 40 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "chosen", + "span": [ + 7, + 12, + 7, + 18 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ":=", + "text": ":=", + "span": [ + 7, + 19, + 7, + 21 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "call", + "text": "mapping.get(\"key\")", + "span": [ + 7, + 22, + 7, + 40 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "attribute", + "text": "mapping.get", + "span": [ + 7, + 22, + 7, + 33 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "mapping", + "span": [ + 7, + 22, + 7, + 29 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ".", + "text": ".", + "span": [ + 7, + 29, + 7, + 30 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "get", + "span": [ + 7, + 30, + 7, + 33 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": "argument_list", + "text": "(\"key\")", + "span": [ + 7, + 33, + 7, + 40 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "(", + "text": "(", + "span": [ + 7, + 33, + 7, + 34 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "string", + "text": "\"key\"", + "span": [ + 7, + 34, + 7, + 39 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "string_start", + "text": "\"", + "span": [ + 7, + 34, + 7, + 35 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "string_content", + "text": "key", + "span": [ + 7, + 35, + 7, + 38 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "string_end", + "text": "\"", + "span": [ + 7, + 38, + 7, + 39 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": ")", + "text": ")", + "span": [ + 7, + 39, + 7, + 40 + ], + "named": false, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + } + ] + }, + { + "kind": ")", + "text": ")", + "span": [ + 7, + 40, + 7, + 41 + ], + "named": false, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": ":", + "text": ":", + "span": [ + 7, + 41, + 7, + 42 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "block", + "text": "running = running + chosen", + "span": [ + 8, + 12, + 8, + 38 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "running", + "span": [ + 8, + 12, + 8, + 19 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "=", + "text": "=", + "span": [ + 8, + 20, + 8, + 21 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "binary_operator", + "text": "running + chosen", + "span": [ + 8, + 22, + 8, + 38 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "running", + "span": [ + 8, + 22, + 8, + 29 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "+", + "text": "+", + "span": [ + 8, + 30, + 8, + 31 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "chosen", + "span": [ + 8, + 32, + 8, + 38 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + } + ] + }, + { + "kind": "for_statement", + "text": "for item in items:\n running = running + item", + "span": [ + 9, + 8, + 10, + 36 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "for", + "text": "for", + "span": [ + 9, + 8, + 9, + 11 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "item", + "span": [ + 9, + 12, + 9, + 16 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "in", + "text": "in", + "span": [ + 9, + 17, + 9, + 19 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "items", + "span": [ + 9, + 20, + 9, + 25 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ":", + "text": ":", + "span": [ + 9, + 25, + 9, + 26 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "block", + "text": "running = running + item", + "span": [ + 10, + 12, + 10, + 36 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "running", + "span": [ + 10, + 12, + 10, + 19 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "=", + "text": "=", + "span": [ + 10, + 20, + 10, + 21 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "binary_operator", + "text": "running + item", + "span": [ + 10, + 22, + 10, + 36 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "running", + "span": [ + 10, + 22, + 10, + 29 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "+", + "text": "+", + "span": [ + 10, + 30, + 10, + 31 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "item", + "span": [ + 10, + 32, + 10, + 36 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + } + ] + }, + { + "kind": "for_statement", + "text": "for other in items:\n running = running + other", + "span": [ + 11, + 8, + 12, + 37 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "for", + "text": "for", + "span": [ + 11, + 8, + 11, + 11 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "other", + "span": [ + 11, + 12, + 11, + 17 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "in", + "text": "in", + "span": [ + 11, + 18, + 11, + 20 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "items", + "span": [ + 11, + 21, + 11, + 26 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ":", + "text": ":", + "span": [ + 11, + 26, + 11, + 27 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "block", + "text": "running = running + other", + "span": [ + 12, + 12, + 12, + 37 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "running", + "span": [ + 12, + 12, + 12, + 19 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "=", + "text": "=", + "span": [ + 12, + 20, + 12, + 21 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "binary_operator", + "text": "running + other", + "span": [ + 12, + 22, + 12, + 37 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "running", + "span": [ + 12, + 22, + 12, + 29 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "+", + "text": "+", + "span": [ + 12, + 30, + 12, + 31 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "other", + "span": [ + 12, + 32, + 12, + 37 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + } + ] + }, + { + "kind": "with_statement", + "text": "with open(\"log\") as handle:\n buffer = handle.read()", + "span": [ + 13, + 8, + 14, + 34 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "with", + "text": "with", + "span": [ + 13, + 8, + 13, + 12 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "with_clause", + "text": "open(\"log\") as handle", + "span": [ + 13, + 13, + 13, + 34 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "call", + "text": "open(\"log\")", + "span": [ + 13, + 13, + 13, + 24 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "open", + "span": [ + 13, + 13, + 13, + 17 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "argument_list", + "text": "(\"log\")", + "span": [ + 13, + 17, + 13, + 24 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "(", + "text": "(", + "span": [ + 13, + 17, + 13, + 18 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "string", + "text": "\"log\"", + "span": [ + 13, + 18, + 13, + 23 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "string_start", + "text": "\"", + "span": [ + 13, + 18, + 13, + 19 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "string_content", + "text": "log", + "span": [ + 13, + 19, + 13, + 22 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "string_end", + "text": "\"", + "span": [ + 13, + 22, + 13, + 23 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": ")", + "text": ")", + "span": [ + 13, + 23, + 13, + 24 + ], + "named": false, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + }, + { + "kind": "as", + "text": "as", + "span": [ + 13, + 25, + 13, + 27 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "as_pattern_target", + "text": "handle", + "span": [ + 13, + 28, + 13, + 34 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": ":", + "text": ":", + "span": [ + 13, + 34, + 13, + 35 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "block", + "text": "buffer = handle.read()", + "span": [ + 14, + 12, + 14, + 34 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "buffer", + "span": [ + 14, + 12, + 14, + 18 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "=", + "text": "=", + "span": [ + 14, + 19, + 14, + 20 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "call", + "text": "handle.read()", + "span": [ + 14, + 21, + 14, + 34 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "attribute", + "text": "handle.read", + "span": [ + 14, + 21, + 14, + 32 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "handle", + "span": [ + 14, + 21, + 14, + 27 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ".", + "text": ".", + "span": [ + 14, + 27, + 14, + 28 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "read", + "span": [ + 14, + 28, + 14, + 32 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": "argument_list", + "text": "()", + "span": [ + 14, + 32, + 14, + 34 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "(", + "text": "(", + "span": [ + 14, + 32, + 14, + 33 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ")", + "text": ")", + "span": [ + 14, + 33, + 14, + 34 + ], + "named": false, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + } + ] + } + ] + }, + { + "kind": "expression_statement", + "text": "target.slot = running", + "span": [ + 15, + 8, + 15, + 29 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "attribute", + "text": "target.slot", + "span": [ + 15, + 8, + 15, + 19 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "target", + "span": [ + 15, + 8, + 15, + 14 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ".", + "text": ".", + "span": [ + 15, + 14, + 15, + 15 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "slot", + "span": [ + 15, + 15, + 15, + 19 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": "=", + "text": "=", + "span": [ + 15, + 20, + 15, + 21 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "running", + "span": [ + 15, + 22, + 15, + 29 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": "expression_statement", + "text": "target[price] = buffer", + "span": [ + 16, + 8, + 16, + 30 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "subscript", + "text": "target[price]", + "span": [ + 16, + 8, + 16, + 21 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "target", + "span": [ + 16, + 8, + 16, + 14 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "[", + "text": "[", + "span": [ + 16, + 14, + 16, + 15 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "price", + "span": [ + 16, + 15, + 16, + 20 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "]", + "text": "]", + "span": [ + 16, + 20, + 16, + 21 + ], + "named": false, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": "=", + "text": "=", + "span": [ + 16, + 22, + 16, + 23 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "buffer", + "span": [ + 16, + 24, + 16, + 30 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": "expression_statement", + "text": "mapping = {\"key\": buffer}", + "span": [ + 17, + 8, + 17, + 33 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "mapping", + "span": [ + 17, + 8, + 17, + 15 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "=", + "text": "=", + "span": [ + 17, + 16, + 17, + 17 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "dictionary", + "text": "{\"key\": buffer}", + "span": [ + 17, + 18, + 17, + 33 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "{", + "text": "{", + "span": [ + 17, + 18, + 17, + 19 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "pair", + "text": "\"key\": buffer", + "span": [ + 17, + 19, + 17, + 32 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "string", + "text": "\"key\"", + "span": [ + 17, + 19, + 17, + 24 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "string_start", + "text": "\"", + "span": [ + 17, + 19, + 17, + 20 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "string_content", + "text": "key", + "span": [ + 17, + 20, + 17, + 23 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "string_end", + "text": "\"", + "span": [ + 17, + 23, + 17, + 24 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": ":", + "text": ":", + "span": [ + 17, + 24, + 17, + 25 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "buffer", + "span": [ + 17, + 26, + 17, + 32 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": "}", + "text": "}", + "span": [ + 17, + 32, + 17, + 33 + ], + "named": false, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + }, + { + "kind": "return_statement", + "text": "return Result(running, buffer, target)", + "span": [ + 18, + 8, + 18, + 46 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "return", + "text": "return", + "span": [ + 18, + 8, + 18, + 14 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "call", + "text": "Result(running, buffer, target)", + "span": [ + 18, + 15, + 18, + 46 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "Result", + "span": [ + 18, + 15, + 18, + 21 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "argument_list", + "text": "(running, buffer, target)", + "span": [ + 18, + 21, + 18, + 46 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "(", + "text": "(", + "span": [ + 18, + 21, + 18, + 22 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "running", + "span": [ + 18, + 22, + 18, + 29 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ",", + "text": ",", + "span": [ + 18, + 29, + 18, + 30 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "buffer", + "span": [ + 18, + 31, + 18, + 37 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ",", + "text": ",", + "span": [ + 18, + 37, + 18, + 38 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "target", + "span": [ + 18, + 39, + 18, + 45 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ")", + "text": ")", + "span": [ + 18, + 45, + 18, + 46 + ], + "named": false, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, + "visibility": "public", + "params": [ + "self", + "price", + "tax", + "items", + "mapping", + "target" + ] + } + ] + } + ] + }, + "expected": [ + { + "boundaries": [ + + ], + "file": "facts/local_flow_python.py", + "id": "Worker#handle", + "line": 2, + "name": "handle", + "owner": "Worker", + "span": [ + 2, + 4, + 18, + 46 + ], + "statements": [ + { + "co_uses": [ + [ + "price", + "tax" + ] + ], + "dependencies": [ + [ + "subtotal", + "price" + ], + [ + "subtotal", + "tax" + ] + ], + "end_line": 3, + "index": 0, + "line": 3, + "reads": [ + "price", + "tax" + ], + "source": "subtotal = price + tax", + "span": [ + 3, + 8, + 3, + 30 + ], + "writes": [ + "subtotal" + ] + }, + { + "co_uses": [ + + ], + "dependencies": [ + [ + "running", + "subtotal" + ] + ], + "end_line": 4, + "index": 1, + "line": 4, + "reads": [ + "subtotal" + ], + "source": "running: int = subtotal", + "span": [ + 4, + 8, + 4, + 31 + ], + "writes": [ + "running" + ] + }, + { + "co_uses": [ + + ], + "dependencies": [ + + ], + "end_line": 5, + "index": 2, + "line": 5, + "reads": [ + + ], + "source": "annotated: int", + "span": [ + 5, + 8, + 5, + 22 + ], + "writes": [ + "annotated" + ] + }, + { + "co_uses": [ + + ], + "dependencies": [ + [ + "annotated", + "running" + ] + ], + "end_line": 6, + "index": 3, + "line": 6, + "reads": [ + "running" + ], + "source": "annotated = running", + "span": [ + 6, + 8, + 6, + 27 + ], + "writes": [ + "annotated" + ] + }, + { + "co_uses": [ + [ + "chosen", + "mapping" + ], + [ + "chosen", + "running" + ], + [ + "mapping", + "running" + ] + ], + "dependencies": [ + [ + "chosen", + "mapping" + ], + [ + "running", + "mapping" + ] + ], + "end_line": 8, + "index": 4, + "line": 7, + "reads": [ + "chosen", + "mapping", + "running" + ], + "source": "if (chosen := mapping.get(\"key\")): running = running + chosen", + "span": [ + 7, + 8, + 8, + 38 + ], + "writes": [ + "chosen", + "running" + ] + }, + { + "co_uses": [ + [ + "item", + "items" + ], + [ + "item", + "running" + ], + [ + "items", + "running" + ] + ], + "dependencies": [ + [ + "item", + "items" + ], + [ + "running", + "items" + ] + ], + "end_line": 10, + "index": 5, + "line": 9, + "reads": [ + "item", + "items", + "running" + ], + "source": "for item in items: running = running + item", + "span": [ + 9, + 8, + 10, + 36 + ], + "writes": [ + "item", + "running" + ] + }, + { + "co_uses": [ + [ + "items", + "other" + ], + [ + "items", + "running" + ], + [ + "other", + "running" + ] + ], + "dependencies": [ + [ + "other", + "items" + ], + [ + "running", + "items" + ] + ], + "end_line": 12, + "index": 6, + "line": 11, + "reads": [ + "items", + "other", + "running" + ], + "source": "for other in items: running = running + other", + "span": [ + 11, + 8, + 12, + 37 + ], + "writes": [ + "other", + "running" + ] + }, + { + "co_uses": [ + + ], + "dependencies": [ + + ], + "end_line": 14, + "index": 7, + "line": 13, + "reads": [ + "handle" + ], + "source": "with open(\"log\") as handle: buffer = handle.read()", + "span": [ + 13, + 8, + 14, + 34 + ], + "writes": [ + "buffer", + "handle" + ] + }, + { + "co_uses": [ + [ + "running", + "target" + ] + ], + "dependencies": [ + + ], + "end_line": 15, + "index": 8, + "line": 15, + "reads": [ + "running", + "target" + ], + "source": "target.slot = running", + "span": [ + 15, + 8, + 15, + 29 + ], + "writes": [ + + ] + }, + { + "co_uses": [ + [ + "buffer", + "price" + ], + [ + "buffer", + "target" + ], + [ + "price", + "target" + ] + ], + "dependencies": [ + + ], + "end_line": 16, + "index": 9, + "line": 16, + "reads": [ + "buffer", + "price", + "target" + ], + "source": "target[price] = buffer", + "span": [ + 16, + 8, + 16, + 30 + ], + "writes": [ + + ] + }, + { + "co_uses": [ + + ], + "dependencies": [ + [ + "mapping", + "buffer" + ] + ], + "end_line": 17, + "index": 10, + "line": 17, + "reads": [ + "buffer" + ], + "source": "mapping = {\"key\": buffer}", + "span": [ + 17, + 8, + 17, + 33 + ], + "writes": [ + "mapping" + ] + }, + { + "co_uses": [ + [ + "buffer", + "running" + ], + [ + "buffer", + "target" + ], + [ + "running", + "target" + ] + ], + "dependencies": [ + + ], + "end_line": 18, + "index": 11, + "line": 18, + "reads": [ + "buffer", + "running", + "target" + ], + "source": "return Result(running, buffer, target)", + "span": [ + 18, + 8, + 18, + 46 + ], + "writes": [ + + ] + } + ] + } + ] +} diff --git a/gems/decomplex/examples/facts/detectors/locality-drag.json b/gems/decomplex/examples/facts/detectors/locality-drag.json new file mode 100644 index 000000000..e4b7a9c5e --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/locality-drag.json @@ -0,0 +1,260 @@ +{ + "detector": "locality-drag", + "input": { + "documents": [ + { + "file": "facts/locality.rb", + "language": "ruby", + "local_complexity_scores": { + "Fixture#assemble": { + "score": 18.0, + "signals": { + "branches": 2 + } + } + }, + "local_methods": [ + { + "id": "Fixture#assemble", + "owner": "Fixture", + "name": "assemble", + "file": "facts/locality.rb", + "line": 1, + "span": [ + 1, + 0, + 20, + 3 + ], + "statements": [ + { + "index": 0, + "line": 1, + "end_line": 1, + "span": [ + 1, + 0, + 1, + 23 + ], + "source": "payload = build_payload", + "reads": [ + "build_payload" + ], + "writes": [ + "payload" + ], + "dependencies": [ + [ + "payload", + "build_payload" + ] + ], + "co_uses": [ + + ] + }, + { + "index": 1, + "line": 3, + "end_line": 3, + "span": [ + 3, + 0, + 3, + 18 + ], + "source": "alpha = load_alpha", + "reads": [ + "load_alpha" + ], + "writes": [ + "alpha" + ], + "dependencies": [ + [ + "alpha", + "load_alpha" + ] + ], + "co_uses": [ + + ] + }, + { + "index": 2, + "line": 5, + "end_line": 5, + "span": [ + 5, + 0, + 5, + 16 + ], + "source": "beta = load_beta", + "reads": [ + "load_beta" + ], + "writes": [ + "beta" + ], + "dependencies": [ + [ + "beta", + "load_beta" + ] + ], + "co_uses": [ + + ] + }, + { + "index": 3, + "line": 7, + "end_line": 7, + "span": [ + 7, + 0, + 7, + 18 + ], + "source": "gamma = load_gamma", + "reads": [ + "load_gamma" + ], + "writes": [ + "gamma" + ], + "dependencies": [ + [ + "gamma", + "load_gamma" + ] + ], + "co_uses": [ + + ] + }, + { + "index": 4, + "line": 9, + "end_line": 9, + "span": [ + 9, + 0, + 9, + 18 + ], + "source": "delta = load_delta", + "reads": [ + "load_delta" + ], + "writes": [ + "delta" + ], + "dependencies": [ + [ + "delta", + "load_delta" + ] + ], + "co_uses": [ + + ] + }, + { + "index": 5, + "line": 13, + "end_line": 13, + "span": [ + 13, + 0, + 13, + 16 + ], + "source": "consume(payload)", + "reads": [ + "payload" + ], + "writes": [ + + ], + "dependencies": [ + + ], + "co_uses": [ + + ] + } + ], + "boundaries": [ + { + "before_index": 0, + "after_index": 5, + "line": 11, + "kind": "comment", + "text": "# phase 2" + } + ] + } + ] + } + ] + }, + "expected": [ + { + "at": "facts/locality.rb:assemble:1", + "boundaries": [ + { + "line": 11, + "marker": "# phase 2" + } + ], + "boundary_crossings": 1, + "defined_at": 1, + "definition_deps": [ + "build_payload" + ], + "defn": "assemble", + "examples": [ + { + "line": 3, + "source": "alpha = load_alpha" + }, + { + "line": 5, + "source": "beta = load_beta" + }, + { + "line": 7, + "source": "gamma = load_gamma" + } + ], + "file": "facts/locality.rb", + "gap_lines": 12, + "gap_statements": 4, + "line": 1, + "local_complexity": 18.0, + "method": "assemble", + "owner": "Fixture", + "reason": "`payload` is initialized 12 line(s) before first use; 4 unrelated intervening statement(s); 1 structural boundary crossing(s); method local complexity 18.0", + "related_statements": 0, + "score": 63, + "setup_statements": 0, + "spans": { + "facts/locality.rb:assemble:1": [ + 1, + 0, + 20, + 3 + ] + }, + "unrelated_statements": 4, + "use_reads": [ + "payload" + ], + "used_at": 13, + "variable": "payload" + } + ] +} diff --git a/gems/decomplex/examples/facts/detectors/operational-discontinuity.json b/gems/decomplex/examples/facts/detectors/operational-discontinuity.json new file mode 100644 index 000000000..b3e9a0f49 --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/operational-discontinuity.json @@ -0,0 +1,245 @@ +{ + "detector": "operational-discontinuity", + "input": { + "documents": [ + { + "file": "facts/operational.rb", + "language": "ruby", + "local_methods": [ + { + "id": "Fixture#process", + "owner": "Fixture", + "name": "process", + "file": "facts/operational.rb", + "line": 1, + "span": [ + 1, + 0, + 20, + 3 + ], + "statements": [ + { + "index": 0, + "line": 1, + "end_line": 1, + "span": [ + 1, + 0, + 1, + 9 + ], + "source": "a = first", + "reads": [ + "first" + ], + "writes": [ + "a" + ], + "dependencies": [ + [ + "a", + "first" + ] + ], + "co_uses": [ + + ] + }, + { + "index": 1, + "line": 2, + "end_line": 2, + "span": [ + 2, + 0, + 2, + 10 + ], + "source": "b = second", + "reads": [ + "second" + ], + "writes": [ + "b" + ], + "dependencies": [ + [ + "b", + "second" + ] + ], + "co_uses": [ + + ] + }, + { + "index": 2, + "line": 3, + "end_line": 3, + "span": [ + 3, + 0, + 3, + 9 + ], + "source": "use(a, b)", + "reads": [ + "a", + "b" + ], + "writes": [ + + ], + "dependencies": [ + + ], + "co_uses": [ + + ] + }, + { + "index": 3, + "line": 7, + "end_line": 7, + "span": [ + 7, + 0, + 7, + 9 + ], + "source": "c = third", + "reads": [ + "third" + ], + "writes": [ + "c" + ], + "dependencies": [ + [ + "c", + "third" + ] + ], + "co_uses": [ + + ] + }, + { + "index": 4, + "line": 8, + "end_line": 8, + "span": [ + 8, + 0, + 8, + 10 + ], + "source": "d = fourth", + "reads": [ + "fourth" + ], + "writes": [ + "d" + ], + "dependencies": [ + [ + "d", + "fourth" + ] + ], + "co_uses": [ + + ] + }, + { + "index": 5, + "line": 9, + "end_line": 9, + "span": [ + 9, + 0, + 9, + 9 + ], + "source": "use(c, d)", + "reads": [ + "c", + "d" + ], + "writes": [ + + ], + "dependencies": [ + + ], + "co_uses": [ + + ] + } + ], + "boundaries": [ + { + "before_index": 2, + "after_index": 3, + "line": 6, + "kind": "comment", + "text": "# phase 2" + } + ] + } + ] + } + ] + }, + "expected": [ + { + "at": "facts/operational.rb:process:1", + "confidence": "high", + "confidence_reasons": [ + "explicit_phase_marker" + ], + "dead_total": 4, + "defn": "process", + "file": "facts/operational.rb", + "line": 1, + "method": "process", + "new_total": 4, + "owner": "Fixture", + "reset_points": [ + { + "after_statement": 3, + "before_statement": 2, + "continuing": [ + + ], + "dead": [ + "a", + "b", + "first", + "second" + ], + "kind": "comment", + "line": 6, + "new": [ + "c", + "d", + "fourth", + "third" + ], + "text": "# phase 2" + } + ], + "resets": 1, + "score": 16, + "spans": { + "facts/operational.rb:process:1": [ + 1, + 0, + 20, + 3 + ] + } + } + ] +} diff --git a/gems/decomplex/examples/facts/detectors/path-condition-derived.json b/gems/decomplex/examples/facts/detectors/path-condition-derived.json new file mode 100644 index 000000000..204612d34 --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/path-condition-derived.json @@ -0,0 +1,444 @@ +{ + "detector": "path-condition", + "input": { + "documents": [ + { + "file": "facts/path_derived.rb", + "language": "ruby", + "lines": [ + "def paths", + " if a", + " if b", + " if c", + " commit_one", + " end", + " if c", + " commit_two", + " end", + " if c", + " commit_three", + " end", + " commit_four", + " end", + " end", + "end" + ], + "function_defs": [ + { + "file": "facts/path_derived.rb", + "name": "paths", + "owner": "Fixture", + "line": 1, + "span": [ + 1, + 0, + 16, + 3 + ], + "body": { + "kind": "method", + "text": "def paths", + "span": [ + 1, + 0, + 16, + 3 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "paths", + "span": [ + 1, + 4, + 1, + 9 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "body_statement", + "text": "", + "span": [ + 2, + 2, + 15, + 5 + ], + "named": true, + "field_name": "body", + "children": [ + { + "kind": "if", + "text": " if a\n if b\n if c\n commit_one\n end\n if c\n commit_two\n end\n if c\n commit_three\n end\n commit_four\n end\n end", + "span": [ + 2, + 2, + 15, + 5 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "a", + "span": [ + 2, + 5, + 2, + 6 + ], + "named": true, + "field_name": "condition", + "children": [ + + ] + }, + { + "kind": "then", + "text": "", + "span": [ + 3, + 4, + 14, + 7 + ], + "named": true, + "field_name": "consequence", + "children": [ + { + "kind": "if", + "text": " if b\n if c\n commit_one\n end\n if c\n commit_two\n end\n if c\n commit_three\n end\n commit_four\n end", + "span": [ + 3, + 4, + 14, + 7 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "b", + "span": [ + 3, + 7, + 3, + 8 + ], + "named": true, + "field_name": "condition", + "children": [ + + ] + }, + { + "kind": "then", + "text": "", + "span": [ + 4, + 6, + 13, + 17 + ], + "named": true, + "field_name": "consequence", + "children": [ + { + "kind": "if", + "text": "if c\n commit_one\nend", + "span": [ + 4, + 6, + 6, + 9 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "c", + "span": [ + 4, + 9, + 4, + 10 + ], + "named": true, + "field_name": "condition", + "children": [ + + ] + }, + { + "kind": "then", + "text": "commit_one", + "span": [ + 5, + 8, + 5, + 18 + ], + "named": true, + "field_name": "consequence", + "children": [ + { + "kind": "call", + "text": "commit_one", + "span": [ + 5, + 8, + 5, + 18 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "commit_one", + "span": [ + 5, + 8, + 5, + 18 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + } + ] + }, + { + "kind": "if", + "text": "if c\n commit_two\nend", + "span": [ + 7, + 6, + 9, + 9 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "c", + "span": [ + 7, + 9, + 7, + 10 + ], + "named": true, + "field_name": "condition", + "children": [ + + ] + }, + { + "kind": "then", + "text": "commit_two", + "span": [ + 8, + 8, + 8, + 18 + ], + "named": true, + "field_name": "consequence", + "children": [ + { + "kind": "call", + "text": "commit_two", + "span": [ + 8, + 8, + 8, + 18 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "commit_two", + "span": [ + 8, + 8, + 8, + 18 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + } + ] + }, + { + "kind": "if", + "text": "if c\n commit_three\nend", + "span": [ + 10, + 6, + 12, + 9 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "c", + "span": [ + 10, + 9, + 10, + 10 + ], + "named": true, + "field_name": "condition", + "children": [ + + ] + }, + { + "kind": "then", + "text": "commit_three", + "span": [ + 11, + 8, + 11, + 20 + ], + "named": true, + "field_name": "consequence", + "children": [ + { + "kind": "call", + "text": "commit_three", + "span": [ + 11, + 8, + 11, + 20 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "commit_three", + "span": [ + 11, + 8, + 11, + 20 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + } + ] + }, + { + "kind": "call", + "text": "commit_four", + "span": [ + 13, + 6, + 13, + 17 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "commit_four", + "span": [ + 13, + 6, + 13, + 17 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, + "visibility": "public", + "params": [ + + ] + } + ] + } + ] + }, + "expected": { + "neglected": [ + { + "action": "commit_four", + "at": "facts/path_derived.rb:paths:13", + "missing": "c", + "pattern": [ + "a", + "b", + "c" + ], + "spans": { + "facts/path_derived.rb:paths:13": [ + 13, + 6, + 13, + 17 + ] + }, + "support": 3 + } + ] + } +} diff --git a/gems/decomplex/examples/facts/detectors/path-condition-raw.json b/gems/decomplex/examples/facts/detectors/path-condition-raw.json new file mode 100644 index 000000000..350369ec4 --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/path-condition-raw.json @@ -0,0 +1,274 @@ +{ + "detector": "path-condition", + "input": { + "documents": [ + { + "file": "facts/path_raw.rb", + "language": "ruby", + "function_defs": [ + { + "file": "facts/path_raw.rb", + "name": "one", + "owner": "Fixture", + "line": 1, + "span": [ + 1, + 0, + 5, + 3 + ], + "body": { + "kind": "method", + "text": "def one", + "span": [ + 1, + 0, + 5, + 3 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "one", + "span": [ + 1, + 0, + 1, + 3 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "body_statement", + "text": "", + "span": [ + 1, + 0, + 5, + 3 + ], + "named": true, + "field_name": "body", + "children": [ + { + "kind": "if", + "text": "if a\n if b\n commit\n end\nend", + "span": [ + 1, + 0, + 5, + 3 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "a", + "span": [ + 1, + 3, + 1, + 4 + ], + "named": true, + "field_name": "condition", + "children": [ + + ] + }, + { + "kind": "body_statement", + "text": "if b", + "span": [ + 2, + 2, + 4, + 5 + ], + "named": true, + "field_name": "consequence", + "children": [ + { + "kind": "if", + "text": "if b\n commit\nend", + "span": [ + 2, + 2, + 4, + 5 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "b", + "span": [ + 2, + 5, + 2, + 6 + ], + "named": true, + "field_name": "condition", + "children": [ + + ] + }, + { + "kind": "body_statement", + "text": "commit", + "span": [ + 3, + 4, + 3, + 10 + ], + "named": true, + "field_name": "consequence", + "children": [ + { + "kind": "call", + "text": "commit", + "span": [ + 3, + 4, + 3, + 10 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "commit", + "span": [ + 3, + 4, + 3, + 10 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, + "visibility": "public", + "params": [ + + ] + } + ], + "path_condition_sites": [ + { + "guards": [ + "a", + "b" + ], + "action": "commit", + "file": "facts/path_raw.rb", + "function": "one", + "line": 3, + "span": [ + 3, + 4, + 3, + 10 + ] + }, + { + "guards": [ + "a", + "b" + ], + "action": "commit", + "file": "facts/path_raw.rb", + "function": "two", + "line": 6, + "span": [ + 6, + 0, + 6, + 6 + ] + }, + { + "guards": [ + "a", + "b" + ], + "action": "commit", + "file": "facts/path_raw.rb", + "function": "three", + "line": 7, + "span": [ + 7, + 0, + 7, + 6 + ] + }, + { + "guards": [ + "a" + ], + "action": "commit", + "file": "facts/path_raw.rb", + "function": "four", + "line": 8, + "span": [ + 8, + 0, + 8, + 6 + ] + } + ] + } + ] + }, + "expected": { + "neglected": [ + { + "action": "commit", + "at": "facts/path_raw.rb:four:8", + "missing": "b", + "pattern": [ + "a", + "b" + ], + "spans": { + "facts/path_raw.rb:four:8": [ + 8, + 0, + 8, + 6 + ] + }, + "support": 3 + } + ] + } +} diff --git a/gems/decomplex/examples/facts/detectors/path-condition.json b/gems/decomplex/examples/facts/detectors/path-condition.json new file mode 100644 index 000000000..4ab29bd07 --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/path-condition.json @@ -0,0 +1,98 @@ +{ + "detector": "path-condition", + "input": { + "documents": [ + { + "file": "facts/path.rb", + "language": "ruby", + "path_condition_sites": [ + { + "guards": [ + "a", + "b" + ], + "action": "commit", + "file": "facts/path.rb", + "function": "one", + "line": 1, + "span": [ + 1, + 0, + 1, + 6 + ] + }, + { + "guards": [ + "a", + "b" + ], + "action": "commit", + "file": "facts/path.rb", + "function": "two", + "line": 2, + "span": [ + 2, + 0, + 2, + 6 + ] + }, + { + "guards": [ + "a", + "b" + ], + "action": "commit", + "file": "facts/path.rb", + "function": "three", + "line": 3, + "span": [ + 3, + 0, + 3, + 6 + ] + }, + { + "guards": [ + "a" + ], + "action": "commit", + "file": "facts/path.rb", + "function": "four", + "line": 4, + "span": [ + 4, + 0, + 4, + 6 + ] + } + ] + } + ] + }, + "expected": { + "neglected": [ + { + "action": "commit", + "at": "facts/path.rb:four:4", + "missing": "b", + "pattern": [ + "a", + "b" + ], + "spans": { + "facts/path.rb:four:4": [ + 4, + 0, + 4, + 6 + ] + }, + "support": 3 + } + ] + } +} diff --git a/gems/decomplex/examples/facts/detectors/redundant-nil-guard-facts.json b/gems/decomplex/examples/facts/detectors/redundant-nil-guard-facts.json new file mode 100644 index 000000000..1e80a6a15 --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/redundant-nil-guard-facts.json @@ -0,0 +1,402 @@ +{ + "detector": "redundant-nil-guard", + "input": { + "documents": [ + { + "file": "facts/nil_guard.rb", + "language": "ruby", + "lines": [ + "def check", + " if x != nil", + " x.nil?", + " end", + "end" + ], + "function_defs": [ + { + "file": "facts/nil_guard.rb", + "name": "check", + "owner": "Fixture", + "line": 1, + "span": [ + 1, + 0, + 5, + 3 + ], + "body": { + "kind": "method", + "text": "def check", + "span": [ + 1, + 0, + 5, + 3 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "check", + "span": [ + 1, + 4, + 1, + 9 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "body_statement", + "text": "", + "span": [ + 2, + 2, + 4, + 5 + ], + "named": true, + "field_name": "body", + "children": [ + { + "kind": "if", + "text": "if x != nil\n x.nil?\nend", + "span": [ + 2, + 2, + 4, + 5 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "binary", + "text": "x != nil", + "span": [ + 2, + 5, + 2, + 13 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "x", + "span": [ + 2, + 5, + 2, + 6 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "!=", + "text": "!=", + "span": [ + 2, + 7, + 2, + 9 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "nil", + "text": "nil", + "span": [ + 2, + 10, + 2, + 13 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": "then", + "text": "x.nil?", + "span": [ + 3, + 4, + 3, + 10 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "call", + "text": "x.nil?", + "span": [ + 3, + 4, + 3, + 10 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "x", + "span": [ + 3, + 4, + 3, + 5 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "nil?", + "span": [ + 3, + 6, + 3, + 10 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, + "visibility": "public", + "params": [ + + ] + } + ], + "normalized_root": { + "type": "ROOT", + "children": [ + { + "Node": { + "type": "DEFN", + "children": [ + { + "Symbol": "check" + }, + { + "Node": { + "type": "SCOPE", + "children": [ + { + "Nil": null + }, + { + "Nil": null + }, + { + "Node": { + "type": "BLOCK", + "children": [ + { + "Node": { + "type": "IF", + "children": [ + { + "Node": { + "type": "OPCALL", + "children": [ + { + "Node": { + "type": "LVAR", + "children": [ + { + "String": "x" + } + ], + "first_lineno": 2, + "first_column": 5, + "last_lineno": 2, + "last_column": 6, + "text": "x" + } + }, + { + "Symbol": "!=" + }, + { + "Node": { + "type": "LIST", + "children": [ + { + "Node": { + "type": "NIL", + "children": [ + + ], + "first_lineno": 2, + "first_column": 10, + "last_lineno": 2, + "last_column": 13, + "text": "nil" + } + } + ], + "first_lineno": 2, + "first_column": 10, + "last_lineno": 2, + "last_column": 13, + "text": "nil" + } + } + ], + "first_lineno": 2, + "first_column": 5, + "last_lineno": 2, + "last_column": 13, + "text": "x != nil" + } + }, + { + "Node": { + "type": "BLOCK", + "children": [ + { + "Node": { + "type": "CALL", + "children": [ + { + "Node": { + "type": "LVAR", + "children": [ + { + "String": "x" + } + ], + "first_lineno": 3, + "first_column": 4, + "last_lineno": 3, + "last_column": 5, + "text": "x" + } + }, + { + "Symbol": "nil?" + } + ], + "first_lineno": 3, + "first_column": 4, + "last_lineno": 3, + "last_column": 10, + "text": "x.nil?" + } + } + ], + "first_lineno": 3, + "first_column": 4, + "last_lineno": 3, + "last_column": 10, + "text": "x.nil?" + } + }, + { + "Nil": null + } + ], + "first_lineno": 2, + "first_column": 2, + "last_lineno": 4, + "last_column": 5, + "text": "if x != nil\n x.nil?\nend" + } + } + ], + "first_lineno": 2, + "first_column": 2, + "last_lineno": 4, + "last_column": 5, + "text": "if x != nil\n x.nil?\nend" + } + } + ], + "first_lineno": 1, + "first_column": 0, + "last_lineno": 5, + "last_column": 3, + "text": "def check" + } + } + ], + "first_lineno": 1, + "first_column": 0, + "last_lineno": 5, + "last_column": 3, + "text": "def check" + } + } + ], + "first_lineno": 1, + "first_column": 0, + "last_lineno": 5, + "last_column": 3, + "text": "def check" + } + } + ] + }, + "expected": [ + { + "at": "facts/nil_guard.rb:check:3", + "defn": "check", + "file": "facts/nil_guard.rb", + "guard": "x.nil?", + "line": 3, + "local": "x", + "proof": "x is already proven non-nil on this path", + "span": [ + 3, + 4, + 3, + 10 + ], + "spans": { + "facts/nil_guard.rb:check:3": [ + 3, + 4, + 3, + 10 + ] + } + } + ] +} diff --git a/gems/decomplex/examples/facts/detectors/redundant-nil-guard-rich.json b/gems/decomplex/examples/facts/detectors/redundant-nil-guard-rich.json new file mode 100644 index 000000000..baf8387f3 --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/redundant-nil-guard-rich.json @@ -0,0 +1,2209 @@ +{ + "detector": "redundant-nil-guard", + "input": { + "documents": [ + { + "file": "facts/nil_guard_rich.rb", + "language": "ruby", + "lines": [ + "def rich", + " if x != nil && y.present?", + " x.nil?", + " y.present?", + " x&.foo", + " end", + " if z.nil?", + " return", + " else", + " z.nil?", + " end", + " unless w.nil?", + " w.nil?", + " end", + " if obj&.ready", + " obj&.name", + " end", + " if b != nil", + " b.nil?", + " else", + " return", + " end", + " if a != nil", + " return", + " else", + " abort", + " end", + "end" + ], + "function_defs": [ + { + "file": "facts/nil_guard_rich.rb", + "name": "rich", + "owner": "Fixture", + "line": 1, + "span": [ + 1, + 0, + 28, + 3 + ], + "body": { + "kind": "method", + "text": "def rich", + "span": [ + 1, + 0, + 28, + 3 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "rich", + "span": [ + 1, + 4, + 1, + 8 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "body_statement", + "text": "", + "span": [ + 2, + 2, + 27, + 5 + ], + "named": true, + "field_name": "body", + "children": [ + { + "kind": "if", + "text": "", + "span": [ + 2, + 2, + 6, + 5 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "binary", + "text": "x != nil && y.present?", + "span": [ + 2, + 5, + 2, + 27 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "binary", + "text": "x != nil", + "span": [ + 2, + 5, + 2, + 14 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "x", + "span": [ + 2, + 5, + 2, + 6 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "!=", + "text": "!=", + "span": [ + 2, + 7, + 2, + 9 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "nil", + "text": "nil", + "span": [ + 2, + 10, + 2, + 13 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": "&&", + "text": "&&", + "span": [ + 2, + 14, + 2, + 16 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "call", + "text": "y.present?", + "span": [ + 2, + 17, + 2, + 27 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "y", + "span": [ + 2, + 17, + 2, + 18 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ".", + "text": ".", + "span": [ + 2, + 18, + 2, + 19 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "present?", + "span": [ + 2, + 19, + 2, + 27 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + }, + { + "kind": "then", + "text": "x.nil?\ny.present?\nx&.foo", + "span": [ + 3, + 4, + 5, + 10 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "call", + "text": "x.nil?", + "span": [ + 3, + 4, + 3, + 10 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "x", + "span": [ + 3, + 4, + 3, + 5 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ".", + "text": ".", + "span": [ + 3, + 5, + 3, + 6 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "nil?", + "span": [ + 3, + 6, + 3, + 10 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": "call", + "text": "y.present?", + "span": [ + 4, + 4, + 4, + 14 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "y", + "span": [ + 4, + 4, + 4, + 5 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ".", + "text": ".", + "span": [ + 4, + 5, + 4, + 6 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "present?", + "span": [ + 4, + 6, + 4, + 14 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": "call", + "text": "x&.foo", + "span": [ + 5, + 4, + 5, + 10 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "x", + "span": [ + 5, + 4, + 5, + 5 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "&.", + "text": "&.", + "span": [ + 5, + 5, + 5, + 7 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "foo", + "span": [ + 5, + 7, + 5, + 10 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + } + ] + }, + { + "kind": "if", + "text": "", + "span": [ + 7, + 2, + 11, + 5 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "call", + "text": "z.nil?", + "span": [ + 7, + 5, + 7, + 11 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "z", + "span": [ + 7, + 5, + 7, + 6 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ".", + "text": ".", + "span": [ + 7, + 6, + 7, + 7 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "nil?", + "span": [ + 7, + 7, + 7, + 11 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": "then", + "text": "return", + "span": [ + 8, + 4, + 8, + 10 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "return", + "text": "return", + "span": [ + 8, + 4, + 8, + 10 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": "else", + "text": "z.nil?", + "span": [ + 10, + 4, + 10, + 10 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "call", + "text": "z.nil?", + "span": [ + 10, + 4, + 10, + 10 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "z", + "span": [ + 10, + 4, + 10, + 5 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ".", + "text": ".", + "span": [ + 10, + 5, + 10, + 6 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "nil?", + "span": [ + 10, + 6, + 10, + 10 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + } + ] + }, + { + "kind": "unless", + "text": "", + "span": [ + 12, + 2, + 14, + 5 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "call", + "text": "w.nil?", + "span": [ + 12, + 9, + 12, + 15 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "w", + "span": [ + 12, + 9, + 12, + 10 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ".", + "text": ".", + "span": [ + 12, + 10, + 12, + 11 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "nil?", + "span": [ + 12, + 11, + 12, + 15 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": "then", + "text": "w.nil?", + "span": [ + 13, + 4, + 13, + 10 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "call", + "text": "w.nil?", + "span": [ + 13, + 4, + 13, + 10 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "w", + "span": [ + 13, + 4, + 13, + 5 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ".", + "text": ".", + "span": [ + 13, + 5, + 13, + 6 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "nil?", + "span": [ + 13, + 6, + 13, + 10 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + } + ] + }, + { + "kind": "if", + "text": "", + "span": [ + 15, + 2, + 17, + 5 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "call", + "text": "obj&.ready", + "span": [ + 15, + 5, + 15, + 15 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "obj", + "span": [ + 15, + 5, + 15, + 8 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "&.", + "text": "&.", + "span": [ + 15, + 8, + 15, + 10 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "ready", + "span": [ + 15, + 10, + 15, + 15 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": "then", + "text": "obj&.name", + "span": [ + 16, + 4, + 16, + 13 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "call", + "text": "obj&.name", + "span": [ + 16, + 4, + 16, + 13 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "obj", + "span": [ + 16, + 4, + 16, + 7 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "&.", + "text": "&.", + "span": [ + 16, + 7, + 16, + 9 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "name", + "span": [ + 16, + 9, + 16, + 13 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + } + ] + }, + { + "kind": "if", + "text": "", + "span": [ + 18, + 2, + 22, + 5 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "binary", + "text": "b != nil", + "span": [ + 18, + 5, + 18, + 14 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "b", + "span": [ + 18, + 5, + 18, + 6 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "!=", + "text": "!=", + "span": [ + 18, + 7, + 18, + 9 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "nil", + "text": "nil", + "span": [ + 18, + 10, + 18, + 13 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": "then", + "text": "b.nil?", + "span": [ + 19, + 4, + 19, + 10 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "call", + "text": "b.nil?", + "span": [ + 19, + 4, + 19, + 10 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "b", + "span": [ + 19, + 4, + 19, + 5 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ".", + "text": ".", + "span": [ + 19, + 5, + 19, + 6 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "nil?", + "span": [ + 19, + 6, + 19, + 10 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + }, + { + "kind": "else", + "text": "return", + "span": [ + 21, + 4, + 21, + 10 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "return", + "text": "return", + "span": [ + 21, + 4, + 21, + 10 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + }, + { + "kind": "if", + "text": "", + "span": [ + 23, + 2, + 27, + 5 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "binary", + "text": "a != nil", + "span": [ + 23, + 5, + 23, + 14 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "a", + "span": [ + 23, + 5, + 23, + 6 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "!=", + "text": "!=", + "span": [ + 23, + 7, + 23, + 9 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "nil", + "text": "nil", + "span": [ + 23, + 10, + 23, + 13 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": "then", + "text": "return", + "span": [ + 24, + 4, + 24, + 10 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "return", + "text": "return", + "span": [ + 24, + 4, + 24, + 10 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": "else", + "text": "abort", + "span": [ + 26, + 4, + 26, + 9 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "call", + "text": "abort", + "span": [ + 26, + 4, + 26, + 9 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "abort", + "span": [ + 26, + 4, + 26, + 9 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, + "visibility": "public", + "params": [ + + ] + } + ], + "normalized_root": { + "type": "ROOT", + "children": [ + { + "Node": { + "type": "DEFN", + "children": [ + { + "Symbol": "rich" + }, + { + "Node": { + "type": "SCOPE", + "children": [ + { + "Nil": null + }, + { + "Nil": null + }, + { + "Node": { + "type": "BLOCK", + "children": [ + { + "Node": { + "type": "IF", + "children": [ + { + "Node": { + "type": "AND", + "children": [ + { + "Node": { + "type": "OPCALL", + "children": [ + { + "Node": { + "type": "LVAR", + "children": [ + { + "String": "x" + } + ], + "first_lineno": 2, + "first_column": 5, + "last_lineno": 2, + "last_column": 6, + "text": "x" + } + }, + { + "Symbol": "!=" + }, + { + "Node": { + "type": "LIST", + "children": [ + { + "Node": { + "type": "NIL", + "children": [ + + ], + "first_lineno": 2, + "first_column": 10, + "last_lineno": 2, + "last_column": 13, + "text": "nil" + } + } + ], + "first_lineno": 2, + "first_column": 10, + "last_lineno": 2, + "last_column": 13, + "text": "nil" + } + } + ], + "first_lineno": 2, + "first_column": 5, + "last_lineno": 2, + "last_column": 14, + "text": "x != nil" + } + }, + { + "Node": { + "type": "CALL", + "children": [ + { + "Node": { + "type": "LVAR", + "children": [ + { + "String": "y" + } + ], + "first_lineno": 2, + "first_column": 17, + "last_lineno": 2, + "last_column": 18, + "text": "y" + } + }, + { + "Symbol": "present?" + } + ], + "first_lineno": 2, + "first_column": 17, + "last_lineno": 2, + "last_column": 27, + "text": "y.present?" + } + } + ], + "first_lineno": 2, + "first_column": 5, + "last_lineno": 2, + "last_column": 27, + "text": "x != nil && y.present?" + } + }, + { + "Node": { + "type": "BLOCK", + "children": [ + { + "Node": { + "type": "CALL", + "children": [ + { + "Node": { + "type": "LVAR", + "children": [ + { + "String": "x" + } + ], + "first_lineno": 3, + "first_column": 4, + "last_lineno": 3, + "last_column": 5, + "text": "x" + } + }, + { + "Symbol": "nil?" + } + ], + "first_lineno": 3, + "first_column": 4, + "last_lineno": 3, + "last_column": 10, + "text": "x.nil?" + } + }, + { + "Node": { + "type": "CALL", + "children": [ + { + "Node": { + "type": "LVAR", + "children": [ + { + "String": "y" + } + ], + "first_lineno": 4, + "first_column": 4, + "last_lineno": 4, + "last_column": 5, + "text": "y" + } + }, + { + "Symbol": "present?" + } + ], + "first_lineno": 4, + "first_column": 4, + "last_lineno": 4, + "last_column": 14, + "text": "y.present?" + } + }, + { + "Node": { + "type": "QCALL", + "children": [ + { + "Node": { + "type": "LVAR", + "children": [ + { + "String": "x" + } + ], + "first_lineno": 5, + "first_column": 4, + "last_lineno": 5, + "last_column": 5, + "text": "x" + } + }, + { + "Symbol": "foo" + } + ], + "first_lineno": 5, + "first_column": 4, + "last_lineno": 5, + "last_column": 10, + "text": "x&.foo" + } + } + ], + "first_lineno": 3, + "first_column": 4, + "last_lineno": 5, + "last_column": 10, + "text": "x.nil?\ny.present?\nx&.foo" + } + }, + { + "Nil": null + } + ], + "first_lineno": 2, + "first_column": 2, + "last_lineno": 6, + "last_column": 5, + "text": "" + } + }, + { + "Node": { + "type": "IF", + "children": [ + { + "Node": { + "type": "CALL", + "children": [ + { + "Node": { + "type": "LVAR", + "children": [ + { + "String": "z" + } + ], + "first_lineno": 7, + "first_column": 5, + "last_lineno": 7, + "last_column": 6, + "text": "z" + } + }, + { + "Symbol": "nil?" + } + ], + "first_lineno": 7, + "first_column": 5, + "last_lineno": 7, + "last_column": 11, + "text": "z.nil?" + } + }, + { + "Node": { + "type": "BLOCK", + "children": [ + { + "Node": { + "type": "RETURN", + "children": [ + + ], + "first_lineno": 8, + "first_column": 4, + "last_lineno": 8, + "last_column": 10, + "text": "return" + } + } + ], + "first_lineno": 8, + "first_column": 4, + "last_lineno": 8, + "last_column": 10, + "text": "return" + } + }, + { + "Node": { + "type": "BLOCK", + "children": [ + { + "Node": { + "type": "CALL", + "children": [ + { + "Node": { + "type": "LVAR", + "children": [ + { + "String": "z" + } + ], + "first_lineno": 10, + "first_column": 4, + "last_lineno": 10, + "last_column": 5, + "text": "z" + } + }, + { + "Symbol": "nil?" + } + ], + "first_lineno": 10, + "first_column": 4, + "last_lineno": 10, + "last_column": 10, + "text": "z.nil?" + } + } + ], + "first_lineno": 10, + "first_column": 4, + "last_lineno": 10, + "last_column": 10, + "text": "z.nil?" + } + } + ], + "first_lineno": 7, + "first_column": 2, + "last_lineno": 11, + "last_column": 5, + "text": "" + } + }, + { + "Node": { + "type": "UNLESS", + "children": [ + { + "Node": { + "type": "CALL", + "children": [ + { + "Node": { + "type": "LVAR", + "children": [ + { + "String": "w" + } + ], + "first_lineno": 12, + "first_column": 9, + "last_lineno": 12, + "last_column": 10, + "text": "w" + } + }, + { + "Symbol": "nil?" + } + ], + "first_lineno": 12, + "first_column": 9, + "last_lineno": 12, + "last_column": 15, + "text": "w.nil?" + } + }, + { + "Node": { + "type": "BLOCK", + "children": [ + { + "Node": { + "type": "CALL", + "children": [ + { + "Node": { + "type": "LVAR", + "children": [ + { + "String": "w" + } + ], + "first_lineno": 13, + "first_column": 4, + "last_lineno": 13, + "last_column": 5, + "text": "w" + } + }, + { + "Symbol": "nil?" + } + ], + "first_lineno": 13, + "first_column": 4, + "last_lineno": 13, + "last_column": 10, + "text": "w.nil?" + } + } + ], + "first_lineno": 13, + "first_column": 4, + "last_lineno": 13, + "last_column": 10, + "text": "w.nil?" + } + }, + { + "Nil": null + } + ], + "first_lineno": 12, + "first_column": 2, + "last_lineno": 14, + "last_column": 5, + "text": "" + } + }, + { + "Node": { + "type": "IF", + "children": [ + { + "Node": { + "type": "QCALL", + "children": [ + { + "Node": { + "type": "LVAR", + "children": [ + { + "String": "obj" + } + ], + "first_lineno": 15, + "first_column": 5, + "last_lineno": 15, + "last_column": 8, + "text": "obj" + } + }, + { + "Symbol": "ready" + } + ], + "first_lineno": 15, + "first_column": 5, + "last_lineno": 15, + "last_column": 15, + "text": "obj&.ready" + } + }, + { + "Node": { + "type": "BLOCK", + "children": [ + { + "Node": { + "type": "QCALL", + "children": [ + { + "Node": { + "type": "LVAR", + "children": [ + { + "String": "obj" + } + ], + "first_lineno": 16, + "first_column": 4, + "last_lineno": 16, + "last_column": 7, + "text": "obj" + } + }, + { + "Symbol": "name" + } + ], + "first_lineno": 16, + "first_column": 4, + "last_lineno": 16, + "last_column": 13, + "text": "obj&.name" + } + } + ], + "first_lineno": 16, + "first_column": 4, + "last_lineno": 16, + "last_column": 13, + "text": "obj&.name" + } + }, + { + "Nil": null + } + ], + "first_lineno": 15, + "first_column": 2, + "last_lineno": 17, + "last_column": 5, + "text": "" + } + }, + { + "Node": { + "type": "IF", + "children": [ + { + "Node": { + "type": "OPCALL", + "children": [ + { + "Node": { + "type": "LVAR", + "children": [ + { + "String": "b" + } + ], + "first_lineno": 18, + "first_column": 5, + "last_lineno": 18, + "last_column": 6, + "text": "b" + } + }, + { + "Symbol": "!=" + }, + { + "Node": { + "type": "LIST", + "children": [ + { + "Node": { + "type": "NIL", + "children": [ + + ], + "first_lineno": 18, + "first_column": 10, + "last_lineno": 18, + "last_column": 13, + "text": "nil" + } + } + ], + "first_lineno": 18, + "first_column": 10, + "last_lineno": 18, + "last_column": 13, + "text": "nil" + } + } + ], + "first_lineno": 18, + "first_column": 5, + "last_lineno": 18, + "last_column": 14, + "text": "b != nil" + } + }, + { + "Node": { + "type": "BLOCK", + "children": [ + { + "Node": { + "type": "CALL", + "children": [ + { + "Node": { + "type": "LVAR", + "children": [ + { + "String": "b" + } + ], + "first_lineno": 19, + "first_column": 4, + "last_lineno": 19, + "last_column": 5, + "text": "b" + } + }, + { + "Symbol": "nil?" + } + ], + "first_lineno": 19, + "first_column": 4, + "last_lineno": 19, + "last_column": 10, + "text": "b.nil?" + } + } + ], + "first_lineno": 19, + "first_column": 4, + "last_lineno": 19, + "last_column": 10, + "text": "b.nil?" + } + }, + { + "Node": { + "type": "BLOCK", + "children": [ + { + "Node": { + "type": "RETURN", + "children": [ + + ], + "first_lineno": 21, + "first_column": 4, + "last_lineno": 21, + "last_column": 10, + "text": "return" + } + } + ], + "first_lineno": 21, + "first_column": 4, + "last_lineno": 21, + "last_column": 10, + "text": "return" + } + } + ], + "first_lineno": 18, + "first_column": 2, + "last_lineno": 22, + "last_column": 5, + "text": "" + } + }, + { + "Node": { + "type": "IF", + "children": [ + { + "Node": { + "type": "OPCALL", + "children": [ + { + "Node": { + "type": "LVAR", + "children": [ + { + "String": "a" + } + ], + "first_lineno": 23, + "first_column": 5, + "last_lineno": 23, + "last_column": 6, + "text": "a" + } + }, + { + "Symbol": "!=" + }, + { + "Node": { + "type": "LIST", + "children": [ + { + "Node": { + "type": "NIL", + "children": [ + + ], + "first_lineno": 23, + "first_column": 10, + "last_lineno": 23, + "last_column": 13, + "text": "nil" + } + } + ], + "first_lineno": 23, + "first_column": 10, + "last_lineno": 23, + "last_column": 13, + "text": "nil" + } + } + ], + "first_lineno": 23, + "first_column": 5, + "last_lineno": 23, + "last_column": 14, + "text": "a != nil" + } + }, + { + "Node": { + "type": "BLOCK", + "children": [ + { + "Node": { + "type": "RETURN", + "children": [ + + ], + "first_lineno": 24, + "first_column": 4, + "last_lineno": 24, + "last_column": 10, + "text": "return" + } + } + ], + "first_lineno": 24, + "first_column": 4, + "last_lineno": 24, + "last_column": 10, + "text": "return" + } + }, + { + "Node": { + "type": "BLOCK", + "children": [ + { + "Node": { + "type": "VCALL", + "children": [ + { + "Symbol": "abort" + } + ], + "first_lineno": 26, + "first_column": 4, + "last_lineno": 26, + "last_column": 9, + "text": "abort" + } + } + ], + "first_lineno": 26, + "first_column": 4, + "last_lineno": 26, + "last_column": 9, + "text": "abort" + } + } + ], + "first_lineno": 23, + "first_column": 2, + "last_lineno": 27, + "last_column": 5, + "text": "" + } + } + ], + "first_lineno": 2, + "first_column": 4, + "last_lineno": 27, + "last_column": 5, + "text": "\n\n\n\n\n" + } + } + ], + "first_lineno": 1, + "first_column": 0, + "last_lineno": 28, + "last_column": 3, + "text": "def rich" + } + } + ], + "first_lineno": 1, + "first_column": 0, + "last_lineno": 28, + "last_column": 3, + "text": "def rich" + } + } + ], + "first_lineno": 1, + "first_column": 0, + "last_lineno": 28, + "last_column": 3, + "text": "def rich" + } + } + ] + }, + "expected": [ + { + "at": "facts/nil_guard_rich.rb:rich:3", + "defn": "rich", + "file": "facts/nil_guard_rich.rb", + "guard": "x.nil?", + "line": 3, + "local": "x", + "proof": "x is already proven non-nil on this path", + "span": [ + 3, + 4, + 3, + 10 + ], + "spans": { + "facts/nil_guard_rich.rb:rich:3": [ + 3, + 4, + 3, + 10 + ] + } + }, + { + "at": "facts/nil_guard_rich.rb:rich:4", + "defn": "rich", + "file": "facts/nil_guard_rich.rb", + "guard": "y.present?", + "line": 4, + "local": "y", + "proof": "y is already proven non-nil on this path", + "span": [ + 4, + 4, + 4, + 14 + ], + "spans": { + "facts/nil_guard_rich.rb:rich:4": [ + 4, + 4, + 4, + 14 + ] + } + }, + { + "at": "facts/nil_guard_rich.rb:rich:5", + "defn": "rich", + "file": "facts/nil_guard_rich.rb", + "guard": "x&.foo", + "line": 5, + "local": "x", + "proof": "x is already proven non-nil on this path", + "span": [ + 5, + 4, + 5, + 10 + ], + "spans": { + "facts/nil_guard_rich.rb:rich:5": [ + 5, + 4, + 5, + 10 + ] + } + }, + { + "at": "facts/nil_guard_rich.rb:rich:10", + "defn": "rich", + "file": "facts/nil_guard_rich.rb", + "guard": "z.nil?", + "line": 10, + "local": "z", + "proof": "z is already proven non-nil on this path", + "span": [ + 10, + 4, + 10, + 10 + ], + "spans": { + "facts/nil_guard_rich.rb:rich:10": [ + 10, + 4, + 10, + 10 + ] + } + }, + { + "at": "facts/nil_guard_rich.rb:rich:13", + "defn": "rich", + "file": "facts/nil_guard_rich.rb", + "guard": "w.nil?", + "line": 13, + "local": "w", + "proof": "w is already proven non-nil on this path", + "span": [ + 13, + 4, + 13, + 10 + ], + "spans": { + "facts/nil_guard_rich.rb:rich:13": [ + 13, + 4, + 13, + 10 + ] + } + }, + { + "at": "facts/nil_guard_rich.rb:rich:16", + "defn": "rich", + "file": "facts/nil_guard_rich.rb", + "guard": "obj&.name", + "line": 16, + "local": "obj", + "proof": "obj is already proven non-nil on this path", + "span": [ + 16, + 4, + 16, + 13 + ], + "spans": { + "facts/nil_guard_rich.rb:rich:16": [ + 16, + 4, + 16, + 13 + ] + } + }, + { + "at": "facts/nil_guard_rich.rb:rich:19", + "defn": "rich", + "file": "facts/nil_guard_rich.rb", + "guard": "b.nil?", + "line": 19, + "local": "b", + "proof": "b is already proven non-nil on this path", + "span": [ + 19, + 4, + 19, + 10 + ], + "spans": { + "facts/nil_guard_rich.rb:rich:19": [ + 19, + 4, + 19, + 10 + ] + } + } + ] +} diff --git a/gems/decomplex/examples/facts/detectors/sequence-mine-nested.json b/gems/decomplex/examples/facts/detectors/sequence-mine-nested.json new file mode 100644 index 000000000..2c8e742e9 --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/sequence-mine-nested.json @@ -0,0 +1,193 @@ +{ + "detector": "sequence-mine", + "input": { + "documents": [ + { + "file": "facts/sequence_nested.rb", + "language": "ruby", + "lines": [ + "sig { acquire release }", + "sig { acquire release }", + "sig { acquire release }", + "sig { acquire release }", + "sig { acquire }", + "sig { release }" + ], + "call_sites": [ + { + "receiver": "", + "message": "sig", + "file": "facts/sequence_nested.rb", + "function": "paired_0", + "owner": "Fixture", + "line": 1, + "span": [ + 1, + 0, + 1, + 23 + ], + "conditional": false, + "arguments": [ + "acquire", + "release" + ], + "control": null, + "safe_navigation": false, + "block": false + }, + { + "receiver": "", + "message": "sig", + "file": "facts/sequence_nested.rb", + "function": "paired_1", + "owner": "Fixture", + "line": 2, + "span": [ + 2, + 0, + 2, + 23 + ], + "conditional": false, + "arguments": [ + "acquire", + "release" + ], + "control": null, + "safe_navigation": false, + "block": false + }, + { + "receiver": "", + "message": "sig", + "file": "facts/sequence_nested.rb", + "function": "paired_2", + "owner": "Fixture", + "line": 3, + "span": [ + 3, + 0, + 3, + 23 + ], + "conditional": false, + "arguments": [ + "acquire", + "release" + ], + "control": null, + "safe_navigation": false, + "block": false + }, + { + "receiver": "", + "message": "sig", + "file": "facts/sequence_nested.rb", + "function": "paired_3", + "owner": "Fixture", + "line": 4, + "span": [ + 4, + 0, + 4, + 23 + ], + "conditional": false, + "arguments": [ + "acquire", + "release" + ], + "control": null, + "safe_navigation": false, + "block": false + }, + { + "receiver": "", + "message": "sig", + "file": "facts/sequence_nested.rb", + "function": "missing_release", + "owner": "Fixture", + "line": 5, + "span": [ + 5, + 0, + 5, + 15 + ], + "conditional": false, + "arguments": [ + "acquire" + ], + "control": null, + "safe_navigation": false, + "block": false + }, + { + "receiver": "", + "message": "sig", + "file": "facts/sequence_nested.rb", + "function": "missing_acquire", + "owner": "Fixture", + "line": 6, + "span": [ + 6, + 0, + 6, + 15 + ], + "conditional": false, + "arguments": [ + "release" + ], + "control": null, + "safe_navigation": false, + "block": false + } + ] + } + ] + }, + "expected": { + "broken": [ + { + "at": "facts/sequence_nested.rb:missing_release:5", + "confidence": 0.8, + "has": "acquire", + "missing": "release", + "pair": [ + "acquire", + "release" + ], + "spans": { + "facts/sequence_nested.rb:missing_release:5": [ + 5, + 0, + 5, + 15 + ] + }, + "support": 4 + }, + { + "at": "facts/sequence_nested.rb:missing_acquire:6", + "confidence": 0.8, + "has": "release", + "missing": "acquire", + "pair": [ + "acquire", + "release" + ], + "spans": { + "facts/sequence_nested.rb:missing_acquire:6": [ + 6, + 0, + 6, + 15 + ] + }, + "support": 4 + } + ] + } +} diff --git a/gems/decomplex/examples/facts/detectors/sequence-mine-rich.json b/gems/decomplex/examples/facts/detectors/sequence-mine-rich.json new file mode 100644 index 000000000..7bf5c4d21 --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/sequence-mine-rich.json @@ -0,0 +1,220 @@ +{ + "detector": "sequence-mine", + "input": { + "documents": [ + { + "file": "facts/sequence_rich.rb", + "language": "ruby", + "lines": [ + "sig {", + " acquire", + " release", + "}", + "sig { acquire release }", + "sig { acquire release }", + "sig { acquire release }", + "sig { acquire }", + "sig { release }", + "sig { Acquire release }", + "sig { acquire release$ }" + ], + "call_sites": [ + { + "receiver": "", + "message": "sig", + "file": "facts/sequence_rich.rb", + "function": "paired_multiline", + "owner": "Fixture", + "line": 1, + "span": [ + 1, + 0, + 4, + 1 + ], + "conditional": false, + "arguments": [ + + ], + "control": null, + "safe_navigation": false, + "block": false + }, + { + "receiver": "", + "message": "sig", + "file": "facts/sequence_rich.rb", + "function": "paired_1", + "owner": "Fixture", + "line": 5, + "span": [ + 5, + 0, + 5, + 23 + ], + "conditional": false, + "arguments": [ + "acquire", + "release" + ], + "control": null, + "safe_navigation": false, + "block": false + }, + { + "receiver": "", + "message": "sig", + "file": "facts/sequence_rich.rb", + "function": "paired_2", + "owner": "Fixture", + "line": 6, + "span": [ + 6, + 0, + 6, + 23 + ], + "conditional": false, + "arguments": [ + "acquire", + "release" + ], + "control": null, + "safe_navigation": false, + "block": false + }, + { + "receiver": "", + "message": "sig", + "file": "facts/sequence_rich.rb", + "function": "paired_3", + "owner": "Fixture", + "line": 7, + "span": [ + 7, + 0, + 7, + 23 + ], + "conditional": false, + "arguments": [ + "acquire", + "release" + ], + "control": null, + "safe_navigation": false, + "block": false + }, + { + "receiver": "", + "message": "sig", + "file": "facts/sequence_rich.rb", + "function": "missing_release", + "owner": "Fixture", + "line": 8, + "span": [ + 8, + 0, + 8, + 15 + ], + "conditional": false, + "arguments": [ + "acquire" + ], + "control": null, + "safe_navigation": false, + "block": false + }, + { + "receiver": "", + "message": "sig", + "file": "facts/sequence_rich.rb", + "function": "missing_acquire", + "owner": "Fixture", + "line": 9, + "span": [ + 9, + 0, + 9, + 15 + ], + "conditional": false, + "arguments": [ + "release" + ], + "control": null, + "safe_navigation": false, + "block": false + }, + { + "receiver": "", + "message": "sig", + "file": "facts/sequence_rich.rb", + "function": "ignored_caps", + "owner": "Fixture", + "line": 10, + "span": [ + 10, + 0, + 10, + 23 + ], + "conditional": false, + "arguments": [ + + ], + "control": null, + "safe_navigation": false, + "block": false + }, + { + "receiver": "", + "message": "sig", + "file": "facts/sequence_rich.rb", + "function": "ignored_symbol", + "owner": "Fixture", + "line": 11, + "span": [ + 11, + 0, + 11, + 24 + ], + "conditional": false, + "arguments": [ + + ], + "control": null, + "safe_navigation": false, + "block": false + } + ] + } + ] + }, + "expected": { + "broken": [ + { + "at": "facts/sequence_rich.rb:missing_release:8", + "confidence": 0.83, + "has": "acquire", + "missing": "release", + "pair": [ + "acquire", + "release" + ], + "spans": { + "facts/sequence_rich.rb:missing_release:8": [ + 8, + 0, + 8, + 15 + ] + }, + "support": 5 + } + ] + } +} diff --git a/gems/decomplex/examples/facts/detectors/sequence-mine.json b/gems/decomplex/examples/facts/detectors/sequence-mine.json new file mode 100644 index 000000000..197cf03fc --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/sequence-mine.json @@ -0,0 +1,79 @@ +{ + "detector": "sequence-mine", + "input": { + "documents": [ + { + "file": "facts/sequence.rb", + "language": "ruby", + "source": "", + "lines": [], + "root": { + "kind": "program", + "text": "", + "span": [1, 0, 1, 0], + "named": true, + "field_name": null, + "children": [] + }, + "normalized_root": { + "type": "ROOT", + "children": [], + "first_lineno": 1, + "first_column": 0, + "last_lineno": 1, + "last_column": 0, + "text": "" + }, + "function_defs": [], + "owner_defs": [], + "call_sites": [ + {"receiver": "", "message": "acquire", "file": "facts/sequence.rb", "function": "paired_0", "owner": "", "line": 1, "span": [1, 0, 1, 7], "conditional": false, "arguments": [], "control": null, "safe_navigation": false, "block": false}, + {"receiver": "", "message": "release", "file": "facts/sequence.rb", "function": "paired_0", "owner": "", "line": 2, "span": [2, 0, 2, 7], "conditional": false, "arguments": [], "control": null, "safe_navigation": false, "block": false}, + {"receiver": "", "message": "acquire", "file": "facts/sequence.rb", "function": "paired_1", "owner": "", "line": 3, "span": [3, 0, 3, 7], "conditional": false, "arguments": [], "control": null, "safe_navigation": false, "block": false}, + {"receiver": "", "message": "release", "file": "facts/sequence.rb", "function": "paired_1", "owner": "", "line": 4, "span": [4, 0, 4, 7], "conditional": false, "arguments": [], "control": null, "safe_navigation": false, "block": false}, + {"receiver": "", "message": "acquire", "file": "facts/sequence.rb", "function": "paired_2", "owner": "", "line": 5, "span": [5, 0, 5, 7], "conditional": false, "arguments": [], "control": null, "safe_navigation": false, "block": false}, + {"receiver": "", "message": "release", "file": "facts/sequence.rb", "function": "paired_2", "owner": "", "line": 6, "span": [6, 0, 6, 7], "conditional": false, "arguments": [], "control": null, "safe_navigation": false, "block": false}, + {"receiver": "", "message": "acquire", "file": "facts/sequence.rb", "function": "paired_3", "owner": "", "line": 7, "span": [7, 0, 7, 7], "conditional": false, "arguments": [], "control": null, "safe_navigation": false, "block": false}, + {"receiver": "", "message": "release", "file": "facts/sequence.rb", "function": "paired_3", "owner": "", "line": 8, "span": [8, 0, 8, 7], "conditional": false, "arguments": [], "control": null, "safe_navigation": false, "block": false}, + {"receiver": "", "message": "acquire", "file": "facts/sequence.rb", "function": "missing_release", "owner": "", "line": 9, "span": [9, 0, 9, 7], "conditional": false, "arguments": [], "control": null, "safe_navigation": false, "block": false}, + {"receiver": "", "message": "release", "file": "facts/sequence.rb", "function": "missing_acquire", "owner": "", "line": 10, "span": [10, 0, 10, 7], "conditional": false, "arguments": [], "control": null, "safe_navigation": false, "block": false} + ], + "state_reads": [], + "state_writes": [], + "decision_sites": [], + "branch_decisions": [], + "dispatch_sites": [], + "semantic_effect_sites": [], + "local_complexity_scores": {}, + "predicate_aliases": [], + "comparison_uses": [] + } + ] + }, + "expected": { + "broken": [ + { + "at": "facts/sequence.rb:missing_release:9", + "confidence": 0.8, + "has": "acquire", + "missing": "release", + "pair": ["acquire", "release"], + "spans": { + "facts/sequence.rb:missing_release:9": [9, 0, 9, 7] + }, + "support": 4 + }, + { + "at": "facts/sequence.rb:missing_acquire:10", + "confidence": 0.8, + "has": "release", + "missing": "acquire", + "pair": ["acquire", "release"], + "spans": { + "facts/sequence.rb:missing_acquire:10": [10, 0, 10, 7] + }, + "support": 4 + } + ] + } +} diff --git a/gems/decomplex/examples/facts/detectors/state-branch-density.json b/gems/decomplex/examples/facts/detectors/state-branch-density.json new file mode 100644 index 000000000..a1e4072f9 --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/state-branch-density.json @@ -0,0 +1,86 @@ +{ + "detector": "state-branch-density", + "input": { + "documents": [ + { + "file": "facts/state_branch.rb", + "language": "ruby", + "source": "", + "lines": [], + "root": { + "kind": "program", + "text": "", + "span": [1, 0, 1, 0], + "named": true, + "field_name": null, + "children": [] + }, + "normalized_root": { + "type": "ROOT", + "children": [], + "first_lineno": 1, + "first_column": 0, + "last_lineno": 1, + "last_column": 0, + "text": "" + }, + "function_defs": [], + "owner_defs": [], + "call_sites": [], + "state_reads": [], + "state_writes": [], + "decision_sites": [], + "branch_decisions": [ + { + "file": "facts/state_branch.rb", + "function": "check", + "line": 1, + "span": [1, 0, 5, 3], + "predicate": "if ready", + "state_refs": ["ready"] + }, + { + "file": "facts/state_branch.rb", + "function": "check", + "line": 2, + "span": [2, 2, 2, 20], + "predicate": "ready?", + "state_refs": ["ready"] + }, + { + "file": "facts/state_branch.rb", + "function": "check", + "line": 6, + "span": [6, 0, 6, 20], + "predicate": "unless stale", + "state_refs": ["stale"] + } + ], + "dispatch_sites": [], + "semantic_effect_sites": [], + "local_complexity_scores": {}, + "predicate_aliases": [], + "comparison_uses": [] + } + ] + }, + "expected": [ + { + "at": "facts/state_branch.rb:check:2", + "decisions": 2, + "file": "facts/state_branch.rb", + "method": "check", + "predicate": "ready?", + "score": 4, + "sites": [ + "facts/state_branch.rb:check:2", + "facts/state_branch.rb:check:6" + ], + "spans": { + "facts/state_branch.rb:check:2": [2, 2, 2, 20], + "facts/state_branch.rb:check:6": [6, 0, 6, 20] + }, + "state_refs": ["ready", "stale"] + } + ] +} diff --git a/gems/decomplex/examples/facts/detectors/state-mesh-rich.json b/gems/decomplex/examples/facts/detectors/state-mesh-rich.json new file mode 100644 index 000000000..2263f19e5 --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/state-mesh-rich.json @@ -0,0 +1,408 @@ +{ + "detector": "state-mesh", + "input": { + "documents": [ + { + "file": "facts/state_mesh_rich.rb", + "language": "ruby", + "state_writes": [ + { + "field": "ready", + "receiver": "self", + "file": "facts/state_mesh_rich.rb", + "function": "first", + "line": 1, + "span": [ + 1, + 0, + 1, + 11 + ], + "owner": "Fixture" + }, + { + "field": "ready", + "receiver": "self", + "file": "facts/state_mesh_rich.rb", + "function": "second", + "line": 2, + "span": [ + 2, + 0, + 2, + 11 + ], + "owner": "Fixture" + }, + { + "field": "stale", + "receiver": "self", + "file": "facts/state_mesh_rich.rb", + "function": "third", + "line": 3, + "span": [ + 3, + 0, + 3, + 11 + ], + "owner": "Fixture" + }, + { + "field": "stale", + "receiver": "self", + "file": "facts/state_mesh_rich.rb", + "function": "fourth", + "line": 4, + "span": [ + 4, + 0, + 4, + 11 + ], + "owner": "Fixture" + } + ], + "state_reads": [ + { + "field": "ready", + "receiver": "self", + "file": "facts/state_mesh_rich.rb", + "function": "check", + "line": 5, + "span": [ + 5, + 0, + 5, + 10 + ], + "owner": "Fixture" + }, + { + "field": "stale", + "receiver": "self", + "file": "facts/state_mesh_rich.rb", + "function": "report", + "line": 6, + "span": [ + 6, + 0, + 6, + 10 + ], + "owner": "Fixture" + } + ], + "predicate_defs": [ + { + "name": "ready?", + "body": "ready == true", + "file": "facts/state_mesh_rich.rb", + "line": 7, + "span": [ + 7, + 0, + 7, + 20 + ] + } + ], + "comparison_sites": [ + { + "source": "ready == true", + "file": "facts/state_mesh_rich.rb", + "function": "inline", + "line": 8, + "span": [ + 8, + 0, + 8, + 13 + ] + } + ], + "predicate_aliases": [ + { + "name": "ready?", + "body": "ready == true", + "file": "facts/state_mesh_rich.rb", + "defn": "ready?", + "line": 7, + "span": [ + 7, + 0, + 7, + 20 + ] + } + ], + "comparison_uses": [ + { + "canon_source": "ready == true", + "raw": "ready == true", + "file": "facts/state_mesh_rich.rb", + "function": "inline", + "line": 8, + "span": [ + 8, + 0, + 8, + 13 + ], + "enclosing_span": [ + 8, + 0, + 8, + 13 + ] + } + ] + } + ] + }, + "expected": { + "fields": { + "ready": { + "messiness": 16.0, + "metrics": { + "fix_churn": 1.0, + "percentiles": { + "messiness": 100, + "pressure": 100, + "re_derivations": 100, + "reads": 100, + "scatter": 100, + "writes": 100 + }, + "pressure": 1, + "re_derivations": 1, + "read_scatter": 1, + "reads": 1, + "receiver_types": 1, + "scatter": 4, + "write_scatter": 2, + "writes": 2 + }, + "rank": 1, + "re_derivations": [ + { + "canon": "ready == true", + "defn": "inline", + "file": "facts/state_mesh_rich.rb", + "line": 8, + "predicate": "ready?", + "raw": "ready == true" + } + ], + "readers": [ + { + "defn": "check", + "file": "facts/state_mesh_rich.rb", + "line": 5, + "recv": "self", + "span": [ + 5, + 0, + 5, + 10 + ] + } + ], + "writers": [ + { + "defn": "first", + "file": "facts/state_mesh_rich.rb", + "line": 1, + "recv": "self", + "span": [ + 1, + 0, + 1, + 11 + ] + }, + { + "defn": "second", + "file": "facts/state_mesh_rich.rb", + "line": 2, + "recv": "self", + "span": [ + 2, + 0, + 2, + 11 + ] + } + ] + }, + "stale": { + "messiness": 9.0, + "metrics": { + "fix_churn": 1.0, + "percentiles": { + "messiness": 50, + "pressure": 100, + "re_derivations": 50, + "reads": 100, + "scatter": 50, + "writes": 100 + }, + "pressure": 1, + "re_derivations": 0, + "read_scatter": 1, + "reads": 1, + "receiver_types": 1, + "scatter": 3, + "write_scatter": 2, + "writes": 2 + }, + "rank": 2, + "re_derivations": [ + + ], + "readers": [ + { + "defn": "report", + "file": "facts/state_mesh_rich.rb", + "line": 6, + "recv": "self", + "span": [ + 6, + 0, + 6, + 10 + ] + } + ], + "writers": [ + { + "defn": "third", + "file": "facts/state_mesh_rich.rb", + "line": 3, + "recv": "self", + "span": [ + 3, + 0, + 3, + 11 + ] + }, + { + "defn": "fourth", + "file": "facts/state_mesh_rich.rb", + "line": 4, + "recv": "self", + "span": [ + 4, + 0, + 4, + 11 + ] + } + ] + } + }, + "hierarchy": [ + { + "files": [ + { + "defns": [ + { + "fields": { + "read": [ + "ready" + ], + "written": [ + + ] + }, + "name": "check", + "readers": 1, + "writers": 0 + }, + { + "fields": { + "read": [ + + ], + "written": [ + "ready" + ] + }, + "name": "first", + "readers": 0, + "writers": 1 + }, + { + "fields": { + "read": [ + + ], + "written": [ + "stale" + ] + }, + "name": "fourth", + "readers": 0, + "writers": 1 + }, + { + "fields": { + "read": [ + "stale" + ], + "written": [ + + ] + }, + "name": "report", + "readers": 1, + "writers": 0 + }, + { + "fields": { + "read": [ + + ], + "written": [ + "ready" + ] + }, + "name": "second", + "readers": 0, + "writers": 1 + }, + { + "fields": { + "read": [ + + ], + "written": [ + "stale" + ] + }, + "name": "third", + "readers": 0, + "writers": 1 + } + ], + "name": "state_mesh_rich.rb", + "readers": 2, + "writers": 4 + } + ], + "name": "facts", + "readers": 2, + "writers": 4 + } + ], + "state_mesh": { + "custom_fields": null, + "min_writes": 2, + "total_fields": 2, + "total_re_derivations": 1, + "total_reads": 2, + "total_writes": 4 + } + } +} diff --git a/gems/decomplex/examples/facts/detectors/state-mesh.json b/gems/decomplex/examples/facts/detectors/state-mesh.json new file mode 100644 index 000000000..94418d1de --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/state-mesh.json @@ -0,0 +1,223 @@ +{ + "detector": "state-mesh", + "input": { + "documents": [ + { + "file": "facts/state_mesh.rb", + "language": "ruby", + "state_writes": [ + { + "field": "ready", + "receiver": "self", + "file": "facts/state_mesh.rb", + "function": "first", + "line": 1, + "span": [ + 1, + 0, + 1, + 11 + ], + "owner": "Fixture" + }, + { + "field": "ready", + "receiver": "self", + "file": "facts/state_mesh.rb", + "function": "second", + "line": 2, + "span": [ + 2, + 0, + 2, + 11 + ], + "owner": "Fixture" + } + ], + "state_reads": [ + { + "field": "ready", + "receiver": "self", + "file": "facts/state_mesh.rb", + "function": "check", + "line": 4, + "span": [ + 4, + 0, + 4, + 10 + ], + "owner": "Fixture" + }, + { + "field": "ready", + "receiver": "self", + "file": "facts/state_mesh.rb", + "function": "report", + "line": 5, + "span": [ + 5, + 0, + 5, + 10 + ], + "owner": "Fixture" + } + ] + } + ] + }, + "expected": { + "fields": { + "ready": { + "messiness": 16.0, + "metrics": { + "fix_churn": 1.0, + "percentiles": { + }, + "pressure": 2, + "re_derivations": 0, + "read_scatter": 2, + "reads": 2, + "receiver_types": 1, + "scatter": 4, + "write_scatter": 2, + "writes": 2 + }, + "rank": 1, + "re_derivations": [ + + ], + "readers": [ + { + "defn": "check", + "file": "facts/state_mesh.rb", + "line": 4, + "recv": "self", + "span": [ + 4, + 0, + 4, + 10 + ] + }, + { + "defn": "report", + "file": "facts/state_mesh.rb", + "line": 5, + "recv": "self", + "span": [ + 5, + 0, + 5, + 10 + ] + } + ], + "writers": [ + { + "defn": "first", + "file": "facts/state_mesh.rb", + "line": 1, + "recv": "self", + "span": [ + 1, + 0, + 1, + 11 + ] + }, + { + "defn": "second", + "file": "facts/state_mesh.rb", + "line": 2, + "recv": "self", + "span": [ + 2, + 0, + 2, + 11 + ] + } + ] + } + }, + "hierarchy": [ + { + "files": [ + { + "defns": [ + { + "fields": { + "read": [ + "ready" + ], + "written": [ + + ] + }, + "name": "check", + "readers": 1, + "writers": 0 + }, + { + "fields": { + "read": [ + + ], + "written": [ + "ready" + ] + }, + "name": "first", + "readers": 0, + "writers": 1 + }, + { + "fields": { + "read": [ + "ready" + ], + "written": [ + + ] + }, + "name": "report", + "readers": 1, + "writers": 0 + }, + { + "fields": { + "read": [ + + ], + "written": [ + "ready" + ] + }, + "name": "second", + "readers": 0, + "writers": 1 + } + ], + "name": "state_mesh.rb", + "readers": 2, + "writers": 2 + } + ], + "name": "facts", + "readers": 2, + "writers": 2 + } + ], + "state_mesh": { + "custom_fields": null, + "min_writes": 2, + "total_fields": 1, + "total_re_derivations": 0, + "total_reads": 2, + "total_writes": 2 + } + } +} diff --git a/gems/decomplex/examples/facts/detectors/structural-topology-rich.json b/gems/decomplex/examples/facts/detectors/structural-topology-rich.json new file mode 100644 index 000000000..35079b6cd --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/structural-topology-rich.json @@ -0,0 +1,195 @@ +{ + "detector": "structural-topology", + "input": { + "documents": [ + { + "file": "facts/topology_rich.rb", + "language": "ruby", + "lines": [ + "class Fixture", + " def self.entry", + " self.", + " helper", + " self.entry", + " end", + " def self.helper", + " end", + "end" + ], + "function_defs": [ + { + "file": "facts/topology_rich.rb", + "name": "self.entry", + "owner": "Fixture", + "line": 2, + "span": [ + 2, + 2, + 6, + 5 + ], + "body": { + "kind": "body_statement", + "text": "", + "span": [ + 2, + 2, + 6, + 5 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + "visibility": "public", + "params": [ + + ] + }, + { + "file": "facts/topology_rich.rb", + "name": "self.helper", + "owner": "Fixture", + "line": 7, + "span": [ + 7, + 2, + 8, + 5 + ], + "body": { + "kind": "body_statement", + "text": "", + "span": [ + 7, + 2, + 8, + 5 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + "visibility": "public", + "params": [ + + ] + } + ], + "owner_defs": [ + { + "file": "facts/topology_rich.rb", + "name": "Fixture", + "kind": "class", + "line": 1, + "span": [ + 1, + 0, + 9, + 3 + ] + } + ], + "call_sites": [ + { + "receiver": "self", + "message": "helper", + "file": "facts/topology_rich.rb", + "function": "self.entry", + "owner": "Fixture", + "line": 3, + "span": [ + 3, + 4, + 4, + 12 + ], + "conditional": false, + "arguments": [ + + ], + "control": null, + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "entry", + "file": "facts/topology_rich.rb", + "function": "self.entry", + "owner": "Fixture", + "line": 5, + "span": [ + 5, + 4, + 5, + 14 + ], + "conditional": false, + "arguments": [ + + ], + "control": null, + "safe_navigation": false, + "block": false + } + ] + } + ] + }, + "expected": { + "edges": [ + { + "callee": "Fixture#self.helper", + "callee_name": "self.helper", + "caller": "Fixture#self.entry", + "caller_name": "self.entry", + "confidence": "high", + "file": "facts/topology_rich.rb", + "kind": "direct_self", + "line": 3, + "span": [ + 3, + 4, + 4, + 12 + ], + "type": "always" + } + ], + "methods": [ + { + "file": "facts/topology_rich.rb", + "id": "Fixture#self.entry", + "line": 2, + "name": "self.entry", + "owner": "Fixture", + "span": [ + 2, + 2, + 6, + 5 + ], + "visibility": "public" + }, + { + "file": "facts/topology_rich.rb", + "id": "Fixture#self.helper", + "line": 7, + "name": "self.helper", + "owner": "Fixture", + "span": [ + 7, + 2, + 8, + 5 + ], + "visibility": "public" + } + ] + } +} diff --git a/gems/decomplex/examples/facts/detectors/structural-topology.json b/gems/decomplex/examples/facts/detectors/structural-topology.json new file mode 100644 index 000000000..a64b9a9cd --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/structural-topology.json @@ -0,0 +1,171 @@ +{ + "detector": "structural-topology", + "input": { + "documents": [ + { + "file": "facts/topology.rb", + "language": "ruby", + "lines": [ + "class Fixture", + " def entry", + " self.helper", + " end", + " private def helper; end", + "end" + ], + "function_defs": [ + { + "file": "facts/topology.rb", + "name": "entry", + "owner": "Fixture", + "line": 2, + "span": [ + 2, + 2, + 4, + 5 + ], + "body": { + "kind": "program", + "text": "", + "span": [ + 1, + 0, + 1, + 0 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + "visibility": "public", + "params": [ + + ] + }, + { + "file": "facts/topology.rb", + "name": "helper", + "owner": "Fixture", + "line": 5, + "span": [ + 5, + 2, + 5, + 25 + ], + "body": { + "kind": "program", + "text": "", + "span": [ + 1, + 0, + 1, + 0 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + "visibility": "private", + "params": [ + + ] + } + ], + "owner_defs": [ + { + "file": "facts/topology.rb", + "name": "Fixture", + "kind": "class", + "line": 1, + "span": [ + 1, + 0, + 6, + 3 + ] + } + ], + "call_sites": [ + { + "receiver": "self", + "message": "helper", + "file": "facts/topology.rb", + "function": "entry", + "owner": "Fixture", + "line": 3, + "span": [ + 3, + 4, + 3, + 15 + ], + "conditional": false, + "arguments": [ + + ], + "control": "conditional", + "safe_navigation": false, + "block": false + } + ] + } + ] + }, + "expected": { + "edges": [ + { + "callee": "Fixture#helper", + "callee_name": "helper", + "caller": "Fixture#entry", + "caller_name": "entry", + "confidence": "high", + "file": "facts/topology.rb", + "kind": "direct_self", + "line": 3, + "span": [ + 3, + 4, + 3, + 15 + ], + "type": "conditional" + } + ], + "methods": [ + { + "file": "facts/topology.rb", + "id": "Fixture#entry", + "line": 2, + "name": "entry", + "owner": "Fixture", + "span": [ + 2, + 2, + 4, + 5 + ], + "visibility": "public" + }, + { + "file": "facts/topology.rb", + "id": "Fixture#helper", + "line": 5, + "name": "helper", + "owner": "Fixture", + "span": [ + 5, + 2, + 5, + 25 + ], + "visibility": "private" + } + ] + } +} diff --git a/gems/decomplex/examples/facts/detectors/weighted-inlined-complexity-rich.json b/gems/decomplex/examples/facts/detectors/weighted-inlined-complexity-rich.json new file mode 100644 index 000000000..2435c065e --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/weighted-inlined-complexity-rich.json @@ -0,0 +1,443 @@ +{ + "detector": "weighted-inlined-complexity", + "input": { + "documents": [ + { + "file": "facts/weighted_rich.rb", + "language": "ruby", + "lines": [ + "class Fixture", + " def entry", + " self.helper", + " self.shared", + " end", + " def helper", + " self.leaf", + " end", + " def leaf; end", + " def shared; end", + " def other", + " self.shared", + " end", + "end" + ], + "function_defs": [ + { + "file": "facts/weighted_rich.rb", + "name": "entry", + "owner": "Fixture", + "line": 2, + "span": [ + 2, + 2, + 5, + 5 + ], + "body": { + "kind": "body_statement", + "text": "", + "span": [ + 2, + 2, + 5, + 5 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + "visibility": "public", + "params": [ + + ] + }, + { + "file": "facts/weighted_rich.rb", + "name": "helper", + "owner": "Fixture", + "line": 6, + "span": [ + 6, + 2, + 8, + 5 + ], + "body": { + "kind": "body_statement", + "text": "", + "span": [ + 6, + 2, + 8, + 5 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + "visibility": "private", + "params": [ + + ] + }, + { + "file": "facts/weighted_rich.rb", + "name": "leaf", + "owner": "Fixture", + "line": 9, + "span": [ + 9, + 2, + 9, + 15 + ], + "body": { + "kind": "body_statement", + "text": "", + "span": [ + 9, + 2, + 9, + 15 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + "visibility": "private", + "params": [ + + ] + }, + { + "file": "facts/weighted_rich.rb", + "name": "shared", + "owner": "Fixture", + "line": 10, + "span": [ + 10, + 2, + 10, + 17 + ], + "body": { + "kind": "body_statement", + "text": "", + "span": [ + 10, + 2, + 10, + 17 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + "visibility": "public", + "params": [ + + ] + }, + { + "file": "facts/weighted_rich.rb", + "name": "other", + "owner": "Fixture", + "line": 11, + "span": [ + 11, + 2, + 13, + 5 + ], + "body": { + "kind": "body_statement", + "text": "", + "span": [ + 11, + 2, + 13, + 5 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + "visibility": "public", + "params": [ + + ] + } + ], + "owner_defs": [ + { + "file": "facts/weighted_rich.rb", + "name": "Fixture", + "kind": "class", + "line": 1, + "span": [ + 1, + 0, + 14, + 3 + ] + } + ], + "call_sites": [ + { + "receiver": "self", + "message": "helper", + "file": "facts/weighted_rich.rb", + "function": "entry", + "owner": "Fixture", + "line": 3, + "span": [ + 3, + 4, + 3, + 15 + ], + "conditional": false, + "arguments": [ + + ], + "control": "conditional", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "shared", + "file": "facts/weighted_rich.rb", + "function": "entry", + "owner": "Fixture", + "line": 4, + "span": [ + 4, + 4, + 4, + 15 + ], + "conditional": false, + "arguments": [ + + ], + "control": null, + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "leaf", + "file": "facts/weighted_rich.rb", + "function": "helper", + "owner": "Fixture", + "line": 7, + "span": [ + 7, + 4, + 7, + 13 + ], + "conditional": false, + "arguments": [ + + ], + "control": "iterates", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "shared", + "file": "facts/weighted_rich.rb", + "function": "other", + "owner": "Fixture", + "line": 12, + "span": [ + 12, + 4, + 12, + 15 + ], + "conditional": false, + "arguments": [ + + ], + "control": null, + "safe_navigation": false, + "block": false + } + ], + "local_methods": [ + { + "id": "Fixture#entry", + "owner": "Fixture", + "name": "entry", + "file": "facts/weighted_rich.rb", + "line": 2, + "span": [ + 2, + 2, + 5, + 5 + ], + "statements": [ + + ], + "boundaries": [ + + ] + }, + { + "id": "Fixture#helper", + "owner": "Fixture", + "name": "helper", + "file": "facts/weighted_rich.rb", + "line": 6, + "span": [ + 6, + 2, + 8, + 5 + ], + "statements": [ + + ], + "boundaries": [ + + ] + }, + { + "id": "Fixture#leaf", + "owner": "Fixture", + "name": "leaf", + "file": "facts/weighted_rich.rb", + "line": 9, + "span": [ + 9, + 2, + 9, + 15 + ], + "statements": [ + + ], + "boundaries": [ + + ] + }, + { + "id": "Fixture#shared", + "owner": "Fixture", + "name": "shared", + "file": "facts/weighted_rich.rb", + "line": 10, + "span": [ + 10, + 2, + 10, + 17 + ], + "statements": [ + + ], + "boundaries": [ + + ] + }, + { + "id": "Fixture#other", + "owner": "Fixture", + "name": "other", + "file": "facts/weighted_rich.rb", + "line": 11, + "span": [ + 11, + 2, + 13, + 5 + ], + "statements": [ + + ], + "boundaries": [ + + ] + } + ], + "local_complexity_scores": { + "Fixture#entry": { + "score": 4.0, + "signals": { + "branches": 1 + } + }, + "Fixture#helper": { + "score": 12.0, + "signals": { + "branches": 3 + } + }, + "Fixture#leaf": { + "score": 11.0, + "signals": { + "branches": 2 + } + }, + "Fixture#shared": { + "score": 10.0, + "signals": { + "branches": 2 + } + }, + "Fixture#other": { + "score": 3.0, + "signals": { + "branches": 1 + } + } + } + } + ] + }, + "expected": [ + { + "at": "facts/weighted_rich.rb:entry:2", + "call_chain": [ + "entry", + "helper" + ], + "depth": 2, + "hidden": 16.8, + "inlined": 20.8, + "local": 4.0, + "method": "entry", + "owner": "Fixture", + "reason": "1 single-caller helper(s) add 16.8 weighted cognitive points", + "signals": { + "branches": 1 + }, + "single_caller_callees": [ + "helper" + ], + "spans": { + "facts/weighted_rich.rb:entry:2": [ + 2, + 2, + 5, + 5 + ] + } + } + ] +} diff --git a/gems/decomplex/examples/facts/detectors/weighted-inlined-complexity.json b/gems/decomplex/examples/facts/detectors/weighted-inlined-complexity.json new file mode 100644 index 000000000..ca507b48d --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/weighted-inlined-complexity.json @@ -0,0 +1,205 @@ +{ + "detector": "weighted-inlined-complexity", + "input": { + "documents": [ + { + "file": "facts/weighted.rb", + "language": "ruby", + "lines": [ + "class Fixture", + " def entry", + " self.helper", + " end", + " def helper", + " end", + "end" + ], + "function_defs": [ + { + "file": "facts/weighted.rb", + "name": "entry", + "owner": "Fixture", + "line": 2, + "span": [ + 2, + 2, + 4, + 5 + ], + "body": { + "kind": "program", + "text": "", + "span": [ + 1, + 0, + 1, + 0 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + "visibility": "public", + "params": [ + + ] + }, + { + "file": "facts/weighted.rb", + "name": "helper", + "owner": "Fixture", + "line": 5, + "span": [ + 5, + 2, + 6, + 5 + ], + "body": { + "kind": "program", + "text": "", + "span": [ + 1, + 0, + 1, + 0 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + "visibility": "private", + "params": [ + + ] + } + ], + "owner_defs": [ + { + "file": "facts/weighted.rb", + "name": "Fixture", + "kind": "class", + "line": 1, + "span": [ + 1, + 0, + 7, + 3 + ] + } + ], + "call_sites": [ + { + "receiver": "self", + "message": "helper", + "file": "facts/weighted.rb", + "function": "entry", + "owner": "Fixture", + "line": 3, + "span": [ + 3, + 4, + 3, + 15 + ], + "conditional": false, + "arguments": [ + + ], + "control": null, + "safe_navigation": false, + "block": false + } + ], + "local_methods": [ + { + "id": "Fixture#entry", + "owner": "Fixture", + "name": "entry", + "file": "facts/weighted.rb", + "line": 2, + "span": [ + 2, + 2, + 4, + 5 + ], + "statements": [ + + ], + "boundaries": [ + + ] + }, + { + "id": "Fixture#helper", + "owner": "Fixture", + "name": "helper", + "file": "facts/weighted.rb", + "line": 5, + "span": [ + 5, + 2, + 6, + 5 + ], + "statements": [ + + ], + "boundaries": [ + + ] + } + ], + "local_complexity_scores": { + "Fixture#entry": { + "score": 3.0, + "signals": { + "branches": 1 + } + }, + "Fixture#helper": { + "score": 16.0, + "signals": { + "branches": 4 + } + } + } + } + ] + }, + "expected": [ + { + "at": "facts/weighted.rb:entry:2", + "call_chain": [ + "entry", + "helper" + ], + "depth": 1, + "hidden": 16.0, + "inlined": 19.0, + "local": 3.0, + "method": "entry", + "owner": "Fixture", + "reason": "1 single-caller helper(s) add 16.0 weighted cognitive points", + "signals": { + "branches": 1 + }, + "single_caller_callees": [ + "helper" + ], + "spans": { + "facts/weighted.rb:entry:2": [ + 2, + 2, + 4, + 5 + ] + } + } + ] +} diff --git a/gems/decomplex/examples/facts/report/postprocess.json b/gems/decomplex/examples/facts/report/postprocess.json new file mode 100644 index 000000000..f39a99842 --- /dev/null +++ b/gems/decomplex/examples/facts/report/postprocess.json @@ -0,0 +1,392 @@ +{ + "input": { + "format": "decomplex.report-facts.v1", + "files": ["facts/report.rb"], + "detectors": { + "miner": { + "missing_abstractions": [ + { + "kind": "conjunction", + "members": ["ready", "valid"], + "support": 2, + "scatter": 2, + "at": "facts/report.rb:checkout:10", + "sites": ["facts/report.rb:checkout:10", "facts/report.rb:refund:30"], + "spans": { + "facts/report.rb:checkout:10": [10, 2, 10, 20] + } + } + ], + "neglected_conditions": [ + { + "pattern": ["ready", "valid"], + "support": 2, + "missing": "valid", + "at": "facts/report.rb:checkout:11", + "spans": { + "facts/report.rb:checkout:11": [11, 2, 11, 18] + } + } + ] + }, + "co_update": { + "co_written_pairs": [ + { + "pair": ["provenance", "storage"], + "support": 3, + "sites": ["facts/report.rb:prepare:4", "facts/report.rb:checkout:12"] + } + ], + "neglected_updates": [ + { + "pair": ["provenance", "storage"], + "support": 3, + "has": "storage", + "missing": "provenance", + "recv": "order", + "at": "facts/report.rb:checkout:12", + "spans": { + "facts/report.rb:checkout:12": [12, 2, 12, 20] + } + } + ] + }, + "predicate_alias": { + "alias_clusters": [] + }, + "semantic_alias": { + "alias_clusters": [ + { + "canon": "storage == READY", + "names": ["ready?", "prepared?"], + "at": "facts/report.rb:checkout:13", + "sites": ["facts/report.rb:checkout:13"], + "spans": { + "facts/report.rb:checkout:13": [13, 2, 13, 18] + } + } + ], + "reification_misses": [ + { + "predicate": "ready?", + "raw": "storage == READY", + "canon": "storage == READY", + "at": "facts/report.rb:checkout:14", + "spans": { + "facts/report.rb:checkout:14": [14, 2, 14, 22] + } + } + ] + }, + "path_condition": { + "neglected": [ + { + "pattern": ["ready", "valid"], + "support": 2, + "missing": "valid", + "action": "ship(order)", + "at": "facts/report.rb:checkout:15", + "spans": { + "facts/report.rb:checkout:15": [15, 2, 15, 22] + } + } + ], + "scattered": [] + }, + "sequence_mine": { + "broken_protocol": [ + { + "pair": ["open", "close"], + "support": 4, + "confidence": 0.8, + "has": "open", + "missing": "close", + "at": "facts/report.rb:checkout:16", + "spans": { + "facts/report.rb:checkout:16": [16, 2, 16, 10] + } + } + ] + }, + "implicit_control_flow": { + "ordered_protocols": [] + }, + "derived_state": [ + { + "file": "facts/report.rb", + "defn": "checkout", + "derived": "storage", + "source": "provenance", + "derived_at": 17, + "source_reassigned_at": 22, + "gap": 5, + "at": "facts/report.rb:checkout:17", + "spans": { + "facts/report.rb:checkout:17": [17, 2, 17, 24] + } + } + ], + "inconsistent_rename_clone": [], + "flay_similarity": [], + "decision_pressure": [ + { + "contract": ".storage", + "decisions": 2, + "essential": 1, + "methods": 1, + "sites": ["facts/report.rb:checkout:18"], + "spans": { + "facts/report.rb:checkout:18": [18, 2, 18, 24] + } + } + ], + "redundant_nil_guard": [], + "false_simplicity": [ + { + "kind": "hidden_mutation", + "detail": "storage=", + "support": 1, + "scatter": 1, + "at": "facts/report.rb:checkout:19", + "sites": ["facts/report.rb:checkout:19"], + "spans": { + "facts/report.rb:checkout:19": [19, 2, 19, 14] + } + } + ], + "oversized_predicate": [], + "fat_union": { + "fat_unions": [] + }, + "state_heatmap": [], + "state_branch_density": [ + { + "at": "facts/report.rb:checkout:20", + "file": "facts/report.rb", + "method": "checkout", + "decisions": 1, + "state_refs": ["storage"], + "predicate": "storage.ready?", + "score": 1, + "sites": ["facts/report.rb:checkout:20"], + "spans": { + "facts/report.rb:checkout:20": [20, 2, 20, 24] + } + } + ], + "temporal_ordering_pressure": [], + "weighted_inlined_complexity": [], + "locality_drag": [], + "function_lcom": [], + "operational_discontinuity": [] + } + }, + "expected": { + "convergence": [ + { + "file": "facts/report.rb", + "method": "checkout", + "detectors": [ + "Broken Protocols", + "Decision Pressure", + "Derived-State Staleness", + "False Simplicity", + "Missing Abstractions", + "Neglected Conditions", + "Neglected Path Conditions", + "Neglected Updates", + "Reification Misses", + "Semantic Predicate Aliases", + "State-Based Branch Density" + ], + "n_detectors": 11, + "score": 24, + "findings": 15, + "at": "facts/report.rb:checkout:18" + } + ], + "root_clusters": [ + { + "kind": "name", + "token": "storage", + "detectors": [ + "Decision Pressure", + "Derived-State Staleness", + "False Simplicity", + "Neglected Updates", + "Reification Misses", + "Semantic Predicate Aliases", + "State-Based Branch Density" + ], + "n_detectors": 7, + "support": 7, + "scatter": 1, + "score": 17, + "fat_union": false, + "fix": "single-source this state (one stamp, or recompute on write) -- the invariant-#16 desync shape", + "sites": [ + "facts/report.rb:checkout:18", + "facts/report.rb:checkout:20", + "facts/report.rb:checkout:14", + "facts/report.rb:checkout:13", + "facts/report.rb:checkout:12", + "facts/report.rb:checkout:17", + "facts/report.rb:checkout:19" + ] + }, + { + "kind": "name", + "token": "ready", + "detectors": [ + "Reification Misses", + "Semantic Predicate Aliases", + "State-Based Branch Density" + ], + "n_detectors": 3, + "support": 3, + "scatter": 1, + "score": 9, + "fat_union": false, + "fix": "reify ONE named predicate/decision and call it everywhere", + "sites": [ + "facts/report.rb:checkout:20", + "facts/report.rb:checkout:14", + "facts/report.rb:checkout:13" + ] + }, + { + "kind": "tuple", + "token": "ready | valid", + "detectors": [ + "Missing Abstractions", + "Neglected Conditions", + "Neglected Path Conditions" + ], + "n_detectors": 3, + "support": 3, + "scatter": 2, + "score": 6, + "fat_union": false, + "fix": "reify ONE named predicate/decision and call it everywhere", + "sites": [ + "facts/report.rb:checkout:10", + "facts/report.rb:refund:30", + "facts/report.rb:checkout:11", + "facts/report.rb:checkout:15" + ] + }, + { + "kind": "name", + "token": "READY", + "detectors": [ + "Reification Misses", + "Semantic Predicate Aliases" + ], + "n_detectors": 2, + "support": 2, + "scatter": 1, + "score": 6, + "fat_union": false, + "fix": "reify ONE named predicate/decision and call it everywhere", + "sites": [ + "facts/report.rb:checkout:14", + "facts/report.rb:checkout:13" + ] + }, + { + "kind": "name", + "token": "provenance", + "detectors": [ + "Derived-State Staleness", + "Neglected Updates" + ], + "n_detectors": 2, + "support": 2, + "scatter": 1, + "score": 4, + "fat_union": false, + "fix": "single-source this state (one stamp, or recompute on write) -- the invariant-#16 desync shape", + "sites": [ + "facts/report.rb:checkout:12", + "facts/report.rb:checkout:17" + ] + }, + { + "kind": "name", + "token": "valid", + "detectors": [ + "Neglected Conditions", + "Neglected Path Conditions" + ], + "n_detectors": 2, + "support": 2, + "scatter": 1, + "score": 3, + "fat_union": false, + "fix": "converging structural debt -- resolve once at the named entity", + "sites": [ + "facts/report.rb:checkout:11", + "facts/report.rb:checkout:15" + ] + } + ], + "sarif": { + "rule_count": 25, + "result_count": 8, + "rule_ids": [ + "decomplex.decision-pressure", + "decomplex.missing-abstractions", + "decomplex.reification-misses", + "decomplex.semantic-predicate-aliases", + "decomplex.state-based-branch-density", + "decomplex.derived-state-staleness", + "decomplex.neglected-conditions", + "decomplex.neglected-updates" + ], + "messages": [ + "Decision Pressure: `.storage` creates 2 eliminable guard decision(s) across 1 method(s)", + "Missing Abstractions: guard tuple `ready | valid` repeats in 2 site(s) with scatter=2", + "Reification Misses: predicate `ready?` is reinvented inline as `storage == READY`", + "Semantic Predicate Aliases: predicate aliases `ready? = prepared?` for `storage == READY`", + "State-Based Branch Density: 1 state-based branch decision(s) over `storage`; example predicate `storage.ready?`", + "Derived-State Staleness: `storage` derived from `provenance` at line 17; `provenance` reassigned at line 22 but `storage` is not recomputed", + "Neglected Conditions: missing condition `valid` from `ready | valid` (support=2)", + "Neglected Updates: writes `.storage` but not co-written `.provenance` on receiver `order` (support=3)" + ], + "locations": [ + { + "uri": "facts/report.rb", + "startLine": 18 + }, + { + "uri": "facts/report.rb", + "startLine": 10 + }, + { + "uri": "facts/report.rb", + "startLine": 14 + }, + { + "uri": "facts/report.rb", + "startLine": 13 + }, + { + "uri": "facts/report.rb", + "startLine": 20 + }, + { + "uri": "facts/report.rb", + "startLine": 17 + }, + { + "uri": "facts/report.rb", + "startLine": 11 + }, + { + "uri": "facts/report.rb", + "startLine": 12 + } + ] + } + } +} diff --git a/gems/decomplex/lib/decomplex/detector_runner.rb b/gems/decomplex/lib/decomplex/detector_runner.rb index 5070c92e0..734a16fa3 100644 --- a/gems/decomplex/lib/decomplex/detector_runner.rb +++ b/gems/decomplex/lib/decomplex/detector_runner.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require "json" +require "ostruct" require_relative "co_update" require_relative "flay_similarity" require_relative "local_flow" @@ -143,12 +144,40 @@ def canonical_json(detector, files, engine: "ruby", **options) JSON.generate(canonicalize(run(detector, files, engine: engine, **options))) << "\n" end + def run_fact_fixture(path, engine: "ruby") + fixture = JSON.parse(File.read(path.to_s)) + detector = fixture.fetch("detector") + + case engine.to_s + when "ruby" + documents = fact_documents(fixture.fetch("input").fetch("documents")) + options = symbolize_options(fixture.fetch("options", {})) + with_fact_documents(documents) do + run(detector, documents.map(&:file), engine: "ruby", **options) + end + when "rust" + JSON.parse(Native::Command.run("detector-facts", "--input", path.to_s)) + else + raise ArgumentError, "unsupported decomplex detector engine: #{engine}" + end + end + + def canonical_json_from_fact_fixture(path, engine: "ruby") + JSON.generate(canonicalize(run_fact_fixture(path, engine: engine))) << "\n" + end + def compare(detector, files, **options) ruby_json = canonical_json(detector, files, engine: "ruby", **options) rust_json = canonical_json(detector, files, engine: "rust", **options) [ruby_json == rust_json, ruby_json, rust_json] end + def compare_fact_fixture(path) + ruby_json = canonical_json_from_fact_fixture(path, engine: "ruby") + rust_json = canonical_json_from_fact_fixture(path, engine: "rust") + [ruby_json == rust_json, ruby_json, rust_json] + end + def detector_names DETECTORS.keys end @@ -165,6 +194,188 @@ def detector_names raise ArgumentError, "unsupported decomplex detector engine: #{engine}" end + private_class_method def self.symbolize_options(options) + options.each_with_object({}) { |(key, value), out| out[key.to_sym] = value } + end + + private_class_method def self.fact_documents(rows) + Array(rows).map { |row| FactDocument.new(row) } + end + + private_class_method def self.with_fact_documents(documents) + by_file = documents.to_h { |document| [document.file.to_s, document] } + original_parse = Syntax.method(:parse) + Syntax.define_singleton_method(:parse) do |file, **kwargs| + by_file.fetch(file.to_s) { original_parse.call(file, **kwargs) } + end + yield + ensure + Syntax.define_singleton_method(:parse, original_parse) + end + + class FactDocument + attr_reader :file, :language, :source, :lines + + FACT_ARRAYS = %w[ + branch_arms branch_decisions call_sites comparison_sites decision_sites + dispatch_sites function_defs local_methods owner_defs path_condition_sites + predicate_aliases predicate_defs semantic_effect_sites state_declarations + state_param_origins state_reads state_writes + ].freeze + + def initialize(row) + @row = row + @file = row.fetch("file") + @language = row.fetch("language", "ruby").to_sym + @source = row.fetch("source", "") + @lines = row.fetch("lines", @source.lines) + @immutable_struct_readers = object_hash(row.fetch("immutable_struct_readers", {})) + @immutable_struct_reader_types = object_hash(row.fetch("immutable_struct_reader_types", {})) + @type_aliases = object_hash(row.fetch("type_aliases", {})) + @local_complexity_scores = row.fetch("local_complexity_scores", {}).to_h do |id, score| + [id.to_s, symbolized_value(score)] + end + @local_contract_assignments = row.fetch("local_contract_assignments", {}) + + FACT_ARRAYS.each do |name| + instance_variable_set("@#{name}", fact_array(row.fetch(name, []))) + end + end + + FACT_ARRAYS.each do |name| + define_method(name) { instance_variable_get("@#{name}") } + end + + def local_methods + return @local_methods if @row.key?("local_methods") + + Syntax.language_profile(language).local_methods(self) + end + + def path_condition_sites + return @path_condition_sites if @row.key?("path_condition_sites") + + Syntax.language_profile(language).path_condition_sites(self) + end + + def branch_decisions(immutable_readers:, immutable_reader_types:, type_aliases:) + @branch_decisions + end + + def immutable_struct_readers + @immutable_struct_readers + end + + def immutable_struct_reader_types + @immutable_struct_reader_types + end + + def type_aliases + @type_aliases + end + + def local_complexity_scores + @local_complexity_scores + end + + def local_contract_assignments(method) + @local_contract_assignments.fetch(method.name.to_s, {}) + end + + def redundant_nil_guard_findings + Syntax::NilGuardAnalyzer.new(self).scan + end + + private + + def fact_array(value) + Array(value).map { |item| objectify(item) } + end + + def object_hash(value) + value.to_h { |key, child| [key.to_s, child] } + end + + def objectify(value) + case value + when Hash + if value.key?("kind") && value.key?("span") && value.key?("children") + return FactNode.new(value, method(:objectify_field)) + end + + OpenStruct.new(value.to_h { |key, child| [key.to_s, objectify_field(key.to_s, child)] }) + when Array + value.map { |child| objectify(child) } + else + value + end + end + + def objectify_field(key, value) + if key == "control" && %w[conditional iterates].include?(value.to_s) + return value.to_sym + end + if key == "visibility" && %w[public protected private].include?(value.to_s) + return value.to_sym + end + + objectify(value) + end + + def symbolized_value(value) + case value + when Hash + value.to_h { |key, child| [key.to_sym, symbolized_value(child)] } + when Array + value.map { |child| symbolized_value(child) } + else + value + end + end + end + + class FactPoint + attr_reader :row, :column + + def initialize(row, column) + @row = row + @column = column + end + end + + class FactNode + attr_reader :kind, :text, :span, :field_name, :children, :start_point, :end_point + attr_accessor :parent, :prev_sibling, :next_sibling + + def initialize(row, objectifier) + @kind = row.fetch("kind") + @text = row.fetch("text", "") + @span = row.fetch("span") + @field_name = row["field_name"] + @named = row.fetch("named", true) + @children = Array(row.fetch("children", [])).map { |child| objectifier.call("node", child) } + @children.each { |child| child.parent = self if child.respond_to?(:parent=) } + @children.each_cons(2) do |left, right| + left.next_sibling = right if left.respond_to?(:next_sibling=) + right.prev_sibling = left if right.respond_to?(:prev_sibling=) + end + @start_point = FactPoint.new(@span[0].to_i - 1, @span[1].to_i) + @end_point = FactPoint.new(@span[2].to_i - 1, @span[3].to_i) + end + + def named? + @named + end + + def named_children + @children.select { |child| child.respond_to?(:named?) && child.named? } + end + + def child_by_field_name(name) + @children.find { |child| child.respond_to?(:field_name) && child.field_name.to_s == name.to_s } + end + end + private_class_method def self.co_update(files, engine:, jobs:) return Native::CoUpdate.scan(files, jobs: jobs) if engine.to_s == "rust" diff --git a/gems/decomplex/rust/src/decomplex/ast.rs b/gems/decomplex/rust/src/decomplex/ast.rs index e1995e5c6..0cd95d487 100644 --- a/gems/decomplex/rust/src/decomplex/ast.rs +++ b/gems/decomplex/rust/src/decomplex/ast.rs @@ -1,6 +1,6 @@ use crate::decomplex::syntax::Language; use anyhow::{Context, Result}; -use serde::Serialize; +use serde::{Deserialize, Serialize}; use std::collections::BTreeSet; use std::fs; use std::path::Path; @@ -34,7 +34,7 @@ const PYTHON_DOTTED_EXPRESSION_WRAPPER_KINDS: &[&str] = &[ "expression_statement", ]; -#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)] pub struct RawNode { pub kind: String, pub text: String, @@ -226,7 +226,7 @@ pub fn node_text<'a>(node: TreeSitterNode<'_>, source: &'a str) -> &'a str { node.utf8_text(source.as_bytes()).unwrap_or("") } -#[derive(Clone, Debug, Eq, PartialEq)] +#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)] pub enum Child { Node(Box), Symbol(String), @@ -236,7 +236,7 @@ pub enum Child { Nil, } -#[derive(Clone, Debug, Eq, PartialEq)] +#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)] pub struct Node { pub r#type: String, pub children: Vec, diff --git a/gems/decomplex/rust/src/decomplex/convergence.rs b/gems/decomplex/rust/src/decomplex/convergence.rs index e646028d3..93768835c 100644 --- a/gems/decomplex/rust/src/decomplex/convergence.rs +++ b/gems/decomplex/rust/src/decomplex/convergence.rs @@ -1,11 +1,12 @@ use crate::decomplex::report::ReportSection; use crate::decomplex::report_value as rv; +use serde::Serialize; use serde_json::Value; use std::collections::{BTreeMap, HashMap}; pub const TIER_WEIGHT: &[(i64, i64)] = &[(1, 3), (2, 2), (3, 1)]; -#[derive(Clone, Debug, Eq, PartialEq)] +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] pub struct Unit { pub file: String, pub method: String, @@ -16,7 +17,7 @@ pub struct Unit { pub at: String, } -#[derive(Clone, Debug, Eq, PartialEq)] +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] pub struct FileRollup { pub file: String, pub detectors: Vec, diff --git a/gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs b/gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs index 8b9567825..0d84104a2 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs @@ -46,9 +46,15 @@ pub fn scan_files(files: &[PathBuf], language: Language) -> Result Vec { + scan_documents_with_summaries(documents, local_flow::scan_documents(documents)) +} + +pub fn scan_documents_with_summaries( + documents: &[Document], + methods: Vec, +) -> Vec { let mut guard = Vec::new(); let mut dispatch = Vec::new(); - let methods = local_flow::scan_documents(documents); let assignment_maps = build_assignment_maps(&methods); let methods_by_file = methods_by_file(&methods); diff --git a/gems/decomplex/rust/src/decomplex/detectors/derived_state.rs b/gems/decomplex/rust/src/decomplex/detectors/derived_state.rs index 2b7027905..3bbcb8551 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/derived_state.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/derived_state.rs @@ -34,7 +34,11 @@ pub fn scan_files(files: &[PathBuf], language: Language) -> Result Vec { - let mut out = local_flow::scan_documents(documents) + scan_summaries(local_flow::scan_documents(documents)) +} + +pub fn scan_summaries(summaries: Vec) -> Vec { + let mut out = summaries .iter() .flat_map(|method| analyze_method(method)) .collect::>(); diff --git a/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs b/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs index c70dd4217..2c6647755 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs @@ -2,7 +2,7 @@ use crate::decomplex::ast::{self, Child, Node, RawNode, Span}; use crate::decomplex::syntax::adapters::{language_profile, LanguageProfile}; use crate::decomplex::syntax::{self, Document, FunctionDef, Language}; use anyhow::Result; -use serde::Serialize; +use serde::{Deserialize, Serialize}; use std::collections::{BTreeMap, BTreeSet}; use std::path::{Path, PathBuf}; @@ -11,7 +11,7 @@ pub struct LocalFlowRow { pub summaries: Vec, } -#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)] pub struct MethodSummary { pub id: String, pub owner: String, @@ -19,15 +19,15 @@ pub struct MethodSummary { pub file: String, pub line: usize, pub span: Span, - #[serde(skip_serializing)] + #[serde(default = "empty_node", skip_serializing)] pub node: Node, - #[serde(skip_serializing)] + #[serde(default, skip_serializing)] pub raw_node: Option, pub statements: Vec, pub boundaries: Vec, } -#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)] pub struct Statement { pub index: usize, pub line: usize, @@ -40,7 +40,7 @@ pub struct Statement { pub co_uses: Vec<(String, String)>, } -#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)] pub struct Boundary { pub before_index: usize, pub after_index: usize, @@ -63,6 +63,18 @@ const STATEMENT_CONTAINER_TYPES: &[&str] = &[ "STATEMENTS", ]; +fn empty_node() -> Node { + Node { + r#type: "ROOT".to_string(), + children: Vec::new(), + first_lineno: 1, + first_column: 0, + last_lineno: 1, + last_column: 0, + text: String::new(), + } +} + pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { let documents = syntax::parse_files(files, language)?; Ok(scan_documents(&documents)) @@ -676,15 +688,6 @@ fn raw_declaration_name_in_tree( .any(|child| raw_declaration_name_in_tree(child, target, profile)) } -fn raw_local_declaration_name_node<'a>( - node: &'a RawNode, - profile: &dyn LanguageProfile, -) -> Option<&'a RawNode> { - raw_local_declaration_name_nodes(node, profile) - .into_iter() - .next() -} - fn raw_local_declaration_name_nodes<'a>( node: &'a RawNode, profile: &dyn LanguageProfile, diff --git a/gems/decomplex/rust/src/decomplex/detectors/locality_drag.rs b/gems/decomplex/rust/src/decomplex/detectors/locality_drag.rs index 9148fbf7c..c4eb4865d 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/locality_drag.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/locality_drag.rs @@ -69,7 +69,7 @@ pub fn scan_summaries(summaries: Vec) -> Vec, complexity_scores: BTreeMap<(String, String), LocalComplexityScore>, ) -> Vec { diff --git a/gems/decomplex/rust/src/decomplex/detectors/path_condition.rs b/gems/decomplex/rust/src/decomplex/detectors/path_condition.rs index deeab38f4..50ccb7455 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/path_condition.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/path_condition.rs @@ -48,12 +48,18 @@ pub fn scan_files(files: &[PathBuf], language: Language) -> Result PathConditionReport { - let raw_sites = documents + let mut sites = documents .iter() - .flat_map(sites_from_raw_facts) + .flat_map(sites_from_document_facts) .collect::>(); - if !raw_sites.is_empty() { - return Report::new(raw_sites).findings(); + sites.extend( + documents + .iter() + .flat_map(sites_from_raw_facts) + .collect::>(), + ); + if !sites.is_empty() { + return Report::new(dedupe_sites(sites)).findings(); } let mut sites = Vec::new(); @@ -65,6 +71,37 @@ pub fn scan_documents(documents: &[Document]) -> PathConditionReport { Report::new(sites).findings() } +fn dedupe_sites(sites: Vec) -> Vec { + let mut seen = BTreeSet::new(); + sites + .into_iter() + .filter(|site| { + seen.insert(( + site.guards.clone(), + site.action.clone(), + site.file.clone(), + site.defn.clone(), + site.line, + )) + }) + .collect() +} + +fn sites_from_document_facts(document: &Document) -> Vec { + document + .path_condition_sites + .iter() + .map(|site| Site { + guards: site.guards.clone(), + action: site.action.clone(), + file: site.file.clone(), + defn: site.function.clone(), + line: site.line, + span: site.span, + }) + .collect() +} + fn sites_from_raw_facts(document: &Document) -> Vec { let profile = language_profile(document.language); let mut sites = Vec::new(); @@ -431,64 +468,6 @@ fn raw_comment_node(node: &RawNode) -> bool { node.kind.contains("comment") } -fn sites_from_mined_facts(document: &Document) -> Vec { - let mut sites = Vec::new(); - for decision in &document.decision_sites { - if decision.members.len() < 2 { - continue; - } - for call in &document.call_sites { - if call.function != decision.function - || !span_inside(call.span, decision.enclosing_span) - { - continue; - } - if span_inside(call.span, decision.span) { - continue; - } - if decision - .members - .iter() - .any(|member| member == &guard_call_text(call)) - { - continue; - } - sites.push(Site { - guards: decision.members.clone(), - action: action_text(call), - file: call.file.clone(), - defn: call.function.clone(), - line: call.line, - span: call.span, - }); - } - } - sites -} - -fn action_text(call: &syntax::CallSite) -> String { - let arguments = call.arguments.join(", "); - if call.receiver == "self" { - format!("{}({arguments})", call.message) - } else { - format!("{}.{}({arguments})", call.receiver, call.message) - } -} - -fn guard_call_text(call: &syntax::CallSite) -> String { - if call.receiver == "self" { - format!("{}()", call.message) - } else { - format!("{}.{}()", call.receiver, call.message) - } -} - -fn span_inside(inner: Span, outer: Span) -> bool { - let starts_after_or_at = inner[0] > outer[0] || (inner[0] == outer[0] && inner[1] >= outer[1]); - let ends_before_or_at = inner[2] < outer[2] || (inner[2] == outer[2] && inner[3] <= outer[3]); - starts_after_or_at && ends_before_or_at -} - struct PathCondition { file: String, lines: Vec, diff --git a/gems/decomplex/rust/src/decomplex/detectors/redundant_nil_guard.rs b/gems/decomplex/rust/src/decomplex/detectors/redundant_nil_guard.rs index 59d512853..b25faf725 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/redundant_nil_guard.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/redundant_nil_guard.rs @@ -67,7 +67,7 @@ impl Finding { const TERMINATING_CALLS: &[&str] = &["raise", "fail", "abort", "exit", "exit!"]; const NIL_PREDICATE_MIDS: &[&str] = &["nil?", "isNull", "is_null", "nil", "is_none"]; -const NON_NIL_PREDICATE_MIDS: &[&str] = &["isSome", "is_some", "present"]; +const NON_NIL_PREDICATE_MIDS: &[&str] = &["isSome", "is_some", "present", "present?"]; pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { let documents = syntax::parse_files(files, language)?; @@ -311,29 +311,6 @@ impl RedundantNilGuard { } match node.r#type.as_str() { - "CALL" => { - let recv = node.children.get(0).and_then(ast::node)?; - let mid = match node.children.get(1)? { - Child::String(s) | Child::Symbol(s) => s, - _ => return None, - }; - let args = node.children.get(2); - if NIL_PREDICATE_MIDS.contains(&mid.as_str()) && self.no_call_arguments(args) { - let subject = self.subject_key(recv)?; - return Some(NilFact { - local: subject, - non_nil_when_true: false, - }); - } - if NON_NIL_PREDICATE_MIDS.contains(&mid.as_str()) && self.no_call_arguments(args) { - let subject = self.subject_key(recv)?; - return Some(NilFact { - local: subject, - non_nil_when_true: true, - }); - } - None - } "OPCALL" => { let recv = node.children.get(0).and_then(ast::node)?; let mid = match node.children.get(1)? { @@ -600,17 +577,6 @@ impl RedundantNilGuard { !(mid.ends_with('=') || mid.ends_with('!') || mid == "[]") } - #[allow(dead_code)] - fn local_name(&self, node: &Node) -> Option { - if matches!(node.r#type.as_str(), "LVAR" | "DVAR") { - match node.children.first()? { - Child::String(s) | Child::Symbol(s) => return Some(s.clone()), - _ => {} - } - } - None - } - fn nil_arg(&self, args: Option<&Child>) -> bool { let Some(Child::Node(node)) = args else { return false; diff --git a/gems/decomplex/rust/src/decomplex/detectors/structural_topology.rs b/gems/decomplex/rust/src/decomplex/detectors/structural_topology.rs index 950b9e44f..12321f5c6 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/structural_topology.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/structural_topology.rs @@ -1,5 +1,4 @@ -use crate::decomplex::ast::{RawNode, Span}; -use crate::decomplex::syntax::adapters::language_profile; +use crate::decomplex::ast::Span; use crate::decomplex::syntax::{self, CallSite, Document, FunctionDef, Language}; use anyhow::Result; use serde::Serialize; @@ -266,56 +265,13 @@ fn file_owner(file: &str) -> String { } fn enclosed_by_matching_owner(document: &Document, owner: &str, span: Span) -> bool { - let profile = language_profile(document.language); - let mut nodes = Vec::new(); - document.root.walk(&mut nodes); - nodes.into_iter().any(|node| { - raw_owner_name(profile, node).as_deref() == Some(owner) && encloses(node.span, span) - }) -} - -fn raw_owner_name( - profile: &dyn crate::decomplex::syntax::adapters::LanguageProfile, - node: &RawNode, -) -> Option { - let owner_kind = profile - .class_owner_node_kinds() - .contains(&node.kind.as_str()) - || profile - .module_owner_node_kinds() - .contains(&node.kind.as_str()) - || profile - .generic_owner_node_kinds() - .contains(&node.kind.as_str()) - || profile - .struct_owner_node_kinds() - .contains(&node.kind.as_str()) - || profile - .impl_owner_node_kinds() - .contains(&node.kind.as_str()); - let hidden_ruby_owner = node.kind == "body_statement" - && node - .children - .first() - .map(|child| matches!(child.kind.as_str(), "class" | "module")) - .unwrap_or(false); - if !owner_kind && !hidden_ruby_owner { - return None; - } - - node.children + document + .owner_defs .iter() - .find(|child| { - child.named - && matches!( - child.kind.as_str(), - "identifier" | "constant" | "type_identifier" | "field_identifier" - ) - }) - .map(|child| child.text.clone()) + .any(|owner_def| owner_def.name == owner && span_encloses(owner_def.span, span)) } -fn encloses(outer: Span, inner: Span) -> bool { +fn span_encloses(outer: Span, inner: Span) -> bool { let starts_before = outer[0] < inner[0] || (outer[0] == inner[0] && outer[1] <= inner[1]); let ends_after = outer[2] > inner[2] || (outer[2] == inner[2] && outer[3] >= inner[3]); starts_before && ends_after diff --git a/gems/decomplex/rust/src/decomplex/detectors/weighted_inlined_cognitive_complexity.rs b/gems/decomplex/rust/src/decomplex/detectors/weighted_inlined_cognitive_complexity.rs index 41b657e27..fd1ab44d5 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/weighted_inlined_cognitive_complexity.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/weighted_inlined_cognitive_complexity.rs @@ -31,6 +31,13 @@ pub fn scan_files( } pub fn scan_documents(documents: &[Document]) -> Vec { + scan_documents_with_summaries(documents, local_flow::scan_documents(documents)) +} + +pub fn scan_documents_with_summaries( + documents: &[Document], + summaries: Vec, +) -> Vec { let topology_report = structural_topology::scan_documents(documents); let topology = structural_topology::Graph::new(topology_report.methods, topology_report.edges); let complexity_scores = documents @@ -44,7 +51,7 @@ pub fn scan_documents(documents: &[Document]) -> Vec>(); let mut scores = BTreeMap::new(); - for summary in local_flow::scan_documents(documents) { + for summary in summaries { let owner = if summary.owner == "(top-level)" { format!("(top-level:{})", summary.file) } else { diff --git a/gems/decomplex/rust/src/decomplex/report.rs b/gems/decomplex/rust/src/decomplex/report.rs index 651725d09..a9495d192 100644 --- a/gems/decomplex/rust/src/decomplex/report.rs +++ b/gems/decomplex/rust/src/decomplex/report.rs @@ -141,6 +141,14 @@ impl Report { serde_json::to_string_pretty(&self.to_sarif_value(true, true, None)).unwrap() } + pub fn convergence_value(&self) -> Value { + json!(self.convergence) + } + + pub fn root_clusters_value(&self) -> Value { + json!(self.root) + } + pub fn to_sarif_value( &self, include_snapshot: bool, diff --git a/gems/decomplex/rust/src/decomplex/root_cause.rs b/gems/decomplex/rust/src/decomplex/root_cause.rs index 66fe2de7a..834f03a2a 100644 --- a/gems/decomplex/rust/src/decomplex/root_cause.rs +++ b/gems/decomplex/rust/src/decomplex/root_cause.rs @@ -2,6 +2,7 @@ use crate::decomplex::convergence; use crate::decomplex::report::ReportSection; use crate::decomplex::report_value as rv; use regex::Regex; +use serde::Serialize; use serde_json::Value; use std::collections::{BTreeMap, HashMap, HashSet}; use std::sync::OnceLock; @@ -32,7 +33,7 @@ pub struct Entity { pub token: String, } -#[derive(Clone, Debug, Eq, PartialEq)] +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] pub struct Cluster { pub kind: String, pub token: String, diff --git a/gems/decomplex/rust/src/decomplex/syntax.rs b/gems/decomplex/rust/src/decomplex/syntax.rs index 70228823d..ce82989a8 100644 --- a/gems/decomplex/rust/src/decomplex/syntax.rs +++ b/gems/decomplex/rust/src/decomplex/syntax.rs @@ -5,7 +5,7 @@ pub mod tree_sitter_adapter; use crate::decomplex::ast::{Node as NormalizedNode, RawNode, Span}; use crate::decomplex::parallel; use anyhow::{bail, Result}; -use serde::Serialize; +use serde::{Deserialize, Deserializer, Serialize}; use std::collections::BTreeMap; use std::path::PathBuf; @@ -92,29 +92,80 @@ impl Language { } } -#[derive(Clone, Debug)] +impl<'de> Deserialize<'de> for Language { + fn deserialize(deserializer: D) -> std::result::Result + where + D: Deserializer<'de>, + { + let value = String::deserialize(deserializer)?; + Self::parse(&value).map_err(serde::de::Error::custom) + } +} + +#[derive(Clone, Debug, Deserialize)] pub struct Document { pub file: String, pub language: Language, + #[serde(default)] pub source: String, + #[serde(default)] pub lines: Vec, + #[serde(default = "empty_raw_node")] pub root: RawNode, + #[serde(default = "empty_normalized_node")] pub normalized_root: NormalizedNode, + #[serde(default)] pub function_defs: Vec, + #[serde(default)] pub owner_defs: Vec, + #[serde(default)] pub call_sites: Vec, + #[serde(default)] pub state_reads: Vec, + #[serde(default)] pub state_writes: Vec, + #[serde(default)] pub decision_sites: Vec, + #[serde(default)] pub branch_decisions: Vec, + #[serde(default)] pub dispatch_sites: Vec, + #[serde(default)] pub semantic_effect_sites: Vec, + #[serde(default)] pub local_complexity_scores: BTreeMap, + #[serde(default)] pub predicate_aliases: Vec, + #[serde(default)] pub comparison_uses: Vec, + #[serde(default)] + pub path_condition_sites: Vec, } -#[derive(Clone, Debug)] +fn empty_raw_node() -> RawNode { + RawNode { + kind: "program".to_string(), + text: String::new(), + span: [1, 0, 1, 0], + named: true, + field_name: None, + children: Vec::new(), + } +} + +fn empty_normalized_node() -> NormalizedNode { + NormalizedNode { + r#type: "ROOT".to_string(), + children: Vec::new(), + first_lineno: 1, + first_column: 0, + last_lineno: 1, + last_column: 0, + text: String::new(), + } +} + +#[derive(Clone, Debug, Deserialize)] pub struct FunctionDef { pub file: String, pub name: String, @@ -126,7 +177,7 @@ pub struct FunctionDef { pub params: Vec, } -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Deserialize)] pub struct OwnerDef { pub file: String, pub name: String, @@ -135,7 +186,7 @@ pub struct OwnerDef { pub span: Span, } -#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] pub struct CallSite { pub receiver: String, pub message: String, @@ -151,7 +202,7 @@ pub struct CallSite { pub block: bool, } -#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] pub struct StateWrite { pub field: String, pub receiver: String, @@ -162,7 +213,7 @@ pub struct StateWrite { pub owner: String, } -#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] pub struct StateRead { pub field: String, pub receiver: String, @@ -173,7 +224,7 @@ pub struct StateRead { pub owner: String, } -#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] pub struct PredicateAlias { pub name: String, pub body: String, @@ -183,7 +234,7 @@ pub struct PredicateAlias { pub span: Span, } -#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] pub struct DecisionSite { pub kind: String, pub members: Vec, @@ -195,7 +246,7 @@ pub struct DecisionSite { pub enclosing_span: Span, } -#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] pub struct BranchDecision { pub file: String, pub function: String, @@ -205,7 +256,7 @@ pub struct BranchDecision { pub state_refs: Vec, } -#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] pub struct DispatchSite { pub variant_set: Vec, pub arm_members: BTreeMap>, @@ -216,7 +267,7 @@ pub struct DispatchSite { pub span: Span, } -#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] pub struct SemanticEffectSite { pub kind: String, pub detail: String, @@ -226,13 +277,13 @@ pub struct SemanticEffectSite { pub span: Span, } -#[derive(Clone, Debug, PartialEq, Serialize)] +#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] pub struct LocalComplexityScore { pub score: f64, pub signals: BTreeMap, } -#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] pub struct ComparisonUse { pub canon_source: String, pub raw: String, @@ -243,6 +294,16 @@ pub struct ComparisonUse { pub enclosing_span: Span, } +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct PathConditionSite { + pub guards: Vec, + pub action: String, + pub file: String, + pub function: String, + pub line: usize, + pub span: Span, +} + #[derive(Clone, Debug)] pub(crate) struct CloneCandidate { pub(crate) file: String, diff --git a/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs b/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs index 5f974063c..0f97c00ab 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs @@ -86,6 +86,7 @@ pub fn parse_file(file: PathBuf, language: Language) -> Result { local_complexity_scores, predicate_aliases, comparison_uses, + path_condition_sites: Vec::new(), }) } diff --git a/gems/decomplex/rust/src/main.rs b/gems/decomplex/rust/src/main.rs index ce882fc3d..48e993fed 100644 --- a/gems/decomplex/rust/src/main.rs +++ b/gems/decomplex/rust/src/main.rs @@ -11,7 +11,9 @@ use decomplex::detectors::{ use decomplex::parallel; use decomplex::report::Report; use decomplex::report_facts::{self, Options as ReportFactsOptions, VcsFilter}; -use decomplex::syntax::Language; +use decomplex::syntax::{Document, Language, LocalComplexityScore}; +use serde::Deserialize; +use serde_json::{json, Value}; use std::io::Read; use std::path::PathBuf; @@ -266,6 +268,18 @@ fn run() -> Result<()> { let facts = read_facts(input.as_ref(), from_stdin)?; render_report(&facts, &format, output.as_ref())?; } + Command::DetectorFacts { input } => { + let fixture = read_facts(Some(&input), false)?; + let detector = fixture + .get("detector") + .and_then(Value::as_str) + .with_context(|| format!("{} missing detector", input.display()))?; + let input = detector_fact_input(&fixture).with_context(|| { + format!("failed to read detector facts from {}", input.display()) + })?; + let output = run_detector_on_fact_input(detector, &input, &fixture)?; + println!("{}", serde_json::to_string(&output)?); + } } Ok(()) } @@ -417,6 +431,9 @@ enum Command { format: String, output: Option, }, + DetectorFacts { + input: PathBuf, + }, } impl Command { @@ -449,7 +466,7 @@ impl Command { | Self::FatUnion { jobs, .. } | Self::Facts { jobs, .. } | Self::Report { jobs, .. } => *jobs, - Self::RenderReport { .. } => None, + Self::RenderReport { .. } | Self::DetectorFacts { .. } => None, } } } @@ -494,6 +511,10 @@ fn parse_args(args: Vec) -> Result { output: args.output, }) } + "detector-facts" => { + let input = parse_input_only_args(cursor.collect(), "detector-facts")?; + Ok(Command::DetectorFacts { input }) + } "state-writes" => { let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; if files.is_empty() { @@ -813,6 +834,198 @@ fn parse_args(args: Vec) -> Result { } } +fn parse_input_only_args(args: Vec, command: &str) -> Result { + let mut input = None; + let mut cursor = args.into_iter(); + while let Some(arg) = cursor.next() { + if arg == "--input" { + input = Some(PathBuf::from( + cursor.next().with_context(|| "--input requires a value")?, + )); + } else if let Some(value) = arg.strip_prefix("--input=") { + input = Some(PathBuf::from(value)); + } else { + bail!("unknown {command} argument: {arg}"); + } + } + input.with_context(|| format!("{command} requires --input=FILE")) +} + +#[derive(Deserialize)] +struct DetectorFactDocuments { + documents: Vec, +} + +struct DetectorFactInput { + documents: Vec, + local_methods: Vec, +} + +fn detector_fact_input(fixture: &Value) -> Result { + let input = fixture + .get("input") + .cloned() + .with_context(|| "detector fact fixture missing input")?; + let documents: DetectorFactDocuments = serde_json::from_value(input.clone())?; + let mut local_methods = Vec::new(); + + if let Some(methods) = input.get("local_methods") { + local_methods.extend(serde_json::from_value::>( + methods.clone(), + )?); + } + for document in input + .get("documents") + .and_then(Value::as_array) + .into_iter() + .flatten() + { + if let Some(methods) = document.get("local_methods") { + local_methods.extend(serde_json::from_value::>( + methods.clone(), + )?); + } + } + + Ok(DetectorFactInput { + documents: documents.documents, + local_methods, + }) +} + +fn run_detector_on_fact_input( + detector: &str, + input: &DetectorFactInput, + fixture: &Value, +) -> Result { + let documents = input.documents.as_slice(); + match detector { + "co-update" => Ok(json!(co_update::scan_documents(documents))), + "decision-pressure" => { + if input.local_methods.is_empty() { + Ok(json!(decision_pressure::scan_documents(documents))) + } else { + Ok(json!(decision_pressure::scan_documents_with_summaries( + documents, + input.local_methods.clone() + ))) + } + } + "predicate-alias" | "predicate-aliases" => { + Ok(json!(predicate_alias::scan_documents(documents))) + } + "miner" | "decision-miner" => Ok(json!(miner::scan_documents(documents))), + "semantic-alias" | "semantic-aliases" => { + Ok(json!(semantic_alias::scan_documents(documents))) + } + "flay-similarity" | "structural-similarity" => { + let options = fixture.get("options").unwrap_or(&Value::Null); + let mass = value_usize(options, "mass", 32)?; + let fuzzy = value_usize(options, "fuzzy", 1)?; + Ok(json!(flay_similarity::scan_documents( + documents, mass, fuzzy + ))) + } + "temporal-ordering-pressure" => { + Ok(json!(temporal_ordering_pressure::scan_documents(documents))) + } + "state-branch-density" => Ok(json!(state_branch_density::scan_documents(documents))), + "redundant-nil-guard" => Ok(json!(redundant_nil_guard::scan_documents(documents))), + "state-mesh" | "state-heatmap" => Ok(json!(state_mesh::scan_documents(documents))), + "inconsistent-rename-clone" => { + Ok(json!(inconsistent_rename_clone::scan_documents(documents))) + } + "derived-state" => { + if input.local_methods.is_empty() { + Ok(json!(derived_state::scan_documents(documents))) + } else { + Ok(json!(derived_state::scan_summaries( + input.local_methods.clone() + ))) + } + } + "implicit-control-flow" | "ordered-protocol-mine" => { + Ok(json!(implicit_control_flow::scan_documents(documents))) + } + "weighted-inlined-complexity" => { + if input.local_methods.is_empty() { + Ok(json!( + weighted_inlined_cognitive_complexity::scan_documents(documents) + )) + } else { + Ok(json!( + weighted_inlined_cognitive_complexity::scan_documents_with_summaries( + documents, + input.local_methods.clone() + ) + )) + } + } + "locality-drag" => { + if input.local_methods.is_empty() { + Ok(json!(locality_drag::scan_documents(documents))) + } else { + Ok(json!(locality_drag::scan_summaries_with_scores( + input.local_methods.clone(), + complexity_scores(documents) + ))) + } + } + "operational-discontinuity" => { + if input.local_methods.is_empty() { + Ok(json!(operational_discontinuity::scan_documents(documents))) + } else { + Ok(json!(operational_discontinuity::scan_summaries( + input.local_methods.clone() + ))) + } + } + "oversized-predicate" => Ok(json!(oversized_predicate::scan_documents(documents))), + "path-condition" => Ok(json!({ + "neglected": path_condition::scan_documents(documents).neglected, + })), + "sequence-mine" | "broken-protocol" => Ok(json!(sequence_mine::scan_documents(documents))), + "function-lcom" => { + if input.local_methods.is_empty() { + Ok(json!(function_lcom::scan_documents(documents))) + } else { + Ok(json!(function_lcom::scan_summaries( + input.local_methods.clone() + ))) + } + } + "false-simplicity" => Ok(json!(false_simplicity::scan_documents(documents))), + "fat-union" => Ok(json!(fat_union::scan_documents(documents))), + "local-flow" => Ok(json!(local_flow::scan_documents(documents))), + "structural-topology" => Ok(json!(structural_topology::scan_documents(documents))), + _ => bail!("unsupported detector fact fixture: {detector}"), + } +} + +fn complexity_scores( + documents: &[Document], +) -> std::collections::BTreeMap<(String, String), LocalComplexityScore> { + documents + .iter() + .flat_map(|document| { + document + .local_complexity_scores + .iter() + .map(|(id, score)| ((document.file.clone(), id.clone()), score.clone())) + }) + .collect() +} + +fn value_usize(options: &Value, key: &str, default: usize) -> Result { + match options.get(key) { + Some(value) => value + .as_u64() + .map(|value| value as usize) + .with_context(|| format!("option {key} must be an integer")), + None => Ok(default), + } +} + struct ReportFactsArgs { options: ReportFactsOptions, targets: Vec, diff --git a/gems/decomplex/rust/tests/examples_oracle.rs b/gems/decomplex/rust/tests/examples_oracle.rs index f3116e925..025d5d5e1 100644 --- a/gems/decomplex/rust/tests/examples_oracle.rs +++ b/gems/decomplex/rust/tests/examples_oracle.rs @@ -6,8 +6,9 @@ use decomplex_rust::decomplex::detectors::{ redundant_nil_guard, semantic_alias, sequence_mine, state_branch_density, state_mesh, structural_topology, temporal_ordering_pressure, weighted_inlined_cognitive_complexity, }; -use decomplex_rust::decomplex::syntax::Language; -use serde::Serialize; +use decomplex_rust::decomplex::report::Report; +use decomplex_rust::decomplex::syntax::{Document, Language, LocalComplexityScore}; +use serde::{Deserialize, Serialize}; use serde_json::{json, Map, Value}; use std::collections::BTreeSet; use std::fs; @@ -64,6 +65,85 @@ fn shared_examples_match_oracles() -> Result<()> { } } +#[test] +fn shared_detector_fact_examples_match_exact_oracles() -> Result<()> { + let examples_root = examples_root(); + let mut failures = Vec::new(); + + for fixture in detector_fact_fixture_paths(&examples_root)? { + let fixture_value: Value = serde_json::from_str(&fs::read_to_string(&fixture)?)?; + let detector = fixture_value + .get("detector") + .and_then(Value::as_str) + .with_context(|| format!("{} missing detector", fixture.display()))?; + let expected = fixture_value + .get("expected") + .cloned() + .with_context(|| format!("{} missing expected", fixture.display()))?; + let input = detector_fact_input(&fixture_value) + .with_context(|| format!("{} input", fixture.display()))?; + let actual = run_detector_on_fact_input(detector, &input, &fixture_value) + .with_context(|| format!("{} {}", detector, fixture.display()))?; + + if actual != expected { + failures.push(format!( + "{} {}\nexpected: {}\nactual: {}", + detector, + fixture.display(), + expected, + actual + )); + } + } + + if failures.is_empty() { + Ok(()) + } else { + bail!( + "shared detector fact oracle failures:\n{}", + failures.join("\n\n") + ) + } +} + +#[test] +fn shared_report_fact_examples_match_postprocess_oracles() -> Result<()> { + let examples_root = examples_root(); + let mut failures = Vec::new(); + + for fixture in report_fact_fixture_paths(&examples_root)? { + let fixture_value: Value = serde_json::from_str(&fs::read_to_string(&fixture)?)?; + let facts = fixture_value + .get("input") + .with_context(|| format!("{} missing input", fixture.display()))?; + let expected = fixture_value + .get("expected") + .cloned() + .with_context(|| format!("{} missing expected", fixture.display()))?; + let report = Report::from_facts(facts) + .with_context(|| format!("failed to build report from {}", fixture.display()))?; + let actual = project_report(&report); + + if actual != expected { + failures.push(format!( + "{}\nexpected: {}\nactual: {}", + fixture.display(), + expected, + actual + )); + } + } + + if failures.is_empty() { + Ok(()) + } else { + bail!( + "shared report fact oracle failures:\n{}", + failures.join("\n\n") + ) + } +} + fn examples_root() -> PathBuf { Path::new(env!("CARGO_MANIFEST_DIR")).join("../examples") } @@ -88,6 +168,75 @@ fn fixture_paths(examples_root: &Path) -> Result> { Ok(paths) } +fn report_fact_fixture_paths(examples_root: &Path) -> Result> { + let report_root = examples_root.join("facts").join("report"); + let mut paths = Vec::new(); + for entry in fs::read_dir(&report_root)? { + let path = entry?.path(); + if path.extension().and_then(|extension| extension.to_str()) == Some("json") { + paths.push(path); + } + } + paths.sort(); + Ok(paths) +} + +fn detector_fact_fixture_paths(examples_root: &Path) -> Result> { + let detector_root = examples_root.join("facts").join("detectors"); + let mut paths = Vec::new(); + for entry in fs::read_dir(&detector_root)? { + let path = entry?.path(); + if path.extension().and_then(|extension| extension.to_str()) == Some("json") { + paths.push(path); + } + } + paths.sort(); + Ok(paths) +} + +#[derive(Deserialize)] +struct DetectorFactInput { + documents: Vec, + local_methods: Vec, +} + +fn detector_fact_input(fixture: &Value) -> Result { + let input = fixture + .get("input") + .cloned() + .with_context(|| "detector fact fixture missing input")?; + let documents = serde_json::from_value::(input.clone())?.documents; + let mut local_methods = Vec::new(); + + if let Some(methods) = input.get("local_methods") { + local_methods.extend(serde_json::from_value::>( + methods.clone(), + )?); + } + for document in input + .get("documents") + .and_then(Value::as_array) + .into_iter() + .flatten() + { + if let Some(methods) = document.get("local_methods") { + local_methods.extend(serde_json::from_value::>( + methods.clone(), + )?); + } + } + + Ok(DetectorFactInput { + documents, + local_methods, + }) +} + +#[derive(Deserialize)] +struct DetectorFactDocuments { + documents: Vec, +} + fn file_stem(path: &Path) -> Result { path.file_stem() .and_then(|stem| stem.to_str()) @@ -157,6 +306,152 @@ fn run_detector( } } +fn run_detector_on_fact_input( + detector: &str, + input: &DetectorFactInput, + fixture: &Value, +) -> Result { + let documents = input.documents.as_slice(); + match detector { + "co-update" => value(co_update::scan_documents(documents)), + "decision-pressure" => { + if input.local_methods.is_empty() { + value(decision_pressure::scan_documents(documents)) + } else { + value(decision_pressure::scan_documents_with_summaries( + documents, + input.local_methods.clone(), + )) + } + } + "predicate-alias" | "predicate-aliases" => { + value(predicate_alias::scan_documents(documents)) + } + "miner" | "decision-miner" => value(miner::scan_documents(documents)), + "semantic-alias" | "semantic-aliases" => value(semantic_alias::scan_documents(documents)), + "flay-similarity" | "structural-similarity" => { + let options = fixture.get("options").cloned().unwrap_or_else(|| json!({})); + let mass = option_usize(&options, "mass", 32)?; + let fuzzy = option_usize(&options, "fuzzy", 1)?; + value(flay_similarity::scan_documents(documents, mass, fuzzy)) + } + "temporal-ordering-pressure" => { + value(temporal_ordering_pressure::scan_documents(documents)) + } + "state-branch-density" => value(state_branch_density::scan_documents(documents)), + "redundant-nil-guard" => value(redundant_nil_guard::scan_documents(documents)), + "state-mesh" | "state-heatmap" => value(state_mesh::scan_documents(documents)), + "inconsistent-rename-clone" => value(inconsistent_rename_clone::scan_documents(documents)), + "derived-state" => { + if input.local_methods.is_empty() { + value(derived_state::scan_documents(documents)) + } else { + value(derived_state::scan_summaries(input.local_methods.clone())) + } + } + "implicit-control-flow" | "ordered-protocol-mine" => { + value(implicit_control_flow::scan_documents(documents)) + } + "weighted-inlined-complexity" => { + if input.local_methods.is_empty() { + value(weighted_inlined_cognitive_complexity::scan_documents( + documents, + )) + } else { + value( + weighted_inlined_cognitive_complexity::scan_documents_with_summaries( + documents, + input.local_methods.clone(), + ), + ) + } + } + "locality-drag" => { + if input.local_methods.is_empty() { + value(locality_drag::scan_documents(documents)) + } else { + value(locality_drag::scan_summaries_with_scores( + input.local_methods.clone(), + complexity_scores(documents), + )) + } + } + "operational-discontinuity" => { + if input.local_methods.is_empty() { + value(operational_discontinuity::scan_documents(documents)) + } else { + value(operational_discontinuity::scan_summaries( + input.local_methods.clone(), + )) + } + } + "oversized-predicate" => value(oversized_predicate::scan_documents(documents)), + "path-condition" => { + let report = path_condition::scan_documents(documents); + value(json!({ "neglected": report.neglected })) + } + "sequence-mine" | "broken-protocol" => value(sequence_mine::scan_documents(documents)), + "function-lcom" => { + if input.local_methods.is_empty() { + value(function_lcom::scan_documents(documents)) + } else { + value(function_lcom::scan_summaries(input.local_methods.clone())) + } + } + "false-simplicity" => value(false_simplicity::scan_documents(documents)), + "fat-union" => value(fat_union::scan_documents(documents)), + "local-flow" => value(local_flow::scan_documents(documents)), + "structural-topology" => value(structural_topology::scan_documents(documents)), + _ => bail!("unsupported detector: {detector}"), + } +} + +fn complexity_scores( + documents: &[Document], +) -> std::collections::BTreeMap<(String, String), LocalComplexityScore> { + documents + .iter() + .flat_map(|document| { + document + .local_complexity_scores + .iter() + .map(|(id, score)| ((document.file.clone(), id.clone()), score.clone())) + }) + .collect() +} + +fn project_report(report: &Report) -> Value { + json!({ + "convergence": report.convergence_value(), + "root_clusters": report.root_clusters_value(), + "sarif": compact_sarif(&report.to_sarif_value(false, false, Some(8))), + }) +} + +fn compact_sarif(sarif: &Value) -> Value { + let run = field(sarif, "runs") + .as_array() + .and_then(|runs| runs.first()) + .unwrap_or(&Value::Null); + let results = array(field(run, "results")); + json!({ + "rule_count": array(field(field(field(run, "tool"), "driver"), "rules")).len(), + "result_count": results.len(), + "rule_ids": results.iter().map(|result| field(result, "ruleId").clone()).collect::>(), + "messages": results.iter().map(|result| field(field(result, "message"), "text").clone()).collect::>(), + "locations": results.iter().map(|result| { + let location = field( + array(field(result, "locations")).first().unwrap_or(&Value::Null), + "physicalLocation", + ); + json!({ + "uri": field(field(location, "artifactLocation"), "uri").clone(), + "startLine": field(field(location, "region"), "startLine").clone(), + }) + }).collect::>(), + }) +} + fn value(value: T) -> Result { Ok(serde_json::to_value(value)?) } diff --git a/gems/decomplex/test/examples_oracle_test.rb b/gems/decomplex/test/examples_oracle_test.rb index fc597b7ba..68f9b92a5 100644 --- a/gems/decomplex/test/examples_oracle_test.rb +++ b/gems/decomplex/test/examples_oracle_test.rb @@ -19,11 +19,16 @@ class ExamplesOracleTest < Minitest::Test .select { |path| SOURCE_EXTENSIONS.include?(File.extname(path)) } .sort .freeze + DETECTOR_FACT_PATHS = Dir[File.join(EXAMPLES_ROOT, "facts", "detectors", "*.json")].sort.freeze def test_shared_oracle_files_exist refute_empty ORACLE_PATHS end + def test_detector_fact_oracles_exist + refute_empty DETECTOR_FACT_PATHS + end + def test_shared_oracles_are_engine_agnostic pinned = ORACLE_PATHS.select { |path| JSON.parse(File.read(path)).key?("engine") } @@ -53,6 +58,15 @@ def test_each_detector_has_one_fixture_per_language end end + DETECTOR_FACT_PATHS.product(ENGINES).each_with_index do |(fixture_path, engine), index| + detector = File.basename(fixture_path, ".json") + method_name = "test_detector_fact_#{index}_#{engine}_#{detector.tr("-", "_")}_matches_exact_oracle" + + define_method(method_name) do + assert_detector_fact_fixture_matches_exact_oracle(fixture_path, engine) + end + end + private def assert_fixture_matches_shared_oracle(fixture_path, engine) @@ -78,6 +92,18 @@ def assert_fixture_matches_shared_oracle(fixture_path, engine) assert_equal expected, project_detector_output(detector, actual), "#{engine} #{fixture_path}" end + def assert_detector_fact_fixture_matches_exact_oracle(fixture_path, engine) + fixture = JSON.parse(File.read(fixture_path)) + expected = fixture.fetch("expected") + assert meaningful?(expected), "#{fixture_path} expected output is empty" + + actual = JSON.parse( + Decomplex::DetectorRunner.canonical_json_from_fact_fixture(fixture_path, engine: engine) + ) + + assert_equal expected, actual, "#{engine} #{fixture_path}" + end + def symbolize_options(options) options.each_with_object({}) { |(key, value), out| out[key.to_sym] = value } end diff --git a/gems/decomplex/test/report_facts_oracle_test.rb b/gems/decomplex/test/report_facts_oracle_test.rb new file mode 100644 index 000000000..eed7010e1 --- /dev/null +++ b/gems/decomplex/test/report_facts_oracle_test.rb @@ -0,0 +1,106 @@ +# frozen_string_literal: true + +require "json" +require "minitest/autorun" +require "tempfile" +require_relative "../lib/decomplex/report" +require_relative "../lib/decomplex/native/command" + +class ReportFactsOracleTest < Minitest::Test + EXAMPLES_ROOT = File.expand_path("../examples", __dir__) + REPORT_FACT_PATHS = Dir[File.join(EXAMPLES_ROOT, "facts", "report", "*.json")].sort.freeze + + def test_report_fact_oracles_exist + refute_empty REPORT_FACT_PATHS + end + + REPORT_FACT_PATHS.each_with_index do |fixture_path, index| + name = File.basename(fixture_path, ".json").tr("-", "_") + + define_method("test_report_fact_#{index}_#{name}_matches_ruby_and_rust") do + assert_report_fact_oracle(fixture_path) + end + end + + private + + def assert_report_fact_oracle(fixture_path) + fixture = JSON.parse(File.read(fixture_path)) + facts = fixture.fetch("input") + expected = fixture.fetch("expected") + + ruby_report = Decomplex::Report.from_facts(JSON.generate(facts)) + assert_equal expected, project_report(ruby_report), "ruby #{fixture_path}" + + skip "cargo is not available" unless rust_available? + + Tempfile.create(["decomplex-report-facts-oracle", ".json"]) do |file| + file.write(JSON.pretty_generate(facts)) + file.flush + + rust_markdown = Decomplex::Native::Command.run( + "render-report", "--input", file.path, "--format", "markdown" + ) + rust_sarif = JSON.parse(Decomplex::Native::Command.run( + "render-report", "--input", file.path, "--format", "sarif" + )) + + assert_equal ruby_report.to_markdown.rstrip, rust_markdown.rstrip, "markdown #{fixture_path}" + assert_equal JSON.parse(ruby_report.to_sarif), rust_sarif, "sarif #{fixture_path}" + end + end + + def project_report(report) + { + "convergence" => json_safe(report.instance_variable_get(:@convergence)), + "root_clusters" => json_safe(report.root_clusters), + "sarif" => compact_sarif(report) + } + end + + def compact_sarif(report) + compact_sarif_hash(JSON.parse(report.to_sarif( + include_snapshot: false, + include_finding_payload: false, + max_results: 8 + ))) + end + + def compact_sarif_hash(sarif) + run = sarif.fetch("runs").first + results = run.fetch("results") + { + "rule_count" => run.dig("tool", "driver", "rules").size, + "result_count" => results.size, + "rule_ids" => results.map { |result| result.fetch("ruleId") }, + "messages" => results.map { |result| result.dig("message", "text") }, + "locations" => results.map do |result| + location = result.dig("locations", 0, "physicalLocation") + { + "uri" => location.dig("artifactLocation", "uri"), + "startLine" => location.dig("region", "startLine") + } + end + } + end + + def json_safe(value) + case value + when Hash + value.to_h { |key, child| [key.to_s, json_safe(child)] } + when Array + value.map { |child| json_safe(child) } + when Symbol + value.to_s + else + value + end + end + + def rust_available? + env = ENV["DECOMPLEX_RUST_BIN"] + return true if env && !env.empty? && File.executable?(env) + + system("cargo", "--version", out: File::NULL, err: File::NULL) + end +end From 360fe243e5d656803fc5439e6353632cf6852a22 Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Sat, 20 Jun 2026 13:10:27 +0000 Subject: [PATCH 43/52] Add shared markdown report oracle --- .../examples/facts/report/postprocess.md | 222 ++++++++++++++++++ gems/decomplex/rust/src/bin/dump_ast.rs | 7 +- gems/decomplex/rust/src/decomplex/ast.rs | 171 +------------- .../rust/src/decomplex/ast/adapters/base.rs | 18 +- .../rust/src/decomplex/ast/adapters/lua.rs | 17 -- .../detectors/implicit_control_flow.rs | 2 - .../src/decomplex/detectors/local_flow.rs | 4 - .../src/decomplex/syntax/adapters/base.rs | 25 +- .../src/decomplex/syntax/adapters/csharp.rs | 4 - .../src/decomplex/syntax/adapters/ruby.rs | 133 ----------- gems/decomplex/rust/src/main.rs | 12 +- gems/decomplex/rust/tests/examples_oracle.rs | 13 + .../test/report_facts_oracle_test.rb | 11 +- 13 files changed, 265 insertions(+), 374 deletions(-) create mode 100644 gems/decomplex/examples/facts/report/postprocess.md diff --git a/gems/decomplex/examples/facts/report/postprocess.md b/gems/decomplex/examples/facts/report/postprocess.md new file mode 100644 index 000000000..ff44d4644 --- /dev/null +++ b/gems/decomplex/examples/facts/report/postprocess.md @@ -0,0 +1,222 @@ +# Decomplex Report + +> Decision-level duplication and neglected-condition analysis. +> Every entry is a ranked **candidate** (Engler's discipline), +> never a verdict -- *POSSIBLE* findings, triaged by a human. +> Sections are ordered by SIGNAL TIER (1 = lowest false +> positive), not by volume. Items within a section are +> frequency-ranked. Triage tier 1, top-of-list, first. + +## Table of Contents +- [Project Prioritization](#project-prioritization) +- [Cross-Detector Convergence (1)](#cross-detector-convergence-1) +- [Root-Cause Clusters (6)](#root-cause-clusters-6) +- [Decision Pressure (1)](#decision-pressure-1) +- [Redundant Nil Guards (0)](#redundant-nil-guards-0) +- [State Heatmap (0)](#state-heatmap-0) +- [State-Based Branch Density (1)](#statebased-branch-density-1) +- [Temporal Ordering Pressure (0)](#temporal-ordering-pressure-0) +- [Missing Abstractions (1)](#missing-abstractions-1) +- [Reification Misses (1)](#reification-misses-1) +- [Semantic Predicate Aliases (1)](#semantic-predicate-aliases-1) +- [Exact Predicate Aliases (0)](#exact-predicate-aliases-0) +- [Inconsistent Rename Clones (0)](#inconsistent-rename-clones-0) +- [Structural Similarity (Type-2/3) (0)](#structural-similarity-type23-0) +- [Neglected Updates (1)](#neglected-updates-1) +- [Derived-State Staleness (1)](#derivedstate-staleness-1) +- [Neglected Conditions (1)](#neglected-conditions-1) +- [Neglected Path Conditions (1)](#neglected-path-conditions-1) +- [Oversized Predicates (0)](#oversized-predicates-0) +- [Broken Protocols (1)](#broken-protocols-1) +- [Implicit Control Flow (0)](#implicit-control-flow-0) +- [Weighted Inlined Cognitive Complexity (0)](#weighted-inlined-cognitive-complexity-0) +- [Locality Drag (0)](#locality-drag-0) +- [Operational Discontinuity (High Confidence) (0)](#operational-discontinuity-high-confidence-0) +- [Function LCOM (0)](#function-lcom-0) +- [Operational Discontinuity (0)](#operational-discontinuity-0) +- [False Simplicity (1)](#false-simplicity-1) +- [Fat Unions (0)](#fat-unions-0) +- [Run Summary](#run-summary) + +## Project Prioritization +_Ordered by signal tier (1 = highest signal / lowest FP), then by volume._ + +- **[tier 1]** [Decision Pressure (1)](#decision-pressure-1): ELIMINABLE guard-pressure per loose contract (nil/is_a?/respond_to?/safe-nav/rescue-nil) -> tighten the contract once / nil-kill: DELETE. essential dispatch + pure c-uses are split out, NEVER summed (Rapps-Weyuker p-use; McCabe) +- **[tier 1]** [State-Based Branch Density (1)](#statebased-branch-density-1): branch decisions over mutable/object state -- state + control-flow pressure +- **[tier 1]** [Missing Abstractions (1)](#missing-abstractions-1): guard tuple recomputed across >=2 decision units +- **[tier 1]** [Reification Misses (1)](#reification-misses-1): an existing predicate reinvented inline -- invariant #16 +- **[tier 1]** [Semantic Predicate Aliases (1)](#semantic-predicate-aliases-1): one decision, multiple names (receiver/polarity folded) +- **[tier 2]** [Neglected Updates (1)](#neglected-updates-1): co-written state, one write missing -- *POSSIBLE* redundant-state desync +- **[tier 2]** [Derived-State Staleness (1)](#derivedstate-staleness-1): b = f(a); a later reassigned, b not recomputed -- *POSSIBLE* bug +- **[tier 2]** [Neglected Conditions (1)](#neglected-conditions-1): dispatch/conjunction minus one element -- *POSSIBLE* bug +- **[tier 3]** [Neglected Path Conditions (1)](#neglected-path-conditions-1): nested-if/&& guard set minus one atom -- *POSSIBLE* bug (noisy) +- **[tier 3]** [Broken Protocols (1)](#broken-protocols-1): co-called pair, one site does A without B -- *POSSIBLE* bug (noisy) +- **[tier 3]** [False Simplicity (1)](#false-simplicity-1): looks simple, behaves non-locally: hidden dispatch/mutation/IO/context/reflection/reopen -- *POSSIBLE* (noisy) + +## Cross-Detector Convergence (1) +_(file, method) units flagged by >=2 INDEPENDENT detectors -- the strongest triage signal: agreement outranks any single detector's volume. Tier-weighted (1=3, 2=2, 3=1). **Start here.**_ + +- `facts/report.rb:18` (checkout) -- **11 detectors** [score 24, 15 findings]: Broken Protocols, Decision Pressure, Derived-State Staleness, False Simplicity, Missing Abstractions, Neglected Conditions, Neglected Path Conditions, Neglected Updates, Reification Misses, Semantic Predicate Aliases, State-Based Branch Density + +### By file +- `facts/report.rb` -- 11 detectors across 1 method(s): Broken Protocols, Decision Pressure, Derived-State Staleness, False Simplicity, Missing Abstractions, Neglected Conditions, Neglected Path Conditions, Neglected Updates, Reification Misses, Semantic Predicate Aliases, State-Based Branch Density + +## Root-Cause Clusters (6) +_Findings across >=2 INDEPENDENT detectors that name the SAME entity -- 'N findings are really one invariant'. Convergence says where to look; this says **what one fix collapses the cluster**. Ranked candidate, not a verdict._ + +- **[name]** `storage` -- **7 detectors** [score 17] across 1 unit(s), 7 findings: Decision Pressure, Derived-State Staleness, False Simplicity, Neglected Updates, Reification Misses, Semantic Predicate Aliases, State-Based Branch Density + - FIX: single-source this state (one stamp, or recompute on write) -- the invariant-#16 desync shape + - `facts/report.rb:18` (checkout) ; `facts/report.rb:20` (checkout) ; `facts/report.rb:14` (checkout) ; `facts/report.rb:13` (checkout) +- **[name]** `ready` -- **3 detectors** [score 9] across 1 unit(s), 3 findings: Reification Misses, Semantic Predicate Aliases, State-Based Branch Density + - FIX: reify ONE named predicate/decision and call it everywhere + - `facts/report.rb:20` (checkout) ; `facts/report.rb:14` (checkout) ; `facts/report.rb:13` (checkout) +- **[tuple]** `ready | valid` -- **3 detectors** [score 6] across 2 unit(s), 3 findings: Missing Abstractions, Neglected Conditions, Neglected Path Conditions + - FIX: reify ONE named predicate/decision and call it everywhere + - `facts/report.rb:10` (checkout) ; `facts/report.rb:30` (refund) ; `facts/report.rb:11` (checkout) ; `facts/report.rb:15` (checkout) +- **[name]** `READY` -- **2 detectors** [score 6] across 1 unit(s), 2 findings: Reification Misses, Semantic Predicate Aliases + - FIX: reify ONE named predicate/decision and call it everywhere + - `facts/report.rb:14` (checkout) ; `facts/report.rb:13` (checkout) +- **[name]** `provenance` -- **2 detectors** [score 4] across 1 unit(s), 2 findings: Derived-State Staleness, Neglected Updates + - FIX: single-source this state (one stamp, or recompute on write) -- the invariant-#16 desync shape + - `facts/report.rb:12` (checkout) ; `facts/report.rb:17` (checkout) +- **[name]** `valid` -- **2 detectors** [score 3] across 1 unit(s), 2 findings: Neglected Conditions, Neglected Path Conditions + - FIX: converging structural debt -- resolve once at the named entity + - `facts/report.rb:11` (checkout) ; `facts/report.rb:15` (checkout) + +## Decision Pressure (1) +_ELIMINABLE guard-pressure per loose contract (nil/is_a?/respond_to?/safe-nav/rescue-nil) -> tighten the contract once / nil-kill: DELETE. essential dispatch + pure c-uses are split out, NEVER summed (Rapps-Weyuker p-use; McCabe)_ + +- `.storage` -- ELIMINABLE guard-pressure **2** across 1 method(s) -> tighten contract / nil-kill: DELETE (+1 essential dispatch on this contract -- legitimate; leave unless Fat-Union/Missing-Abstractions says re-derived) + - `facts/report.rb:18` (checkout) + +## Redundant Nil Guards (0) +_nil checks / safe-nav dominated by an earlier non-nil proof -- delete repeated control flow or tighten the type_ + +None. + +## State Heatmap (0) +_state fields ranked by write/read/re-derivation scatter -- tangled mutable state should get one owner_ + +None. + +## State-Based Branch Density (1) +_branch decisions over mutable/object state -- state + control-flow pressure_ + +- `facts/report.rb:20` (checkout) -- **1** state-based branch decision(s), refs=`storage` score=1 + - example predicate: `storage.ready?` + +## Temporal Ordering Pressure (0) +_public mutable lifecycle surfaces that create implicit state-machine ordering_ + +None. + +## Missing Abstractions (1) +_guard tuple recomputed across >=2 decision units_ + +- **[conjunction]** support=2 scatter=2 rank= + - tuple: `ready | valid` + - `facts/report.rb:10` (checkout) ; `facts/report.rb:30` (refund) + +## Reification Misses (1) +_an existing predicate reinvented inline -- invariant #16_ + +- predicate `ready?` reinvented inline at `facts/report.rb:14` (checkout) (`storage == READY`) + +## Semantic Predicate Aliases (1) +_one decision, multiple names (receiver/polarity folded)_ + +- `ready? = prepared?` == `storage == READY` + - `facts/report.rb:13` (checkout) + +## Exact Predicate Aliases (0) +_identical one-line predicate body under >=2 names_ + +None. + +## Inconsistent Rename Clones (0) +_pasted block with inconsistent identifier mapping -- *POSSIBLE* missed rename bug_ + +None. + +## Structural Similarity (Type-2/3) (0) +_Tree-sitter structural clone pressure: Type-2 renamed clones and Type-3 fuzzy clones -- refactor pressure, not a verdict_ + +None. + +## Neglected Updates (1) +_co-written state, one write missing -- *POSSIBLE* redundant-state desync_ + +- *POSSIBLE* (support=3) `facts/report.rb:12` (checkout) writes `.storage` but NOT `.provenance` (recv `order`) + +## Derived-State Staleness (1) +_b = f(a); a later reassigned, b not recomputed -- *POSSIBLE* bug_ + +- *POSSIBLE* `facts/report.rb:17` (checkout): `storage` derived from `provenance` (line 17); `provenance` reassigned line 22, `storage` not recomputed + +## Neglected Conditions (1) +_dispatch/conjunction minus one element -- *POSSIBLE* bug_ + +- *POSSIBLE* (support=2) `facts/report.rb:11` (checkout) -- MISSING `valid` from `ready | valid` + +## Neglected Path Conditions (1) +_nested-if/&& guard set minus one atom -- *POSSIBLE* bug (noisy)_ + +- *POSSIBLE* (support=2) `facts/report.rb:15` (checkout) -- MISSING `valid` from `ready | valid` + +## Oversized Predicates (0) +_predicate with >3 condition atoms -- use an existing helper or extract a named predicate_ + +None. + +## Broken Protocols (1) +_co-called pair, one site does A without B -- *POSSIBLE* bug (noisy)_ + +- *POSSIBLE* conf=0.8 support=4 `facts/report.rb:16` (checkout) does `open` without `close` + +## Implicit Control Flow (0) +_state-dependent internal call order exists -- hidden lifecycle/control-flow pressure_ + +None. + +## Weighted Inlined Cognitive Complexity (0) +_same-owner helper chain hides cognitive load behind a low-looking orchestration method_ + +None. + +## Locality Drag (0) +_local initialized far before first use while unrelated work runs -- move setup closer or extract a private phase_ + +None. + +## Operational Discontinuity (High Confidence) (0) +_strong blank/comment phase boundary where local variable lifetimes reset -- likely implicit sub-function boundary_ + +None. + +## Function LCOM (0) +_independent local data-flow components inside one method -- *POSSIBLE* mixed concerns_ + +None. + +## Operational Discontinuity (0) +_blank/comment phase boundary where local variable lifetimes reset -- *POSSIBLE* implicit sub-function boundary_ + +None. + +## False Simplicity (1) +_looks simple, behaves non-locally: hidden dispatch/mutation/IO/context/reflection/reopen -- *POSSIBLE* (noisy)_ + +- *POSSIBLE* [hidden_mutation] scatter=1 support=1 `storage=` -- `facts/report.rb:19` (checkout) + +## Fat Unions (0) +_case dispatch over class consts whose arms read mostly variant-invariant members -- product-vs-sum decomposition candidate (extraction -> nil-kill) -- *POSSIBLE*_ + +None. + +## Run Summary +- Files analyzed: 1 +- Detectors: 25 (all shipped, self-tested) +- Convergence: 1 unit(s) flagged by >=2 independent detectors +- Root-cause clusters: 6 (one fix collapses each) +- Total candidates: 11 +- Method: stdlib AST only, intra-procedural, zero deps, no CFG / no points-to; Type-2/3 similarity uses Tree-sitter structural fingerprints (see docs/agents/design.md) diff --git a/gems/decomplex/rust/src/bin/dump_ast.rs b/gems/decomplex/rust/src/bin/dump_ast.rs index abacb80d3..a0094175c 100644 --- a/gems/decomplex/rust/src/bin/dump_ast.rs +++ b/gems/decomplex/rust/src/bin/dump_ast.rs @@ -1,9 +1,6 @@ -#[path = "../decomplex/mod.rs"] -mod decomplex; - use anyhow::{bail, Result}; -use decomplex::ast::{self, Child, Node}; -use decomplex::syntax::Language; +use decomplex_rust::decomplex::ast::{self, Child, Node}; +use decomplex_rust::decomplex::syntax::Language; use serde_json::{json, Value}; use std::env; use std::fs; diff --git a/gems/decomplex/rust/src/decomplex/ast.rs b/gems/decomplex/rust/src/decomplex/ast.rs index 0cd95d487..c9e549d54 100644 --- a/gems/decomplex/rust/src/decomplex/ast.rs +++ b/gems/decomplex/rust/src/decomplex/ast.rs @@ -793,6 +793,7 @@ fn lua_keyed_table_target<'tree>( struct TreeSitterNormalizer<'source> { source: &'source str, + #[cfg(test)] language: Language, normalization_adapter: &'static dyn AstNormalizationAdapter, local_stack: Vec>, @@ -804,6 +805,7 @@ impl<'source> TreeSitterNormalizer<'source> { fn new(source: &'source str, language: Language) -> Self { Self { source, + #[cfg(test)] language, normalization_adapter: normalization_adapter(language), local_stack: Vec::new(), @@ -2498,40 +2500,6 @@ impl<'source> TreeSitterNormalizer<'source> { ) } - fn normalize_declaration(&mut self, node: TreeSitterNode<'_>) -> Option { - let mut assignments = Vec::new(); - for entry in self.declaration_entries(node) { - let Some(name) = self.declaration_name(entry) else { - continue; - }; - let right = self - .declaration_value(entry) - .and_then(|value| self.normalize_node(value)); - assignments.push(self.wrap( - "LASGN", - vec![Child::String(self.target_name(name)), optional_node(right)], - entry, - )); - } - - if assignments.is_empty() { - None - } else if assignments.len() == 1 { - assignments.into_iter().next() - } else { - Some( - self.wrap( - "BLOCK", - assignments - .into_iter() - .map(|assignment| Child::Node(Box::new(assignment))) - .collect(), - node, - ), - ) - } - } - fn normalize_call(&mut self, node: TreeSitterNode<'_>) -> Option { if self.zero_child_identifier_call(node) { return Some(self.normalize_zero_child_call(node)); @@ -2961,6 +2929,7 @@ impl<'source> TreeSitterNormalizer<'source> { self.normalize_body_nodes(self.named_children(node), node) } + #[cfg(test)] fn normalize_dotted_call_expression(&mut self, node: TreeSitterNode<'_>) -> Option { self.normalize_dotted_call_expression_with_source(node, None) } @@ -3984,6 +3953,7 @@ impl<'source> TreeSitterNormalizer<'source> { } } + #[cfg(test)] fn list(&self, children: Option>, source: TreeSitterNode<'_>) -> Option { let children = children?; if children.is_empty() { @@ -4071,24 +4041,6 @@ impl<'source> TreeSitterNormalizer<'source> { } } - fn wrap_from_span_text( - &self, - node_type: &str, - children: Vec, - node_span: Span, - text: &str, - ) -> Node { - Node { - r#type: node_type.to_string(), - children, - first_lineno: node_span[0], - first_column: node_span[1], - last_lineno: node_span[2], - last_column: node_span[3], - text: self.source_text(text), - } - } - fn with_ruby_scope( &mut self, node: TreeSitterNode<'_>, @@ -4200,20 +4152,6 @@ impl<'source> TreeSitterNormalizer<'source> { } } - fn collect_parameter_names(&self, node: TreeSitterNode<'_>, locals: &mut BTreeSet) { - if let Some(name) = self.named_field(node, "name") { - self.collect_identifier_names(name, locals); - return; - } - if let Some(name) = self - .named_children(node) - .into_iter() - .find_map(|child| self.identifier_text(child)) - { - locals.insert(name); - } - } - fn ruby_scope_boundary(&self, node: TreeSitterNode<'_>) -> bool { if matches!(node.kind(), "block" | "do_block") && node @@ -4416,11 +4354,6 @@ impl<'source> TreeSitterNormalizer<'source> { .single_assignment_block_child(node, self.source) } - fn single_assignment_statement(&self, node: TreeSitterNode<'_>) -> bool { - self.normalization_adapter - .single_assignment_statement(node, self.source) - } - fn has_assignment_operator_child(&self, node: TreeSitterNode<'_>) -> bool { node.children(&mut node.walk()).any(|child| { !child.is_named() && self.assignment_operator(node_text(child, self.source)) @@ -5708,6 +5641,7 @@ impl<'source> TreeSitterNormalizer<'source> { .any(|child| self.same_ts_node(child, node)) } + #[cfg(test)] fn node_key(&self, node: TreeSitterNode<'_>) -> (String, usize, usize) { (node.kind().to_string(), node.start_byte(), node.end_byte()) } @@ -5840,82 +5774,6 @@ impl<'source> TreeSitterNormalizer<'source> { )) } - fn declaration_entries<'tree>( - &self, - node: TreeSitterNode<'tree>, - ) -> Vec> { - if matches!(node.kind(), "local_variable_declaration") { - let entries = self - .named_children(node) - .into_iter() - .filter(|child| child.kind() == "variable_declarator") - .collect::>(); - if !entries.is_empty() { - return entries; - } - } - if matches!( - node.kind(), - "local_variable_declaration" - | "variable_declarator" - | "variable_declaration" - | "property_declaration" - ) { - vec![node] - } else { - Vec::new() - } - } - - fn declaration_name<'tree>( - &self, - node: TreeSitterNode<'tree>, - ) -> Option> { - if let Some(name) = self.named_field(node, "name") { - return Some(name); - } - - for child in self.named_children(node) { - if child.kind() == "variable_declaration" { - if let Some(name) = self.declaration_name(child) { - return Some(name); - } - } - if matches!(child.kind(), "identifier" | "simple_identifier" | "pattern") { - return Some(child); - } - } - None - } - - fn declaration_value<'tree>( - &self, - node: TreeSitterNode<'tree>, - ) -> Option> { - if node.kind() == "property_declaration" { - let mut after_target = false; - for child in self.named_children(node) { - if !after_target && matches!(child.kind(), "variable_declaration" | "pattern") { - after_target = true; - continue; - } - if after_target && !declaration_metadata_kind(child.kind()) { - return Some(child); - } - } - } - - self.named_field(node, "value").or_else(|| { - self.named_children(node).into_iter().find(|child| { - !declaration_metadata_kind(child.kind()) - && !matches!( - child.kind(), - "identifier" | "simple_identifier" | "pattern" | "variable_declaration" - ) - }) - }) - } - fn assignment_target( &mut self, left: TreeSitterNode<'_>, @@ -6138,14 +5996,17 @@ impl<'source> TreeSitterNormalizer<'source> { node.parent() } + #[cfg(test)] fn next_sibling<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { node.next_sibling() } + #[cfg(test)] fn prev_sibling<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { node.prev_sibling() } + #[cfg(test)] fn next_named_sibling<'tree>( &self, node: TreeSitterNode<'tree>, @@ -6692,21 +6553,6 @@ fn dynamic_scope(mut node: Node) -> Node { node } -fn declaration_metadata_kind(kind: &str) -> bool { - matches!( - kind, - "modifiers" - | "type" - | "nullable_type" - | "parenthesized_type" - | "user_type" - | "type_identifier" - | "integral_type" - | "floating_point_type" - | "void_type" - ) -} - fn kind_type(kind: &str) -> String { let mut result = String::new(); let mut in_separator = false; @@ -6722,6 +6568,7 @@ fn kind_type(kind: &str) -> String { result } +#[cfg(test)] fn ts_node(node: Option>) -> bool { node.is_some() } diff --git a/gems/decomplex/rust/src/decomplex/ast/adapters/base.rs b/gems/decomplex/rust/src/decomplex/ast/adapters/base.rs index 2a44dc455..fc40580c0 100644 --- a/gems/decomplex/rust/src/decomplex/ast/adapters/base.rs +++ b/gems/decomplex/rust/src/decomplex/ast/adapters/base.rs @@ -1,7 +1,7 @@ use super::super::{ - bare_identifier_text, bracketed, case_arm_descendant, concatenated_string_node, - concatenated_string_target, descendant, direct_binary_operator, element_reference_shape, - function_kind, identifier_kind_name, named_children, node_text, question_colon_ternary_parts, + bracketed, case_arm_descendant, concatenated_string_node, concatenated_string_target, + descendant, direct_binary_operator, element_reference_shape, function_kind, + identifier_kind_name, named_children, node_text, question_colon_ternary_parts, raw_named_children, ruby_exception_constant_text, statement_block_wrapper, TernaryParts, ARRAY_LITERAL_NODE_KINDS, ARRAY_LITERAL_WRAPPER_KINDS, BOOLEAN_EXPRESSION_KINDS, CASE_ARGUMENT_WHEN_KINDS, CASE_ELSE_KINDS, CASE_NODE_KINDS, COMPARISON_EXPRESSION_KINDS, @@ -706,10 +706,6 @@ pub(crate) trait AstNormalizationAdapter: Sync { ) } - fn identifier_text_node(&self, _node: TreeSitterNode<'_>, _source: &str) -> bool { - false - } - fn local_identifier_text(&self, _node: TreeSitterNode<'_>, _source: &str) -> Option { None } @@ -928,10 +924,6 @@ pub(crate) trait AstNormalizationAdapter: Sync { false } - fn single_assignment_statement(&self, _node: TreeSitterNode<'_>, _source: &str) -> bool { - false - } - fn member_read_excluded(&self, _node: TreeSitterNode<'_>) -> bool { false } @@ -1016,10 +1008,6 @@ pub(crate) trait AstNormalizationAdapter: Sync { .unwrap_or(false) } - fn bare_identifier_text(&self, text: &str) -> bool { - bare_identifier_text(text) - } - fn descendant<'tree>( &self, node: TreeSitterNode<'tree>, diff --git a/gems/decomplex/rust/src/decomplex/ast/adapters/lua.rs b/gems/decomplex/rust/src/decomplex/ast/adapters/lua.rs index 44c5cfe9c..9e4b17cf3 100644 --- a/gems/decomplex/rust/src/decomplex/ast/adapters/lua.rs +++ b/gems/decomplex/rust/src/decomplex/ast/adapters/lua.rs @@ -332,11 +332,6 @@ impl AstNormalizationAdapter for LuaAstAdapter { named_children(target) } - fn identifier_text_node(&self, node: TreeSitterNode<'_>, source: &str) -> bool { - matches!(node.kind(), "variable_list" | "expression_list") - && self.bare_identifier_text(node_text(node, source)) - } - fn member_assignment_target(&self, node: TreeSitterNode<'_>, source: &str) -> bool { if node.kind() != "variable_list" { return false; @@ -443,18 +438,6 @@ impl AstNormalizationAdapter for LuaAstAdapter { lua_single_assignment_block_child(node, source) } - fn single_assignment_statement(&self, node: TreeSitterNode<'_>, source: &str) -> bool { - if node.kind() != "assignment_statement" { - return false; - } - let Some(parent) = node.parent() else { - return false; - }; - parent.kind() == "block" - && node_text(parent, source) == node_text(node, source) - && raw_named_children(parent).len() == 1 - } - fn member_read_excluded(&self, node: TreeSitterNode<'_>) -> bool { node.kind() == "field" } diff --git a/gems/decomplex/rust/src/decomplex/detectors/implicit_control_flow.rs b/gems/decomplex/rust/src/decomplex/detectors/implicit_control_flow.rs index 6a3da82f2..d8c5e6be3 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/implicit_control_flow.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/implicit_control_flow.rs @@ -37,7 +37,6 @@ struct MethodEffect { #[derive(Clone, Debug)] struct Call { mid: String, - file: String, line: usize, span: Span, reads: Vec, @@ -94,7 +93,6 @@ fn sequences_for_document(document: &Document, effect_index: &EffectIndex) -> Ve let effect = effect_index.effect_for(&function_def.owner, &mid); Call { mid, - file: call.file.clone(), line: call.line, span: call.span, reads: effect.map(|e| e.reads.clone()).unwrap_or_default(), diff --git a/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs b/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs index 2c6647755..fee39a45e 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs @@ -105,7 +105,6 @@ fn normalized_local_methods(document: &Document) -> Vec { let mut detector = LocalFlow::new( document.file.clone(), document.lines.clone(), - document.language, method_metadata(document), ); detector.scan(&document.normalized_root) @@ -1154,7 +1153,6 @@ fn fallback_node_from_raw(raw: &RawNode) -> Node { struct LocalFlow { file: String, lines: Vec, - language: Language, methods_by_span: BTreeMap, } @@ -1162,13 +1160,11 @@ impl LocalFlow { fn new( file: String, lines: Vec, - language: Language, methods_by_span: BTreeMap, ) -> Self { Self { file, lines, - language, methods_by_span, } } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs index ddf27bacd..3beb79adf 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs @@ -5,7 +5,7 @@ use super::super::tree_sitter_adapter::{ }; use super::super::{CallSite, CloneCandidate, Document, FunctionDef, Language}; use crate::decomplex::ast::{node_text, normalize_text, span, RawNode}; -use std::collections::{BTreeMap, BTreeSet, HashSet}; +use std::collections::HashSet; use tree_sitter::{Language as TreeSitterLanguage, Node}; pub(crate) const EMPTY_NODE_KINDS: &[&str] = &[]; @@ -197,10 +197,6 @@ pub(crate) trait LanguageProfile { EMPTY_NODE_KINDS } - fn declarator_node_kinds(&self) -> &[&str] { - EMPTY_NODE_KINDS - } - fn local_identifier_wrapper_node_kinds(&self) -> &[&str] { EMPTY_NODE_KINDS } @@ -1044,25 +1040,6 @@ pub(crate) trait LanguageProfile { ) } - fn method_param_types(&self, _lines: &[String]) -> BTreeMap> { - BTreeMap::new() - } - - fn immutable_struct_readers(&self, _lines: &[String]) -> BTreeMap> { - BTreeMap::new() - } - - fn immutable_struct_reader_types( - &self, - _lines: &[String], - ) -> BTreeMap> { - BTreeMap::new() - } - - fn type_aliases(&self, _lines: &[String]) -> BTreeMap { - BTreeMap::new() - } - fn clone_candidates(&self, document: &Document) -> Vec { let mut out = Vec::new(); let mut seen = HashSet::new(); diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/csharp.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/csharp.rs index 51e1ae869..87a4946c7 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/csharp.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/csharp.rs @@ -77,10 +77,6 @@ impl LanguageProfile for CSharpProfile { &["variable_declaration"] } - fn declarator_node_kinds(&self) -> &[&str] { - &["variable_declaration", "variable_declarator"] - } - fn comparison_node_kinds(&self) -> &[&str] { &["binary_expression"] } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/ruby.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/ruby.rs index 694c09499..2ba33fe7d 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/ruby.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/ruby.rs @@ -6,7 +6,6 @@ use super::super::{CallSite, FunctionDef, Language}; use super::base::LanguageProfile; use crate::decomplex::ast::{node_text, normalize_text, span}; use regex::Regex; -use std::collections::{BTreeMap, BTreeSet}; use tree_sitter::{Language as TreeSitterLanguage, Node}; pub(crate) struct RubyProfile; @@ -294,25 +293,6 @@ impl LanguageProfile for RubyProfile { fn skip_state_write_target(&self, target: &Target) -> bool { target.field == "[]" || target.field.starts_with('$') } - - fn method_param_types(&self, lines: &[String]) -> BTreeMap> { - ruby_method_param_types(lines) - } - - fn immutable_struct_readers(&self, lines: &[String]) -> BTreeMap> { - ruby_immutable_struct_readers(lines) - } - - fn immutable_struct_reader_types( - &self, - lines: &[String], - ) -> BTreeMap> { - ruby_immutable_struct_reader_types(lines) - } - - fn type_aliases(&self, lines: &[String]) -> BTreeMap { - ruby_type_aliases(lines) - } } fn hidden_ruby_method_name(node: Node<'_>, source: &str) -> Option { @@ -909,116 +889,3 @@ fn ruby_case_pattern_texts(patterns: &[Node<'_>], source: &str) -> Vec { } out } - -fn ruby_immutable_struct_readers(lines: &[String]) -> BTreeMap> { - let mut readers = BTreeMap::new(); - let mut class_stack = Vec::new(); - let class_struct_re = - Regex::new(r"^\s*class\s+([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\s*<\s*T::Struct\b").unwrap(); - let const_re = Regex::new(r"^\s*const\s+:([A-Za-z_]\w*)\b").unwrap(); - let end_re = Regex::new(r"^\s*end\s*(?:#.*)?$").unwrap(); - - for line in lines { - if let Some(caps) = class_struct_re.captures(line) { - class_stack.push(caps[1].to_string()); - continue; - } - if !class_stack.is_empty() { - if let Some(caps) = const_re.captures(line) { - readers - .entry(class_stack.last().unwrap().clone()) - .or_insert_with(BTreeSet::new) - .insert(caps[1].to_string()); - continue; - } - } - if end_re.is_match(line) { - class_stack.pop(); - } - } - readers -} - -fn ruby_immutable_struct_reader_types( - lines: &[String], -) -> BTreeMap> { - let mut reader_types = BTreeMap::new(); - let mut class_stack = Vec::new(); - let class_struct_re = - Regex::new(r"^\s*class\s+([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\s*<\s*T::Struct\b").unwrap(); - let const_type_re = - Regex::new(r"^\s*const\s+:([A-Za-z_]\w*)\s*,\s*([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\b") - .unwrap(); - let end_re = Regex::new(r"^\s*end\s*(?:#.*)?$").unwrap(); - - for line in lines { - if let Some(caps) = class_struct_re.captures(line) { - class_stack.push(caps[1].to_string()); - continue; - } - if !class_stack.is_empty() { - if let Some(caps) = const_type_re.captures(line) { - reader_types - .entry(class_stack.last().unwrap().clone()) - .or_insert_with(BTreeMap::new) - .insert(caps[1].to_string(), caps[2].to_string()); - continue; - } - } - if end_re.is_match(line) { - class_stack.pop(); - } - } - reader_types -} - -fn ruby_type_aliases(lines: &[String]) -> BTreeMap { - let mut aliases = BTreeMap::new(); - let type_alias_re = - Regex::new(r"^\s*([A-Z]\w*)\s*=\s*T\.type_alias\s*\{\s*([A-Z]\w*(?:::[A-Z]\w*)*)\s*\}") - .unwrap(); - let const_alias_re = Regex::new(r"^\s*([A-Z]\w*)\s*=\s*([A-Z]\w*(?:::[A-Z]\w*)*)\b").unwrap(); - - for line in lines { - if let Some(caps) = type_alias_re.captures(line) { - aliases.insert(caps[1].to_string(), caps[2].to_string()); - } else if let Some(caps) = const_alias_re.captures(line) { - aliases.insert(caps[1].to_string(), caps[2].to_string()); - } - } - aliases -} - -fn ruby_method_param_types(lines: &[String]) -> BTreeMap> { - let mut types_by_method = BTreeMap::new(); - let mut pending_sig = String::new(); - let def_re = Regex::new(r"^\s*def\s+([A-Za-z_]\w*[!?=]?)(?:\s|\(|$)").unwrap(); - - for line in lines { - if ruby_pending_sig_active(line, &pending_sig) { - pending_sig.push_str(line); - } - if let Some(caps) = def_re.captures(line) { - types_by_method.insert(caps[1].to_string(), ruby_sig_param_types(&pending_sig)); - pending_sig.clear(); - } - } - types_by_method -} - -fn ruby_pending_sig_active(line: &str, pending_sig: &str) -> bool { - !pending_sig.is_empty() || line.trim().starts_with("sig") -} - -fn ruby_sig_param_types(sig_source: &str) -> BTreeMap { - let params_re = Regex::new(r"params\s*\((.*?)\)").unwrap(); - let param_pair_re = - Regex::new(r"([A-Za-z_]\w*)\s*:\s*([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)").unwrap(); - let mut params = BTreeMap::new(); - if let Some(p_caps) = params_re.captures(sig_source) { - for pair in param_pair_re.captures_iter(&p_caps[1]) { - params.insert(pair[1].to_string(), pair[2].to_string()); - } - } - params -} diff --git a/gems/decomplex/rust/src/main.rs b/gems/decomplex/rust/src/main.rs index 48e993fed..cfc2d1ea9 100644 --- a/gems/decomplex/rust/src/main.rs +++ b/gems/decomplex/rust/src/main.rs @@ -1,17 +1,15 @@ -mod decomplex; - use anyhow::{bail, Context, Result}; -use decomplex::detectors::{ +use decomplex_rust::decomplex::detectors::{ co_update, decision_pressure, derived_state, false_simplicity, fat_union, flay_similarity, function_lcom, implicit_control_flow, inconsistent_rename_clone, local_flow, locality_drag, miner, operational_discontinuity, oversized_predicate, path_condition, predicate_alias, redundant_nil_guard, semantic_alias, sequence_mine, state_branch_density, state_mesh, structural_topology, temporal_ordering_pressure, weighted_inlined_cognitive_complexity, }; -use decomplex::parallel; -use decomplex::report::Report; -use decomplex::report_facts::{self, Options as ReportFactsOptions, VcsFilter}; -use decomplex::syntax::{Document, Language, LocalComplexityScore}; +use decomplex_rust::decomplex::parallel; +use decomplex_rust::decomplex::report::Report; +use decomplex_rust::decomplex::report_facts::{self, Options as ReportFactsOptions, VcsFilter}; +use decomplex_rust::decomplex::syntax::{Document, Language, LocalComplexityScore}; use serde::Deserialize; use serde_json::{json, Value}; use std::io::Read; diff --git a/gems/decomplex/rust/tests/examples_oracle.rs b/gems/decomplex/rust/tests/examples_oracle.rs index 025d5d5e1..0ca35cad4 100644 --- a/gems/decomplex/rust/tests/examples_oracle.rs +++ b/gems/decomplex/rust/tests/examples_oracle.rs @@ -120,6 +120,10 @@ fn shared_report_fact_examples_match_postprocess_oracles() -> Result<()> { .get("expected") .cloned() .with_context(|| format!("{} missing expected", fixture.display()))?; + let expected_markdown = fs::read_to_string(fixture.with_extension("md")) + .with_context(|| format!("{} missing markdown oracle", fixture.display()))? + .trim_end() + .to_string(); let report = Report::from_facts(facts) .with_context(|| format!("failed to build report from {}", fixture.display()))?; let actual = project_report(&report); @@ -132,6 +136,15 @@ fn shared_report_fact_examples_match_postprocess_oracles() -> Result<()> { actual )); } + let markdown = report.to_markdown().trim_end().to_string(); + if markdown != expected_markdown { + failures.push(format!( + "{} markdown\nexpected: {}\nactual: {}", + fixture.display(), + expected_markdown, + markdown + )); + } } if failures.is_empty() { diff --git a/gems/decomplex/test/report_facts_oracle_test.rb b/gems/decomplex/test/report_facts_oracle_test.rb index eed7010e1..dfff2d61b 100644 --- a/gems/decomplex/test/report_facts_oracle_test.rb +++ b/gems/decomplex/test/report_facts_oracle_test.rb @@ -28,9 +28,11 @@ def assert_report_fact_oracle(fixture_path) fixture = JSON.parse(File.read(fixture_path)) facts = fixture.fetch("input") expected = fixture.fetch("expected") + expected_markdown = expected_markdown_for(fixture_path) ruby_report = Decomplex::Report.from_facts(JSON.generate(facts)) assert_equal expected, project_report(ruby_report), "ruby #{fixture_path}" + assert_equal expected_markdown, ruby_report.to_markdown.rstrip, "markdown ruby #{fixture_path}" skip "cargo is not available" unless rust_available? @@ -45,7 +47,7 @@ def assert_report_fact_oracle(fixture_path) "render-report", "--input", file.path, "--format", "sarif" )) - assert_equal ruby_report.to_markdown.rstrip, rust_markdown.rstrip, "markdown #{fixture_path}" + assert_equal expected_markdown, rust_markdown.rstrip, "markdown rust #{fixture_path}" assert_equal JSON.parse(ruby_report.to_sarif), rust_sarif, "sarif #{fixture_path}" end end @@ -58,6 +60,13 @@ def project_report(report) } end + def expected_markdown_for(fixture_path) + markdown_path = fixture_path.sub(/\.json\z/, ".md") + assert File.file?(markdown_path), "missing markdown oracle #{markdown_path}" + + File.read(markdown_path).rstrip + end + def compact_sarif(report) compact_sarif_hash(JSON.parse(report.to_sarif( include_snapshot: false, From 7797384a24a298452a0b1cca43decb41f42586be Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Sat, 20 Jun 2026 13:12:20 +0000 Subject: [PATCH 44/52] Add CLEAR launch planning docs --- docs/agents/TODO.md | 46 +++++++++++++++++++++++++++ docs/agents/self-host.md | 67 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 113 insertions(+) create mode 100644 docs/agents/TODO.md create mode 100644 docs/agents/self-host.md diff --git a/docs/agents/TODO.md b/docs/agents/TODO.md new file mode 100644 index 000000000..4c2fa743b --- /dev/null +++ b/docs/agents/TODO.md @@ -0,0 +1,46 @@ +# Launch Checklist: CLEAR v0.1 & Generalized Gems + +## Phase 1: The "Secret Sauce" Launch (Weeks 1-6) + +### 1. Lineage (The Backbone) +- [ ] Finalize Rust `lineage` crate with VCS Trait support (Git/JJ/Hg). +- [ ] Implement Sentry/Stack-trace ingestion with verification anchors. +- [ ] Implement Coverage-Delta ingestion (Aggregates only). +- [ ] Build the Local UI Server (Rust/Axum + React/Monaco) with gutters. + +### 2. Boobytrap & SlopCop (The Integrity Wall) +- [ ] Generalize SlopCop regexes into language-neutral providers. +- [ ] Implement Systems-Test Coverage detection (Atomics -> Loom, etc.) for Zig. +- [ ] Add `--format sarif` output for native GitHub Check Annotations. +- [ ] Update Boobytrap to use Lineage SQLite DB for function-level history. + +### 3. Nil-Kill & Auto-Type (The Repair Engine) +- [ ] Complete the extraction of `auto-type` from `nil-kill`. +- [ ] Implement the `auto-type` Provider Registry (Template/LLM/AST tiers). +- [ ] Abstract Nil-Kill Z3 evidence extraction into language providers. +- [ ] Launch "Hidden Enum Discovery" as a flagship AI-refactoring feature. + +## Phase 2: CLEAR v0.1 Architectural Preview (Weeks 7-8) + +### 4. Compiler Hardening +- [ ] **Must Build:** Promote the "Memory Brains" (`Type`, `CleanupClassifier`, `EscapeAnalysis`) to Hard-Gated mutation status. +- [ ] Ensure 100% of safety invariants in `CLAUDE.md` are killed by transpile-mutants. +- [ ] Conduct a final parity run: Tree-sitter-Ruby vs. Prism-Ruby facts. + +### 5. Launch Artifacts +- [ ] Finalize the "Language Tour" featuring the MiniVM (`_bc_runner.cht`) as proof of logic. +- [ ] Release the 3-week "Decomplex Expansion" narrative (Ruby -> Python -> Universal). + +## Phase 3: CLEAR v0.2 Self-Hosted Release (Weeks 9-10+) + +### 6. The Great Migration +- [ ] Execute the "Narrowing the Funnel" refactor: move Ruby source to "Spiritual CLEAR". +- [ ] Run the S2S Script (Ruby-to-CLEAR) on the Kernel (AST/Type/Annotator). +- [ ] Achieve first successful self-compiled "Hello World". +- [ ] Achieve full self-hosting of the MIR Lowering and Checker passes. + +## Phase 4: Enterprise & Scalability + +- [ ] Port the Lineage UI to a standalone **Tauri** Desktop Application. +- [ ] Design the "Cloud Fact-Store" for aggregate team risk (hosting-safe). +- [ ] Finalize per-seat license model for the "Systems Integrity Platform". diff --git a/docs/agents/self-host.md b/docs/agents/self-host.md new file mode 100644 index 000000000..1799a62b2 --- /dev/null +++ b/docs/agents/self-host.md @@ -0,0 +1,67 @@ +# Self-Hosting Plan: The "Boiling Frog" Transpilation + +This document outlines a phased bootstrapping approach to self-hosting the CLEAR compiler. Rather than a single "big bang" transpilation, we build the transpiler incrementally to handle the specific Ruby features used in each compiler pass. + +## Core Strategy + +1. **Surgical Ruby Refactoring (Phase 0)**: Eliminate dynamic hazards (e.g., `send`, `instance_variable_get`) in the Ruby source to simplify the transpiler and ensure idiomatic CLEAR output. +2. **Phase-Locked Development**: Build the transpiler logic required for Pass N, then transpile Pass N. +3. **Surgical Manual Intervention**: Complex Ruby idioms (e.g., dynamic regex generation in the Lexer, or complex metaprogramming in the Annotator) are manually converted during each pass. +4. **Fact-Driven Memory Safety**: Use `decomplex` to detect aliasing and ownership facts in the Ruby source to drive correct CLEAR capability selection (GIVE vs. Borrow vs. Shared). + +## Phase Estimates + +| Phase | Component | Ruby LOC | Transpiler LOC (Cumulative) | Manual Work | +| :--- | :--- | :---: | :---: | :---: | +| **P0** | **Source Refactor** | N/A | 0 | 0% | +| **P1** | **Lexer** | 360 | 1,000 | 0% | +| **P2** | **Parser & AST** | 18,500 | 3,000 | 5% | +| **P3** | **Type Inference (Annotator)** | 11,000 | 5,000 | 10% | +| **P4** | **Cap & Effect Tracking** | 11,200 | 6,500 | 15% | +| **P5** | **Escape Analysis** | 4,100 | 7,500 | 15% | +| **P6** | **AST/MIR Re-writing** | 1,500 | 8,500 | 10% | +| **P7** | **Thunk Conversion** | 3,300 | 9,500 | 20% | +| **P8** | **FSM Conversion** | 7,500 | 10,500 | 25% | +| **P9** | **MIR Lowering** | 22,000 | 11,500 | 15% | +| **P10** | **MIR Safety Verification** | 3,000 | 12,000 | 5% | +| **P11** | **Zig Emission** | 4,200 | 12,500 | 5% | +| **P12** | **Test Suite (spec/)** | 100,000+ | 16,000 | 15% | + +### Phase Details + +#### P0: Surgical Refactor +- **Goal**: Remove dynamic Ruby features that are difficult to transpile. +- **Actions**: Replace `send` with explicit interfaces; replace `instance_variable_get` with getters; simplify RSpec mocks to use structural doubles. +- **Benefit**: Reduces transpiler complexity by ~2,000 LOC and ensures the output follows CLEAR "Fortress Architecture" principles. + +#### P1-P2: Frontend (Lexer & Parser) +- **Ruby Surface**: String scanning, recursive descent, large case/match blocks, AST node instantiation. +- **Transpiler Goal**: Map Ruby `StringScanner` to CLEAR `Scanner`, and `case` to CLEAR `MATCH`. +- **Manual Work**: Complex regex-driven tokenization rules that don't map 1:1 to Zig's regex engine. + +#### P3-P4: Semantic Analysis (Annotator) +- **Ruby Surface**: Symbol tables, recursive tree walks, Sorbet `sig` blocks, `T::Hash`, `T::Set`. +- **Transpiler Goal**: Robust mapping of Sorbet types to CLEAR types; mapping Ruby `Hash/Set` to CLEAR `@map/@set`. +- **Manual Work**: Deeply nested type-inference edge cases and circular dependency resolution in the declaration index. + +#### P5-P8: Middle-End (Semantic & Transforms) +- **Ruby Surface**: Flow-sensitive analysis, graph traversal, tree-to-tree transformations (Rewriters), closure/thunk generation. +- **Transpiler Goal**: Implementing a "Data-Flow Bridge" in `decomplex` to detect aliasing hazards. +- **Manual Work**: FSM conversion logic is the most complex Ruby in the codebase, requiring careful manual verification of the generated state machines. + +#### P9-P11: Backend (Lowering & Emission) +- **Ruby Surface**: Explicit memory decision logic, cleanup classification, Zig template strings. +- **Transpiler Goal**: High-fidelity mapping of Ruby logic to CLEAR's ownership markers (`Cleanup`, `MoveMark`). +- **Manual Work**: Very low; these passes are already designed with "CLEAR-like" semantics (mechanical and fact-driven). + +#### P12: Test Suite (spec/) +- **Ruby Surface**: RSpec DSL (`expect`, `it`, `describe`), dynamic doubles, `send` for white-box testing. +- **Transpiler Goal**: Map RSpec DSL to CLEAR `TEST` and `ASSERT` blocks. +- **Manual Work**: High (~15%) due to the highly dynamic nature of Ruby test mocking. + +## Success Criteria + +A phase is considered complete when: +1. The CLEAR-transpiled version of Pass N passes all unit tests when driven by the Ruby versions of Passes 0..(N-1). +2. Decomplex reports 0 "Encapsulation Breaches" in the generated CLEAR code. +3. The binary size and performance of the self-hosted pass are within 20% of the Ruby baseline (targeting 2-5x faster eventually). From 4dccb188bbb4351e17b32d69711184a1bdf2c9ff Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Sat, 20 Jun 2026 13:38:50 +0000 Subject: [PATCH 45/52] Add Ruby syntax facts oracle --- .../syntax-facts/oracles/ruby-core.json | 1188 +++++++++++++++++ .../examples/syntax-facts/ruby/core.rb | 73 + gems/decomplex/lib/decomplex.rb | 1 + gems/decomplex/lib/decomplex/syntax.rb | 2 + gems/decomplex/lib/decomplex/syntax/ruby.rb | 15 + gems/decomplex/lib/decomplex/syntax_oracle.rb | 117 ++ gems/decomplex/rust/src/decomplex/mod.rs | 1 + .../rust/src/decomplex/syntax_oracle.rs | 121 ++ gems/decomplex/rust/src/main.rs | 28 +- gems/decomplex/test/syntax_oracle_test.rb | 45 + 10 files changed, 1590 insertions(+), 1 deletion(-) create mode 100644 gems/decomplex/examples/syntax-facts/oracles/ruby-core.json create mode 100644 gems/decomplex/examples/syntax-facts/ruby/core.rb create mode 100644 gems/decomplex/lib/decomplex/syntax_oracle.rb create mode 100644 gems/decomplex/rust/src/decomplex/syntax_oracle.rs create mode 100644 gems/decomplex/test/syntax_oracle_test.rb diff --git a/gems/decomplex/examples/syntax-facts/oracles/ruby-core.json b/gems/decomplex/examples/syntax-facts/oracles/ruby-core.json new file mode 100644 index 000000000..a77521f15 --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/oracles/ruby-core.json @@ -0,0 +1,1188 @@ +{ + "format": "decomplex.syntax-facts.v1", + "documents": [ + { + "file": "gems/decomplex/examples/syntax-facts/ruby/core.rb", + "language": "ruby", + "functions": [ + { + "name": "audit", + "owner": "RubySyntaxFactsCore", + "line": 57, + "span": [ + 57, + 2, + 62, + 5 + ], + "visibility": "private", + "params": [ + "name" + ] + }, + { + "name": "initialize", + "owner": "RubySyntaxFactsCore", + "line": 19, + "span": [ + 19, + 2, + 23, + 5 + ], + "visibility": "public", + "params": [ + "source" + ] + }, + { + "name": "inline_private", + "owner": "RubySyntaxFactsCore", + "line": 64, + "span": [ + 64, + 10, + 66, + 5 + ], + "visibility": "private", + "params": [ + "value" + ] + }, + { + "name": "loaded?", + "owner": "RubySyntaxFactsCore", + "line": 72, + "span": [ + 72, + 2, + 72, + 33 + ], + "visibility": "private", + "params": [] + }, + { + "name": "process", + "owner": "RubySyntaxFactsCore", + "line": 26, + "span": [ + 26, + 2, + 53, + 5 + ], + "visibility": "public", + "params": [ + "user", + "items", + "callback" + ] + }, + { + "name": "ready?", + "owner": "RubySyntaxFactsCore", + "line": 68, + "span": [ + 68, + 2, + 70, + 5 + ], + "visibility": "private", + "params": [] + }, + { + "name": "self.build", + "owner": "RubySyntaxFactsCore", + "line": 14, + "span": [ + 14, + 2, + 16, + 5 + ], + "visibility": "public", + "params": [ + "source" + ] + } + ], + "owners": [ + { + "name": "Account", + "kind": "class", + "line": 3, + "span": [ + 3, + 0, + 6, + 3 + ] + }, + { + "name": "RubySyntaxFactsCore", + "kind": "class", + "line": 8, + "span": [ + 8, + 0, + 73, + 3 + ] + } + ], + "calls": [ + { + "receiver": "%w[owner admin]", + "message": "freeze", + "function": "(top-level)", + "owner": "RubySyntaxFactsCore", + "line": 9, + "span": [ + 9, + 16, + 9, + 38 + ], + "conditional": false, + "arguments": [], + "control": "always", + "safe_navigation": false, + "block": false + }, + { + "receiver": "Account", + "message": "new", + "function": "process", + "owner": "RubySyntaxFactsCore", + "line": 28, + "span": [ + 28, + 14, + 28, + 59 + ], + "conditional": false, + "arguments": [ + "name: name", + "active: user.active?" + ], + "control": "always", + "safe_navigation": false, + "block": false + }, + { + "receiver": "T", + "message": "let", + "function": "initialize", + "owner": "RubySyntaxFactsCore", + "line": 21, + "span": [ + 21, + 13, + 21, + 30 + ], + "conditional": false, + "arguments": [ + "0", + "Integer" + ], + "control": "always", + "safe_navigation": false, + "block": false + }, + { + "receiver": "T", + "message": "let", + "function": "initialize", + "owner": "RubySyntaxFactsCore", + "line": 22, + "span": [ + 22, + 14, + 22, + 34 + ], + "conditional": false, + "arguments": [ + ":idle", + "Status" + ], + "control": "always", + "safe_navigation": false, + "block": false + }, + { + "receiver": "T", + "message": "type_alias", + "function": "(top-level)", + "owner": "RubySyntaxFactsCore", + "line": 10, + "span": [ + 10, + 11, + 10, + 34 + ], + "conditional": false, + "arguments": [], + "control": "always", + "safe_navigation": false, + "block": true + }, + { + "receiver": "callback", + "message": "call", + "function": "process", + "owner": "RubySyntaxFactsCore", + "line": 30, + "span": [ + 30, + 4, + 30, + 22 + ], + "conditional": false, + "arguments": [ + "account" + ], + "control": "always", + "safe_navigation": false, + "block": false + }, + { + "receiver": "item", + "message": "children", + "function": "process", + "owner": "RubySyntaxFactsCore", + "line": 49, + "span": [ + 49, + 6, + 49, + 19 + ], + "conditional": true, + "arguments": [], + "control": "iterates", + "safe_navigation": false, + "block": false + }, + { + "receiver": "items", + "message": "flat_map", + "function": "process", + "owner": "RubySyntaxFactsCore", + "line": 48, + "span": [ + 48, + 4, + 50, + 7 + ], + "conditional": false, + "arguments": [], + "control": "always", + "safe_navigation": false, + "block": true + }, + { + "receiver": "self", + "message": "attr_reader", + "function": "(top-level)", + "owner": "RubySyntaxFactsCore", + "line": 12, + "span": [ + 12, + 2, + 12, + 20 + ], + "conditional": false, + "arguments": [ + ":count" + ], + "control": "always", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "audit", + "function": "process", + "owner": "RubySyntaxFactsCore", + "line": 29, + "span": [ + 29, + 4, + 29, + 15 + ], + "conditional": false, + "arguments": [ + "name" + ], + "control": "always", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "const", + "function": "(top-level)", + "owner": "Account", + "line": 4, + "span": [ + 4, + 2, + 4, + 21 + ], + "conditional": false, + "arguments": [ + ":name", + "String" + ], + "control": "always", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "default", + "function": "process", + "owner": "RubySyntaxFactsCore", + "line": 38, + "span": [ + 38, + 6, + 38, + 19 + ], + "conditional": true, + "arguments": [ + "user" + ], + "control": "conditional", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "escalate", + "function": "process", + "owner": "RubySyntaxFactsCore", + "line": 34, + "span": [ + 34, + 6, + 34, + 20 + ], + "conditional": true, + "arguments": [ + "user" + ], + "control": "conditional", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "fallback", + "function": "process", + "owner": "RubySyntaxFactsCore", + "line": 36, + "span": [ + 36, + 6, + 36, + 20 + ], + "conditional": true, + "arguments": [ + "user" + ], + "control": "conditional", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "helper", + "function": "inline_private", + "owner": "RubySyntaxFactsCore", + "line": 65, + "span": [ + 65, + 4, + 65, + 17 + ], + "conditional": false, + "arguments": [ + "value" + ], + "control": "always", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "new", + "function": "self.build", + "owner": "RubySyntaxFactsCore", + "line": 15, + "span": [ + 15, + 4, + 15, + 15 + ], + "conditional": false, + "arguments": [ + "source" + ], + "control": "always", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "params", + "function": "(top-level)", + "owner": "RubySyntaxFactsCore", + "line": 18, + "span": [ + 18, + 8, + 18, + 30 + ], + "conditional": false, + "arguments": [ + "source: Object" + ], + "control": "always", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "params", + "function": "(top-level)", + "owner": "RubySyntaxFactsCore", + "line": 25, + "span": [ + 25, + 8, + 25, + 58 + ], + "conditional": false, + "arguments": [ + "user: Object", + "items: Array", + "callback: Proc" + ], + "control": "always", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "private", + "function": "(top-level)", + "owner": "RubySyntaxFactsCore", + "line": 55, + "span": [ + 55, + 2, + 55, + 9 + ], + "conditional": false, + "arguments": [], + "control": "always", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "private", + "function": "(top-level)", + "owner": "RubySyntaxFactsCore", + "line": 64, + "span": [ + 64, + 2, + 66, + 5 + ], + "conditional": false, + "arguments": [ + "inline_private", + "(value)", + "helper(value)" + ], + "control": "always", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "prop", + "function": "(top-level)", + "owner": "Account", + "line": 5, + "span": [ + 5, + 2, + 5, + 26 + ], + "conditional": false, + "arguments": [ + ":active", + "T::Boolean" + ], + "control": "always", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "publish", + "function": "process", + "owner": "RubySyntaxFactsCore", + "line": 43, + "span": [ + 43, + 6, + 43, + 21 + ], + "conditional": true, + "arguments": [ + ":ready" + ], + "control": "conditional", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "puts", + "function": "audit", + "owner": "RubySyntaxFactsCore", + "line": 58, + "span": [ + 58, + 4, + 58, + 14 + ], + "conditional": false, + "arguments": [ + "name" + ], + "control": "always", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "send", + "function": "audit", + "owner": "RubySyntaxFactsCore", + "line": 59, + "span": [ + 59, + 4, + 59, + 23 + ], + "conditional": false, + "arguments": [ + ":record", + "name" + ], + "control": "always", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "sig", + "function": "(top-level)", + "owner": "RubySyntaxFactsCore", + "line": 18, + "span": [ + 18, + 2, + 18, + 37 + ], + "conditional": false, + "arguments": [], + "control": "always", + "safe_navigation": false, + "block": true + }, + { + "receiver": "self", + "message": "sig", + "function": "(top-level)", + "owner": "RubySyntaxFactsCore", + "line": 25, + "span": [ + 25, + 2, + 25, + 76 + ], + "conditional": false, + "arguments": [], + "control": "always", + "safe_navigation": false, + "block": true + }, + { + "receiver": "self", + "message": "warn", + "function": "process", + "owner": "RubySyntaxFactsCore", + "line": 45, + "span": [ + 45, + 6, + 45, + 23 + ], + "conditional": true, + "arguments": [ + "\"not ready\"" + ], + "control": "conditional", + "safe_navigation": false, + "block": false + }, + { + "receiver": "user", + "message": "active?", + "function": "process", + "owner": "RubySyntaxFactsCore", + "line": 28, + "span": [ + 28, + 46, + 28, + 58 + ], + "conditional": false, + "arguments": [], + "control": "always", + "safe_navigation": false, + "block": false + }, + { + "receiver": "user", + "message": "profile", + "function": "process", + "owner": "RubySyntaxFactsCore", + "line": 27, + "span": [ + 27, + 11, + 27, + 24 + ], + "conditional": false, + "arguments": [], + "control": "always", + "safe_navigation": true, + "block": false + }, + { + "receiver": "user", + "message": "ready?", + "function": "process", + "owner": "RubySyntaxFactsCore", + "line": 41, + "span": [ + 41, + 27, + 41, + 38 + ], + "conditional": true, + "arguments": [], + "control": "conditional", + "safe_navigation": false, + "block": false + }, + { + "receiver": "user", + "message": "role", + "function": "process", + "owner": "RubySyntaxFactsCore", + "line": 32, + "span": [ + 32, + 9, + 32, + 18 + ], + "conditional": true, + "arguments": [], + "control": "conditional", + "safe_navigation": false, + "block": false + }, + { + "receiver": "user&.profile", + "message": "name", + "function": "process", + "owner": "RubySyntaxFactsCore", + "line": 27, + "span": [ + 27, + 11, + 27, + 30 + ], + "conditional": false, + "arguments": [], + "control": "always", + "safe_navigation": true, + "block": false + } + ], + "state_reads": [ + { + "field": "$GLOBAL_STATE", + "receiver": "self", + "function": "audit", + "owner": "RubySyntaxFactsCore", + "line": 60, + "span": [ + 60, + 4, + 60, + 17 + ] + }, + { + "field": "@count", + "receiver": "self", + "function": "ready?", + "owner": "RubySyntaxFactsCore", + "line": 69, + "span": [ + 69, + 4, + 69, + 10 + ] + }, + { + "field": "@source", + "receiver": "self", + "function": "audit", + "owner": "RubySyntaxFactsCore", + "line": 61, + "span": [ + 61, + 4, + 61, + 11 + ] + }, + { + "field": "@status", + "receiver": "self", + "function": "loaded?", + "owner": "RubySyntaxFactsCore", + "line": 72, + "span": [ + 72, + 16, + 72, + 23 + ] + }, + { + "field": "@status", + "receiver": "self", + "function": "process", + "owner": "RubySyntaxFactsCore", + "line": 41, + "span": [ + 41, + 7, + 41, + 14 + ] + }, + { + "field": "@status", + "receiver": "self", + "function": "process", + "owner": "RubySyntaxFactsCore", + "line": 52, + "span": [ + 52, + 4, + 52, + 11 + ] + }, + { + "field": "active?", + "receiver": "user", + "function": "process", + "owner": "RubySyntaxFactsCore", + "line": 28, + "span": [ + 28, + 46, + 28, + 58 + ] + }, + { + "field": "flat_map", + "receiver": "items", + "function": "process", + "owner": "RubySyntaxFactsCore", + "line": 48, + "span": [ + 48, + 4, + 50, + 7 + ] + }, + { + "field": "freeze", + "receiver": "%w[owner admin]", + "function": "(top-level)", + "owner": "RubySyntaxFactsCore", + "line": 9, + "span": [ + 9, + 16, + 9, + 38 + ] + }, + { + "field": "name", + "receiver": "user&.profile", + "function": "process", + "owner": "RubySyntaxFactsCore", + "line": 27, + "span": [ + 27, + 11, + 27, + 30 + ] + }, + { + "field": "profile", + "receiver": "user", + "function": "process", + "owner": "RubySyntaxFactsCore", + "line": 27, + "span": [ + 27, + 11, + 27, + 24 + ] + }, + { + "field": "ready?", + "receiver": "user", + "function": "process", + "owner": "RubySyntaxFactsCore", + "line": 41, + "span": [ + 41, + 27, + 41, + 38 + ] + }, + { + "field": "role", + "receiver": "user", + "function": "process", + "owner": "RubySyntaxFactsCore", + "line": 32, + "span": [ + 32, + 9, + 32, + 18 + ] + } + ], + "state_writes": [ + { + "field": "@count", + "receiver": "self", + "function": "initialize", + "owner": "RubySyntaxFactsCore", + "line": 21, + "span": [ + 21, + 4, + 21, + 30 + ] + }, + { + "field": "@count", + "receiver": "self", + "function": "process", + "owner": "RubySyntaxFactsCore", + "line": 42, + "span": [ + 42, + 6, + 42, + 17 + ] + }, + { + "field": "@source", + "receiver": "self", + "function": "initialize", + "owner": "RubySyntaxFactsCore", + "line": 20, + "span": [ + 20, + 4, + 20, + 20 + ] + }, + { + "field": "@status", + "receiver": "self", + "function": "initialize", + "owner": "RubySyntaxFactsCore", + "line": 22, + "span": [ + 22, + 4, + 22, + 34 + ] + } + ], + "decisions": [ + { + "kind": "case_dispatch", + "members": [ + "\"owner\"", + "ADMIN_ROLES", + "nil" + ], + "function": "process", + "line": 32, + "span": [ + 32, + 4, + 39, + 7 + ], + "predicate": "user.role", + "enclosing_span": [ + 32, + 4, + 39, + 7 + ] + }, + { + "kind": "conjunction", + "members": [ + "@status == :idle", + "user.ready?" + ], + "function": "process", + "line": 41, + "span": [ + 41, + 7, + 41, + 38 + ], + "predicate": "@status == :idle && user.ready?", + "enclosing_span": [ + 41, + 4, + 46, + 7 + ] + } + ], + "branch_decisions": [ + { + "function": "process", + "line": 32, + "span": [ + 32, + 4, + 39, + 7 + ], + "predicate": "user.role", + "state_refs": [ + "user.role" + ] + }, + { + "function": "process", + "line": 41, + "span": [ + 41, + 4, + 46, + 7 + ], + "predicate": "@status == :idle && user.ready?", + "state_refs": [ + "@status", + "user.ready?" + ] + } + ], + "dispatch_sites": [], + "semantic_effects": [ + { + "kind": "context_dependency", + "detail": "$GLOBAL_STATE", + "function": "audit", + "line": 60, + "span": [ + 60, + 4, + 60, + 17 + ] + }, + { + "kind": "dynamic_dispatch", + "detail": "callback.call", + "function": "process", + "line": 30, + "span": [ + 30, + 4, + 30, + 22 + ] + }, + { + "kind": "dynamic_dispatch", + "detail": "send", + "function": "audit", + "line": 59, + "span": [ + 59, + 4, + 59, + 23 + ] + }, + { + "kind": "hidden_io", + "detail": "puts", + "function": "audit", + "line": 58, + "span": [ + 58, + 4, + 58, + 14 + ] + }, + { + "kind": "hidden_io", + "detail": "warn", + "function": "process", + "line": 45, + "span": [ + 45, + 6, + 45, + 23 + ] + } + ], + "predicate_bodies": [ + { + "name": "loaded?", + "owner": "RubySyntaxFactsCore", + "body": "@status == :ready", + "line": 72, + "span": [ + 72, + 2, + 72, + 33 + ] + } + ], + "local_complexity": [ + { + "id": "RubySyntaxFactsCore#audit", + "score": 0.0, + "signals": {} + }, + { + "id": "RubySyntaxFactsCore#initialize", + "score": 0.0, + "signals": {} + }, + { + "id": "RubySyntaxFactsCore#inline_private", + "score": 0.0, + "signals": {} + }, + { + "id": "RubySyntaxFactsCore#loaded?", + "score": 0.0, + "signals": {} + }, + { + "id": "RubySyntaxFactsCore#process", + "score": 4.0, + "signals": { + "boolean_ops": 2, + "branches": 1, + "cases": 2, + "loops": 1 + } + }, + { + "id": "RubySyntaxFactsCore#ready?", + "score": 0.0, + "signals": {} + }, + { + "id": "RubySyntaxFactsCore#self.build", + "score": 0.0, + "signals": {} + } + ] + } + ] +} diff --git a/gems/decomplex/examples/syntax-facts/ruby/core.rb b/gems/decomplex/examples/syntax-facts/ruby/core.rb new file mode 100644 index 000000000..0115a0e2f --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/ruby/core.rb @@ -0,0 +1,73 @@ +# frozen_string_literal: true + +class Account < T::Struct + const :name, String + prop :active, T::Boolean +end + +class RubySyntaxFactsCore + ADMIN_ROLES = %w[owner admin].freeze + Status = T.type_alias { Symbol } + + attr_reader :count + + def self.build(source) + new(source) + end + + sig { params(source: Object).void } + def initialize(source) + @source = source + @count = T.let(0, Integer) + @status = T.let(:idle, Status) + end + + sig { params(user: Object, items: Array, callback: Proc).returns(Symbol) } + def process(user, items, callback) + name = user&.profile&.name + account = Account.new(name: name, active: user.active?) + audit(name) + callback.(account) + + case user.role + when "owner", *ADMIN_ROLES + escalate(user) + when nil + fallback(user) + else + default(user) + end + + if @status == :idle && user.ready? + @count += 1 + publish(:ready) + else + warn("not ready") + end + + items.flat_map do |item| + item.children + end + + @status + end + + private + + def audit(name) + puts(name) + send(:record, name) + $GLOBAL_STATE + @source + end + + private def inline_private(value) + helper(value) + end + + def ready? + @count > 0 + end + + def loaded? = @status == :ready +end diff --git a/gems/decomplex/lib/decomplex.rb b/gems/decomplex/lib/decomplex.rb index c993004a5..526377294 100644 --- a/gems/decomplex/lib/decomplex.rb +++ b/gems/decomplex/lib/decomplex.rb @@ -34,6 +34,7 @@ require_relative "decomplex/root_cause" require_relative "decomplex/delta" require_relative "decomplex/report_facts" +require_relative "decomplex/syntax_oracle" # Decomplex: decision-level duplication + neglected-condition detector. # See decomplex.gemspec for the rationale. v0 scope is exact-match diff --git a/gems/decomplex/lib/decomplex/syntax.rb b/gems/decomplex/lib/decomplex/syntax.rb index 13259dc3d..eda609bfa 100644 --- a/gems/decomplex/lib/decomplex/syntax.rb +++ b/gems/decomplex/lib/decomplex/syntax.rb @@ -1383,6 +1383,8 @@ def state_write_source_node(node) end def record_state_read(document, node, stack, out) + return if assignment_lhs?(node) + target = state_read_target(node) return unless target target = normalize_target_receiver(target, stack) diff --git a/gems/decomplex/lib/decomplex/syntax/ruby.rb b/gems/decomplex/lib/decomplex/syntax/ruby.rb index 2cfdaf2a6..2292ea8b6 100644 --- a/gems/decomplex/lib/decomplex/syntax/ruby.rb +++ b/gems/decomplex/lib/decomplex/syntax/ruby.rb @@ -273,11 +273,26 @@ def hidden_ruby_method_signature(document, node) def ruby_single_expression_function_body(node) body = ruby_method_body_wrapper(node) + return ruby_endless_method_expression(node) unless body + return nil unless body ruby_single_expression_body_child(body) end + def ruby_endless_method_expression(node) + return nil unless ts_node?(node) + return nil unless %w[method singleton_method].include?(node.kind) + return nil if node.named_children.any? { |child| child.kind == "body_statement" } + + node.named_children.reverse.find do |child| + !%w[ + identifier field_identifier property_identifier constant self + method_parameters superclass + ].include?(child.kind) + end + end + def ruby_method_body_wrapper(node) return nil unless ts_node?(node) diff --git a/gems/decomplex/lib/decomplex/syntax_oracle.rb b/gems/decomplex/lib/decomplex/syntax_oracle.rb new file mode 100644 index 000000000..3c4d0b23b --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax_oracle.rb @@ -0,0 +1,117 @@ +# frozen_string_literal: true + +require "json" +require_relative "syntax" +require_relative "native/command" + +module Decomplex + module SyntaxOracle + FORMAT = "decomplex.syntax-facts.v1" + + module_function + + def project(files, engine: "ruby", language: nil) + paths = Array(files).map(&:to_s) + case engine.to_s + when "ruby" + project_files(paths, language: language) + when "rust" + rust_project_files(paths, language: language) + else + raise ArgumentError, "unsupported syntax oracle engine: #{engine}" + end + end + + def canonical_json(files, engine: "ruby", language: nil) + JSON.pretty_generate(project(files, engine: engine, language: language)) << "\n" + end + + def project_files(files, language: nil) + { + "format" => FORMAT, + "documents" => Array(files).map do |file| + lang = (language || Syntax.language_for(file)).to_sym + project_document(Syntax.parse(file, language: lang)) + end + } + end + + def project_document(document) + { + "file" => logical_file(document.file), + "language" => document.language.to_s, + "functions" => rows(document.function_defs, %i[name owner line span visibility params]), + "owners" => rows(document.owner_defs, %i[name kind line span]), + "calls" => rows( + document.call_sites, + %i[receiver message function owner line span conditional arguments control safe_navigation block] + ), + "state_reads" => rows(document.state_reads, %i[field receiver function owner line span]), + "state_writes" => rows(document.state_writes, %i[field receiver function owner line span]), + "decisions" => rows(document.decision_sites, %i[kind members function line span predicate enclosing_span]), + "branch_decisions" => branch_decision_rows(document), + "dispatch_sites" => rows(document.dispatch_sites, %i[variant_set arm_members outside function line span]), + "semantic_effects" => rows(document.semantic_effect_sites, %i[kind detail function line span]), + "predicate_bodies" => rows(document.predicate_defs, %i[name owner body line span]), + "local_complexity" => local_complexity_rows(document) + } + end + + def rust_project_files(files, language:) + lang = language || Syntax.language_for(files.first).to_s + JSON.parse(Native::Command.run("syntax-facts", "--language", lang.to_s, *files)) + end + + def rows(items, keys) + Array(items).map do |item| + keys.each_with_object({}) do |key, out| + out[key.to_s] = normalize_value(item.public_send(key)) + end + end.sort_by { |row| JSON.generate(row) } + end + + def branch_decision_rows(document) + rows = document.branch_decisions( + immutable_readers: document.immutable_struct_readers, + immutable_reader_types: document.immutable_struct_reader_types, + type_aliases: document.type_aliases + ) + rows(rows, %i[function line span predicate state_refs]) + end + + def local_complexity_rows(document) + document.local_complexity_scores.map do |id, score| + { + "id" => id.to_s, + "score" => normalize_value(score.fetch(:score)), + "signals" => normalize_value(score.fetch(:signals)) + } + end.sort_by { |row| row.fetch("id") } + end + + def normalize_value(value) + case value + when Symbol + value.to_s + when Array + value.map { |item| normalize_value(item) } + when Hash + value.keys.map(&:to_s).sort.each_with_object({}) do |key, out| + raw_key = value.key?(key) ? key : key.to_sym + out[key] = normalize_value(value.fetch(raw_key)) + end + else + value + end + end + + def logical_file(file) + path = file.to_s.tr("\\", "/") + marker = "gems/decomplex/examples/" + index = path.index(marker) + return path[index..] if index + + path + end + end +end diff --git a/gems/decomplex/rust/src/decomplex/mod.rs b/gems/decomplex/rust/src/decomplex/mod.rs index 48dd07632..27df87ae5 100644 --- a/gems/decomplex/rust/src/decomplex/mod.rs +++ b/gems/decomplex/rust/src/decomplex/mod.rs @@ -12,3 +12,4 @@ pub mod report_value; pub mod root_cause; pub mod sarif; pub mod syntax; +pub mod syntax_oracle; diff --git a/gems/decomplex/rust/src/decomplex/syntax_oracle.rs b/gems/decomplex/rust/src/decomplex/syntax_oracle.rs new file mode 100644 index 000000000..598c05158 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax_oracle.rs @@ -0,0 +1,121 @@ +use crate::decomplex::syntax::{self, Document, Language}; +use anyhow::Result; +use serde_json::{json, Value}; +use std::path::PathBuf; + +pub const FORMAT: &str = "decomplex.syntax-facts.v1"; + +pub fn project_files(files: &[PathBuf], language: Language) -> Result { + let documents = syntax::parse_files(files, language)?; + Ok(json!({ + "format": FORMAT, + "documents": documents.iter().map(project_document).collect::>(), + })) +} + +pub fn project_document(document: &Document) -> Value { + json!({ + "file": logical_file(&document.file), + "language": document.language.as_str(), + "functions": sorted(document.function_defs.iter().map(|function| json!({ + "name": function.name, + "owner": function.owner, + "line": function.line, + "span": function.span, + "visibility": function.visibility, + "params": function.params, + })).collect()), + "owners": sorted(document.owner_defs.iter().map(|owner| json!({ + "name": owner.name, + "kind": owner.kind, + "line": owner.line, + "span": owner.span, + })).collect()), + "calls": sorted(document.call_sites.iter().map(|call| json!({ + "receiver": call.receiver, + "message": call.message, + "function": call.function, + "owner": call.owner, + "line": call.line, + "span": call.span, + "conditional": call.conditional, + "arguments": call.arguments, + "control": call.control, + "safe_navigation": call.safe_navigation, + "block": call.block, + })).collect()), + "state_reads": sorted(document.state_reads.iter().map(|read| json!({ + "field": read.field, + "receiver": read.receiver, + "function": read.function, + "owner": read.owner, + "line": read.line, + "span": read.span, + })).collect()), + "state_writes": sorted(document.state_writes.iter().map(|write| json!({ + "field": write.field, + "receiver": write.receiver, + "function": write.function, + "owner": write.owner, + "line": write.line, + "span": write.span, + })).collect()), + "decisions": sorted(document.decision_sites.iter().map(|decision| json!({ + "kind": decision.kind, + "members": decision.members, + "function": decision.function, + "line": decision.line, + "span": decision.span, + "predicate": decision.predicate, + "enclosing_span": decision.enclosing_span, + })).collect()), + "branch_decisions": sorted(document.branch_decisions.iter().map(|decision| json!({ + "function": decision.function, + "line": decision.line, + "span": decision.span, + "predicate": decision.predicate, + "state_refs": decision.state_refs, + })).collect()), + "dispatch_sites": sorted(document.dispatch_sites.iter().map(|site| json!({ + "variant_set": site.variant_set, + "arm_members": site.arm_members, + "outside": site.outside, + "function": site.function, + "line": site.line, + "span": site.span, + })).collect()), + "semantic_effects": sorted(document.semantic_effect_sites.iter().map(|site| json!({ + "kind": site.kind, + "detail": site.detail, + "function": site.function, + "line": site.line, + "span": site.span, + })).collect()), + "predicate_bodies": sorted(document.predicate_aliases.iter().map(|predicate| json!({ + "name": predicate.name, + "owner": "", + "body": predicate.body, + "line": predicate.line, + "span": predicate.span, + })).collect()), + "local_complexity": document.local_complexity_scores.iter().map(|(id, score)| json!({ + "id": id, + "score": score.score, + "signals": score.signals, + })).collect::>(), + }) +} + +fn sorted(mut rows: Vec) -> Vec { + rows.sort_by_key(|row| row.to_string()); + rows +} + +fn logical_file(file: &str) -> String { + let path = file.replace('\\', "/"); + let marker = "gems/decomplex/examples/"; + if let Some(index) = path.find(marker) { + return path[index..].to_string(); + } + path +} diff --git a/gems/decomplex/rust/src/main.rs b/gems/decomplex/rust/src/main.rs index cfc2d1ea9..b6e64ebb9 100644 --- a/gems/decomplex/rust/src/main.rs +++ b/gems/decomplex/rust/src/main.rs @@ -10,6 +10,7 @@ use decomplex_rust::decomplex::parallel; use decomplex_rust::decomplex::report::Report; use decomplex_rust::decomplex::report_facts::{self, Options as ReportFactsOptions, VcsFilter}; use decomplex_rust::decomplex::syntax::{Document, Language, LocalComplexityScore}; +use decomplex_rust::decomplex::syntax_oracle; use serde::Deserialize; use serde_json::{json, Value}; use std::io::Read; @@ -266,6 +267,14 @@ fn run() -> Result<()> { let facts = read_facts(input.as_ref(), from_stdin)?; render_report(&facts, &format, output.as_ref())?; } + Command::SyntaxFacts { + language, files, .. + } => { + let language = Language::parse(&language)?; + let facts = syntax_oracle::project_files(&files, language) + .with_context(|| "failed to collect syntax facts")?; + println!("{}", serde_json::to_string(&facts)?); + } Command::DetectorFacts { input } => { let fixture = read_facts(Some(&input), false)?; let detector = fixture @@ -429,6 +438,11 @@ enum Command { format: String, output: Option, }, + SyntaxFacts { + language: String, + files: Vec, + jobs: Option, + }, DetectorFacts { input: PathBuf, }, @@ -463,7 +477,8 @@ impl Command { | Self::FalseSimplicity { jobs, .. } | Self::FatUnion { jobs, .. } | Self::Facts { jobs, .. } - | Self::Report { jobs, .. } => *jobs, + | Self::Report { jobs, .. } + | Self::SyntaxFacts { jobs, .. } => *jobs, Self::RenderReport { .. } | Self::DetectorFacts { .. } => None, } } @@ -513,6 +528,17 @@ fn parse_args(args: Vec) -> Result { let input = parse_input_only_args(cursor.collect(), "detector-facts")?; Ok(Command::DetectorFacts { input }) } + "syntax-facts" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("syntax-facts requires at least one file"); + } + Ok(Command::SyntaxFacts { + language, + files, + jobs, + }) + } "state-writes" => { let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; if files.is_empty() { diff --git a/gems/decomplex/test/syntax_oracle_test.rb b/gems/decomplex/test/syntax_oracle_test.rb new file mode 100644 index 000000000..56401d0f4 --- /dev/null +++ b/gems/decomplex/test/syntax_oracle_test.rb @@ -0,0 +1,45 @@ +# frozen_string_literal: true + +require "json" +require "minitest/autorun" +require_relative "../lib/decomplex/syntax_oracle" + +class SyntaxOracleTest < Minitest::Test + EXAMPLES_ROOT = File.expand_path("../examples/syntax-facts", __dir__) + ORACLE_ROOT = File.join(EXAMPLES_ROOT, "oracles") + ENGINES = %w[ruby].freeze + + FIXTURES = Dir[File.join(EXAMPLES_ROOT, "*", "*")] + .select { |path| File.file?(path) && Decomplex::Syntax.supported_source?(path) } + .sort + .freeze + + def test_syntax_fact_fixtures_exist + refute_empty FIXTURES + end + + FIXTURES.product(ENGINES).each_with_index do |(fixture_path, engine), index| + language = File.basename(File.dirname(fixture_path)) + name = File.basename(fixture_path, File.extname(fixture_path)) + method_name = "test_#{index}_#{engine}_#{language}_#{name}_syntax_facts_match_oracle" + + define_method(method_name) do + assert_syntax_facts_match_oracle(fixture_path, engine) + end + end + + private + + def assert_syntax_facts_match_oracle(fixture_path, engine) + language = File.basename(File.dirname(fixture_path)) + name = File.basename(fixture_path, File.extname(fixture_path)) + oracle_path = File.join(ORACLE_ROOT, "#{language}-#{name}.json") + + assert File.file?(oracle_path), "missing syntax oracle #{oracle_path}" + + expected = JSON.parse(File.read(oracle_path)) + actual = Decomplex::SyntaxOracle.project([fixture_path], engine: engine, language: language) + + assert_equal expected, actual, "#{engine} #{fixture_path}" + end +end From 9ee6fed02da2cde8d3a3d1903769fbdb84a761bd Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Sat, 20 Jun 2026 13:57:50 +0000 Subject: [PATCH 46/52] Expand syntax facts oracle with clone candidates --- .../syntax-facts/oracles/python-core.json | 1417 +++++++++++++++++ .../syntax-facts/oracles/ruby-core.json | 489 ++++++ .../examples/syntax-facts/python/core.py | 65 + gems/decomplex/lib/decomplex/syntax_oracle.rb | 6 +- .../rust/src/decomplex/syntax_oracle.rs | 13 + 5 files changed, 1989 insertions(+), 1 deletion(-) create mode 100644 gems/decomplex/examples/syntax-facts/oracles/python-core.json create mode 100644 gems/decomplex/examples/syntax-facts/python/core.py diff --git a/gems/decomplex/examples/syntax-facts/oracles/python-core.json b/gems/decomplex/examples/syntax-facts/oracles/python-core.json new file mode 100644 index 000000000..fa22f9927 --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/oracles/python-core.json @@ -0,0 +1,1417 @@ +{ + "format": "decomplex.syntax-facts.v1", + "documents": [ + { + "file": "gems/decomplex/examples/syntax-facts/python/core.py", + "language": "python", + "functions": [ + { + "name": "__init__", + "owner": "PythonSyntaxFactsCore", + "line": 7, + "span": [ + 7, + 4, + 10, + 22 + ], + "visibility": "public", + "params": [ + "self", + "lock", + "resource" + ] + }, + { + "name": "_normalize", + "owner": "PythonSyntaxFactsCore", + "line": 54, + "span": [ + 54, + 4, + 56, + 22 + ], + "visibility": "private", + "params": [ + "self", + "value" + ] + }, + { + "name": "generator", + "owner": "PythonSyntaxFactsCore", + "line": 58, + "span": [ + 58, + 4, + 60, + 23 + ], + "visibility": "public", + "params": [ + "self", + "values" + ] + }, + { + "name": "process", + "owner": "PythonSyntaxFactsCore", + "line": 12, + "span": [ + 12, + 4, + 52, + 41 + ], + "visibility": "public", + "params": [ + "self", + "user", + "items", + "callback" + ] + }, + { + "name": "simple_with", + "owner": "PythonSyntaxFactsCore", + "line": 62, + "span": [ + 62, + 4, + 64, + 16 + ], + "visibility": "public", + "params": [ + "self", + "resource" + ] + } + ], + "owners": [ + { + "name": "PythonSyntaxFactsCore", + "kind": "class", + "line": 6, + "span": [ + 6, + 0, + 64, + 16 + ] + } + ], + "calls": [ + { + "receiver": "handle", + "message": "read", + "function": "process", + "owner": "PythonSyntaxFactsCore", + "line": 22, + "span": [ + 22, + 19, + 22, + 32 + ], + "conditional": false, + "arguments": [], + "control": "always", + "safe_navigation": false, + "block": false + }, + { + "receiver": "handle", + "message": "read", + "function": "process", + "owner": "PythonSyntaxFactsCore", + "line": 22, + "span": [ + 22, + 26, + 22, + 30 + ], + "conditional": false, + "arguments": [], + "control": "always", + "safe_navigation": false, + "block": false + }, + { + "receiver": "item", + "message": "startswith", + "function": "process", + "owner": "PythonSyntaxFactsCore", + "line": 28, + "span": [ + 28, + 30, + 28, + 50 + ], + "conditional": true, + "arguments": [ + "\"x\"" + ], + "control": "conditional", + "safe_navigation": false, + "block": false + }, + { + "receiver": "item", + "message": "startswith", + "function": "process", + "owner": "PythonSyntaxFactsCore", + "line": 28, + "span": [ + 28, + 35, + 28, + 45 + ], + "conditional": true, + "arguments": [ + "\"x\"" + ], + "control": "conditional", + "safe_navigation": false, + "block": false + }, + { + "receiver": "result", + "message": "append", + "function": "process", + "owner": "PythonSyntaxFactsCore", + "line": 37, + "span": [ + 37, + 12, + 37, + 31 + ], + "conditional": true, + "arguments": [ + "item" + ], + "control": "iterates", + "safe_navigation": false, + "block": false + }, + { + "receiver": "result", + "message": "append", + "function": "process", + "owner": "PythonSyntaxFactsCore", + "line": 37, + "span": [ + 37, + 19, + 37, + 25 + ], + "conditional": true, + "arguments": [ + "item" + ], + "control": "iterates", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "audit", + "function": "process", + "owner": "PythonSyntaxFactsCore", + "line": 45, + "span": [ + 45, + 16, + 45, + 41 + ], + "conditional": true, + "arguments": [ + "result[index]" + ], + "control": "iterates", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "audit", + "function": "process", + "owner": "PythonSyntaxFactsCore", + "line": 45, + "span": [ + 45, + 21, + 45, + 26 + ], + "conditional": true, + "arguments": [ + "result[index]" + ], + "control": "iterates", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "callback", + "function": "process", + "owner": "PythonSyntaxFactsCore", + "line": 29, + "span": [ + 29, + 16, + 29, + 30 + ], + "conditional": true, + "arguments": [ + "item" + ], + "control": "conditional", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "default", + "function": "process", + "owner": "PythonSyntaxFactsCore", + "line": 35, + "span": [ + 35, + 20, + 35, + 38 + ], + "conditional": true, + "arguments": [ + "user" + ], + "control": "conditional", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "default", + "function": "process", + "owner": "PythonSyntaxFactsCore", + "line": 35, + "span": [ + 35, + 25, + 35, + 32 + ], + "conditional": true, + "arguments": [ + "user" + ], + "control": "conditional", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "escalate", + "function": "process", + "owner": "PythonSyntaxFactsCore", + "line": 33, + "span": [ + 33, + 20, + 33, + 39 + ], + "conditional": true, + "arguments": [ + "user" + ], + "control": "conditional", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "escalate", + "function": "process", + "owner": "PythonSyntaxFactsCore", + "line": 33, + "span": [ + 33, + 25, + 33, + 33 + ], + "conditional": true, + "arguments": [ + "user" + ], + "control": "conditional", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "len", + "function": "process", + "owner": "PythonSyntaxFactsCore", + "line": 40, + "span": [ + 40, + 22, + 40, + 33 + ], + "conditional": true, + "arguments": [ + "result" + ], + "control": "iterates", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "open", + "function": "process", + "owner": "PythonSyntaxFactsCore", + "line": 21, + "span": [ + 21, + 13, + 21, + 28 + ], + "conditional": false, + "arguments": [ + "user.path" + ], + "control": "always", + "safe_navigation": false, + "block": false + }, + { + "receiver": "value", + "message": "strip", + "function": "_normalize", + "owner": "PythonSyntaxFactsCore", + "line": 55, + "span": [ + 55, + 18, + 55, + 31 + ], + "conditional": false, + "arguments": [], + "control": "always", + "safe_navigation": false, + "block": false + }, + { + "receiver": "value", + "message": "strip", + "function": "_normalize", + "owner": "PythonSyntaxFactsCore", + "line": 55, + "span": [ + 55, + 24, + 55, + 29 + ], + "conditional": false, + "arguments": [], + "control": "always", + "safe_navigation": false, + "block": false + } + ], + "state_reads": [ + { + "field": "append", + "receiver": "result", + "function": "process", + "owner": "PythonSyntaxFactsCore", + "line": 37, + "span": [ + 37, + 12, + 37, + 25 + ] + }, + { + "field": "audit", + "receiver": "self", + "function": "process", + "owner": "PythonSyntaxFactsCore", + "line": 45, + "span": [ + 45, + 16, + 45, + 26 + ] + }, + { + "field": "default", + "receiver": "self", + "function": "process", + "owner": "PythonSyntaxFactsCore", + "line": 35, + "span": [ + 35, + 20, + 35, + 32 + ] + }, + { + "field": "escalate", + "receiver": "self", + "function": "process", + "owner": "PythonSyntaxFactsCore", + "line": 33, + "span": [ + 33, + 20, + 33, + 33 + ] + }, + { + "field": "name", + "receiver": "user.profile", + "function": "process", + "owner": "PythonSyntaxFactsCore", + "line": 13, + "span": [ + 13, + 20, + 13, + 37 + ] + }, + { + "field": "path", + "receiver": "user", + "function": "process", + "owner": "PythonSyntaxFactsCore", + "line": 21, + "span": [ + 21, + 18, + 21, + 27 + ] + }, + { + "field": "profile", + "receiver": "user", + "function": "process", + "owner": "PythonSyntaxFactsCore", + "line": 13, + "span": [ + 13, + 20, + 13, + 32 + ] + }, + { + "field": "read", + "receiver": "handle", + "function": "process", + "owner": "PythonSyntaxFactsCore", + "line": 22, + "span": [ + 22, + 19, + 22, + 30 + ] + }, + { + "field": "ready", + "receiver": "user", + "function": "process", + "owner": "PythonSyntaxFactsCore", + "line": 28, + "span": [ + 28, + 15, + 28, + 25 + ] + }, + { + "field": "startswith", + "receiver": "item", + "function": "process", + "owner": "PythonSyntaxFactsCore", + "line": 28, + "span": [ + 28, + 30, + 28, + 45 + ] + }, + { + "field": "strip", + "receiver": "value", + "function": "_normalize", + "owner": "PythonSyntaxFactsCore", + "line": 55, + "span": [ + 55, + 18, + 55, + 29 + ] + } + ], + "state_writes": [ + { + "field": "_lock", + "receiver": "self", + "function": "__init__", + "owner": "PythonSyntaxFactsCore", + "line": 8, + "span": [ + 8, + 8, + 8, + 25 + ] + }, + { + "field": "count", + "receiver": "self", + "function": "__init__", + "owner": "PythonSyntaxFactsCore", + "line": 10, + "span": [ + 10, + 8, + 10, + 22 + ] + }, + { + "field": "count", + "receiver": "self", + "function": "process", + "owner": "PythonSyntaxFactsCore", + "line": 19, + "span": [ + 19, + 12, + 19, + 22 + ] + }, + { + "field": "resource", + "receiver": "self", + "function": "__init__", + "owner": "PythonSyntaxFactsCore", + "line": 9, + "span": [ + 9, + 8, + 9, + 32 + ] + } + ], + "decisions": [ + { + "kind": "conjunction", + "members": [ + "item.startswith(\"x\")", + "user.ready" + ], + "function": "process", + "line": 28, + "span": [ + 28, + 15, + 28, + 50 + ], + "predicate": "user.ready and item.startswith(\"x\")", + "enclosing_span": [ + 28, + 12, + 29, + 30 + ] + } + ], + "branch_decisions": [ + { + "function": "process", + "line": 28, + "span": [ + 28, + 12, + 29, + 30 + ], + "predicate": "user.ready and item.startswith(\"x\")", + "state_refs": [ + "item.startswith", + "user.ready" + ] + } + ], + "dispatch_sites": [], + "semantic_effects": [ + { + "kind": "hidden_io", + "detail": "open", + "function": "process", + "line": 21, + "span": [ + 21, + 13, + 21, + 28 + ] + } + ], + "predicate_bodies": [], + "local_complexity": [ + { + "id": "PythonSyntaxFactsCore#__init__", + "score": 0.0, + "signals": {} + }, + { + "id": "PythonSyntaxFactsCore#_normalize", + "score": 0.0, + "signals": { + "early_exits": 2 + } + }, + { + "id": "PythonSyntaxFactsCore#generator", + "score": 3.2, + "signals": { + "loops": 2, + "nested": 1 + } + }, + { + "id": "PythonSyntaxFactsCore#process", + "score": 15.0, + "signals": { + "boolean_ops": 2, + "branches": 3, + "cases": 3, + "early_exits": 2, + "loops": 4, + "nested": 5 + } + }, + { + "id": "PythonSyntaxFactsCore#simple_with", + "score": 0.0, + "signals": {} + } + ], + "clone_candidates": [ + { + "method_name": "(top-level)", + "node_name": "class_definition", + "line": 6, + "span": [ + 6, + 0, + 64, + 16 + ], + "mass": 377, + "fingerprint": "class_definition(id id ::: block(function_definition(id id parameters((:( id ,:, id ,:, id ):)) ::: block(expression_statement(attribute(id .:. id) =:= id) expression_statement(attribute(id .:. id) =:= id) expression_statement(attribute(id .:. id) =:= lit))) function_definition(id id parameters((:( id ,:, typed_parameter(id ::: type(string_start:\" lit string_end:\")) ,:, typed_parameter(id ::: type(id type_parameter([:[ id ]:]))) ,:, id ):)) ::: block(expression_statement(id ::: id =:= attribute(attribute(id .:. id) .:. id)) expression_statement(id ::: id) expression_statement(id =:= list([:[ ]:])) expression_statement(id =:= string(string_start:\" string_content(escape_sequence:\\\\) string_end:\")) with_statement(id with_clause(id .:. id) ::: block(attribute(id .:. id) +=:+= lit)) with_statement(id with_clause(call(id argument_list((:( attribute(id .:. id) ):))) id id) ::: block(id =:= call(attribute(id .:. id) argument_list((:( ):))))) for_statement(id id id id ::: block(if_statement(id comparison_operator(id id nil) ::: id) if_statement(id boolean_operator(attribute(id .:. id) id call(attribute(id .:. id) argument_list((:( string(string_start:\" lit string_end:\") ):)))) ::: block(id argument_list((:( id ):)))) match_statement(id id ::: block(case_clause(id case_pattern(string(string_start:\" lit string_end:\") |:| string(string_start:\" lit string_end:\")) ::: block(attribute(id .:. id) argument_list((:( id ):)))) case_clause(id id ::: block(attribute(id .:. id) argument_list((:( id ):)))))) expression_statement(attribute(id .:. id) argument_list((:( id ):))))) expression_statement(id =:= lit) while_statement(id comparison_operator(id <:< call(id argument_list((:( id ):)))) ::: block(if_statement(id comparison_operator(subscript(id [:[ id ]:]) ==:== string(string_start:\" lit string_end:\")) ::: id) try_statement(id ::: block(attribute(id .:. id) argument_list((:( subscript(id [:[ id ]:]) ):))) except_clause(id id ::: id)) expression_statement(id +=:+= lit))) assert_statement(id id) return_statement(id conditional_expression(id id id id id)))) function_definition(id id parameters((:( id ,:, typed_default_parameter(id ::: type(id |:| nil) =:= nil) ):)) ::: block(expression_statement(id =:= conditional_expression(call(attribute(id .:. id) argument_list((:( ):))) id comparison_operator(id is not(id id) nil) id string(string_start:\" lit string_end:\"))) return_statement(id id))) function_definition(id id parameters((:( id ,:, id ):)) ::: block(id id id id ::: block(id id))) function_definition(id id parameters((:( id ,:, id ):)) ::: block(id id ::: id))))", + "child_fingerprints": [ + "function_definition(id id parameters((:( id ,:, id ,:, id ):)) ::: block(expression_statement(attribute(id .:. id) =:= id) expression_statement(attribute(id .:. id) =:= id) expression_statement(attribute(id .:. id) =:= lit)))", + "function_definition(id id parameters((:( id ,:, typed_parameter(id ::: type(string_start:\" lit string_end:\")) ,:, typed_parameter(id ::: type(id type_parameter([:[ id ]:]))) ,:, id ):)) ::: block(expression_statement(id ::: id =:= attribute(attribute(id .:. id) .:. id)) expression_statement(id ::: id) expression_statement(id =:= list([:[ ]:])) expression_statement(id =:= string(string_start:\" string_content(escape_sequence:\\\\) string_end:\")) with_statement(id with_clause(id .:. id) ::: block(attribute(id .:. id) +=:+= lit)) with_statement(id with_clause(call(id argument_list((:( attribute(id .:. id) ):))) id id) ::: block(id =:= call(attribute(id .:. id) argument_list((:( ):))))) for_statement(id id id id ::: block(if_statement(id comparison_operator(id id nil) ::: id) if_statement(id boolean_operator(attribute(id .:. id) id call(attribute(id .:. id) argument_list((:( string(string_start:\" lit string_end:\") ):)))) ::: block(id argument_list((:( id ):)))) match_statement(id id ::: block(case_clause(id case_pattern(string(string_start:\" lit string_end:\") |:| string(string_start:\" lit string_end:\")) ::: block(attribute(id .:. id) argument_list((:( id ):)))) case_clause(id id ::: block(attribute(id .:. id) argument_list((:( id ):)))))) expression_statement(attribute(id .:. id) argument_list((:( id ):))))) expression_statement(id =:= lit) while_statement(id comparison_operator(id <:< call(id argument_list((:( id ):)))) ::: block(if_statement(id comparison_operator(subscript(id [:[ id ]:]) ==:== string(string_start:\" lit string_end:\")) ::: id) try_statement(id ::: block(attribute(id .:. id) argument_list((:( subscript(id [:[ id ]:]) ):))) except_clause(id id ::: id)) expression_statement(id +=:+= lit))) assert_statement(id id) return_statement(id conditional_expression(id id id id id))))", + "function_definition(id id parameters((:( id ,:, typed_default_parameter(id ::: type(id |:| nil) =:= nil) ):)) ::: block(expression_statement(id =:= conditional_expression(call(attribute(id .:. id) argument_list((:( ):))) id comparison_operator(id is not(id id) nil) id string(string_start:\" lit string_end:\"))) return_statement(id id)))", + "function_definition(id id parameters((:( id ,:, id ):)) ::: block(id id id id ::: block(id id)))", + "function_definition(id id parameters((:( id ,:, id ):)) ::: block(id id ::: id))" + ], + "child_masses": [ + 34, + 258, + 46, + 19, + 15 + ] + }, + { + "method_name": "(top-level)", + "node_name": "module", + "line": 1, + "span": [ + 1, + 0, + 66, + 0 + ], + "mass": 389, + "fingerprint": "module(future_import_statement(id id id id) import_statement(id dotted_name(id .:. id)) class_definition(id id ::: block(function_definition(id id parameters((:( id ,:, id ,:, id ):)) ::: block(expression_statement(attribute(id .:. id) =:= id) expression_statement(attribute(id .:. id) =:= id) expression_statement(attribute(id .:. id) =:= lit))) function_definition(id id parameters((:( id ,:, typed_parameter(id ::: type(string_start:\" lit string_end:\")) ,:, typed_parameter(id ::: type(id type_parameter([:[ id ]:]))) ,:, id ):)) ::: block(expression_statement(id ::: id =:= attribute(attribute(id .:. id) .:. id)) expression_statement(id ::: id) expression_statement(id =:= list([:[ ]:])) expression_statement(id =:= string(string_start:\" string_content(escape_sequence:\\\\) string_end:\")) with_statement(id with_clause(id .:. id) ::: block(attribute(id .:. id) +=:+= lit)) with_statement(id with_clause(call(id argument_list((:( attribute(id .:. id) ):))) id id) ::: block(id =:= call(attribute(id .:. id) argument_list((:( ):))))) for_statement(id id id id ::: block(if_statement(id comparison_operator(id id nil) ::: id) if_statement(id boolean_operator(attribute(id .:. id) id call(attribute(id .:. id) argument_list((:( string(string_start:\" lit string_end:\") ):)))) ::: block(id argument_list((:( id ):)))) match_statement(id id ::: block(case_clause(id case_pattern(string(string_start:\" lit string_end:\") |:| string(string_start:\" lit string_end:\")) ::: block(attribute(id .:. id) argument_list((:( id ):)))) case_clause(id id ::: block(attribute(id .:. id) argument_list((:( id ):)))))) expression_statement(attribute(id .:. id) argument_list((:( id ):))))) expression_statement(id =:= lit) while_statement(id comparison_operator(id <:< call(id argument_list((:( id ):)))) ::: block(if_statement(id comparison_operator(subscript(id [:[ id ]:]) ==:== string(string_start:\" lit string_end:\")) ::: id) try_statement(id ::: block(attribute(id .:. id) argument_list((:( subscript(id [:[ id ]:]) ):))) except_clause(id id ::: id)) expression_statement(id +=:+= lit))) assert_statement(id id) return_statement(id conditional_expression(id id id id id)))) function_definition(id id parameters((:( id ,:, typed_default_parameter(id ::: type(id |:| nil) =:= nil) ):)) ::: block(expression_statement(id =:= conditional_expression(call(attribute(id .:. id) argument_list((:( ):))) id comparison_operator(id is not(id id) nil) id string(string_start:\" lit string_end:\"))) return_statement(id id))) function_definition(id id parameters((:( id ,:, id ):)) ::: block(id id id id ::: block(id id))) function_definition(id id parameters((:( id ,:, id ):)) ::: block(id id ::: id)))))", + "child_fingerprints": [ + "future_import_statement(id id id id)", + "import_statement(id dotted_name(id .:. id))", + "class_definition(id id ::: block(function_definition(id id parameters((:( id ,:, id ,:, id ):)) ::: block(expression_statement(attribute(id .:. id) =:= id) expression_statement(attribute(id .:. id) =:= id) expression_statement(attribute(id .:. id) =:= lit))) function_definition(id id parameters((:( id ,:, typed_parameter(id ::: type(string_start:\" lit string_end:\")) ,:, typed_parameter(id ::: type(id type_parameter([:[ id ]:]))) ,:, id ):)) ::: block(expression_statement(id ::: id =:= attribute(attribute(id .:. id) .:. id)) expression_statement(id ::: id) expression_statement(id =:= list([:[ ]:])) expression_statement(id =:= string(string_start:\" string_content(escape_sequence:\\\\) string_end:\")) with_statement(id with_clause(id .:. id) ::: block(attribute(id .:. id) +=:+= lit)) with_statement(id with_clause(call(id argument_list((:( attribute(id .:. id) ):))) id id) ::: block(id =:= call(attribute(id .:. id) argument_list((:( ):))))) for_statement(id id id id ::: block(if_statement(id comparison_operator(id id nil) ::: id) if_statement(id boolean_operator(attribute(id .:. id) id call(attribute(id .:. id) argument_list((:( string(string_start:\" lit string_end:\") ):)))) ::: block(id argument_list((:( id ):)))) match_statement(id id ::: block(case_clause(id case_pattern(string(string_start:\" lit string_end:\") |:| string(string_start:\" lit string_end:\")) ::: block(attribute(id .:. id) argument_list((:( id ):)))) case_clause(id id ::: block(attribute(id .:. id) argument_list((:( id ):)))))) expression_statement(attribute(id .:. id) argument_list((:( id ):))))) expression_statement(id =:= lit) while_statement(id comparison_operator(id <:< call(id argument_list((:( id ):)))) ::: block(if_statement(id comparison_operator(subscript(id [:[ id ]:]) ==:== string(string_start:\" lit string_end:\")) ::: id) try_statement(id ::: block(attribute(id .:. id) argument_list((:( subscript(id [:[ id ]:]) ):))) except_clause(id id ::: id)) expression_statement(id +=:+= lit))) assert_statement(id id) return_statement(id conditional_expression(id id id id id)))) function_definition(id id parameters((:( id ,:, typed_default_parameter(id ::: type(id |:| nil) =:= nil) ):)) ::: block(expression_statement(id =:= conditional_expression(call(attribute(id .:. id) argument_list((:( ):))) id comparison_operator(id is not(id id) nil) id string(string_start:\" lit string_end:\"))) return_statement(id id))) function_definition(id id parameters((:( id ,:, id ):)) ::: block(id id id id ::: block(id id))) function_definition(id id parameters((:( id ,:, id ):)) ::: block(id id ::: id))))" + ], + "child_masses": [ + 5, + 6, + 377 + ] + }, + { + "method_name": "__init__", + "node_name": "block", + "line": 7, + "span": [ + 7, + 4, + 64, + 16 + ], + "mass": 373, + "fingerprint": "block(function_definition(id id parameters((:( id ,:, id ,:, id ):)) ::: block(expression_statement(attribute(id .:. id) =:= id) expression_statement(attribute(id .:. id) =:= id) expression_statement(attribute(id .:. id) =:= lit))) function_definition(id id parameters((:( id ,:, typed_parameter(id ::: type(string_start:\" lit string_end:\")) ,:, typed_parameter(id ::: type(id type_parameter([:[ id ]:]))) ,:, id ):)) ::: block(expression_statement(id ::: id =:= attribute(attribute(id .:. id) .:. id)) expression_statement(id ::: id) expression_statement(id =:= list([:[ ]:])) expression_statement(id =:= string(string_start:\" string_content(escape_sequence:\\\\) string_end:\")) with_statement(id with_clause(id .:. id) ::: block(attribute(id .:. id) +=:+= lit)) with_statement(id with_clause(call(id argument_list((:( attribute(id .:. id) ):))) id id) ::: block(id =:= call(attribute(id .:. id) argument_list((:( ):))))) for_statement(id id id id ::: block(if_statement(id comparison_operator(id id nil) ::: id) if_statement(id boolean_operator(attribute(id .:. id) id call(attribute(id .:. id) argument_list((:( string(string_start:\" lit string_end:\") ):)))) ::: block(id argument_list((:( id ):)))) match_statement(id id ::: block(case_clause(id case_pattern(string(string_start:\" lit string_end:\") |:| string(string_start:\" lit string_end:\")) ::: block(attribute(id .:. id) argument_list((:( id ):)))) case_clause(id id ::: block(attribute(id .:. id) argument_list((:( id ):)))))) expression_statement(attribute(id .:. id) argument_list((:( id ):))))) expression_statement(id =:= lit) while_statement(id comparison_operator(id <:< call(id argument_list((:( id ):)))) ::: block(if_statement(id comparison_operator(subscript(id [:[ id ]:]) ==:== string(string_start:\" lit string_end:\")) ::: id) try_statement(id ::: block(attribute(id .:. id) argument_list((:( subscript(id [:[ id ]:]) ):))) except_clause(id id ::: id)) expression_statement(id +=:+= lit))) assert_statement(id id) return_statement(id conditional_expression(id id id id id)))) function_definition(id id parameters((:( id ,:, typed_default_parameter(id ::: type(id |:| nil) =:= nil) ):)) ::: block(expression_statement(id =:= conditional_expression(call(attribute(id .:. id) argument_list((:( ):))) id comparison_operator(id is not(id id) nil) id string(string_start:\" lit string_end:\"))) return_statement(id id))) function_definition(id id parameters((:( id ,:, id ):)) ::: block(id id id id ::: block(id id))) function_definition(id id parameters((:( id ,:, id ):)) ::: block(id id ::: id)))", + "child_fingerprints": [ + "function_definition(id id parameters((:( id ,:, id ,:, id ):)) ::: block(expression_statement(attribute(id .:. id) =:= id) expression_statement(attribute(id .:. id) =:= id) expression_statement(attribute(id .:. id) =:= lit)))", + "function_definition(id id parameters((:( id ,:, typed_parameter(id ::: type(string_start:\" lit string_end:\")) ,:, typed_parameter(id ::: type(id type_parameter([:[ id ]:]))) ,:, id ):)) ::: block(expression_statement(id ::: id =:= attribute(attribute(id .:. id) .:. id)) expression_statement(id ::: id) expression_statement(id =:= list([:[ ]:])) expression_statement(id =:= string(string_start:\" string_content(escape_sequence:\\\\) string_end:\")) with_statement(id with_clause(id .:. id) ::: block(attribute(id .:. id) +=:+= lit)) with_statement(id with_clause(call(id argument_list((:( attribute(id .:. id) ):))) id id) ::: block(id =:= call(attribute(id .:. id) argument_list((:( ):))))) for_statement(id id id id ::: block(if_statement(id comparison_operator(id id nil) ::: id) if_statement(id boolean_operator(attribute(id .:. id) id call(attribute(id .:. id) argument_list((:( string(string_start:\" lit string_end:\") ):)))) ::: block(id argument_list((:( id ):)))) match_statement(id id ::: block(case_clause(id case_pattern(string(string_start:\" lit string_end:\") |:| string(string_start:\" lit string_end:\")) ::: block(attribute(id .:. id) argument_list((:( id ):)))) case_clause(id id ::: block(attribute(id .:. id) argument_list((:( id ):)))))) expression_statement(attribute(id .:. id) argument_list((:( id ):))))) expression_statement(id =:= lit) while_statement(id comparison_operator(id <:< call(id argument_list((:( id ):)))) ::: block(if_statement(id comparison_operator(subscript(id [:[ id ]:]) ==:== string(string_start:\" lit string_end:\")) ::: id) try_statement(id ::: block(attribute(id .:. id) argument_list((:( subscript(id [:[ id ]:]) ):))) except_clause(id id ::: id)) expression_statement(id +=:+= lit))) assert_statement(id id) return_statement(id conditional_expression(id id id id id))))", + "function_definition(id id parameters((:( id ,:, typed_default_parameter(id ::: type(id |:| nil) =:= nil) ):)) ::: block(expression_statement(id =:= conditional_expression(call(attribute(id .:. id) argument_list((:( ):))) id comparison_operator(id is not(id id) nil) id string(string_start:\" lit string_end:\"))) return_statement(id id)))", + "function_definition(id id parameters((:( id ,:, id ):)) ::: block(id id id id ::: block(id id)))", + "function_definition(id id parameters((:( id ,:, id ):)) ::: block(id id ::: id))" + ], + "child_masses": [ + 34, + 258, + 46, + 19, + 15 + ] + }, + { + "method_name": "__init__", + "node_name": "block", + "line": 8, + "span": [ + 8, + 8, + 10, + 22 + ], + "mass": 22, + "fingerprint": "block(expression_statement(attribute(id .:. id) =:= id) expression_statement(attribute(id .:. id) =:= id) expression_statement(attribute(id .:. id) =:= lit))", + "child_fingerprints": [ + "expression_statement(attribute(id .:. id) =:= id)", + "expression_statement(attribute(id .:. id) =:= id)", + "expression_statement(attribute(id .:. id) =:= lit)" + ], + "child_masses": [ + 7, + 7, + 7 + ] + }, + { + "method_name": "__init__", + "node_name": "defn", + "line": 7, + "span": [ + 7, + 4, + 10, + 22 + ], + "mass": 34, + "fingerprint": "function_definition(id id parameters((:( id ,:, id ,:, id ):)) ::: block(expression_statement(attribute(id .:. id) =:= id) expression_statement(attribute(id .:. id) =:= id) expression_statement(attribute(id .:. id) =:= lit)))", + "child_fingerprints": [ + "expression_statement(attribute(id .:. id) =:= id)", + "expression_statement(attribute(id .:. id) =:= id)", + "expression_statement(attribute(id .:. id) =:= lit)" + ], + "child_masses": [ + 7, + 7, + 7 + ] + }, + { + "method_name": "_normalize", + "node_name": "block", + "line": 55, + "span": [ + 55, + 8, + 56, + 22 + ], + "mass": 28, + "fingerprint": "block(expression_statement(id =:= conditional_expression(call(attribute(id .:. id) argument_list((:( ):))) id comparison_operator(id is not(id id) nil) id string(string_start:\" lit string_end:\"))) return_statement(id id))", + "child_fingerprints": [ + "expression_statement(id =:= conditional_expression(call(attribute(id .:. id) argument_list((:( ):))) id comparison_operator(id is not(id id) nil) id string(string_start:\" lit string_end:\")))", + "return_statement(id id)" + ], + "child_masses": [ + 24, + 3 + ] + }, + { + "method_name": "_normalize", + "node_name": "defn", + "line": 54, + "span": [ + 54, + 4, + 56, + 22 + ], + "mass": 46, + "fingerprint": "function_definition(id id parameters((:( id ,:, typed_default_parameter(id ::: type(id |:| nil) =:= nil) ):)) ::: block(expression_statement(id =:= conditional_expression(call(attribute(id .:. id) argument_list((:( ):))) id comparison_operator(id is not(id id) nil) id string(string_start:\" lit string_end:\"))) return_statement(id id)))", + "child_fingerprints": [ + "expression_statement(id =:= conditional_expression(call(attribute(id .:. id) argument_list((:( ):))) id comparison_operator(id is not(id id) nil) id string(string_start:\" lit string_end:\")))", + "return_statement(id id)" + ], + "child_masses": [ + 24, + 3 + ] + }, + { + "method_name": "generator", + "node_name": "block", + "line": 59, + "span": [ + 59, + 8, + 60, + 23 + ], + "mass": 9, + "fingerprint": "block(id id id id ::: block(id id))", + "child_fingerprints": [], + "child_masses": [] + }, + { + "method_name": "generator", + "node_name": "block", + "line": 60, + "span": [ + 60, + 12, + 60, + 23 + ], + "mass": 3, + "fingerprint": "block(id id)", + "child_fingerprints": [], + "child_masses": [] + }, + { + "method_name": "generator", + "node_name": "defn", + "line": 58, + "span": [ + 58, + 4, + 60, + 23 + ], + "mass": 19, + "fingerprint": "function_definition(id id parameters((:( id ,:, id ):)) ::: block(id id id id ::: block(id id)))", + "child_fingerprints": [ + "block(id id)" + ], + "child_masses": [ + 3 + ] + }, + { + "method_name": "process", + "node_name": "block", + "line": 13, + "span": [ + 13, + 8, + 52, + 41 + ], + "mass": 230, + "fingerprint": "block(expression_statement(id ::: id =:= attribute(attribute(id .:. id) .:. id)) expression_statement(id ::: id) expression_statement(id =:= list([:[ ]:])) expression_statement(id =:= string(string_start:\" string_content(escape_sequence:\\\\) string_end:\")) with_statement(id with_clause(id .:. id) ::: block(attribute(id .:. id) +=:+= lit)) with_statement(id with_clause(call(id argument_list((:( attribute(id .:. id) ):))) id id) ::: block(id =:= call(attribute(id .:. id) argument_list((:( ):))))) for_statement(id id id id ::: block(if_statement(id comparison_operator(id id nil) ::: id) if_statement(id boolean_operator(attribute(id .:. id) id call(attribute(id .:. id) argument_list((:( string(string_start:\" lit string_end:\") ):)))) ::: block(id argument_list((:( id ):)))) match_statement(id id ::: block(case_clause(id case_pattern(string(string_start:\" lit string_end:\") |:| string(string_start:\" lit string_end:\")) ::: block(attribute(id .:. id) argument_list((:( id ):)))) case_clause(id id ::: block(attribute(id .:. id) argument_list((:( id ):)))))) expression_statement(attribute(id .:. id) argument_list((:( id ):))))) expression_statement(id =:= lit) while_statement(id comparison_operator(id <:< call(id argument_list((:( id ):)))) ::: block(if_statement(id comparison_operator(subscript(id [:[ id ]:]) ==:== string(string_start:\" lit string_end:\")) ::: id) try_statement(id ::: block(attribute(id .:. id) argument_list((:( subscript(id [:[ id ]:]) ):))) except_clause(id id ::: id)) expression_statement(id +=:+= lit))) assert_statement(id id) return_statement(id conditional_expression(id id id id id)))", + "child_fingerprints": [ + "expression_statement(id ::: id =:= attribute(attribute(id .:. id) .:. id))", + "expression_statement(id ::: id)", + "expression_statement(id =:= list([:[ ]:]))", + "expression_statement(id =:= string(string_start:\" string_content(escape_sequence:\\\\) string_end:\"))", + "with_statement(id with_clause(id .:. id) ::: block(attribute(id .:. id) +=:+= lit))", + "with_statement(id with_clause(call(id argument_list((:( attribute(id .:. id) ):))) id id) ::: block(id =:= call(attribute(id .:. id) argument_list((:( ):)))))", + "for_statement(id id id id ::: block(if_statement(id comparison_operator(id id nil) ::: id) if_statement(id boolean_operator(attribute(id .:. id) id call(attribute(id .:. id) argument_list((:( string(string_start:\" lit string_end:\") ):)))) ::: block(id argument_list((:( id ):)))) match_statement(id id ::: block(case_clause(id case_pattern(string(string_start:\" lit string_end:\") |:| string(string_start:\" lit string_end:\")) ::: block(attribute(id .:. id) argument_list((:( id ):)))) case_clause(id id ::: block(attribute(id .:. id) argument_list((:( id ):)))))) expression_statement(attribute(id .:. id) argument_list((:( id ):)))))", + "expression_statement(id =:= lit)", + "while_statement(id comparison_operator(id <:< call(id argument_list((:( id ):)))) ::: block(if_statement(id comparison_operator(subscript(id [:[ id ]:]) ==:== string(string_start:\" lit string_end:\")) ::: id) try_statement(id ::: block(attribute(id .:. id) argument_list((:( subscript(id [:[ id ]:]) ):))) except_clause(id id ::: id)) expression_statement(id +=:+= lit)))", + "assert_statement(id id)", + "return_statement(id conditional_expression(id id id id id))" + ], + "child_masses": [ + 12, + 4, + 6, + 8, + 14, + 26, + 91, + 4, + 53, + 3, + 8 + ] + }, + { + "method_name": "process", + "node_name": "block", + "line": 19, + "span": [ + 19, + 12, + 19, + 27 + ], + "mass": 7, + "fingerprint": "block(attribute(id .:. id) +=:+= lit)", + "child_fingerprints": [ + "attribute(id .:. id)", + "lit" + ], + "child_masses": [ + 4, + 1 + ] + }, + { + "method_name": "process", + "node_name": "block", + "line": 22, + "span": [ + 22, + 12, + 22, + 32 + ], + "mass": 11, + "fingerprint": "block(id =:= call(attribute(id .:. id) argument_list((:( ):))))", + "child_fingerprints": [ + "call(attribute(id .:. id) argument_list((:( ):)))" + ], + "child_masses": [ + 8 + ] + }, + { + "method_name": "process", + "node_name": "block", + "line": 25, + "span": [ + 25, + 12, + 37, + 31 + ], + "mass": 85, + "fingerprint": "block(if_statement(id comparison_operator(id id nil) ::: id) if_statement(id boolean_operator(attribute(id .:. id) id call(attribute(id .:. id) argument_list((:( string(string_start:\" lit string_end:\") ):)))) ::: block(id argument_list((:( id ):)))) match_statement(id id ::: block(case_clause(id case_pattern(string(string_start:\" lit string_end:\") |:| string(string_start:\" lit string_end:\")) ::: block(attribute(id .:. id) argument_list((:( id ):)))) case_clause(id id ::: block(attribute(id .:. id) argument_list((:( id ):)))))) expression_statement(attribute(id .:. id) argument_list((:( id ):))))", + "child_fingerprints": [ + "if_statement(id comparison_operator(id id nil) ::: id)", + "if_statement(id boolean_operator(attribute(id .:. id) id call(attribute(id .:. id) argument_list((:( string(string_start:\" lit string_end:\") ):)))) ::: block(id argument_list((:( id ):))))", + "match_statement(id id ::: block(case_clause(id case_pattern(string(string_start:\" lit string_end:\") |:| string(string_start:\" lit string_end:\")) ::: block(attribute(id .:. id) argument_list((:( id ):)))) case_clause(id id ::: block(attribute(id .:. id) argument_list((:( id ):))))))", + "expression_statement(attribute(id .:. id) argument_list((:( id ):)))" + ], + "child_masses": [ + 8, + 27, + 40, + 9 + ] + }, + { + "method_name": "process", + "node_name": "block", + "line": 29, + "span": [ + 29, + 16, + 29, + 30 + ], + "mass": 6, + "fingerprint": "block(id argument_list((:( id ):)))", + "child_fingerprints": [], + "child_masses": [] + }, + { + "method_name": "process", + "node_name": "block", + "line": 31, + "span": [ + 31, + 23, + 35, + 38 + ], + "mass": 36, + "fingerprint": "block(case_clause(id case_pattern(string(string_start:\" lit string_end:\") |:| string(string_start:\" lit string_end:\")) ::: block(attribute(id .:. id) argument_list((:( id ):)))) case_clause(id id ::: block(attribute(id .:. id) argument_list((:( id ):)))))", + "child_fingerprints": [ + "case_clause(id case_pattern(string(string_start:\" lit string_end:\") |:| string(string_start:\" lit string_end:\")) ::: block(attribute(id .:. id) argument_list((:( id ):))))", + "case_clause(id id ::: block(attribute(id .:. id) argument_list((:( id ):))))" + ], + "child_masses": [ + 22, + 13 + ] + }, + { + "method_name": "process", + "node_name": "block", + "line": 33, + "span": [ + 33, + 20, + 33, + 39 + ], + "mass": 9, + "fingerprint": "block(attribute(id .:. id) argument_list((:( id ):)))", + "child_fingerprints": [ + "attribute(id .:. id)" + ], + "child_masses": [ + 4 + ] + }, + { + "method_name": "process", + "node_name": "block", + "line": 35, + "span": [ + 35, + 20, + 35, + 38 + ], + "mass": 9, + "fingerprint": "block(attribute(id .:. id) argument_list((:( id ):)))", + "child_fingerprints": [ + "attribute(id .:. id)" + ], + "child_masses": [ + 4 + ] + }, + { + "method_name": "process", + "node_name": "block", + "line": 41, + "span": [ + 41, + 12, + 49, + 22 + ], + "mass": 41, + "fingerprint": "block(if_statement(id comparison_operator(subscript(id [:[ id ]:]) ==:== string(string_start:\" lit string_end:\")) ::: id) try_statement(id ::: block(attribute(id .:. id) argument_list((:( subscript(id [:[ id ]:]) ):))) except_clause(id id ::: id)) expression_statement(id +=:+= lit))", + "child_fingerprints": [ + "if_statement(id comparison_operator(subscript(id [:[ id ]:]) ==:== string(string_start:\" lit string_end:\")) ::: id)", + "try_statement(id ::: block(attribute(id .:. id) argument_list((:( subscript(id [:[ id ]:]) ):))) except_clause(id id ::: id))", + "expression_statement(id +=:+= lit)" + ], + "child_masses": [ + 15, + 21, + 4 + ] + }, + { + "method_name": "process", + "node_name": "block", + "line": 45, + "span": [ + 45, + 16, + 45, + 41 + ], + "mass": 13, + "fingerprint": "block(attribute(id .:. id) argument_list((:( subscript(id [:[ id ]:]) ):)))", + "child_fingerprints": [ + "attribute(id .:. id)" + ], + "child_masses": [ + 4 + ] + }, + { + "method_name": "process", + "node_name": "case_clause", + "line": 32, + "span": [ + 32, + 16, + 33, + 39 + ], + "mass": 22, + "fingerprint": "case_clause(id case_pattern(string(string_start:\" lit string_end:\") |:| string(string_start:\" lit string_end:\")) ::: block(attribute(id .:. id) argument_list((:( id ):))))", + "child_fingerprints": [ + "attribute(id .:. id)" + ], + "child_masses": [ + 4 + ] + }, + { + "method_name": "process", + "node_name": "case_clause", + "line": 34, + "span": [ + 34, + 16, + 35, + 38 + ], + "mass": 13, + "fingerprint": "case_clause(id id ::: block(attribute(id .:. id) argument_list((:( id ):))))", + "child_fingerprints": [ + "attribute(id .:. id)" + ], + "child_masses": [ + 4 + ] + }, + { + "method_name": "process", + "node_name": "defn", + "line": 12, + "span": [ + 12, + 4, + 52, + 41 + ], + "mass": 258, + "fingerprint": "function_definition(id id parameters((:( id ,:, typed_parameter(id ::: type(string_start:\" lit string_end:\")) ,:, typed_parameter(id ::: type(id type_parameter([:[ id ]:]))) ,:, id ):)) ::: block(expression_statement(id ::: id =:= attribute(attribute(id .:. id) .:. id)) expression_statement(id ::: id) expression_statement(id =:= list([:[ ]:])) expression_statement(id =:= string(string_start:\" string_content(escape_sequence:\\\\) string_end:\")) with_statement(id with_clause(id .:. id) ::: block(attribute(id .:. id) +=:+= lit)) with_statement(id with_clause(call(id argument_list((:( attribute(id .:. id) ):))) id id) ::: block(id =:= call(attribute(id .:. id) argument_list((:( ):))))) for_statement(id id id id ::: block(if_statement(id comparison_operator(id id nil) ::: id) if_statement(id boolean_operator(attribute(id .:. id) id call(attribute(id .:. id) argument_list((:( string(string_start:\" lit string_end:\") ):)))) ::: block(id argument_list((:( id ):)))) match_statement(id id ::: block(case_clause(id case_pattern(string(string_start:\" lit string_end:\") |:| string(string_start:\" lit string_end:\")) ::: block(attribute(id .:. id) argument_list((:( id ):)))) case_clause(id id ::: block(attribute(id .:. id) argument_list((:( id ):)))))) expression_statement(attribute(id .:. id) argument_list((:( id ):))))) expression_statement(id =:= lit) while_statement(id comparison_operator(id <:< call(id argument_list((:( id ):)))) ::: block(if_statement(id comparison_operator(subscript(id [:[ id ]:]) ==:== string(string_start:\" lit string_end:\")) ::: id) try_statement(id ::: block(attribute(id .:. id) argument_list((:( subscript(id [:[ id ]:]) ):))) except_clause(id id ::: id)) expression_statement(id +=:+= lit))) assert_statement(id id) return_statement(id conditional_expression(id id id id id))))", + "child_fingerprints": [ + "expression_statement(id ::: id =:= attribute(attribute(id .:. id) .:. id))", + "expression_statement(id ::: id)", + "expression_statement(id =:= list([:[ ]:]))", + "expression_statement(id =:= string(string_start:\" string_content(escape_sequence:\\\\) string_end:\"))", + "with_statement(id with_clause(id .:. id) ::: block(attribute(id .:. id) +=:+= lit))", + "with_statement(id with_clause(call(id argument_list((:( attribute(id .:. id) ):))) id id) ::: block(id =:= call(attribute(id .:. id) argument_list((:( ):)))))", + "for_statement(id id id id ::: block(if_statement(id comparison_operator(id id nil) ::: id) if_statement(id boolean_operator(attribute(id .:. id) id call(attribute(id .:. id) argument_list((:( string(string_start:\" lit string_end:\") ):)))) ::: block(id argument_list((:( id ):)))) match_statement(id id ::: block(case_clause(id case_pattern(string(string_start:\" lit string_end:\") |:| string(string_start:\" lit string_end:\")) ::: block(attribute(id .:. id) argument_list((:( id ):)))) case_clause(id id ::: block(attribute(id .:. id) argument_list((:( id ):)))))) expression_statement(attribute(id .:. id) argument_list((:( id ):)))))", + "expression_statement(id =:= lit)", + "while_statement(id comparison_operator(id <:< call(id argument_list((:( id ):)))) ::: block(if_statement(id comparison_operator(subscript(id [:[ id ]:]) ==:== string(string_start:\" lit string_end:\")) ::: id) try_statement(id ::: block(attribute(id .:. id) argument_list((:( subscript(id [:[ id ]:]) ):))) except_clause(id id ::: id)) expression_statement(id +=:+= lit)))", + "assert_statement(id id)", + "return_statement(id conditional_expression(id id id id id))" + ], + "child_masses": [ + 12, + 4, + 6, + 8, + 14, + 26, + 91, + 4, + 53, + 3, + 8 + ] + }, + { + "method_name": "process", + "node_name": "for_statement", + "line": 24, + "span": [ + 24, + 8, + 37, + 31 + ], + "mass": 91, + "fingerprint": "for_statement(id id id id ::: block(if_statement(id comparison_operator(id id nil) ::: id) if_statement(id boolean_operator(attribute(id .:. id) id call(attribute(id .:. id) argument_list((:( string(string_start:\" lit string_end:\") ):)))) ::: block(id argument_list((:( id ):)))) match_statement(id id ::: block(case_clause(id case_pattern(string(string_start:\" lit string_end:\") |:| string(string_start:\" lit string_end:\")) ::: block(attribute(id .:. id) argument_list((:( id ):)))) case_clause(id id ::: block(attribute(id .:. id) argument_list((:( id ):)))))) expression_statement(attribute(id .:. id) argument_list((:( id ):)))))", + "child_fingerprints": [ + "if_statement(id comparison_operator(id id nil) ::: id)", + "if_statement(id boolean_operator(attribute(id .:. id) id call(attribute(id .:. id) argument_list((:( string(string_start:\" lit string_end:\") ):)))) ::: block(id argument_list((:( id ):))))", + "match_statement(id id ::: block(case_clause(id case_pattern(string(string_start:\" lit string_end:\") |:| string(string_start:\" lit string_end:\")) ::: block(attribute(id .:. id) argument_list((:( id ):)))) case_clause(id id ::: block(attribute(id .:. id) argument_list((:( id ):))))))", + "expression_statement(attribute(id .:. id) argument_list((:( id ):)))" + ], + "child_masses": [ + 8, + 27, + 40, + 9 + ] + }, + { + "method_name": "process", + "node_name": "if_statement", + "line": 25, + "span": [ + 25, + 12, + 26, + 24 + ], + "mass": 8, + "fingerprint": "if_statement(id comparison_operator(id id nil) ::: id)", + "child_fingerprints": [ + "comparison_operator(id id nil)", + "id" + ], + "child_masses": [ + 4, + 1 + ] + }, + { + "method_name": "process", + "node_name": "if_statement", + "line": 28, + "span": [ + 28, + 12, + 29, + 30 + ], + "mass": 27, + "fingerprint": "if_statement(id boolean_operator(attribute(id .:. id) id call(attribute(id .:. id) argument_list((:( string(string_start:\" lit string_end:\") ):)))) ::: block(id argument_list((:( id ):))))", + "child_fingerprints": [], + "child_masses": [] + }, + { + "method_name": "process", + "node_name": "if_statement", + "line": 41, + "span": [ + 41, + 12, + 42, + 21 + ], + "mass": 15, + "fingerprint": "if_statement(id comparison_operator(subscript(id [:[ id ]:]) ==:== string(string_start:\" lit string_end:\")) ::: id)", + "child_fingerprints": [ + "comparison_operator(subscript(id [:[ id ]:]) ==:== string(string_start:\" lit string_end:\"))", + "id" + ], + "child_masses": [ + 11, + 1 + ] + }, + { + "method_name": "process", + "node_name": "match_statement", + "line": 31, + "span": [ + 31, + 12, + 35, + 38 + ], + "mass": 40, + "fingerprint": "match_statement(id id ::: block(case_clause(id case_pattern(string(string_start:\" lit string_end:\") |:| string(string_start:\" lit string_end:\")) ::: block(attribute(id .:. id) argument_list((:( id ):)))) case_clause(id id ::: block(attribute(id .:. id) argument_list((:( id ):))))))", + "child_fingerprints": [ + "case_clause(id case_pattern(string(string_start:\" lit string_end:\") |:| string(string_start:\" lit string_end:\")) ::: block(attribute(id .:. id) argument_list((:( id ):))))", + "case_clause(id id ::: block(attribute(id .:. id) argument_list((:( id ):))))" + ], + "child_masses": [ + 22, + 13 + ] + }, + { + "method_name": "process", + "node_name": "while_statement", + "line": 40, + "span": [ + 40, + 8, + 49, + 22 + ], + "mass": 53, + "fingerprint": "while_statement(id comparison_operator(id <:< call(id argument_list((:( id ):)))) ::: block(if_statement(id comparison_operator(subscript(id [:[ id ]:]) ==:== string(string_start:\" lit string_end:\")) ::: id) try_statement(id ::: block(attribute(id .:. id) argument_list((:( subscript(id [:[ id ]:]) ):))) except_clause(id id ::: id)) expression_statement(id +=:+= lit)))", + "child_fingerprints": [ + "if_statement(id comparison_operator(subscript(id [:[ id ]:]) ==:== string(string_start:\" lit string_end:\")) ::: id)", + "try_statement(id ::: block(attribute(id .:. id) argument_list((:( subscript(id [:[ id ]:]) ):))) except_clause(id id ::: id))", + "expression_statement(id +=:+= lit)" + ], + "child_masses": [ + 15, + 21, + 4 + ] + }, + { + "method_name": "simple_with", + "node_name": "block", + "line": 63, + "span": [ + 63, + 8, + 64, + 16 + ], + "mass": 5, + "fingerprint": "block(id id ::: id)", + "child_fingerprints": [ + "id", + "id" + ], + "child_masses": [ + 1, + 1 + ] + }, + { + "method_name": "simple_with", + "node_name": "defn", + "line": 62, + "span": [ + 62, + 4, + 64, + 16 + ], + "mass": 15, + "fingerprint": "function_definition(id id parameters((:( id ,:, id ):)) ::: block(id id ::: id))", + "child_fingerprints": [ + "id", + "id" + ], + "child_masses": [ + 1, + 1 + ] + } + ] + } + ] +} diff --git a/gems/decomplex/examples/syntax-facts/oracles/ruby-core.json b/gems/decomplex/examples/syntax-facts/oracles/ruby-core.json index a77521f15..b756de0b4 100644 --- a/gems/decomplex/examples/syntax-facts/oracles/ruby-core.json +++ b/gems/decomplex/examples/syntax-facts/oracles/ruby-core.json @@ -1182,6 +1182,495 @@ "score": 0.0, "signals": {} } + ], + "clone_candidates": [ + { + "method_name": "(top-level)", + "node_name": "assignment", + "line": 10, + "span": [ + 10, + 2, + 10, + 34 + ], + "mass": 11, + "fingerprint": "assignment(id =:= call(id .:. id block({:{ id }:})))", + "child_fingerprints": [ + "call(id .:. id block({:{ id }:}))" + ], + "child_masses": [ + 8 + ] + }, + { + "method_name": "(top-level)", + "node_name": "assignment", + "line": 9, + "span": [ + 9, + 2, + 9, + 38 + ], + "mass": 11, + "fingerprint": "assignment(id =:= call(string_array(%w(:%w[ id id ):]) .:. id))", + "child_fingerprints": [ + "call(string_array(%w(:%w[ id id ):]) .:. id)" + ], + "child_masses": [ + 8 + ] + }, + { + "method_name": "(top-level)", + "node_name": "block", + "line": 10, + "span": [ + 10, + 24, + 10, + 34 + ], + "mass": 4, + "fingerprint": "block({:{ id }:})", + "child_fingerprints": [ + "id" + ], + "child_masses": [ + 1 + ] + }, + { + "method_name": "(top-level)", + "node_name": "block", + "line": 18, + "span": [ + 18, + 6, + 18, + 37 + ], + "mass": 15, + "fingerprint": "block({:{ block_body(call(id argument_list((:( pair(id ::: id) ):))) .:. id) }:})", + "child_fingerprints": [ + "call(id argument_list((:( pair(id ::: id) ):)))" + ], + "child_masses": [ + 9 + ] + }, + { + "method_name": "(top-level)", + "node_name": "block", + "line": 25, + "span": [ + 25, + 6, + 25, + 76 + ], + "mass": 29, + "fingerprint": "block({:{ block_body(call(id argument_list((:( pair(id ::: id) ,:, pair(id ::: id) ,:, pair(id ::: id) ):))) .:. id argument_list((:( id ):))) }:})", + "child_fingerprints": [ + "call(id argument_list((:( pair(id ::: id) ,:, pair(id ::: id) ,:, pair(id ::: id) ):)))" + ], + "child_masses": [ + 19 + ] + }, + { + "method_name": "(top-level)", + "node_name": "class", + "line": 8, + "span": [ + 8, + 0, + 73, + 3 + ], + "mass": 342, + "fingerprint": "class(id id body_statement(assignment(id =:= call(string_array(%w(:%w[ id id ):]) .:. id)) assignment(id =:= call(id .:. id block({:{ id }:}))) call(id lit) singleton_method(id id .:. id method_parameters((:( id ):)) body_statement(id argument_list((:( id ):))) id) call(id block({:{ block_body(call(id argument_list((:( pair(id ::: id) ):))) .:. id) }:})) method(id id method_parameters((:( id ):)) body_statement(assignment(instance_variable:@source =:= id) assignment(instance_variable:@count =:= call(id .:. id argument_list((:( lit ,:, id ):)))) assignment(instance_variable:@status =:= call(id .:. id argument_list((:( lit ,:, id ):))))) id) call(id block({:{ block_body(call(id argument_list((:( pair(id ::: id) ,:, pair(id ::: id) ,:, pair(id ::: id) ):))) .:. id argument_list((:( id ):))) }:})) method(id id method_parameters((:( id ,:, id ,:, id ):)) body_statement(assignment(id =:= call(call(id &.:&. id) &.:&. id)) assignment(id =:= call(id .:. id argument_list((:( pair(id ::: id) ,:, pair(id ::: call(id .:. id)) ):)))) call(id argument_list((:( id ):))) call(id .:. argument_list((:( id ):))) case(id call(id .:. id) when(id pattern(\":\" lit \":\") ,:, pattern(*:* id) then(call(id argument_list((:( id ):))))) when(id id then(call(id argument_list((:( id ):))))) else(id call(id argument_list((:( id ):)))) id) if(id binary(binary(instance_variable:@status ==:== lit) &&:&& call(id .:. id)) then(operator_assignment(instance_variable:@count +=:+= lit) call(id argument_list((:( lit ):)))) else(id call(id argument_list((:( string(\":\" lit \":\") ):)))) id) call(id .:. id do_block(id block_parameters(|:| id |:|) body_statement(id .:. id) id)) instance_variable:@status) id) id method(id id method_parameters((:( id ):)) body_statement(call(id argument_list((:( id ):))) call(id argument_list((:( lit ,:, id ):))) global_variable:$GLOBAL_STATE instance_variable:@source) id) call(id argument_list(id id method_parameters((:( id ):)) body_statement(id argument_list((:( id ):))) id)) method(id id body_statement(instance_variable:@count >:> lit) id) method(id id =:= binary(instance_variable:@status ==:== lit))) id)", + "child_fingerprints": [ + "assignment(id =:= call(string_array(%w(:%w[ id id ):]) .:. id))", + "assignment(id =:= call(id .:. id block({:{ id }:})))", + "call(id lit)", + "singleton_method(id id .:. id method_parameters((:( id ):)) body_statement(id argument_list((:( id ):))) id)", + "call(id block({:{ block_body(call(id argument_list((:( pair(id ::: id) ):))) .:. id) }:}))", + "method(id id method_parameters((:( id ):)) body_statement(assignment(instance_variable:@source =:= id) assignment(instance_variable:@count =:= call(id .:. id argument_list((:( lit ,:, id ):)))) assignment(instance_variable:@status =:= call(id .:. id argument_list((:( lit ,:, id ):))))) id)", + "call(id block({:{ block_body(call(id argument_list((:( pair(id ::: id) ,:, pair(id ::: id) ,:, pair(id ::: id) ):))) .:. id argument_list((:( id ):))) }:}))", + "method(id id method_parameters((:( id ,:, id ,:, id ):)) body_statement(assignment(id =:= call(call(id &.:&. id) &.:&. id)) assignment(id =:= call(id .:. id argument_list((:( pair(id ::: id) ,:, pair(id ::: call(id .:. id)) ):)))) call(id argument_list((:( id ):))) call(id .:. argument_list((:( id ):))) case(id call(id .:. id) when(id pattern(\":\" lit \":\") ,:, pattern(*:* id) then(call(id argument_list((:( id ):))))) when(id id then(call(id argument_list((:( id ):))))) else(id call(id argument_list((:( id ):)))) id) if(id binary(binary(instance_variable:@status ==:== lit) &&:&& call(id .:. id)) then(operator_assignment(instance_variable:@count +=:+= lit) call(id argument_list((:( lit ):)))) else(id call(id argument_list((:( string(\":\" lit \":\") ):)))) id) call(id .:. id do_block(id block_parameters(|:| id |:|) body_statement(id .:. id) id)) instance_variable:@status) id)", + "method(id id method_parameters((:( id ):)) body_statement(call(id argument_list((:( id ):))) call(id argument_list((:( lit ,:, id ):))) global_variable:$GLOBAL_STATE instance_variable:@source) id)", + "call(id argument_list(id id method_parameters((:( id ):)) body_statement(id argument_list((:( id ):))) id))", + "method(id id body_statement(instance_variable:@count >:> lit) id)", + "method(id id =:= binary(instance_variable:@status ==:== lit))" + ], + "child_masses": [ + 11, + 11, + 3, + 16, + 17, + 39, + 31, + 151, + 25, + 16, + 8, + 8 + ] + }, + { + "method_name": "audit", + "node_name": "defn", + "line": 57, + "span": [ + 57, + 2, + 62, + 5 + ], + "mass": 25, + "fingerprint": "method(id id method_parameters((:( id ):)) body_statement(call(id argument_list((:( id ):))) call(id argument_list((:( lit ,:, id ):))) global_variable:$GLOBAL_STATE instance_variable:@source) id)", + "child_fingerprints": [ + "call(id argument_list((:( id ):)))", + "call(id argument_list((:( lit ,:, id ):)))", + "global_variable:$GLOBAL_STATE", + "instance_variable:@source" + ], + "child_masses": [ + 6, + 8, + 1, + 1 + ] + }, + { + "method_name": "initialize", + "node_name": "assignment", + "line": 20, + "span": [ + 20, + 4, + 20, + 20 + ], + "mass": 4, + "fingerprint": "assignment(instance_variable:@source =:= id)", + "child_fingerprints": [ + "instance_variable:@source" + ], + "child_masses": [ + 1 + ] + }, + { + "method_name": "initialize", + "node_name": "assignment", + "line": 21, + "span": [ + 21, + 4, + 21, + 30 + ], + "mass": 13, + "fingerprint": "assignment(instance_variable:@count =:= call(id .:. id argument_list((:( lit ,:, id ):))))", + "child_fingerprints": [ + "instance_variable:@count", + "call(id .:. id argument_list((:( lit ,:, id ):)))" + ], + "child_masses": [ + 1, + 10 + ] + }, + { + "method_name": "initialize", + "node_name": "assignment", + "line": 22, + "span": [ + 22, + 4, + 22, + 34 + ], + "mass": 13, + "fingerprint": "assignment(instance_variable:@status =:= call(id .:. id argument_list((:( lit ,:, id ):))))", + "child_fingerprints": [ + "instance_variable:@status", + "call(id .:. id argument_list((:( lit ,:, id ):)))" + ], + "child_masses": [ + 1, + 10 + ] + }, + { + "method_name": "initialize", + "node_name": "defn", + "line": 19, + "span": [ + 19, + 2, + 23, + 5 + ], + "mass": 39, + "fingerprint": "method(id id method_parameters((:( id ):)) body_statement(assignment(instance_variable:@source =:= id) assignment(instance_variable:@count =:= call(id .:. id argument_list((:( lit ,:, id ):)))) assignment(instance_variable:@status =:= call(id .:. id argument_list((:( lit ,:, id ):))))) id)", + "child_fingerprints": [ + "assignment(instance_variable:@source =:= id)", + "assignment(instance_variable:@count =:= call(id .:. id argument_list((:( lit ,:, id ):))))", + "assignment(instance_variable:@status =:= call(id .:. id argument_list((:( lit ,:, id ):))))" + ], + "child_masses": [ + 4, + 13, + 13 + ] + }, + { + "method_name": "inline_private", + "node_name": "defn", + "line": 64, + "span": [ + 64, + 10, + 66, + 5 + ], + "mass": 14, + "fingerprint": "argument_list(id id method_parameters((:( id ):)) body_statement(id argument_list((:( id ):))) id)", + "child_fingerprints": [], + "child_masses": [] + }, + { + "method_name": "loaded?", + "node_name": "defn", + "line": 72, + "span": [ + 72, + 2, + 72, + 33 + ], + "mass": 8, + "fingerprint": "method(id id =:= binary(instance_variable:@status ==:== lit))", + "child_fingerprints": [ + "instance_variable:@status", + "lit" + ], + "child_masses": [ + 1, + 1 + ] + }, + { + "method_name": "process", + "node_name": "assignment", + "line": 27, + "span": [ + 27, + 4, + 27, + 30 + ], + "mass": 10, + "fingerprint": "assignment(id =:= call(call(id &.:&. id) &.:&. id))", + "child_fingerprints": [ + "call(call(id &.:&. id) &.:&. id)" + ], + "child_masses": [ + 7 + ] + }, + { + "method_name": "process", + "node_name": "assignment", + "line": 28, + "span": [ + 28, + 4, + 28, + 59 + ], + "mass": 22, + "fingerprint": "assignment(id =:= call(id .:. id argument_list((:( pair(id ::: id) ,:, pair(id ::: call(id .:. id)) ):))))", + "child_fingerprints": [ + "call(id .:. id argument_list((:( pair(id ::: id) ,:, pair(id ::: call(id .:. id)) ):)))" + ], + "child_masses": [ + 19 + ] + }, + { + "method_name": "process", + "node_name": "case", + "line": 32, + "span": [ + 32, + 4, + 39, + 7 + ], + "mass": 42, + "fingerprint": "case(id call(id .:. id) when(id pattern(\":\" lit \":\") ,:, pattern(*:* id) then(call(id argument_list((:( id ):))))) when(id id then(call(id argument_list((:( id ):))))) else(id call(id argument_list((:( id ):)))) id)", + "child_fingerprints": [ + "call(id .:. id)", + "when(id pattern(\":\" lit \":\") ,:, pattern(*:* id) then(call(id argument_list((:( id ):)))))", + "when(id id then(call(id argument_list((:( id ):)))))", + "else(id call(id argument_list((:( id ):))))" + ], + "child_masses": [ + 4, + 17, + 10, + 8 + ] + }, + { + "method_name": "process", + "node_name": "defn", + "line": 26, + "span": [ + 26, + 2, + 53, + 5 + ], + "mass": 151, + "fingerprint": "method(id id method_parameters((:( id ,:, id ,:, id ):)) body_statement(assignment(id =:= call(call(id &.:&. id) &.:&. id)) assignment(id =:= call(id .:. id argument_list((:( pair(id ::: id) ,:, pair(id ::: call(id .:. id)) ):)))) call(id argument_list((:( id ):))) call(id .:. argument_list((:( id ):))) case(id call(id .:. id) when(id pattern(\":\" lit \":\") ,:, pattern(*:* id) then(call(id argument_list((:( id ):))))) when(id id then(call(id argument_list((:( id ):))))) else(id call(id argument_list((:( id ):)))) id) if(id binary(binary(instance_variable:@status ==:== lit) &&:&& call(id .:. id)) then(operator_assignment(instance_variable:@count +=:+= lit) call(id argument_list((:( lit ):)))) else(id call(id argument_list((:( string(\":\" lit \":\") ):)))) id) call(id .:. id do_block(id block_parameters(|:| id |:|) body_statement(id .:. id) id)) instance_variable:@status) id)", + "child_fingerprints": [ + "assignment(id =:= call(call(id &.:&. id) &.:&. id))", + "assignment(id =:= call(id .:. id argument_list((:( pair(id ::: id) ,:, pair(id ::: call(id .:. id)) ):))))", + "call(id argument_list((:( id ):)))", + "call(id .:. argument_list((:( id ):)))", + "case(id call(id .:. id) when(id pattern(\":\" lit \":\") ,:, pattern(*:* id) then(call(id argument_list((:( id ):))))) when(id id then(call(id argument_list((:( id ):))))) else(id call(id argument_list((:( id ):)))) id)", + "if(id binary(binary(instance_variable:@status ==:== lit) &&:&& call(id .:. id)) then(operator_assignment(instance_variable:@count +=:+= lit) call(id argument_list((:( lit ):)))) else(id call(id argument_list((:( string(\":\" lit \":\") ):)))) id)", + "call(id .:. id do_block(id block_parameters(|:| id |:|) body_statement(id .:. id) id))", + "instance_variable:@status" + ], + "child_masses": [ + 10, + 22, + 6, + 7, + 42, + 35, + 15, + 1 + ] + }, + { + "method_name": "process", + "node_name": "do_block", + "line": 48, + "span": [ + 48, + 19, + 50, + 7 + ], + "mass": 11, + "fingerprint": "do_block(id block_parameters(|:| id |:|) body_statement(id .:. id) id)", + "child_fingerprints": [], + "child_masses": [] + }, + { + "method_name": "process", + "node_name": "if", + "line": 41, + "span": [ + 41, + 4, + 46, + 7 + ], + "mass": 35, + "fingerprint": "if(id binary(binary(instance_variable:@status ==:== lit) &&:&& call(id .:. id)) then(operator_assignment(instance_variable:@count +=:+= lit) call(id argument_list((:( lit ):)))) else(id call(id argument_list((:( string(\":\" lit \":\") ):)))) id)", + "child_fingerprints": [ + "binary(binary(instance_variable:@status ==:== lit) &&:&& call(id .:. id))", + "then(operator_assignment(instance_variable:@count +=:+= lit) call(id argument_list((:( lit ):))))", + "else(id call(id argument_list((:( string(\":\" lit \":\") ):))))" + ], + "child_masses": [ + 10, + 11, + 11 + ] + }, + { + "method_name": "process", + "node_name": "operator_assignment", + "line": 42, + "span": [ + 42, + 6, + 42, + 17 + ], + "mass": 4, + "fingerprint": "operator_assignment(instance_variable:@count +=:+= lit)", + "child_fingerprints": [ + "instance_variable:@count", + "lit" + ], + "child_masses": [ + 1, + 1 + ] + }, + { + "method_name": "ready?", + "node_name": "defn", + "line": 68, + "span": [ + 68, + 2, + 70, + 5 + ], + "mass": 8, + "fingerprint": "method(id id body_statement(instance_variable:@count >:> lit) id)", + "child_fingerprints": [ + "instance_variable:@count", + "lit" + ], + "child_masses": [ + 1, + 1 + ] + }, + { + "method_name": "self.build", + "node_name": "defn", + "line": 14, + "span": [ + 14, + 2, + 16, + 5 + ], + "mass": 16, + "fingerprint": "singleton_method(id id .:. id method_parameters((:( id ):)) body_statement(id argument_list((:( id ):))) id)", + "child_fingerprints": [], + "child_masses": [] + }, + { + "method_name": "self.build", + "node_name": "defs", + "line": 14, + "span": [ + 14, + 2, + 16, + 5 + ], + "mass": 16, + "fingerprint": "singleton_method(id id .:. id method_parameters((:( id ):)) body_statement(id argument_list((:( id ):))) id)", + "child_fingerprints": [], + "child_masses": [] + } ] } ] diff --git a/gems/decomplex/examples/syntax-facts/python/core.py b/gems/decomplex/examples/syntax-facts/python/core.py new file mode 100644 index 000000000..5ef1ef474 --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/python/core.py @@ -0,0 +1,65 @@ +from __future__ import annotations + +import os.path + + +class PythonSyntaxFactsCore: + def __init__(self, lock, resource): + self._lock = lock + self.resource = resource + self.count = 0 + + def process(self, user: "User", items: list[str], callback): + name: str = user.profile.name + pending: str + result = [] + marker = "\\n" + + with self._lock: + self.count += 1 + + with open(user.path) as handle: + data = handle.read() + + for item in items: + if item is None: + continue + + if user.ready and item.startswith("x"): + callback(item) + + match item: + case "owner" | "admin": + self.escalate(user) + case _: + self.default(user) + + result.append(item) + + index = 0 + while index < len(result): + if result[index] == "stop": + break + + try: + self.audit(result[index]) + except ValueError: + continue + + index += 1 + + assert result + return data if result else marker + + def _normalize(self, value: str | None = None): + cleaned = value.strip() if value is not None else "missing" + return cleaned + + def generator(self, values): + for value in values: + yield value + + def simple_with(self, resource): + with resource: + pass + diff --git a/gems/decomplex/lib/decomplex/syntax_oracle.rb b/gems/decomplex/lib/decomplex/syntax_oracle.rb index 3c4d0b23b..3ba88cf7f 100644 --- a/gems/decomplex/lib/decomplex/syntax_oracle.rb +++ b/gems/decomplex/lib/decomplex/syntax_oracle.rb @@ -53,7 +53,11 @@ def project_document(document) "dispatch_sites" => rows(document.dispatch_sites, %i[variant_set arm_members outside function line span]), "semantic_effects" => rows(document.semantic_effect_sites, %i[kind detail function line span]), "predicate_bodies" => rows(document.predicate_defs, %i[name owner body line span]), - "local_complexity" => local_complexity_rows(document) + "local_complexity" => local_complexity_rows(document), + "clone_candidates" => rows( + document.clone_candidates, + %i[method_name node_name line span mass fingerprint child_fingerprints child_masses] + ) } end diff --git a/gems/decomplex/rust/src/decomplex/syntax_oracle.rs b/gems/decomplex/rust/src/decomplex/syntax_oracle.rs index 598c05158..03a534522 100644 --- a/gems/decomplex/rust/src/decomplex/syntax_oracle.rs +++ b/gems/decomplex/rust/src/decomplex/syntax_oracle.rs @@ -1,3 +1,4 @@ +use crate::decomplex::syntax::adapters::language_profile; use crate::decomplex::syntax::{self, Document, Language}; use anyhow::Result; use serde_json::{json, Value}; @@ -14,6 +15,8 @@ pub fn project_files(files: &[PathBuf], language: Language) -> Result { } pub fn project_document(document: &Document) -> Value { + let clone_candidates = language_profile(document.language).clone_candidates(document); + json!({ "file": logical_file(&document.file), "language": document.language.as_str(), @@ -103,6 +106,16 @@ pub fn project_document(document: &Document) -> Value { "score": score.score, "signals": score.signals, })).collect::>(), + "clone_candidates": sorted(clone_candidates.iter().map(|candidate| json!({ + "method_name": candidate.method_name, + "node_name": candidate.node_name, + "line": candidate.line, + "span": candidate.span, + "mass": candidate.mass, + "fingerprint": candidate.fingerprint, + "child_fingerprints": candidate.child_fingerprints, + "child_masses": candidate.child_masses, + })).collect()), }) } From 3d7209ff92eabbad7960cc2e08ac5600cbffcd1b Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Sat, 20 Jun 2026 14:01:49 +0000 Subject: [PATCH 47/52] Add Rust Zig and Go syntax facts oracles --- .../examples/syntax-facts/go/core.go | 81 ++ .../syntax-facts/oracles/go-core.json | 1276 +++++++++++++++++ .../syntax-facts/oracles/rust-core.json | 1035 +++++++++++++ .../syntax-facts/oracles/zig-core.json | 961 +++++++++++++ .../examples/syntax-facts/rust/core.rs | 56 + .../examples/syntax-facts/zig/core.zig | 56 + 6 files changed, 3465 insertions(+) create mode 100644 gems/decomplex/examples/syntax-facts/go/core.go create mode 100644 gems/decomplex/examples/syntax-facts/oracles/go-core.json create mode 100644 gems/decomplex/examples/syntax-facts/oracles/rust-core.json create mode 100644 gems/decomplex/examples/syntax-facts/oracles/zig-core.json create mode 100644 gems/decomplex/examples/syntax-facts/rust/core.rs create mode 100644 gems/decomplex/examples/syntax-facts/zig/core.zig diff --git a/gems/decomplex/examples/syntax-facts/go/core.go b/gems/decomplex/examples/syntax-facts/go/core.go new file mode 100644 index 000000000..c33e2db33 --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/go/core.go @@ -0,0 +1,81 @@ +package syntaxfacts + +type Status int + +const ( + Idle Status = iota + Busy +) + +type Profile struct { + Name string +} + +type User struct { + Role string + Ready bool + Active bool + Profile Profile +} + +type Account struct { + Name string + Active bool +} + +type GoSyntaxFactsCore struct { + status Status + count int + lookup map[string]int +} + +func NewGoSyntaxFactsCore(status Status) *GoSyntaxFactsCore { + return &GoSyntaxFactsCore{status: status, lookup: map[string]int{}} +} + +func (c *GoSyntaxFactsCore) Process(user User, items []string, callback func(Account)) string { + var first, second int = 1, 2 + _ = first + _ = second + + name := user.Profile.Name + account := Account{Name: name, Active: user.Active} + callback(account) + + switch user.Role { + case "owner", "admin": + c.escalate(user) + case "guest": + c.fallback(user) + default: + c.defaultCase(user) + } + + if c.status == Idle && user.Ready { + c.count += 1 + c.publish(Busy) + } else { + c.warn("not ready") + } + + for _, item := range items { + c.children(item) + } + + c.lookup[name] = c.count + go c.audit(name) + defer c.audit(name) + + return name +} + +func (c *GoSyntaxFactsCore) audit(name string) { + println(name) + c.send("record", name) + _ = c.status +} + +func (c GoSyntaxFactsCore) Ready() bool { + return c.count > 0 +} + diff --git a/gems/decomplex/examples/syntax-facts/oracles/go-core.json b/gems/decomplex/examples/syntax-facts/oracles/go-core.json new file mode 100644 index 000000000..5ac9c16e9 --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/oracles/go-core.json @@ -0,0 +1,1276 @@ +{ + "format": "decomplex.syntax-facts.v1", + "documents": [ + { + "file": "gems/decomplex/examples/syntax-facts/go/core.go", + "language": "go", + "functions": [ + { + "name": "NewGoSyntaxFactsCore", + "owner": "core", + "line": 32, + "span": [ + 32, + 0, + 34, + 1 + ], + "visibility": "public", + "params": [ + "status" + ] + }, + { + "name": "Process", + "owner": "GoSyntaxFactsCore", + "line": 36, + "span": [ + 36, + 0, + 70, + 1 + ], + "visibility": "public", + "params": [ + "user", + "items", + "callback" + ] + }, + { + "name": "Ready", + "owner": "GoSyntaxFactsCore", + "line": 78, + "span": [ + 78, + 0, + 80, + 1 + ], + "visibility": "public", + "params": [] + }, + { + "name": "audit", + "owner": "GoSyntaxFactsCore", + "line": 72, + "span": [ + 72, + 0, + 76, + 1 + ], + "visibility": "private", + "params": [ + "name" + ] + } + ], + "owners": [ + { + "name": "Account", + "kind": "owner", + "line": 21, + "span": [ + 21, + 5, + 24, + 1 + ] + }, + { + "name": "GoSyntaxFactsCore", + "kind": "owner", + "line": 26, + "span": [ + 26, + 5, + 30, + 1 + ] + }, + { + "name": "Profile", + "kind": "owner", + "line": 10, + "span": [ + 10, + 5, + 12, + 1 + ] + }, + { + "name": "Status", + "kind": "owner", + "line": 3, + "span": [ + 3, + 5, + 3, + 15 + ] + }, + { + "name": "User", + "kind": "owner", + "line": 14, + "span": [ + 14, + 5, + 19, + 1 + ] + } + ], + "calls": [ + { + "receiver": "self", + "message": "audit", + "function": "Process", + "owner": "GoSyntaxFactsCore", + "line": 66, + "span": [ + 66, + 4, + 66, + 17 + ], + "conditional": false, + "arguments": [ + "name" + ], + "control": "always", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "audit", + "function": "Process", + "owner": "GoSyntaxFactsCore", + "line": 66, + "span": [ + 66, + 6, + 66, + 11 + ], + "conditional": false, + "arguments": [ + "name" + ], + "control": "always", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "audit", + "function": "Process", + "owner": "GoSyntaxFactsCore", + "line": 67, + "span": [ + 67, + 7, + 67, + 20 + ], + "conditional": false, + "arguments": [ + "name" + ], + "control": "always", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "audit", + "function": "Process", + "owner": "GoSyntaxFactsCore", + "line": 67, + "span": [ + 67, + 9, + 67, + 14 + ], + "conditional": false, + "arguments": [ + "name" + ], + "control": "always", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "callback", + "function": "Process", + "owner": "GoSyntaxFactsCore", + "line": 43, + "span": [ + 43, + 1, + 43, + 9 + ], + "conditional": false, + "arguments": [ + "account" + ], + "control": "always", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "children", + "function": "Process", + "owner": "GoSyntaxFactsCore", + "line": 62, + "span": [ + 62, + 2, + 62, + 12 + ], + "conditional": true, + "arguments": [ + "item" + ], + "control": "iterates", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "children", + "function": "Process", + "owner": "GoSyntaxFactsCore", + "line": 62, + "span": [ + 62, + 4, + 62, + 12 + ], + "conditional": true, + "arguments": [ + "item" + ], + "control": "iterates", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "defaultCase", + "function": "Process", + "owner": "GoSyntaxFactsCore", + "line": 51, + "span": [ + 51, + 2, + 51, + 15 + ], + "conditional": true, + "arguments": [ + "user" + ], + "control": "conditional", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "defaultCase", + "function": "Process", + "owner": "GoSyntaxFactsCore", + "line": 51, + "span": [ + 51, + 4, + 51, + 15 + ], + "conditional": true, + "arguments": [ + "user" + ], + "control": "conditional", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "escalate", + "function": "Process", + "owner": "GoSyntaxFactsCore", + "line": 47, + "span": [ + 47, + 2, + 47, + 12 + ], + "conditional": true, + "arguments": [ + "user" + ], + "control": "conditional", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "escalate", + "function": "Process", + "owner": "GoSyntaxFactsCore", + "line": 47, + "span": [ + 47, + 4, + 47, + 12 + ], + "conditional": true, + "arguments": [ + "user" + ], + "control": "conditional", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "fallback", + "function": "Process", + "owner": "GoSyntaxFactsCore", + "line": 49, + "span": [ + 49, + 2, + 49, + 12 + ], + "conditional": true, + "arguments": [ + "user" + ], + "control": "conditional", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "fallback", + "function": "Process", + "owner": "GoSyntaxFactsCore", + "line": 49, + "span": [ + 49, + 4, + 49, + 12 + ], + "conditional": true, + "arguments": [ + "user" + ], + "control": "conditional", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "println", + "function": "audit", + "owner": "GoSyntaxFactsCore", + "line": 73, + "span": [ + 73, + 1, + 73, + 8 + ], + "conditional": false, + "arguments": [ + "name" + ], + "control": "always", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "publish", + "function": "Process", + "owner": "GoSyntaxFactsCore", + "line": 56, + "span": [ + 56, + 2, + 56, + 11 + ], + "conditional": true, + "arguments": [ + "Busy" + ], + "control": "conditional", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "publish", + "function": "Process", + "owner": "GoSyntaxFactsCore", + "line": 56, + "span": [ + 56, + 4, + 56, + 11 + ], + "conditional": true, + "arguments": [ + "Busy" + ], + "control": "conditional", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "send", + "function": "audit", + "owner": "GoSyntaxFactsCore", + "line": 74, + "span": [ + 74, + 1, + 74, + 7 + ], + "conditional": false, + "arguments": [ + "\"record\"", + "name" + ], + "control": "always", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "send", + "function": "audit", + "owner": "GoSyntaxFactsCore", + "line": 74, + "span": [ + 74, + 3, + 74, + 7 + ], + "conditional": false, + "arguments": [ + "\"record\"", + "name" + ], + "control": "always", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "warn", + "function": "Process", + "owner": "GoSyntaxFactsCore", + "line": 58, + "span": [ + 58, + 2, + 58, + 8 + ], + "conditional": true, + "arguments": [ + "\"not ready\"" + ], + "control": "conditional", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "warn", + "function": "Process", + "owner": "GoSyntaxFactsCore", + "line": 58, + "span": [ + 58, + 4, + 58, + 8 + ], + "conditional": true, + "arguments": [ + "\"not ready\"" + ], + "control": "conditional", + "safe_navigation": false, + "block": false + } + ], + "state_reads": [ + { + "field": "Name", + "receiver": "user.Profile", + "function": "Process", + "owner": "GoSyntaxFactsCore", + "line": 41, + "span": [ + 41, + 9, + 41, + 26 + ] + }, + { + "field": "Profile", + "receiver": "user", + "function": "Process", + "owner": "GoSyntaxFactsCore", + "line": 41, + "span": [ + 41, + 9, + 41, + 21 + ] + }, + { + "field": "Ready", + "receiver": "user", + "function": "Process", + "owner": "GoSyntaxFactsCore", + "line": 54, + "span": [ + 54, + 24, + 54, + 34 + ] + }, + { + "field": "Role", + "receiver": "user", + "function": "Process", + "owner": "GoSyntaxFactsCore", + "line": 45, + "span": [ + 45, + 8, + 45, + 17 + ] + }, + { + "field": "audit", + "receiver": "self", + "function": "Process", + "owner": "GoSyntaxFactsCore", + "line": 66, + "span": [ + 66, + 4, + 66, + 11 + ] + }, + { + "field": "audit", + "receiver": "self", + "function": "Process", + "owner": "GoSyntaxFactsCore", + "line": 67, + "span": [ + 67, + 7, + 67, + 14 + ] + }, + { + "field": "children", + "receiver": "self", + "function": "Process", + "owner": "GoSyntaxFactsCore", + "line": 62, + "span": [ + 62, + 2, + 62, + 12 + ] + }, + { + "field": "count", + "receiver": "self", + "function": "Process", + "owner": "GoSyntaxFactsCore", + "line": 65, + "span": [ + 65, + 18, + 65, + 25 + ] + }, + { + "field": "count", + "receiver": "self", + "function": "Ready", + "owner": "GoSyntaxFactsCore", + "line": 79, + "span": [ + 79, + 8, + 79, + 15 + ] + }, + { + "field": "defaultCase", + "receiver": "self", + "function": "Process", + "owner": "GoSyntaxFactsCore", + "line": 51, + "span": [ + 51, + 2, + 51, + 15 + ] + }, + { + "field": "escalate", + "receiver": "self", + "function": "Process", + "owner": "GoSyntaxFactsCore", + "line": 47, + "span": [ + 47, + 2, + 47, + 12 + ] + }, + { + "field": "fallback", + "receiver": "self", + "function": "Process", + "owner": "GoSyntaxFactsCore", + "line": 49, + "span": [ + 49, + 2, + 49, + 12 + ] + }, + { + "field": "lookup", + "receiver": "self", + "function": "Process", + "owner": "GoSyntaxFactsCore", + "line": 65, + "span": [ + 65, + 1, + 65, + 9 + ] + }, + { + "field": "publish", + "receiver": "self", + "function": "Process", + "owner": "GoSyntaxFactsCore", + "line": 56, + "span": [ + 56, + 2, + 56, + 11 + ] + }, + { + "field": "send", + "receiver": "self", + "function": "audit", + "owner": "GoSyntaxFactsCore", + "line": 74, + "span": [ + 74, + 1, + 74, + 7 + ] + }, + { + "field": "status", + "receiver": "self", + "function": "Process", + "owner": "GoSyntaxFactsCore", + "line": 54, + "span": [ + 54, + 4, + 54, + 12 + ] + }, + { + "field": "status", + "receiver": "self", + "function": "audit", + "owner": "GoSyntaxFactsCore", + "line": 75, + "span": [ + 75, + 5, + 75, + 13 + ] + }, + { + "field": "warn", + "receiver": "self", + "function": "Process", + "owner": "GoSyntaxFactsCore", + "line": 58, + "span": [ + 58, + 2, + 58, + 8 + ] + } + ], + "state_writes": [ + { + "field": "count", + "receiver": "self", + "function": "Process", + "owner": "GoSyntaxFactsCore", + "line": 55, + "span": [ + 55, + 2, + 55, + 14 + ] + }, + { + "field": "lookup", + "receiver": "self", + "function": "Process", + "owner": "GoSyntaxFactsCore", + "line": 65, + "span": [ + 65, + 1, + 65, + 25 + ] + } + ], + "decisions": [ + { + "kind": "case_dispatch", + "members": [ + "\"guest\"", + "\"owner\", \"admin\"" + ], + "function": "Process", + "line": 45, + "span": [ + 45, + 1, + 52, + 2 + ], + "predicate": "user.Role", + "enclosing_span": [ + 45, + 1, + 52, + 2 + ] + }, + { + "kind": "conjunction", + "members": [ + "c.status == Idle", + "user.Ready" + ], + "function": "Process", + "line": 54, + "span": [ + 54, + 4, + 54, + 34 + ], + "predicate": "c.status == Idle && user.Ready", + "enclosing_span": [ + 54, + 1, + 59, + 2 + ] + } + ], + "branch_decisions": [ + { + "function": "Process", + "line": 45, + "span": [ + 45, + 1, + 52, + 2 + ], + "predicate": "user.Role", + "state_refs": [ + "user.Role" + ] + }, + { + "function": "Process", + "line": 54, + "span": [ + 54, + 1, + 59, + 2 + ], + "predicate": "c.status == Idle && user.Ready", + "state_refs": [ + "c.status", + "user.Ready" + ] + } + ], + "dispatch_sites": [], + "semantic_effects": [ + { + "kind": "hidden_io", + "detail": "println", + "function": "audit", + "line": 73, + "span": [ + 73, + 1, + 73, + 8 + ] + } + ], + "predicate_bodies": [], + "local_complexity": [ + { + "id": "(top-level)#NewGoSyntaxFactsCore", + "score": 0.0, + "signals": { + "early_exits": 2 + } + }, + { + "id": "GoSyntaxFactsCore#Process", + "score": 6.1, + "signals": { + "boolean_ops": 2, + "branches": 1, + "cases": 2, + "early_exits": 2, + "loops": 2, + "nested": 1 + } + }, + { + "id": "GoSyntaxFactsCore#Ready", + "score": 0.0, + "signals": { + "early_exits": 2 + } + }, + { + "id": "GoSyntaxFactsCore#audit", + "score": 0.0, + "signals": {} + } + ], + "clone_candidates": [ + { + "method_name": "NewGoSyntaxFactsCore", + "node_name": "block", + "line": 32, + "span": [ + 32, + 60, + 34, + 1 + ], + "mass": 31, + "fingerprint": "block({:{ statement_list(return_statement(id expression_list(&:& composite_literal(id literal_value({:{ keyed_element(id ::: id) ,:, keyed_element(id ::: literal_element(map_type(id [:[ id ]:] id) literal_value({:{ }:}))) }:}))))) }:})", + "child_fingerprints": [ + "statement_list(return_statement(id expression_list(&:& composite_literal(id literal_value({:{ keyed_element(id ::: id) ,:, keyed_element(id ::: literal_element(map_type(id [:[ id ]:] id) literal_value({:{ }:}))) }:})))))" + ], + "child_masses": [ + 28 + ] + }, + { + "method_name": "NewGoSyntaxFactsCore", + "node_name": "defn", + "line": 32, + "span": [ + 32, + 0, + 34, + 1 + ], + "mass": 43, + "fingerprint": "function_declaration(id id parameter_list((:( parameter_declaration(id id) ):)) pointer_type(*:* id) block({:{ statement_list(return_statement(id expression_list(&:& composite_literal(id literal_value({:{ keyed_element(id ::: id) ,:, keyed_element(id ::: literal_element(map_type(id [:[ id ]:] id) literal_value({:{ }:}))) }:}))))) }:}))", + "child_fingerprints": [ + "statement_list(return_statement(id expression_list(&:& composite_literal(id literal_value({:{ keyed_element(id ::: id) ,:, keyed_element(id ::: literal_element(map_type(id [:[ id ]:] id) literal_value({:{ }:}))) }:})))))" + ], + "child_masses": [ + 28 + ] + }, + { + "method_name": "Process", + "node_name": "assignment_statement", + "line": 38, + "span": [ + 38, + 1, + 38, + 10 + ], + "mass": 4, + "fingerprint": "assignment_statement(id =:= id)", + "child_fingerprints": [ + "id", + "id" + ], + "child_masses": [ + 1, + 1 + ] + }, + { + "method_name": "Process", + "node_name": "assignment_statement", + "line": 39, + "span": [ + 39, + 1, + 39, + 11 + ], + "mass": 4, + "fingerprint": "assignment_statement(id =:= id)", + "child_fingerprints": [ + "id", + "id" + ], + "child_masses": [ + 1, + 1 + ] + }, + { + "method_name": "Process", + "node_name": "assignment_statement", + "line": 55, + "span": [ + 55, + 2, + 55, + 14 + ], + "mass": 7, + "fingerprint": "assignment_statement(expression_list(id .:. id) +=:+= lit)", + "child_fingerprints": [ + "expression_list(id .:. id)", + "lit" + ], + "child_masses": [ + 4, + 1 + ] + }, + { + "method_name": "Process", + "node_name": "assignment_statement", + "line": 65, + "span": [ + 65, + 1, + 65, + 25 + ], + "mass": 14, + "fingerprint": "assignment_statement(expression_list(selector_expression(id .:. id) [:[ id ]:]) =:= expression_list(id .:. id))", + "child_fingerprints": [ + "expression_list(selector_expression(id .:. id) [:[ id ]:])", + "expression_list(id .:. id)" + ], + "child_masses": [ + 8, + 4 + ] + }, + { + "method_name": "Process", + "node_name": "block", + "line": 36, + "span": [ + 36, + 94, + 70, + 1 + ], + "mass": 235, + "fingerprint": "block({:{ statement_list(var_declaration(id var_spec(id ,:, id id =:= expression_list(lit ,:, lit))) assignment_statement(id =:= id) assignment_statement(id =:= id) short_var_declaration(id :=::= expression_list(selector_expression(id .:. id) .:. id)) short_var_declaration(id :=::= expression_list(id literal_value({:{ keyed_element(id ::: id) ,:, keyed_element(id ::: literal_element(id .:. id)) }:}))) expression_statement(id argument_list((:( id ):))) expression_switch_statement(id selector_expression(id .:. id) {:{ expression_case(id expression_list(interpreted_string_literal(\":\" id \":\") ,:, interpreted_string_literal(\":\" id \":\")) ::: statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( id ):))))) expression_case(id expression_list(\":\" id \":\") ::: statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( id ):))))) default_case(id ::: statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( id ):))))) }:}) if_statement(id binary_expression(binary_expression(selector_expression(id .:. id) ==:== id) &&:&& selector_expression(id .:. id)) block({:{ statement_list(assignment_statement(expression_list(id .:. id) +=:+= lit) expression_statement(selector_expression(id .:. id) argument_list((:( id ):)))) }:}) id block({:{ statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( interpreted_string_literal(\":\" interpreted_string_literal_content:not ready \":\") ):)))) }:})) for_statement(id range_clause(expression_list(id ,:, id) :=::= id id) block({:{ statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( id ):)))) }:})) assignment_statement(expression_list(selector_expression(id .:. id) [:[ id ]:]) =:= expression_list(id .:. id)) go_statement(id call_expression(selector_expression(id .:. id) argument_list((:( id ):)))) defer_statement(id call_expression(selector_expression(id .:. id) argument_list((:( id ):)))) return_statement(id id)) }:})", + "child_fingerprints": [ + "statement_list(var_declaration(id var_spec(id ,:, id id =:= expression_list(lit ,:, lit))) assignment_statement(id =:= id) assignment_statement(id =:= id) short_var_declaration(id :=::= expression_list(selector_expression(id .:. id) .:. id)) short_var_declaration(id :=::= expression_list(id literal_value({:{ keyed_element(id ::: id) ,:, keyed_element(id ::: literal_element(id .:. id)) }:}))) expression_statement(id argument_list((:( id ):))) expression_switch_statement(id selector_expression(id .:. id) {:{ expression_case(id expression_list(interpreted_string_literal(\":\" id \":\") ,:, interpreted_string_literal(\":\" id \":\")) ::: statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( id ):))))) expression_case(id expression_list(\":\" id \":\") ::: statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( id ):))))) default_case(id ::: statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( id ):))))) }:}) if_statement(id binary_expression(binary_expression(selector_expression(id .:. id) ==:== id) &&:&& selector_expression(id .:. id)) block({:{ statement_list(assignment_statement(expression_list(id .:. id) +=:+= lit) expression_statement(selector_expression(id .:. id) argument_list((:( id ):)))) }:}) id block({:{ statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( interpreted_string_literal(\":\" interpreted_string_literal_content:not ready \":\") ):)))) }:})) for_statement(id range_clause(expression_list(id ,:, id) :=::= id id) block({:{ statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( id ):)))) }:})) assignment_statement(expression_list(selector_expression(id .:. id) [:[ id ]:]) =:= expression_list(id .:. id)) go_statement(id call_expression(selector_expression(id .:. id) argument_list((:( id ):)))) defer_statement(id call_expression(selector_expression(id .:. id) argument_list((:( id ):)))) return_statement(id id))" + ], + "child_masses": [ + 232 + ] + }, + { + "method_name": "Process", + "node_name": "block", + "line": 54, + "span": [ + 54, + 35, + 57, + 2 + ], + "mass": 20, + "fingerprint": "block({:{ statement_list(assignment_statement(expression_list(id .:. id) +=:+= lit) expression_statement(selector_expression(id .:. id) argument_list((:( id ):)))) }:})", + "child_fingerprints": [ + "statement_list(assignment_statement(expression_list(id .:. id) +=:+= lit) expression_statement(selector_expression(id .:. id) argument_list((:( id ):))))" + ], + "child_masses": [ + 17 + ] + }, + { + "method_name": "Process", + "node_name": "block", + "line": 57, + "span": [ + 57, + 8, + 59, + 2 + ], + "mass": 16, + "fingerprint": "block({:{ statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( interpreted_string_literal(\":\" interpreted_string_literal_content:not ready \":\") ):)))) }:})", + "child_fingerprints": [ + "statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( interpreted_string_literal(\":\" interpreted_string_literal_content:not ready \":\") ):))))" + ], + "child_masses": [ + 13 + ] + }, + { + "method_name": "Process", + "node_name": "block", + "line": 61, + "span": [ + 61, + 28, + 63, + 2 + ], + "mass": 13, + "fingerprint": "block({:{ statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( id ):)))) }:})", + "child_fingerprints": [ + "statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( id ):))))" + ], + "child_masses": [ + 10 + ] + }, + { + "method_name": "Process", + "node_name": "defn", + "line": 36, + "span": [ + 36, + 0, + 70, + 1 + ], + "mass": 269, + "fingerprint": "method_declaration(id parameter_list((:( parameter_declaration(id pointer_type(*:* id)) ):)) id parameter_list((:( parameter_declaration(id id) ,:, parameter_declaration(id slice_type([:[ ]:] id)) ,:, parameter_declaration(id function_type(id parameter_list((:( id ):)))) ):)) id block({:{ statement_list(var_declaration(id var_spec(id ,:, id id =:= expression_list(lit ,:, lit))) assignment_statement(id =:= id) assignment_statement(id =:= id) short_var_declaration(id :=::= expression_list(selector_expression(id .:. id) .:. id)) short_var_declaration(id :=::= expression_list(id literal_value({:{ keyed_element(id ::: id) ,:, keyed_element(id ::: literal_element(id .:. id)) }:}))) expression_statement(id argument_list((:( id ):))) expression_switch_statement(id selector_expression(id .:. id) {:{ expression_case(id expression_list(interpreted_string_literal(\":\" id \":\") ,:, interpreted_string_literal(\":\" id \":\")) ::: statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( id ):))))) expression_case(id expression_list(\":\" id \":\") ::: statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( id ):))))) default_case(id ::: statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( id ):))))) }:}) if_statement(id binary_expression(binary_expression(selector_expression(id .:. id) ==:== id) &&:&& selector_expression(id .:. id)) block({:{ statement_list(assignment_statement(expression_list(id .:. id) +=:+= lit) expression_statement(selector_expression(id .:. id) argument_list((:( id ):)))) }:}) id block({:{ statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( interpreted_string_literal(\":\" interpreted_string_literal_content:not ready \":\") ):)))) }:})) for_statement(id range_clause(expression_list(id ,:, id) :=::= id id) block({:{ statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( id ):)))) }:})) assignment_statement(expression_list(selector_expression(id .:. id) [:[ id ]:]) =:= expression_list(id .:. id)) go_statement(id call_expression(selector_expression(id .:. id) argument_list((:( id ):)))) defer_statement(id call_expression(selector_expression(id .:. id) argument_list((:( id ):)))) return_statement(id id)) }:}))", + "child_fingerprints": [ + "statement_list(var_declaration(id var_spec(id ,:, id id =:= expression_list(lit ,:, lit))) assignment_statement(id =:= id) assignment_statement(id =:= id) short_var_declaration(id :=::= expression_list(selector_expression(id .:. id) .:. id)) short_var_declaration(id :=::= expression_list(id literal_value({:{ keyed_element(id ::: id) ,:, keyed_element(id ::: literal_element(id .:. id)) }:}))) expression_statement(id argument_list((:( id ):))) expression_switch_statement(id selector_expression(id .:. id) {:{ expression_case(id expression_list(interpreted_string_literal(\":\" id \":\") ,:, interpreted_string_literal(\":\" id \":\")) ::: statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( id ):))))) expression_case(id expression_list(\":\" id \":\") ::: statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( id ):))))) default_case(id ::: statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( id ):))))) }:}) if_statement(id binary_expression(binary_expression(selector_expression(id .:. id) ==:== id) &&:&& selector_expression(id .:. id)) block({:{ statement_list(assignment_statement(expression_list(id .:. id) +=:+= lit) expression_statement(selector_expression(id .:. id) argument_list((:( id ):)))) }:}) id block({:{ statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( interpreted_string_literal(\":\" interpreted_string_literal_content:not ready \":\") ):)))) }:})) for_statement(id range_clause(expression_list(id ,:, id) :=::= id id) block({:{ statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( id ):)))) }:})) assignment_statement(expression_list(selector_expression(id .:. id) [:[ id ]:]) =:= expression_list(id .:. id)) go_statement(id call_expression(selector_expression(id .:. id) argument_list((:( id ):)))) defer_statement(id call_expression(selector_expression(id .:. id) argument_list((:( id ):)))) return_statement(id id))" + ], + "child_masses": [ + 232 + ] + }, + { + "method_name": "Process", + "node_name": "for_statement", + "line": 61, + "span": [ + 61, + 1, + 63, + 2 + ], + "mass": 23, + "fingerprint": "for_statement(id range_clause(expression_list(id ,:, id) :=::= id id) block({:{ statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( id ):)))) }:}))", + "child_fingerprints": [ + "statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( id ):))))" + ], + "child_masses": [ + 10 + ] + }, + { + "method_name": "Process", + "node_name": "if_statement", + "line": 54, + "span": [ + 54, + 1, + 59, + 2 + ], + "mass": 52, + "fingerprint": "if_statement(id binary_expression(binary_expression(selector_expression(id .:. id) ==:== id) &&:&& selector_expression(id .:. id)) block({:{ statement_list(assignment_statement(expression_list(id .:. id) +=:+= lit) expression_statement(selector_expression(id .:. id) argument_list((:( id ):)))) }:}) id block({:{ statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( interpreted_string_literal(\":\" interpreted_string_literal_content:not ready \":\") ):)))) }:}))", + "child_fingerprints": [ + "statement_list(assignment_statement(expression_list(id .:. id) +=:+= lit) expression_statement(selector_expression(id .:. id) argument_list((:( id ):))))" + ], + "child_masses": [ + 17 + ] + }, + { + "method_name": "Ready", + "node_name": "block", + "line": 78, + "span": [ + 78, + 40, + 80, + 1 + ], + "mass": 13, + "fingerprint": "block({:{ statement_list(return_statement(id expression_list(selector_expression(id .:. id) >:> lit))) }:})", + "child_fingerprints": [ + "statement_list(return_statement(id expression_list(selector_expression(id .:. id) >:> lit)))" + ], + "child_masses": [ + 10 + ] + }, + { + "method_name": "Ready", + "node_name": "defn", + "line": 78, + "span": [ + 78, + 0, + 80, + 1 + ], + "mass": 26, + "fingerprint": "method_declaration(id parameter_list((:( parameter_declaration(id id) ):)) id parameter_list((:( ):)) id block({:{ statement_list(return_statement(id expression_list(selector_expression(id .:. id) >:> lit))) }:}))", + "child_fingerprints": [ + "statement_list(return_statement(id expression_list(selector_expression(id .:. id) >:> lit)))" + ], + "child_masses": [ + 10 + ] + }, + { + "method_name": "audit", + "node_name": "assignment_statement", + "line": 75, + "span": [ + 75, + 1, + 75, + 13 + ], + "mass": 7, + "fingerprint": "assignment_statement(id =:= expression_list(id .:. id))", + "child_fingerprints": [ + "id", + "expression_list(id .:. id)" + ], + "child_masses": [ + 1, + 4 + ] + }, + { + "method_name": "audit", + "node_name": "block", + "line": 72, + "span": [ + 72, + 47, + 76, + 1 + ], + "mass": 31, + "fingerprint": "block({:{ statement_list(expression_statement(id argument_list((:( id ):))) expression_statement(selector_expression(id .:. id) argument_list((:( interpreted_string_literal(\":\" id \":\") ,:, id ):))) assignment_statement(id =:= expression_list(id .:. id))) }:})", + "child_fingerprints": [ + "statement_list(expression_statement(id argument_list((:( id ):))) expression_statement(selector_expression(id .:. id) argument_list((:( interpreted_string_literal(\":\" id \":\") ,:, id ):))) assignment_statement(id =:= expression_list(id .:. id)))" + ], + "child_masses": [ + 28 + ] + }, + { + "method_name": "audit", + "node_name": "defn", + "line": 72, + "span": [ + 72, + 0, + 76, + 1 + ], + "mass": 48, + "fingerprint": "method_declaration(id parameter_list((:( parameter_declaration(id pointer_type(*:* id)) ):)) id parameter_list((:( parameter_declaration(id id) ):)) block({:{ statement_list(expression_statement(id argument_list((:( id ):))) expression_statement(selector_expression(id .:. id) argument_list((:( interpreted_string_literal(\":\" id \":\") ,:, id ):))) assignment_statement(id =:= expression_list(id .:. id))) }:}))", + "child_fingerprints": [ + "statement_list(expression_statement(id argument_list((:( id ):))) expression_statement(selector_expression(id .:. id) argument_list((:( interpreted_string_literal(\":\" id \":\") ,:, id ):))) assignment_statement(id =:= expression_list(id .:. id)))" + ], + "child_masses": [ + 28 + ] + } + ] + } + ] +} diff --git a/gems/decomplex/examples/syntax-facts/oracles/rust-core.json b/gems/decomplex/examples/syntax-facts/oracles/rust-core.json new file mode 100644 index 000000000..3eb8d14cb --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/oracles/rust-core.json @@ -0,0 +1,1035 @@ +{ + "format": "decomplex.syntax-facts.v1", + "documents": [ + { + "file": "gems/decomplex/examples/syntax-facts/rust/core.rs", + "language": "rust", + "functions": [ + { + "name": "audit", + "owner": "RustSyntaxFactsCore", + "line": 46, + "span": [ + 46, + 4, + 50, + 5 + ], + "visibility": "private", + "params": [ + "&self", + "name" + ] + }, + { + "name": "new", + "owner": "RustSyntaxFactsCore", + "line": 12, + "span": [ + 12, + 4, + 14, + 5 + ], + "visibility": "public", + "params": [ + "status" + ] + }, + { + "name": "process", + "owner": "RustSyntaxFactsCore", + "line": 16, + "span": [ + 16, + 4, + 44, + 5 + ], + "visibility": "public", + "params": [ + "&mut self", + "user", + "items", + "callback" + ] + }, + { + "name": "ready", + "owner": "RustSyntaxFactsCore", + "line": 52, + "span": [ + 52, + 4, + 54, + 5 + ], + "visibility": "private", + "params": [ + "&self" + ] + } + ], + "owners": [ + { + "name": "RustSyntaxFactsCore", + "kind": "impl", + "line": 11, + "span": [ + 11, + 0, + 55, + 1 + ] + }, + { + "name": "RustSyntaxFactsCore", + "kind": "struct", + "line": 1, + "span": [ + 1, + 0, + 4, + 1 + ] + } + ], + "calls": [ + { + "receiver": "Account", + "message": "new", + "function": "process", + "owner": "RustSyntaxFactsCore", + "line": 23, + "span": [ + 23, + 22, + 23, + 63 + ], + "conditional": false, + "arguments": [ + "name.clone()", + "user.active()" + ], + "control": "always", + "safe_navigation": false, + "block": false + }, + { + "receiver": "item", + "message": "children", + "function": "process", + "owner": "RustSyntaxFactsCore", + "line": 40, + "span": [ + 40, + 12, + 40, + 27 + ], + "conditional": true, + "arguments": [], + "control": "iterates", + "safe_navigation": false, + "block": false + }, + { + "receiver": "name", + "message": "clone", + "function": "process", + "owner": "RustSyntaxFactsCore", + "line": 23, + "span": [ + 23, + 35, + 23, + 47 + ], + "conditional": false, + "arguments": [], + "control": "always", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "Some", + "function": "process", + "owner": "RustSyntaxFactsCore", + "line": 43, + "span": [ + 43, + 8, + 43, + 18 + ], + "conditional": false, + "arguments": [ + "name" + ], + "control": "always", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "callback", + "function": "process", + "owner": "RustSyntaxFactsCore", + "line": 24, + "span": [ + 24, + 8, + 24, + 26 + ], + "conditional": false, + "arguments": [ + "&account" + ], + "control": "always", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "default_case", + "function": "process", + "owner": "RustSyntaxFactsCore", + "line": 29, + "span": [ + 29, + 17, + 29, + 40 + ], + "conditional": true, + "arguments": [ + "user" + ], + "control": "conditional", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "escalate", + "function": "process", + "owner": "RustSyntaxFactsCore", + "line": 27, + "span": [ + 27, + 41, + 27, + 60 + ], + "conditional": true, + "arguments": [ + "user" + ], + "control": "conditional", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "fallback", + "function": "process", + "owner": "RustSyntaxFactsCore", + "line": 28, + "span": [ + 28, + 27, + 28, + 46 + ], + "conditional": true, + "arguments": [ + "user" + ], + "control": "conditional", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "publish", + "function": "process", + "owner": "RustSyntaxFactsCore", + "line": 34, + "span": [ + 34, + 12, + 34, + 38 + ], + "conditional": true, + "arguments": [ + "Status::Busy" + ], + "control": "conditional", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "send", + "function": "audit", + "owner": "RustSyntaxFactsCore", + "line": 48, + "span": [ + 48, + 8, + 48, + 33 + ], + "conditional": false, + "arguments": [ + "\"record\"", + "name" + ], + "control": "always", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "status", + "function": "audit", + "owner": "RustSyntaxFactsCore", + "line": 49, + "span": [ + 49, + 8, + 49, + 21 + ], + "conditional": false, + "arguments": [], + "control": "always", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "warn", + "function": "process", + "owner": "RustSyntaxFactsCore", + "line": 36, + "span": [ + 36, + 12, + 36, + 34 + ], + "conditional": true, + "arguments": [ + "\"not ready\"" + ], + "control": "conditional", + "safe_navigation": false, + "block": false + }, + { + "receiver": "user", + "message": "active", + "function": "process", + "owner": "RustSyntaxFactsCore", + "line": 23, + "span": [ + 23, + 49, + 23, + 62 + ], + "conditional": false, + "arguments": [], + "control": "always", + "safe_navigation": false, + "block": false + }, + { + "receiver": "user", + "message": "profile", + "function": "process", + "owner": "RustSyntaxFactsCore", + "line": 22, + "span": [ + 22, + 19, + 22, + 33 + ], + "conditional": false, + "arguments": [], + "control": "always", + "safe_navigation": false, + "block": false + }, + { + "receiver": "user", + "message": "ready", + "function": "process", + "owner": "RustSyntaxFactsCore", + "line": 32, + "span": [ + 32, + 50, + 32, + 62 + ], + "conditional": true, + "arguments": [], + "control": "conditional", + "safe_navigation": false, + "block": false + }, + { + "receiver": "user", + "message": "role", + "function": "process", + "owner": "RustSyntaxFactsCore", + "line": 26, + "span": [ + 26, + 14, + 26, + 25 + ], + "conditional": true, + "arguments": [], + "control": "conditional", + "safe_navigation": false, + "block": false + }, + { + "receiver": "user.profile()", + "message": "name", + "function": "process", + "owner": "RustSyntaxFactsCore", + "line": 22, + "span": [ + 22, + 19, + 22, + 40 + ], + "conditional": false, + "arguments": [], + "control": "always", + "safe_navigation": false, + "block": false + }, + { + "receiver": "user.profile().name()", + "message": "to_string", + "function": "process", + "owner": "RustSyntaxFactsCore", + "line": 22, + "span": [ + 22, + 19, + 22, + 52 + ], + "conditional": false, + "arguments": [], + "control": "always", + "safe_navigation": false, + "block": false + } + ], + "state_reads": [ + { + "field": "active", + "receiver": "user", + "function": "process", + "owner": "RustSyntaxFactsCore", + "line": 23, + "span": [ + 23, + 49, + 23, + 60 + ] + }, + { + "field": "children", + "receiver": "item", + "function": "process", + "owner": "RustSyntaxFactsCore", + "line": 40, + "span": [ + 40, + 12, + 40, + 25 + ] + }, + { + "field": "clone", + "receiver": "name", + "function": "process", + "owner": "RustSyntaxFactsCore", + "line": 23, + "span": [ + 23, + 35, + 23, + 45 + ] + }, + { + "field": "count", + "receiver": "self", + "function": "ready", + "owner": "RustSyntaxFactsCore", + "line": 53, + "span": [ + 53, + 8, + 53, + 18 + ] + }, + { + "field": "default_case", + "receiver": "self", + "function": "process", + "owner": "RustSyntaxFactsCore", + "line": 29, + "span": [ + 29, + 17, + 29, + 34 + ] + }, + { + "field": "escalate", + "receiver": "self", + "function": "process", + "owner": "RustSyntaxFactsCore", + "line": 27, + "span": [ + 27, + 41, + 27, + 54 + ] + }, + { + "field": "fallback", + "receiver": "self", + "function": "process", + "owner": "RustSyntaxFactsCore", + "line": 28, + "span": [ + 28, + 27, + 28, + 40 + ] + }, + { + "field": "name", + "receiver": "user.profile()", + "function": "process", + "owner": "RustSyntaxFactsCore", + "line": 22, + "span": [ + 22, + 19, + 22, + 38 + ] + }, + { + "field": "profile", + "receiver": "user", + "function": "process", + "owner": "RustSyntaxFactsCore", + "line": 22, + "span": [ + 22, + 19, + 22, + 31 + ] + }, + { + "field": "publish", + "receiver": "self", + "function": "process", + "owner": "RustSyntaxFactsCore", + "line": 34, + "span": [ + 34, + 12, + 34, + 24 + ] + }, + { + "field": "ready", + "receiver": "user", + "function": "process", + "owner": "RustSyntaxFactsCore", + "line": 32, + "span": [ + 32, + 50, + 32, + 60 + ] + }, + { + "field": "role", + "receiver": "user", + "function": "process", + "owner": "RustSyntaxFactsCore", + "line": 26, + "span": [ + 26, + 14, + 26, + 23 + ] + }, + { + "field": "send", + "receiver": "self", + "function": "audit", + "owner": "RustSyntaxFactsCore", + "line": 48, + "span": [ + 48, + 8, + 48, + 17 + ] + }, + { + "field": "status", + "receiver": "self", + "function": "audit", + "owner": "RustSyntaxFactsCore", + "line": 49, + "span": [ + 49, + 8, + 49, + 19 + ] + }, + { + "field": "to_string", + "receiver": "user.profile().name()", + "function": "process", + "owner": "RustSyntaxFactsCore", + "line": 22, + "span": [ + 22, + 19, + 22, + 50 + ] + }, + { + "field": "warn", + "receiver": "self", + "function": "process", + "owner": "RustSyntaxFactsCore", + "line": 36, + "span": [ + 36, + 12, + 36, + 21 + ] + } + ], + "state_writes": [ + { + "field": "count", + "receiver": "self", + "function": "process", + "owner": "RustSyntaxFactsCore", + "line": 33, + "span": [ + 33, + 12, + 33, + 27 + ] + } + ], + "decisions": [ + { + "kind": "case_dispatch", + "members": [ + "Role::Guest", + "Role::Owner | Role::Admin" + ], + "function": "process", + "line": 26, + "span": [ + 26, + 8, + 30, + 9 + ], + "predicate": "user.role()", + "enclosing_span": [ + 26, + 8, + 30, + 9 + ] + }, + { + "kind": "conjunction", + "members": [ + "matches!(self.status, Status::Idle)", + "user.ready()" + ], + "function": "process", + "line": 32, + "span": [ + 32, + 11, + 32, + 62 + ], + "predicate": "matches!(self.status, Status::Idle) && user.ready()", + "enclosing_span": [ + 32, + 8, + 37, + 9 + ] + } + ], + "branch_decisions": [ + { + "function": "process", + "line": 26, + "span": [ + 26, + 8, + 30, + 9 + ], + "predicate": "user.role()", + "state_refs": [ + "user.role" + ] + }, + { + "function": "process", + "line": 32, + "span": [ + 32, + 8, + 37, + 9 + ], + "predicate": "matches!(self.status, Status::Idle) && user.ready()", + "state_refs": [ + "user.ready" + ] + } + ], + "dispatch_sites": [], + "semantic_effects": [ + { + "kind": "callback_inversion", + "detail": "callback", + "function": "process", + "line": 24, + "span": [ + 24, + 8, + 24, + 26 + ] + } + ], + "predicate_bodies": [], + "local_complexity": [ + { + "id": "RustSyntaxFactsCore#audit", + "score": 0.0, + "signals": {} + }, + { + "id": "RustSyntaxFactsCore#new", + "score": 0.0, + "signals": {} + }, + { + "id": "RustSyntaxFactsCore#process", + "score": 5.6, + "signals": { + "boolean_ops": 2, + "branches": 1, + "cases": 1, + "loops": 2, + "nested": 1 + } + }, + { + "id": "RustSyntaxFactsCore#ready", + "score": 0.0, + "signals": {} + } + ], + "clone_candidates": [ + { + "method_name": "audit", + "node_name": "block", + "line": 46, + "span": [ + 46, + 32, + 50, + 5 + ], + "mass": 43, + "fingerprint": "block({:{ expression_statement(macro_invocation(id !:! token_tree((:( string_literal(\":\" lit \":\") ,:, id ):))) ;:;) expression_statement(call_expression(field_expression(id .:. id) arguments((:( string_literal(\":\" lit \":\") ,:, id ):))) ;:;) expression_statement(call_expression(field_expression(id .:. id) arguments((:( ):))) ;:;) }:})", + "child_fingerprints": [ + "expression_statement(macro_invocation(id !:! token_tree((:( string_literal(\":\" lit \":\") ,:, id ):))) ;:;)", + "expression_statement(call_expression(field_expression(id .:. id) arguments((:( string_literal(\":\" lit \":\") ,:, id ):))) ;:;)", + "expression_statement(call_expression(field_expression(id .:. id) arguments((:( ):))) ;:;)" + ], + "child_masses": [ + 14, + 16, + 10 + ] + }, + { + "method_name": "audit", + "node_name": "defn", + "line": 46, + "span": [ + 46, + 4, + 50, + 5 + ], + "mass": 59, + "fingerprint": "function_item(id id parameters((:( self_parameter(&:& id) ,:, parameter(id ::: reference_type(&:& id)) ):)) block({:{ expression_statement(macro_invocation(id !:! token_tree((:( string_literal(\":\" lit \":\") ,:, id ):))) ;:;) expression_statement(call_expression(field_expression(id .:. id) arguments((:( string_literal(\":\" lit \":\") ,:, id ):))) ;:;) expression_statement(call_expression(field_expression(id .:. id) arguments((:( ):))) ;:;) }:}))", + "child_fingerprints": [ + "expression_statement(macro_invocation(id !:! token_tree((:( string_literal(\":\" lit \":\") ,:, id ):))) ;:;)", + "expression_statement(call_expression(field_expression(id .:. id) arguments((:( string_literal(\":\" lit \":\") ,:, id ):))) ;:;)", + "expression_statement(call_expression(field_expression(id .:. id) arguments((:( ):))) ;:;)" + ], + "child_masses": [ + 14, + 16, + 10 + ] + }, + { + "method_name": "new", + "node_name": "block", + "line": 12, + "span": [ + 12, + 39, + 14, + 5 + ], + "mass": 14, + "fingerprint": "block({:{ struct_expression(id field_initializer_list({:{ id ,:, field_initializer(id ::: lit) }:})) }:})", + "child_fingerprints": [ + "struct_expression(id field_initializer_list({:{ id ,:, field_initializer(id ::: lit) }:}))" + ], + "child_masses": [ + 11 + ] + }, + { + "method_name": "new", + "node_name": "defn", + "line": 12, + "span": [ + 12, + 4, + 14, + 5 + ], + "mass": 27, + "fingerprint": "function_item(id id id parameters((:( parameter(id ::: id) ):)) ->:-> id block({:{ struct_expression(id field_initializer_list({:{ id ,:, field_initializer(id ::: lit) }:})) }:}))", + "child_fingerprints": [ + "struct_expression(id field_initializer_list({:{ id ,:, field_initializer(id ::: lit) }:}))" + ], + "child_masses": [ + 11 + ] + }, + { + "method_name": "process", + "node_name": "block", + "line": 21, + "span": [ + 21, + 24, + 44, + 5 + ], + "mass": 228, + "fingerprint": "block({:{ let_declaration(id id =:= call_expression(field_expression(call_expression(field_expression(call_expression(field_expression(id .:. id) arguments((:( ):))) .:. id) arguments((:( ):))) .:. id) arguments((:( ):))) ;:;) let_declaration(id id =:= call_expression(scoped_identifier(id ::::: id) arguments((:( call_expression(field_expression(id .:. id) arguments((:( ):))) ,:, call_expression(field_expression(id .:. id) arguments((:( ):))) ):))) ;:;) expression_statement(call_expression(id arguments((:( reference_expression(&:& id) ):))) ;:;) expression_statement(id call_expression(field_expression(id .:. id) arguments((:( ):))) match_block({:{ match_arm(match_pattern(scoped_identifier(id ::::: id) |:| scoped_identifier(id ::::: id)) =>:=> call_expression(field_expression(id .:. id) arguments((:( id ):))) ,:,) match_arm(match_pattern(id ::::: id) =>:=> call_expression(field_expression(id .:. id) arguments((:( id ):))) ,:,) match_arm(id =>:=> call_expression(field_expression(id .:. id) arguments((:( id ):))) ,:,) }:})) expression_statement(id binary_expression(macro_invocation(id !:! token_tree((:( id .:. id ,:, id ::::: id ):))) &&:&& call_expression(field_expression(id .:. id) arguments((:( ):)))) block({:{ expression_statement(compound_assignment_expr(field_expression(id .:. id) +=:+= lit) ;:;) expression_statement(call_expression(field_expression(id .:. id) arguments((:( scoped_identifier(id ::::: id) ):))) ;:;) }:}) else_clause(id block({:{ expression_statement(call_expression(field_expression(id .:. id) arguments((:( string_literal(\":\" lit \":\") ):))) ;:;) }:}))) expression_statement(id id id id block({:{ expression_statement(call_expression(field_expression(id .:. id) arguments((:( ):))) ;:;) }:})) call_expression(id arguments((:( id ):))) }:})", + "child_fingerprints": [ + "let_declaration(id id =:= call_expression(field_expression(call_expression(field_expression(call_expression(field_expression(id .:. id) arguments((:( ):))) .:. id) arguments((:( ):))) .:. id) arguments((:( ):))) ;:;)", + "let_declaration(id id =:= call_expression(scoped_identifier(id ::::: id) arguments((:( call_expression(field_expression(id .:. id) arguments((:( ):))) ,:, call_expression(field_expression(id .:. id) arguments((:( ):))) ):))) ;:;)", + "expression_statement(call_expression(id arguments((:( reference_expression(&:& id) ):))) ;:;)", + "expression_statement(id call_expression(field_expression(id .:. id) arguments((:( ):))) match_block({:{ match_arm(match_pattern(scoped_identifier(id ::::: id) |:| scoped_identifier(id ::::: id)) =>:=> call_expression(field_expression(id .:. id) arguments((:( id ):))) ,:,) match_arm(match_pattern(id ::::: id) =>:=> call_expression(field_expression(id .:. id) arguments((:( id ):))) ,:,) match_arm(id =>:=> call_expression(field_expression(id .:. id) arguments((:( id ):))) ,:,) }:}))", + "expression_statement(id binary_expression(macro_invocation(id !:! token_tree((:( id .:. id ,:, id ::::: id ):))) &&:&& call_expression(field_expression(id .:. id) arguments((:( ):)))) block({:{ expression_statement(compound_assignment_expr(field_expression(id .:. id) +=:+= lit) ;:;) expression_statement(call_expression(field_expression(id .:. id) arguments((:( scoped_identifier(id ::::: id) ):))) ;:;) }:}) else_clause(id block({:{ expression_statement(call_expression(field_expression(id .:. id) arguments((:( string_literal(\":\" lit \":\") ):))) ;:;) }:})))", + "expression_statement(id id id id block({:{ expression_statement(call_expression(field_expression(id .:. id) arguments((:( ):))) ;:;) }:}))", + "call_expression(id arguments((:( id ):)))" + ], + "child_masses": [ + 27, + 30, + 10, + 64, + 70, + 18, + 6 + ] + }, + { + "method_name": "process", + "node_name": "block", + "line": 32, + "span": [ + 32, + 63, + 35, + 9 + ], + "mass": 26, + "fingerprint": "block({:{ expression_statement(compound_assignment_expr(field_expression(id .:. id) +=:+= lit) ;:;) expression_statement(call_expression(field_expression(id .:. id) arguments((:( scoped_identifier(id ::::: id) ):))) ;:;) }:})", + "child_fingerprints": [ + "expression_statement(compound_assignment_expr(field_expression(id .:. id) +=:+= lit) ;:;)", + "expression_statement(call_expression(field_expression(id .:. id) arguments((:( scoped_identifier(id ::::: id) ):))) ;:;)" + ], + "child_masses": [ + 9, + 14 + ] + }, + { + "method_name": "process", + "node_name": "block", + "line": 35, + "span": [ + 35, + 15, + 37, + 9 + ], + "mass": 17, + "fingerprint": "block({:{ expression_statement(call_expression(field_expression(id .:. id) arguments((:( string_literal(\":\" lit \":\") ):))) ;:;) }:})", + "child_fingerprints": [ + "expression_statement(call_expression(field_expression(id .:. id) arguments((:( string_literal(\":\" lit \":\") ):))) ;:;)" + ], + "child_masses": [ + 14 + ] + }, + { + "method_name": "process", + "node_name": "block", + "line": 39, + "span": [ + 39, + 26, + 41, + 9 + ], + "mass": 13, + "fingerprint": "block({:{ expression_statement(call_expression(field_expression(id .:. id) arguments((:( ):))) ;:;) }:})", + "child_fingerprints": [ + "expression_statement(call_expression(field_expression(id .:. id) arguments((:( ):))) ;:;)" + ], + "child_masses": [ + 10 + ] + }, + { + "method_name": "process", + "node_name": "defn", + "line": 16, + "span": [ + 16, + 4, + 44, + 5 + ], + "mass": 276, + "fingerprint": "function_item(id id id parameters((:( self_parameter(&:& id id) ,:, parameter(id ::: reference_type(&:& id)) ,:, parameter(id ::: generic_type(id type_arguments(<:< id >:>))) ,:, parameter(id ::: function_type(id parameters((:( reference_type(&:& id) ):)))) ,:, ):)) ->:-> generic_type(id type_arguments(<:< id >:>)) block({:{ let_declaration(id id =:= call_expression(field_expression(call_expression(field_expression(call_expression(field_expression(id .:. id) arguments((:( ):))) .:. id) arguments((:( ):))) .:. id) arguments((:( ):))) ;:;) let_declaration(id id =:= call_expression(scoped_identifier(id ::::: id) arguments((:( call_expression(field_expression(id .:. id) arguments((:( ):))) ,:, call_expression(field_expression(id .:. id) arguments((:( ):))) ):))) ;:;) expression_statement(call_expression(id arguments((:( reference_expression(&:& id) ):))) ;:;) expression_statement(id call_expression(field_expression(id .:. id) arguments((:( ):))) match_block({:{ match_arm(match_pattern(scoped_identifier(id ::::: id) |:| scoped_identifier(id ::::: id)) =>:=> call_expression(field_expression(id .:. id) arguments((:( id ):))) ,:,) match_arm(match_pattern(id ::::: id) =>:=> call_expression(field_expression(id .:. id) arguments((:( id ):))) ,:,) match_arm(id =>:=> call_expression(field_expression(id .:. id) arguments((:( id ):))) ,:,) }:})) expression_statement(id binary_expression(macro_invocation(id !:! token_tree((:( id .:. id ,:, id ::::: id ):))) &&:&& call_expression(field_expression(id .:. id) arguments((:( ):)))) block({:{ expression_statement(compound_assignment_expr(field_expression(id .:. id) +=:+= lit) ;:;) expression_statement(call_expression(field_expression(id .:. id) arguments((:( scoped_identifier(id ::::: id) ):))) ;:;) }:}) else_clause(id block({:{ expression_statement(call_expression(field_expression(id .:. id) arguments((:( string_literal(\":\" lit \":\") ):))) ;:;) }:}))) expression_statement(id id id id block({:{ expression_statement(call_expression(field_expression(id .:. id) arguments((:( ):))) ;:;) }:})) call_expression(id arguments((:( id ):))) }:}))", + "child_fingerprints": [ + "let_declaration(id id =:= call_expression(field_expression(call_expression(field_expression(call_expression(field_expression(id .:. id) arguments((:( ):))) .:. id) arguments((:( ):))) .:. id) arguments((:( ):))) ;:;)", + "let_declaration(id id =:= call_expression(scoped_identifier(id ::::: id) arguments((:( call_expression(field_expression(id .:. id) arguments((:( ):))) ,:, call_expression(field_expression(id .:. id) arguments((:( ):))) ):))) ;:;)", + "expression_statement(call_expression(id arguments((:( reference_expression(&:& id) ):))) ;:;)", + "expression_statement(id call_expression(field_expression(id .:. id) arguments((:( ):))) match_block({:{ match_arm(match_pattern(scoped_identifier(id ::::: id) |:| scoped_identifier(id ::::: id)) =>:=> call_expression(field_expression(id .:. id) arguments((:( id ):))) ,:,) match_arm(match_pattern(id ::::: id) =>:=> call_expression(field_expression(id .:. id) arguments((:( id ):))) ,:,) match_arm(id =>:=> call_expression(field_expression(id .:. id) arguments((:( id ):))) ,:,) }:}))", + "expression_statement(id binary_expression(macro_invocation(id !:! token_tree((:( id .:. id ,:, id ::::: id ):))) &&:&& call_expression(field_expression(id .:. id) arguments((:( ):)))) block({:{ expression_statement(compound_assignment_expr(field_expression(id .:. id) +=:+= lit) ;:;) expression_statement(call_expression(field_expression(id .:. id) arguments((:( scoped_identifier(id ::::: id) ):))) ;:;) }:}) else_clause(id block({:{ expression_statement(call_expression(field_expression(id .:. id) arguments((:( string_literal(\":\" lit \":\") ):))) ;:;) }:})))", + "expression_statement(id id id id block({:{ expression_statement(call_expression(field_expression(id .:. id) arguments((:( ):))) ;:;) }:}))", + "call_expression(id arguments((:( id ):)))" + ], + "child_masses": [ + 27, + 30, + 10, + 64, + 70, + 18, + 6 + ] + }, + { + "method_name": "ready", + "node_name": "block", + "line": 52, + "span": [ + 52, + 28, + 54, + 5 + ], + "mass": 10, + "fingerprint": "block({:{ binary_expression(field_expression(id .:. id) >:> lit) }:})", + "child_fingerprints": [ + "binary_expression(field_expression(id .:. id) >:> lit)" + ], + "child_masses": [ + 7 + ] + }, + { + "method_name": "ready", + "node_name": "defn", + "line": 52, + "span": [ + 52, + 4, + 54, + 5 + ], + "mass": 21, + "fingerprint": "function_item(id id parameters((:( self_parameter(&:& id) ):)) ->:-> id block({:{ binary_expression(field_expression(id .:. id) >:> lit) }:}))", + "child_fingerprints": [ + "binary_expression(field_expression(id .:. id) >:> lit)" + ], + "child_masses": [ + 7 + ] + } + ] + } + ] +} diff --git a/gems/decomplex/examples/syntax-facts/oracles/zig-core.json b/gems/decomplex/examples/syntax-facts/oracles/zig-core.json new file mode 100644 index 000000000..936f2ff2b --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/oracles/zig-core.json @@ -0,0 +1,961 @@ +{ + "format": "decomplex.syntax-facts.v1", + "documents": [ + { + "file": "gems/decomplex/examples/syntax-facts/zig/core.zig", + "language": "zig", + "functions": [ + { + "name": "audit", + "owner": "ZigSyntaxFactsCore", + "line": 38, + "span": [ + 38, + 4, + 41, + 5 + ], + "visibility": "private", + "params": [ + "self", + "name" + ] + }, + { + "name": "init", + "owner": "ZigSyntaxFactsCore", + "line": 7, + "span": [ + 7, + 4, + 9, + 5 + ], + "visibility": "public", + "params": [ + "status" + ] + }, + { + "name": "process", + "owner": "ZigSyntaxFactsCore", + "line": 11, + "span": [ + 11, + 4, + 36, + 5 + ], + "visibility": "public", + "params": [ + "self", + "user", + "items", + "callback" + ] + }, + { + "name": "ready", + "owner": "ZigSyntaxFactsCore", + "line": 43, + "span": [ + 43, + 4, + 45, + 5 + ], + "visibility": "private", + "params": [ + "self" + ] + } + ], + "owners": [ + { + "name": "Item", + "kind": "struct", + "line": 53, + "span": [ + 53, + 13, + 55, + 1 + ] + }, + { + "name": "ZigSyntaxFactsCore", + "kind": "struct", + "line": 3, + "span": [ + 3, + 31, + 46, + 1 + ] + } + ], + "calls": [ + { + "receiver": "item", + "message": "children", + "function": "process", + "owner": "ZigSyntaxFactsCore", + "line": 31, + "span": [ + 31, + 16, + 31, + 31 + ], + "conditional": true, + "arguments": [], + "control": "iterates", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "callback", + "function": "process", + "owner": "ZigSyntaxFactsCore", + "line": 15, + "span": [ + 15, + 8, + 15, + 22 + ], + "conditional": false, + "arguments": [ + "user" + ], + "control": "always", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "defaultCase", + "function": "process", + "owner": "ZigSyntaxFactsCore", + "line": 20, + "span": [ + 20, + 20, + 20, + 42 + ], + "conditional": true, + "arguments": [ + "user" + ], + "control": "conditional", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "escalate", + "function": "process", + "owner": "ZigSyntaxFactsCore", + "line": 18, + "span": [ + 18, + 30, + 18, + 49 + ], + "conditional": true, + "arguments": [ + "user" + ], + "control": "conditional", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "fallback", + "function": "process", + "owner": "ZigSyntaxFactsCore", + "line": 19, + "span": [ + 19, + 22, + 19, + 41 + ], + "conditional": true, + "arguments": [ + "user" + ], + "control": "conditional", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "publish", + "function": "process", + "owner": "ZigSyntaxFactsCore", + "line": 25, + "span": [ + 25, + 12, + 25, + 31 + ], + "conditional": true, + "arguments": [ + ".busy" + ], + "control": "conditional", + "safe_navigation": false, + "block": false + } + ], + "state_reads": [ + { + "field": "admin", + "receiver": ".literal", + "function": "process", + "owner": "ZigSyntaxFactsCore", + "line": 18, + "span": [ + 18, + 20, + 18, + 26 + ] + }, + { + "field": "busy", + "receiver": ".literal", + "function": "process", + "owner": "ZigSyntaxFactsCore", + "line": 25, + "span": [ + 25, + 25, + 25, + 30 + ] + }, + { + "field": "children", + "receiver": "item", + "function": "process", + "owner": "ZigSyntaxFactsCore", + "line": 31, + "span": [ + 31, + 16, + 31, + 29 + ] + }, + { + "field": "count", + "receiver": "self", + "function": "ready", + "owner": "ZigSyntaxFactsCore", + "line": 44, + "span": [ + 44, + 15, + 44, + 25 + ] + }, + { + "field": "defaultCase", + "receiver": "self", + "function": "process", + "owner": "ZigSyntaxFactsCore", + "line": 20, + "span": [ + 20, + 20, + 20, + 36 + ] + }, + { + "field": "escalate", + "receiver": "self", + "function": "process", + "owner": "ZigSyntaxFactsCore", + "line": 18, + "span": [ + 18, + 30, + 18, + 43 + ] + }, + { + "field": "fallback", + "receiver": "self", + "function": "process", + "owner": "ZigSyntaxFactsCore", + "line": 19, + "span": [ + 19, + 22, + 19, + 35 + ] + }, + { + "field": "guest", + "receiver": ".literal", + "function": "process", + "owner": "ZigSyntaxFactsCore", + "line": 19, + "span": [ + 19, + 12, + 19, + 18 + ] + }, + { + "field": "idle", + "receiver": ".literal", + "function": "process", + "owner": "ZigSyntaxFactsCore", + "line": 23, + "span": [ + 23, + 27, + 23, + 32 + ] + }, + { + "field": "name", + "receiver": "user.profile", + "function": "process", + "owner": "ZigSyntaxFactsCore", + "line": 12, + "span": [ + 12, + 21, + 12, + 38 + ] + }, + { + "field": "owner", + "receiver": ".literal", + "function": "process", + "owner": "ZigSyntaxFactsCore", + "line": 18, + "span": [ + 18, + 12, + 18, + 18 + ] + }, + { + "field": "profile", + "receiver": "user", + "function": "process", + "owner": "ZigSyntaxFactsCore", + "line": 12, + "span": [ + 12, + 21, + 12, + 33 + ] + }, + { + "field": "publish", + "receiver": "self", + "function": "process", + "owner": "ZigSyntaxFactsCore", + "line": 25, + "span": [ + 25, + 12, + 25, + 24 + ] + }, + { + "field": "ready", + "receiver": "user", + "function": "process", + "owner": "ZigSyntaxFactsCore", + "line": 23, + "span": [ + 23, + 37, + 23, + 47 + ] + }, + { + "field": "role", + "receiver": "user", + "function": "process", + "owner": "ZigSyntaxFactsCore", + "line": 17, + "span": [ + 17, + 16, + 17, + 25 + ] + }, + { + "field": "status", + "receiver": "self", + "function": "audit", + "owner": "ZigSyntaxFactsCore", + "line": 40, + "span": [ + 40, + 12, + 40, + 23 + ] + }, + { + "field": "status", + "receiver": "self", + "function": "process", + "owner": "ZigSyntaxFactsCore", + "line": 23, + "span": [ + 23, + 12, + 23, + 23 + ] + } + ], + "state_writes": [ + { + "field": "count", + "receiver": ".literal", + "function": "init", + "owner": "ZigSyntaxFactsCore", + "line": 8, + "span": [ + 8, + 53, + 8, + 63 + ] + }, + { + "field": "count", + "receiver": "self", + "function": "process", + "owner": "ZigSyntaxFactsCore", + "line": 24, + "span": [ + 24, + 12, + 24, + 22 + ] + }, + { + "field": "status", + "receiver": ".literal", + "function": "init", + "owner": "ZigSyntaxFactsCore", + "line": 8, + "span": [ + 8, + 35, + 8, + 51 + ] + } + ], + "decisions": [ + { + "kind": "case_dispatch", + "members": [ + ".guest", + ".owner" + ], + "function": "process", + "line": 17, + "span": [ + 17, + 8, + 21, + 9 + ], + "predicate": "user.role", + "enclosing_span": [ + 17, + 8, + 21, + 9 + ] + }, + { + "kind": "conjunction", + "members": [ + "self.status == .idle", + "user.ready" + ], + "function": "process", + "line": 23, + "span": [ + 23, + 12, + 23, + 47 + ], + "predicate": "self.status == .idle and user.ready", + "enclosing_span": [ + 23, + 8, + 28, + 9 + ] + } + ], + "branch_decisions": [ + { + "function": "process", + "line": 17, + "span": [ + 17, + 8, + 21, + 9 + ], + "predicate": "user.role", + "state_refs": [ + "user.role" + ] + }, + { + "function": "process", + "line": 23, + "span": [ + 23, + 8, + 28, + 9 + ], + "predicate": "self.status == .idle and user.ready", + "state_refs": [ + ".literal.idle", + "status", + "user.ready" + ] + } + ], + "dispatch_sites": [], + "semantic_effects": [], + "predicate_bodies": [], + "local_complexity": [ + { + "id": "ZigSyntaxFactsCore#audit", + "score": 0.0, + "signals": {} + }, + { + "id": "ZigSyntaxFactsCore#init", + "score": 0.0, + "signals": { + "early_exits": 1 + } + }, + { + "id": "ZigSyntaxFactsCore#process", + "score": 5.6, + "signals": { + "boolean_ops": 2, + "branches": 1, + "cases": 1, + "early_exits": 1, + "loops": 2, + "nested": 1 + } + }, + { + "id": "ZigSyntaxFactsCore#ready", + "score": 0.0, + "signals": { + "early_exits": 1 + } + } + ], + "clone_candidates": [ + { + "method_name": "(top-level)", + "node_name": "enum_declaration", + "line": 48, + "span": [ + 48, + 15, + 51, + 1 + ], + "mass": 8, + "fingerprint": "enum_declaration(id {:{ id ,:, id ,:, }:})", + "child_fingerprints": [ + "id", + "id" + ], + "child_masses": [ + 1, + 1 + ] + }, + { + "method_name": "(top-level)", + "node_name": "struct_declaration", + "line": 3, + "span": [ + 3, + 31, + 46, + 1 + ], + "mass": 365, + "fingerprint": "struct_declaration(id {:{ container_field(id ::: id) ,:, container_field(id ::: id) ,:, function_declaration(id id id parameters((:( parameter(id ::: id) ):)) id block({:{ expression_statement(return_expression(id struct_initializer(id initializer_list({:{ assignment_expression(field_expression(.:. id) =:= id) ,:, assignment_expression(field_expression(.:. id) =:= lit) }:}))) ;:;) }:})) function_declaration(id id id parameters((:( parameter(id ::: pointer_type(*:* id)) ,:, parameter(id ::: id) ,:, parameter(id ::: slice_type([:[ ]:] id id)) ,:, parameter(id ::: id) ):)) nullable_type(?:? slice_type([:[ ]:] id id)) block({:{ variable_declaration(id id =:= field_expression(field_expression(id .:. id) .:. id) ;:;) variable_declaration(id id ::: nullable_type(?:? slice_type([:[ ]:] id id)) =:= nil ;:;) expression_statement(call_expression(id (:( id ):)) ;:;) switch_expression(id (:( field_expression(id .:. id) ):) {:{ switch_case(field_expression(.:. id) ,:, field_expression(.:. id) =>:=> call_expression(field_expression(id .:. id) (:( id ):))) ,:, switch_case(field_expression(.:. id) =>:=> call_expression(field_expression(id .:. id) (:( id ):))) ,:, switch_case(id =>:=> call_expression(field_expression(id .:. id) (:( id ):))) ,:, }:}) if_statement(id (:( binary_expression(binary_expression(field_expression(id .:. id) ==:== field_expression(.:. id)) id field_expression(id .:. id)) ):) block_expression({:{ variable_declaration(field_expression(id .:. id) +=:+= lit ;:;) expression_statement(call_expression(field_expression(id .:. id) (:( field_expression(.:. id) ):)) ;:;) }:}) else_clause(id labeled_statement({:{ expression_statement(call_expression(field_expression(field_expression(id .:. id) .:. id) (:( string(\":\" lit \":\") ,:, anonymous_struct_initializer(.:. initializer_list({:{ }:})) ):)) ;:;) }:}))) labeled_statement(id (:( id ):) payload(|:| id |:|) block_expression({:{ variable_declaration(id =:= call_expression(field_expression(id .:. id) (:( ):)) ;:;) }:})) variable_declaration(id =:= id ;:;) expression_statement(return_expression(id id) ;:;) }:})) function_declaration(id id parameters((:( parameter(id ::: pointer_type(*:* id)) ,:, parameter(id ::: slice_type([:[ ]:] id id)) ):)) id block({:{ expression_statement(call_expression(field_expression(field_expression(id .:. id) .:. id) (:( string(\":\" lit \":\") ,:, anonymous_struct_initializer(.:. initializer_list({:{ id }:})) ):)) ;:;) variable_declaration(id =:= field_expression(id .:. id) ;:;) }:})) function_declaration(id id parameters((:( parameter(id ::: pointer_type(*:* id)) ):)) id block({:{ expression_statement(return_expression(id binary_expression(field_expression(id .:. id) >:> lit)) ;:;) }:})) }:})", + "child_fingerprints": [ + "container_field(id ::: id)", + "container_field(id ::: id)", + "function_declaration(id id id parameters((:( parameter(id ::: id) ):)) id block({:{ expression_statement(return_expression(id struct_initializer(id initializer_list({:{ assignment_expression(field_expression(.:. id) =:= id) ,:, assignment_expression(field_expression(.:. id) =:= lit) }:}))) ;:;) }:}))", + "function_declaration(id id id parameters((:( parameter(id ::: pointer_type(*:* id)) ,:, parameter(id ::: id) ,:, parameter(id ::: slice_type([:[ ]:] id id)) ,:, parameter(id ::: id) ):)) nullable_type(?:? slice_type([:[ ]:] id id)) block({:{ variable_declaration(id id =:= field_expression(field_expression(id .:. id) .:. id) ;:;) variable_declaration(id id ::: nullable_type(?:? slice_type([:[ ]:] id id)) =:= nil ;:;) expression_statement(call_expression(id (:( id ):)) ;:;) switch_expression(id (:( field_expression(id .:. id) ):) {:{ switch_case(field_expression(.:. id) ,:, field_expression(.:. id) =>:=> call_expression(field_expression(id .:. id) (:( id ):))) ,:, switch_case(field_expression(.:. id) =>:=> call_expression(field_expression(id .:. id) (:( id ):))) ,:, switch_case(id =>:=> call_expression(field_expression(id .:. id) (:( id ):))) ,:, }:}) if_statement(id (:( binary_expression(binary_expression(field_expression(id .:. id) ==:== field_expression(.:. id)) id field_expression(id .:. id)) ):) block_expression({:{ variable_declaration(field_expression(id .:. id) +=:+= lit ;:;) expression_statement(call_expression(field_expression(id .:. id) (:( field_expression(.:. id) ):)) ;:;) }:}) else_clause(id labeled_statement({:{ expression_statement(call_expression(field_expression(field_expression(id .:. id) .:. id) (:( string(\":\" lit \":\") ,:, anonymous_struct_initializer(.:. initializer_list({:{ }:})) ):)) ;:;) }:}))) labeled_statement(id (:( id ):) payload(|:| id |:|) block_expression({:{ variable_declaration(id =:= call_expression(field_expression(id .:. id) (:( ):)) ;:;) }:})) variable_declaration(id =:= id ;:;) expression_statement(return_expression(id id) ;:;) }:}))", + "function_declaration(id id parameters((:( parameter(id ::: pointer_type(*:* id)) ,:, parameter(id ::: slice_type([:[ ]:] id id)) ):)) id block({:{ expression_statement(call_expression(field_expression(field_expression(id .:. id) .:. id) (:( string(\":\" lit \":\") ,:, anonymous_struct_initializer(.:. initializer_list({:{ id }:})) ):)) ;:;) variable_declaration(id =:= field_expression(id .:. id) ;:;) }:}))", + "function_declaration(id id parameters((:( parameter(id ::: pointer_type(*:* id)) ):)) id block({:{ expression_statement(return_expression(id binary_expression(field_expression(id .:. id) >:> lit)) ;:;) }:}))" + ], + "child_masses": [ + 4, + 4, + 37, + 231, + 56, + 27 + ] + }, + { + "method_name": "(top-level)", + "node_name": "struct_declaration", + "line": 53, + "span": [ + 53, + 13, + 55, + 1 + ], + "mass": 13, + "fingerprint": "struct_declaration(id {:{ container_field(id ::: slice_type([:[ ]:] id id)) ,:, }:})", + "child_fingerprints": [ + "container_field(id ::: slice_type([:[ ]:] id id))" + ], + "child_masses": [ + 8 + ] + }, + { + "method_name": "audit", + "node_name": "block", + "line": 38, + "span": [ + 38, + 63, + 41, + 5 + ], + "mass": 34, + "fingerprint": "block({:{ expression_statement(call_expression(field_expression(field_expression(id .:. id) .:. id) (:( string(\":\" lit \":\") ,:, anonymous_struct_initializer(.:. initializer_list({:{ id }:})) ):)) ;:;) variable_declaration(id =:= field_expression(id .:. id) ;:;) }:})", + "child_fingerprints": [ + "expression_statement(call_expression(field_expression(field_expression(id .:. id) .:. id) (:( string(\":\" lit \":\") ,:, anonymous_struct_initializer(.:. initializer_list({:{ id }:})) ):)) ;:;)", + "variable_declaration(id =:= field_expression(id .:. id) ;:;)" + ], + "child_masses": [ + 23, + 8 + ] + }, + { + "method_name": "audit", + "node_name": "defn", + "line": 38, + "span": [ + 38, + 4, + 41, + 5 + ], + "mass": 56, + "fingerprint": "function_declaration(id id parameters((:( parameter(id ::: pointer_type(*:* id)) ,:, parameter(id ::: slice_type([:[ ]:] id id)) ):)) id block({:{ expression_statement(call_expression(field_expression(field_expression(id .:. id) .:. id) (:( string(\":\" lit \":\") ,:, anonymous_struct_initializer(.:. initializer_list({:{ id }:})) ):)) ;:;) variable_declaration(id =:= field_expression(id .:. id) ;:;) }:}))", + "child_fingerprints": [ + "expression_statement(call_expression(field_expression(field_expression(id .:. id) .:. id) (:( string(\":\" lit \":\") ,:, anonymous_struct_initializer(.:. initializer_list({:{ id }:})) ):)) ;:;)", + "variable_declaration(id =:= field_expression(id .:. id) ;:;)" + ], + "child_masses": [ + 23, + 8 + ] + }, + { + "method_name": "init", + "node_name": "block", + "line": 7, + "span": [ + 7, + 51, + 9, + 5 + ], + "mass": 25, + "fingerprint": "block({:{ expression_statement(return_expression(id struct_initializer(id initializer_list({:{ assignment_expression(field_expression(.:. id) =:= id) ,:, assignment_expression(field_expression(.:. id) =:= lit) }:}))) ;:;) }:})", + "child_fingerprints": [ + "expression_statement(return_expression(id struct_initializer(id initializer_list({:{ assignment_expression(field_expression(.:. id) =:= id) ,:, assignment_expression(field_expression(.:. id) =:= lit) }:}))) ;:;)" + ], + "child_masses": [ + 22 + ] + }, + { + "method_name": "init", + "node_name": "defn", + "line": 7, + "span": [ + 7, + 4, + 9, + 5 + ], + "mass": 37, + "fingerprint": "function_declaration(id id id parameters((:( parameter(id ::: id) ):)) id block({:{ expression_statement(return_expression(id struct_initializer(id initializer_list({:{ assignment_expression(field_expression(.:. id) =:= id) ,:, assignment_expression(field_expression(.:. id) =:= lit) }:}))) ;:;) }:}))", + "child_fingerprints": [ + "expression_statement(return_expression(id struct_initializer(id initializer_list({:{ assignment_expression(field_expression(.:. id) =:= id) ,:, assignment_expression(field_expression(.:. id) =:= lit) }:}))) ;:;)" + ], + "child_masses": [ + 22 + ] + }, + { + "method_name": "process", + "node_name": "block", + "line": 11, + "span": [ + 11, + 113, + 36, + 5 + ], + "mass": 192, + "fingerprint": "block({:{ variable_declaration(id id =:= field_expression(field_expression(id .:. id) .:. id) ;:;) variable_declaration(id id ::: nullable_type(?:? slice_type([:[ ]:] id id)) =:= nil ;:;) expression_statement(call_expression(id (:( id ):)) ;:;) switch_expression(id (:( field_expression(id .:. id) ):) {:{ switch_case(field_expression(.:. id) ,:, field_expression(.:. id) =>:=> call_expression(field_expression(id .:. id) (:( id ):))) ,:, switch_case(field_expression(.:. id) =>:=> call_expression(field_expression(id .:. id) (:( id ):))) ,:, switch_case(id =>:=> call_expression(field_expression(id .:. id) (:( id ):))) ,:, }:}) if_statement(id (:( binary_expression(binary_expression(field_expression(id .:. id) ==:== field_expression(.:. id)) id field_expression(id .:. id)) ):) block_expression({:{ variable_declaration(field_expression(id .:. id) +=:+= lit ;:;) expression_statement(call_expression(field_expression(id .:. id) (:( field_expression(.:. id) ):)) ;:;) }:}) else_clause(id labeled_statement({:{ expression_statement(call_expression(field_expression(field_expression(id .:. id) .:. id) (:( string(\":\" lit \":\") ,:, anonymous_struct_initializer(.:. initializer_list({:{ }:})) ):)) ;:;) }:}))) labeled_statement(id (:( id ):) payload(|:| id |:|) block_expression({:{ variable_declaration(id =:= call_expression(field_expression(id .:. id) (:( ):)) ;:;) }:})) variable_declaration(id =:= id ;:;) expression_statement(return_expression(id id) ;:;) }:})", + "child_fingerprints": [ + "variable_declaration(id id =:= field_expression(field_expression(id .:. id) .:. id) ;:;)", + "variable_declaration(id id ::: nullable_type(?:? slice_type([:[ ]:] id id)) =:= nil ;:;)", + "expression_statement(call_expression(id (:( id ):)) ;:;)", + "switch_expression(id (:( field_expression(id .:. id) ):) {:{ switch_case(field_expression(.:. id) ,:, field_expression(.:. id) =>:=> call_expression(field_expression(id .:. id) (:( id ):))) ,:, switch_case(field_expression(.:. id) =>:=> call_expression(field_expression(id .:. id) (:( id ):))) ,:, switch_case(id =>:=> call_expression(field_expression(id .:. id) (:( id ):))) ,:, }:})", + "if_statement(id (:( binary_expression(binary_expression(field_expression(id .:. id) ==:== field_expression(.:. id)) id field_expression(id .:. id)) ):) block_expression({:{ variable_declaration(field_expression(id .:. id) +=:+= lit ;:;) expression_statement(call_expression(field_expression(id .:. id) (:( field_expression(.:. id) ):)) ;:;) }:}) else_clause(id labeled_statement({:{ expression_statement(call_expression(field_expression(field_expression(id .:. id) .:. id) (:( string(\":\" lit \":\") ,:, anonymous_struct_initializer(.:. initializer_list({:{ }:})) ):)) ;:;) }:})))", + "labeled_statement(id (:( id ):) payload(|:| id |:|) block_expression({:{ variable_declaration(id =:= call_expression(field_expression(id .:. id) (:( ):)) ;:;) }:}))", + "variable_declaration(id =:= id ;:;)", + "expression_statement(return_expression(id id) ;:;)" + ], + "child_masses": [ + 12, + 14, + 7, + 54, + 69, + 23, + 5, + 5 + ] + }, + { + "method_name": "process", + "node_name": "defn", + "line": 11, + "span": [ + 11, + 4, + 36, + 5 + ], + "mass": 231, + "fingerprint": "function_declaration(id id id parameters((:( parameter(id ::: pointer_type(*:* id)) ,:, parameter(id ::: id) ,:, parameter(id ::: slice_type([:[ ]:] id id)) ,:, parameter(id ::: id) ):)) nullable_type(?:? slice_type([:[ ]:] id id)) block({:{ variable_declaration(id id =:= field_expression(field_expression(id .:. id) .:. id) ;:;) variable_declaration(id id ::: nullable_type(?:? slice_type([:[ ]:] id id)) =:= nil ;:;) expression_statement(call_expression(id (:( id ):)) ;:;) switch_expression(id (:( field_expression(id .:. id) ):) {:{ switch_case(field_expression(.:. id) ,:, field_expression(.:. id) =>:=> call_expression(field_expression(id .:. id) (:( id ):))) ,:, switch_case(field_expression(.:. id) =>:=> call_expression(field_expression(id .:. id) (:( id ):))) ,:, switch_case(id =>:=> call_expression(field_expression(id .:. id) (:( id ):))) ,:, }:}) if_statement(id (:( binary_expression(binary_expression(field_expression(id .:. id) ==:== field_expression(.:. id)) id field_expression(id .:. id)) ):) block_expression({:{ variable_declaration(field_expression(id .:. id) +=:+= lit ;:;) expression_statement(call_expression(field_expression(id .:. id) (:( field_expression(.:. id) ):)) ;:;) }:}) else_clause(id labeled_statement({:{ expression_statement(call_expression(field_expression(field_expression(id .:. id) .:. id) (:( string(\":\" lit \":\") ,:, anonymous_struct_initializer(.:. initializer_list({:{ }:})) ):)) ;:;) }:}))) labeled_statement(id (:( id ):) payload(|:| id |:|) block_expression({:{ variable_declaration(id =:= call_expression(field_expression(id .:. id) (:( ):)) ;:;) }:})) variable_declaration(id =:= id ;:;) expression_statement(return_expression(id id) ;:;) }:}))", + "child_fingerprints": [ + "variable_declaration(id id =:= field_expression(field_expression(id .:. id) .:. id) ;:;)", + "variable_declaration(id id ::: nullable_type(?:? slice_type([:[ ]:] id id)) =:= nil ;:;)", + "expression_statement(call_expression(id (:( id ):)) ;:;)", + "switch_expression(id (:( field_expression(id .:. id) ):) {:{ switch_case(field_expression(.:. id) ,:, field_expression(.:. id) =>:=> call_expression(field_expression(id .:. id) (:( id ):))) ,:, switch_case(field_expression(.:. id) =>:=> call_expression(field_expression(id .:. id) (:( id ):))) ,:, switch_case(id =>:=> call_expression(field_expression(id .:. id) (:( id ):))) ,:, }:})", + "if_statement(id (:( binary_expression(binary_expression(field_expression(id .:. id) ==:== field_expression(.:. id)) id field_expression(id .:. id)) ):) block_expression({:{ variable_declaration(field_expression(id .:. id) +=:+= lit ;:;) expression_statement(call_expression(field_expression(id .:. id) (:( field_expression(.:. id) ):)) ;:;) }:}) else_clause(id labeled_statement({:{ expression_statement(call_expression(field_expression(field_expression(id .:. id) .:. id) (:( string(\":\" lit \":\") ,:, anonymous_struct_initializer(.:. initializer_list({:{ }:})) ):)) ;:;) }:})))", + "labeled_statement(id (:( id ):) payload(|:| id |:|) block_expression({:{ variable_declaration(id =:= call_expression(field_expression(id .:. id) (:( ):)) ;:;) }:}))", + "variable_declaration(id =:= id ;:;)", + "expression_statement(return_expression(id id) ;:;)" + ], + "child_masses": [ + 12, + 14, + 7, + 54, + 69, + 23, + 5, + 5 + ] + }, + { + "method_name": "process", + "node_name": "if_statement", + "line": 23, + "span": [ + 23, + 8, + 28, + 9 + ], + "mass": 69, + "fingerprint": "if_statement(id (:( binary_expression(binary_expression(field_expression(id .:. id) ==:== field_expression(.:. id)) id field_expression(id .:. id)) ):) block_expression({:{ variable_declaration(field_expression(id .:. id) +=:+= lit ;:;) expression_statement(call_expression(field_expression(id .:. id) (:( field_expression(.:. id) ):)) ;:;) }:}) else_clause(id labeled_statement({:{ expression_statement(call_expression(field_expression(field_expression(id .:. id) .:. id) (:( string(\":\" lit \":\") ,:, anonymous_struct_initializer(.:. initializer_list({:{ }:})) ):)) ;:;) }:})))", + "child_fingerprints": [ + "variable_declaration(field_expression(id .:. id) +=:+= lit ;:;)", + "expression_statement(call_expression(field_expression(id .:. id) (:( field_expression(.:. id) ):)) ;:;)" + ], + "child_masses": [ + 8, + 12 + ] + }, + { + "method_name": "process", + "node_name": "switch_case", + "line": 18, + "span": [ + 18, + 12, + 18, + 49 + ], + "mass": 17, + "fingerprint": "switch_case(field_expression(.:. id) ,:, field_expression(.:. id) =>:=> call_expression(field_expression(id .:. id) (:( id ):)))", + "child_fingerprints": [ + "field_expression(.:. id)", + "field_expression(.:. id)", + "call_expression(field_expression(id .:. id) (:( id ):))" + ], + "child_masses": [ + 3, + 3, + 8 + ] + }, + { + "method_name": "process", + "node_name": "switch_case", + "line": 19, + "span": [ + 19, + 12, + 19, + 41 + ], + "mass": 13, + "fingerprint": "switch_case(field_expression(.:. id) =>:=> call_expression(field_expression(id .:. id) (:( id ):)))", + "child_fingerprints": [ + "field_expression(.:. id)", + "call_expression(field_expression(id .:. id) (:( id ):))" + ], + "child_masses": [ + 3, + 8 + ] + }, + { + "method_name": "process", + "node_name": "switch_case", + "line": 20, + "span": [ + 20, + 12, + 20, + 42 + ], + "mass": 11, + "fingerprint": "switch_case(id =>:=> call_expression(field_expression(id .:. id) (:( id ):)))", + "child_fingerprints": [ + "call_expression(field_expression(id .:. id) (:( id ):))" + ], + "child_masses": [ + 8 + ] + }, + { + "method_name": "process", + "node_name": "switch_expression", + "line": 17, + "span": [ + 17, + 8, + 21, + 9 + ], + "mass": 54, + "fingerprint": "switch_expression(id (:( field_expression(id .:. id) ):) {:{ switch_case(field_expression(.:. id) ,:, field_expression(.:. id) =>:=> call_expression(field_expression(id .:. id) (:( id ):))) ,:, switch_case(field_expression(.:. id) =>:=> call_expression(field_expression(id .:. id) (:( id ):))) ,:, switch_case(id =>:=> call_expression(field_expression(id .:. id) (:( id ):))) ,:, }:})", + "child_fingerprints": [ + "field_expression(id .:. id)", + "switch_case(field_expression(.:. id) ,:, field_expression(.:. id) =>:=> call_expression(field_expression(id .:. id) (:( id ):)))", + "switch_case(field_expression(.:. id) =>:=> call_expression(field_expression(id .:. id) (:( id ):)))", + "switch_case(id =>:=> call_expression(field_expression(id .:. id) (:( id ):)))" + ], + "child_masses": [ + 4, + 17, + 13, + 11 + ] + }, + { + "method_name": "ready", + "node_name": "block", + "line": 43, + "span": [ + 43, + 45, + 45, + 5 + ], + "mass": 14, + "fingerprint": "block({:{ expression_statement(return_expression(id binary_expression(field_expression(id .:. id) >:> lit)) ;:;) }:})", + "child_fingerprints": [ + "expression_statement(return_expression(id binary_expression(field_expression(id .:. id) >:> lit)) ;:;)" + ], + "child_masses": [ + 11 + ] + }, + { + "method_name": "ready", + "node_name": "defn", + "line": 43, + "span": [ + 43, + 4, + 45, + 5 + ], + "mass": 27, + "fingerprint": "function_declaration(id id parameters((:( parameter(id ::: pointer_type(*:* id)) ):)) id block({:{ expression_statement(return_expression(id binary_expression(field_expression(id .:. id) >:> lit)) ;:;) }:}))", + "child_fingerprints": [ + "expression_statement(return_expression(id binary_expression(field_expression(id .:. id) >:> lit)) ;:;)" + ], + "child_masses": [ + 11 + ] + } + ] + } + ] +} diff --git a/gems/decomplex/examples/syntax-facts/rust/core.rs b/gems/decomplex/examples/syntax-facts/rust/core.rs new file mode 100644 index 000000000..461089b93 --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/rust/core.rs @@ -0,0 +1,56 @@ +pub struct RustSyntaxFactsCore { + status: Status, + count: usize, +} + +pub enum Status { + Idle, + Busy, +} + +impl RustSyntaxFactsCore { + pub fn new(status: Status) -> Self { + Self { status, count: 0 } + } + + pub fn process( + &mut self, + user: &User, + items: Vec, + callback: fn(&Account), + ) -> Option { + let name = user.profile().name().to_string(); + let account = Account::new(name.clone(), user.active()); + callback(&account); + + match user.role() { + Role::Owner | Role::Admin => self.escalate(user), + Role::Guest => self.fallback(user), + _ => self.default_case(user), + } + + if matches!(self.status, Status::Idle) && user.ready() { + self.count += 1; + self.publish(Status::Busy); + } else { + self.warn("not ready"); + } + + for item in items { + item.children(); + } + + Some(name) + } + + fn audit(&self, name: &str) { + println!("{}", name); + self.send("record", name); + self.status(); + } + + fn ready(&self) -> bool { + self.count > 0 + } +} + diff --git a/gems/decomplex/examples/syntax-facts/zig/core.zig b/gems/decomplex/examples/syntax-facts/zig/core.zig new file mode 100644 index 000000000..af8fc2b74 --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/zig/core.zig @@ -0,0 +1,56 @@ +const std = @import("std"); + +pub const ZigSyntaxFactsCore = struct { + status: Status, + count: usize, + + pub fn init(status: Status) ZigSyntaxFactsCore { + return ZigSyntaxFactsCore{ .status = status, .count = 0 }; + } + + pub fn process(self: *ZigSyntaxFactsCore, user: anytype, items: []const Item, callback: anytype) ?[]const u8 { + const name = user.profile.name; + var result: ?[]const u8 = null; + + callback(user); + + switch (user.role) { + .owner, .admin => self.escalate(user), + .guest => self.fallback(user), + else => self.defaultCase(user), + } + + if (self.status == .idle and user.ready) { + self.count += 1; + self.publish(.busy); + } else { + std.debug.print("not ready", .{}); + } + + for (items) |item| { + _ = item.children(); + } + + result = name; + return result; + } + + fn audit(self: *ZigSyntaxFactsCore, name: []const u8) void { + std.debug.print("{s}", .{name}); + _ = self.status; + } + + fn ready(self: *ZigSyntaxFactsCore) bool { + return self.count > 0; + } +}; + +const Status = enum { + idle, + busy, +}; + +const Item = struct { + value: []const u8, +}; + From 23baef8e41dabc68d199a62a444091f3fd48a857 Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Sat, 20 Jun 2026 15:51:34 +0000 Subject: [PATCH 48/52] Enforce shared syntax oracle across Ruby and Rust --- gems/decomplex/examples/syntax-facts/c/core.c | 51 + .../examples/syntax-facts/cpp/core.cpp | 61 + .../examples/syntax-facts/csharp/core.cs | 67 + .../examples/syntax-facts/java/core.java | 53 + .../examples/syntax-facts/javascript/core.js | 55 + .../examples/syntax-facts/kotlin/core.kt | 46 + .../examples/syntax-facts/lua/core.lua | 51 + .../examples/syntax-facts/oracles/c-core.json | 555 +++++ .../syntax-facts/oracles/cpp-core.json | 637 ++++++ .../syntax-facts/oracles/csharp-core.json | 563 +++++ .../syntax-facts/oracles/go-core.json | 1073 +++------ .../syntax-facts/oracles/java-core.json | 563 +++++ .../syntax-facts/oracles/javascript-core.json | 695 ++++++ .../syntax-facts/oracles/kotlin-core.json | 491 ++++ .../syntax-facts/oracles/lua-core.json | 597 +++++ .../syntax-facts/oracles/php-core.json | 629 ++++++ .../syntax-facts/oracles/python-core.json | 1286 ++--------- .../syntax-facts/oracles/ruby-core.json | 1991 ++++++----------- .../syntax-facts/oracles/rust-core.json | 909 +++----- .../syntax-facts/oracles/swift-core.json | 595 +++++ .../syntax-facts/oracles/typescript-core.json | 646 ++++++ .../syntax-facts/oracles/zig-core.json | 755 ++----- .../examples/syntax-facts/php/core.php | 60 + .../examples/syntax-facts/swift/core.swift | 54 + .../examples/syntax-facts/typescript/core.ts | 72 + gems/decomplex/lib/decomplex/syntax.rb | 33 +- .../lib/decomplex/syntax/adapters.rb | 24 +- gems/decomplex/lib/decomplex/syntax/cpp.rb | 10 + gems/decomplex/lib/decomplex/syntax/csharp.rb | 23 + gems/decomplex/lib/decomplex/syntax/go.rb | 41 +- gems/decomplex/lib/decomplex/syntax/kotlin.rb | 18 + gems/decomplex/lib/decomplex/syntax/lua.rb | 53 +- gems/decomplex/lib/decomplex/syntax/php.rb | 43 +- gems/decomplex/lib/decomplex/syntax/python.rb | 9 + gems/decomplex/lib/decomplex/syntax_oracle.rb | 48 +- gems/decomplex/rust/src/decomplex/syntax.rs | 14 + .../src/decomplex/syntax/adapters/base.rs | 203 +- .../rust/src/decomplex/syntax/adapters/c.rs | 9 + .../rust/src/decomplex/syntax/adapters/cpp.rs | 47 +- .../src/decomplex/syntax/adapters/csharp.rs | 41 +- .../rust/src/decomplex/syntax/adapters/go.rs | 67 +- .../src/decomplex/syntax/adapters/java.rs | 19 + .../decomplex/syntax/adapters/javascript.rs | 26 +- .../src/decomplex/syntax/adapters/kotlin.rs | 59 +- .../rust/src/decomplex/syntax/adapters/lua.rs | 106 +- .../rust/src/decomplex/syntax/adapters/php.rs | 44 +- .../src/decomplex/syntax/adapters/python.rs | 10 +- .../src/decomplex/syntax/adapters/ruby.rs | 130 +- .../src/decomplex/syntax/adapters/rust.rs | 11 +- .../src/decomplex/syntax/adapters/swift.rs | 104 + .../decomplex/syntax/adapters/typescript.rs | 26 +- .../rust/src/decomplex/syntax/adapters/zig.rs | 56 +- .../rust/src/decomplex/syntax/complexity.rs | 88 +- .../decomplex/syntax/tree_sitter_adapter.rs | 488 +++- .../rust/src/decomplex/syntax_oracle.rs | 20 +- gems/decomplex/test/syntax_oracle_test.rb | 2 +- 56 files changed, 9998 insertions(+), 4429 deletions(-) create mode 100644 gems/decomplex/examples/syntax-facts/c/core.c create mode 100644 gems/decomplex/examples/syntax-facts/cpp/core.cpp create mode 100644 gems/decomplex/examples/syntax-facts/csharp/core.cs create mode 100644 gems/decomplex/examples/syntax-facts/java/core.java create mode 100644 gems/decomplex/examples/syntax-facts/javascript/core.js create mode 100644 gems/decomplex/examples/syntax-facts/kotlin/core.kt create mode 100644 gems/decomplex/examples/syntax-facts/lua/core.lua create mode 100644 gems/decomplex/examples/syntax-facts/oracles/c-core.json create mode 100644 gems/decomplex/examples/syntax-facts/oracles/cpp-core.json create mode 100644 gems/decomplex/examples/syntax-facts/oracles/csharp-core.json create mode 100644 gems/decomplex/examples/syntax-facts/oracles/java-core.json create mode 100644 gems/decomplex/examples/syntax-facts/oracles/javascript-core.json create mode 100644 gems/decomplex/examples/syntax-facts/oracles/kotlin-core.json create mode 100644 gems/decomplex/examples/syntax-facts/oracles/lua-core.json create mode 100644 gems/decomplex/examples/syntax-facts/oracles/php-core.json create mode 100644 gems/decomplex/examples/syntax-facts/oracles/swift-core.json create mode 100644 gems/decomplex/examples/syntax-facts/oracles/typescript-core.json create mode 100644 gems/decomplex/examples/syntax-facts/php/core.php create mode 100644 gems/decomplex/examples/syntax-facts/swift/core.swift create mode 100644 gems/decomplex/examples/syntax-facts/typescript/core.ts diff --git a/gems/decomplex/examples/syntax-facts/c/core.c b/gems/decomplex/examples/syntax-facts/c/core.c new file mode 100644 index 000000000..cde7856d5 --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/c/core.c @@ -0,0 +1,51 @@ +typedef enum { + STATUS_IDLE, + STATUS_BUSY +} Status; + +typedef struct CSyntaxFactsCore { + Status status; + int count; + Sink *sink; +} CSyntaxFactsCore; + +void CSyntaxFactsCore_process(CSyntaxFactsCore *self, User *user, Item **items, int item_count, Callback callback) { + const char *name = user->profile->name; + Account account = make_account(name, user->active); + callback(&account); + + switch (user->role) { + case ROLE_OWNER: + case ROLE_ADMIN: + escalate(self, user); + break; + case ROLE_GUEST: + fallback(self, user); + break; + default: + default_case(self, user); + break; + } + + if (self->status == STATUS_IDLE && user->ready) { + self->count += 1; + publish(self, STATUS_BUSY); + } else { + warn("not ready"); + } + + for (int i = 0; i < item_count; i++) { + item_children(items[i]); + } +} + +static Status CSyntaxFactsCore_audit(CSyntaxFactsCore *self, const char *name) { + puts(name); + sink_send(self->sink, "record", name); + return self->status; +} + +int CSyntaxFactsCore_ready(CSyntaxFactsCore *self) { + return self->count > 0; +} + diff --git a/gems/decomplex/examples/syntax-facts/cpp/core.cpp b/gems/decomplex/examples/syntax-facts/cpp/core.cpp new file mode 100644 index 000000000..965fa11b4 --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/cpp/core.cpp @@ -0,0 +1,61 @@ +#include +#include + +enum class Status { + Idle, + Busy +}; + +class CppSyntaxFactsCore { + Status status; + int count; + Sink *sink; + +public: + explicit CppSyntaxFactsCore(Status status, Sink *sink) + : status(status), count(0), sink(sink) {} + + std::string process(User &user, std::vector &items, Callback callback) { + std::string name = user.profile().name(); + Account account{name, user.active()}; + callback(account); + + switch (user.role()) { + case Role::Owner: + case Role::Admin: + escalate(user); + break; + case Role::Guest: + fallback(user); + break; + default: + defaultCase(user); + break; + } + + if (status == Status::Idle && user.ready()) { + count += 1; + publish(Status::Busy); + } else { + warn("not ready"); + } + + for (auto &item : items) { + item.children(); + } + + return name; + } + +private: + Status audit(const std::string &name) { + std::cout << name; + sink->send("record", name); + return status; + } + + bool ready() const { + return count > 0; + } +}; + diff --git a/gems/decomplex/examples/syntax-facts/csharp/core.cs b/gems/decomplex/examples/syntax-facts/csharp/core.cs new file mode 100644 index 000000000..168611ffe --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/csharp/core.cs @@ -0,0 +1,67 @@ +using System; +using System.Collections.Generic; + +class CSharpSyntaxFactsCore +{ + private Status status; + private int count; + private Sink sink; + + public CSharpSyntaxFactsCore(Status status, Sink sink) + { + this.status = status; + this.count = 0; + this.sink = sink; + } + + public string Process(User user, IEnumerable items, Action callback) + { + var name = user.Profile.Name; + var account = new Account(name, user.Active); + callback(account); + + switch (user.Role) + { + case "owner": + case "admin": + Escalate(user); + break; + case "guest": + Fallback(user); + break; + default: + DefaultCase(user); + break; + } + + if (this.status == Status.Idle && user.Ready) + { + this.count += 1; + Publish(Status.Busy); + } + else + { + Console.WriteLine("not ready"); + } + + foreach (var item in items) + { + item.Children(); + } + + return name; + } + + private Status Audit(string name) + { + Console.WriteLine(name); + sink.Send("record", name); + return status; + } + + private bool Ready() + { + return count > 0; + } +} + diff --git a/gems/decomplex/examples/syntax-facts/java/core.java b/gems/decomplex/examples/syntax-facts/java/core.java new file mode 100644 index 000000000..d3b220bfc --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/java/core.java @@ -0,0 +1,53 @@ +package syntaxfacts; + +class JavaSyntaxFactsCore { + private Status status; + private int count; + + public JavaSyntaxFactsCore(Status status) { + this.status = status; + this.count = 0; + } + + public String process(User user, Iterable items, Callback callback) { + String name = user.profile().name(); + Account account = new Account(name, user.active()); + callback.call(account); + + switch (user.role()) { + case "owner": + case "admin": + this.escalate(user); + break; + case "guest": + this.fallback(user); + break; + default: + this.defaultCase(user); + } + + if (this.status == Status.IDLE && user.ready()) { + this.count += 1; + this.publish(Status.BUSY); + } else { + System.err.println("not ready"); + } + + for (Item item : items) { + item.children(); + } + + return name; + } + + private void audit(String name) { + System.out.println(name); + this.send("record", name); + this.status.name(); + } + + boolean ready() { + return this.count > 0; + } +} + diff --git a/gems/decomplex/examples/syntax-facts/javascript/core.js b/gems/decomplex/examples/syntax-facts/javascript/core.js new file mode 100644 index 000000000..8f06a5326 --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/javascript/core.js @@ -0,0 +1,55 @@ +export class JavaScriptSyntaxFactsCore { + #status; + + constructor(status, sink) { + this.#status = status; + this.count = 0; + this.sink = sink; + } + + process(user, items, callback) { + const name = user?.profile?.name; + const account = { name, active: user.active }; + callback(account); + + switch (user.role) { + case "owner": + case "admin": + this.escalate(user); + break; + case "guest": + this.fallback(user); + break; + default: + this.defaultCase(user); + } + + if (this.#status === "idle" && user.ready) { + this.count += 1; + this.publish("busy"); + } else { + console.warn("not ready"); + } + + for (const index in items) { + this.#audit(items[index]); + } + + return name ?? null; + } + + #audit(name) { + console.log(name); + this.sink.send("record", name); + return this.#status; + } + + ready() { + return this.count > 0; + } +} + +export function normalizeValue(input) { + return input ?? null; +} + diff --git a/gems/decomplex/examples/syntax-facts/kotlin/core.kt b/gems/decomplex/examples/syntax-facts/kotlin/core.kt new file mode 100644 index 000000000..e02085ff8 --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/kotlin/core.kt @@ -0,0 +1,46 @@ +package syntaxfacts + +class KotlinSyntaxFactsCore(private var status: Status, private val sink: Sink) { + private var count = 0 + + fun process(user: User, items: List, callback: (Account) -> Unit): String? { + val name = user.profile?.name + val account = Account(name, user.active) + callback(account) + + when (user.role) { + "owner", "admin" -> escalate(user) + "guest" -> fallback(user) + else -> defaultCase(user) + } + + if (status == Status.IDLE && user.ready) { + count += 1 + publish(Status.BUSY) + } else { + println("not ready") + } + + for (item in items) { + item.children() + } + + return name ?: "missing" + } + + private fun audit(name: String): Status { + println(name) + sink.send("record", name) + return status + } + + fun ready(): Boolean { + return count > 0 + } +} + +enum class Status { + IDLE, + BUSY +} + diff --git a/gems/decomplex/examples/syntax-facts/lua/core.lua b/gems/decomplex/examples/syntax-facts/lua/core.lua new file mode 100644 index 000000000..84ef82b3e --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/lua/core.lua @@ -0,0 +1,51 @@ +local LuaSyntaxFactsCore = {} +LuaSyntaxFactsCore.__index = LuaSyntaxFactsCore + +function LuaSyntaxFactsCore.new(status, sink) + local instance = { + status = status, + count = 0, + sink = sink + } + return setmetatable(instance, LuaSyntaxFactsCore) +end + +function LuaSyntaxFactsCore:process(user, items, callback) + local name = user.profile.name + local account = { name = name, active = user.active } + callback(account) + + if user.role == "owner" or user.role == "admin" then + self:escalate(user) + elseif user.role == "guest" then + self:fallback(user) + else + self:default_case(user) + end + + if self.status == "idle" and user.ready then + self.count = self.count + 1 + self:publish("busy") + else + print("not ready") + end + + for _, item in ipairs(items) do + item:children() + end + + return name or "missing" +end + +function LuaSyntaxFactsCore:audit(name) + print(name) + self.sink:send("record", name) + return self.status +end + +function LuaSyntaxFactsCore:ready() + return self.count > 0 +end + +return LuaSyntaxFactsCore + diff --git a/gems/decomplex/examples/syntax-facts/oracles/c-core.json b/gems/decomplex/examples/syntax-facts/oracles/c-core.json new file mode 100644 index 000000000..b5ea57a4e --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/oracles/c-core.json @@ -0,0 +1,555 @@ +{ + "format": "decomplex.syntax-facts.v1", + "documents": [ + { + "file": "gems/decomplex/examples/syntax-facts/c/core.c", + "language": "c", + "functions": [ + { + "line": 12, + "name": "CSyntaxFactsCore_process", + "owner": "CSyntaxFactsCore", + "params": [ + "self", + "user", + "items", + "item_count", + "callback" + ], + "span": [ + 12, + 0, + 40, + 1 + ], + "visibility": "public" + }, + { + "line": 42, + "name": "CSyntaxFactsCore_audit", + "owner": "CSyntaxFactsCore", + "params": [ + "self", + "name" + ], + "span": [ + 42, + 0, + 46, + 1 + ], + "visibility": "private" + }, + { + "line": 48, + "name": "CSyntaxFactsCore_ready", + "owner": "CSyntaxFactsCore", + "params": [ + "self" + ], + "span": [ + 48, + 0, + 50, + 1 + ], + "visibility": "public" + } + ], + "owners": [ + { + "kind": "struct", + "line": 6, + "name": "CSyntaxFactsCore", + "span": [ + 6, + 8, + 10, + 1 + ] + } + ], + "calls": [ + { + "arguments": [ + "&account" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "CSyntaxFactsCore_process", + "line": 15, + "message": "callback", + "owner": "CSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 15, + 2, + 15, + 20 + ] + }, + { + "arguments": [ + "\"not ready\"" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "CSyntaxFactsCore_process", + "line": 34, + "message": "warn", + "owner": "CSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 34, + 4, + 34, + 21 + ] + }, + { + "arguments": [ + "items[i]" + ], + "block": false, + "conditional": true, + "control": "iterates", + "function": "CSyntaxFactsCore_process", + "line": 38, + "message": "item_children", + "owner": "CSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 38, + 4, + 38, + 27 + ] + }, + { + "arguments": [ + "name", + "user->active" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "CSyntaxFactsCore_process", + "line": 14, + "message": "make_account", + "owner": "CSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 14, + 20, + 14, + 52 + ] + }, + { + "arguments": [ + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "CSyntaxFactsCore_audit", + "line": 43, + "message": "puts", + "owner": "CSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 43, + 2, + 43, + 12 + ] + }, + { + "arguments": [ + "self", + "STATUS_BUSY" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "CSyntaxFactsCore_process", + "line": 32, + "message": "publish", + "owner": "CSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 32, + 4, + 32, + 30 + ] + }, + { + "arguments": [ + "self", + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "CSyntaxFactsCore_process", + "line": 20, + "message": "escalate", + "owner": "CSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 20, + 6, + 20, + 26 + ] + }, + { + "arguments": [ + "self", + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "CSyntaxFactsCore_process", + "line": 23, + "message": "fallback", + "owner": "CSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 23, + 6, + 23, + 26 + ] + }, + { + "arguments": [ + "self", + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "CSyntaxFactsCore_process", + "line": 26, + "message": "default_case", + "owner": "CSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 26, + 6, + 26, + 30 + ] + }, + { + "arguments": [ + "self->sink", + "\"record\"", + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "CSyntaxFactsCore_audit", + "line": 44, + "message": "sink_send", + "owner": "CSyntaxFactsCore", + "receiver": "self.sink", + "safe_navigation": false, + "span": [ + 44, + 2, + 44, + 39 + ] + } + ], + "state_reads": [ + { + "field": "active", + "function": "CSyntaxFactsCore_process", + "line": 14, + "owner": "CSyntaxFactsCore", + "receiver": "user", + "span": [ + 14, + 39, + 14, + 51 + ] + }, + { + "field": "count", + "function": "CSyntaxFactsCore_ready", + "line": 49, + "owner": "CSyntaxFactsCore", + "receiver": "self", + "span": [ + 49, + 9, + 49, + 20 + ] + }, + { + "field": "name", + "function": "CSyntaxFactsCore_process", + "line": 13, + "owner": "CSyntaxFactsCore", + "receiver": "user->profile", + "span": [ + 13, + 21, + 13, + 40 + ] + }, + { + "field": "profile", + "function": "CSyntaxFactsCore_process", + "line": 13, + "owner": "CSyntaxFactsCore", + "receiver": "user", + "span": [ + 13, + 21, + 13, + 34 + ] + }, + { + "field": "ready", + "function": "CSyntaxFactsCore_process", + "line": 30, + "owner": "CSyntaxFactsCore", + "receiver": "user", + "span": [ + 30, + 37, + 30, + 48 + ] + }, + { + "field": "role", + "function": "CSyntaxFactsCore_process", + "line": 17, + "owner": "CSyntaxFactsCore", + "receiver": "user", + "span": [ + 17, + 10, + 17, + 20 + ] + }, + { + "field": "sink", + "function": "CSyntaxFactsCore_audit", + "line": 44, + "owner": "CSyntaxFactsCore", + "receiver": "self", + "span": [ + 44, + 12, + 44, + 22 + ] + }, + { + "field": "status", + "function": "CSyntaxFactsCore_audit", + "line": 45, + "owner": "CSyntaxFactsCore", + "receiver": "self", + "span": [ + 45, + 9, + 45, + 21 + ] + }, + { + "field": "status", + "function": "CSyntaxFactsCore_process", + "line": 30, + "owner": "CSyntaxFactsCore", + "receiver": "self", + "span": [ + 30, + 6, + 30, + 18 + ] + } + ], + "state_writes": [ + { + "field": "count", + "function": "CSyntaxFactsCore_process", + "line": 31, + "owner": "CSyntaxFactsCore", + "receiver": "self", + "span": [ + 31, + 4, + 31, + 20 + ] + } + ], + "decisions": [ + { + "enclosing_span": [ + 17, + 2, + 28, + 3 + ], + "function": "CSyntaxFactsCore_process", + "kind": "case_dispatch", + "line": 17, + "members": [ + "ROLE_ADMIN", + "ROLE_GUEST", + "ROLE_OWNER" + ], + "predicate": "user->role", + "span": [ + 17, + 2, + 28, + 3 + ] + }, + { + "enclosing_span": [ + 30, + 2, + 35, + 3 + ], + "function": "CSyntaxFactsCore_process", + "kind": "conjunction", + "line": 30, + "members": [ + "self->status == STATUS_IDLE", + "user->ready" + ], + "predicate": "self->status == STATUS_IDLE && user->ready", + "span": [ + 30, + 6, + 30, + 48 + ] + } + ], + "branch_decisions": [ + { + "function": "CSyntaxFactsCore_process", + "line": 17, + "predicate": "(user->role)", + "span": [ + 17, + 2, + 28, + 3 + ], + "state_refs": [ + "user.role" + ] + }, + { + "function": "CSyntaxFactsCore_process", + "line": 30, + "predicate": "(self->status == STATUS_IDLE && user->ready)", + "span": [ + 30, + 2, + 35, + 3 + ], + "state_refs": [ + "status", + "user.ready" + ] + } + ], + "dispatch_sites": [ + { + "arm_members": { + "ROLE_ADMIN": [ + + ], + "ROLE_GUEST": [ + + ], + "ROLE_OWNER": [ + + ] + }, + "function": "CSyntaxFactsCore_process", + "line": 17, + "outside": [ + + ], + "span": [ + 17, + 2, + 28, + 3 + ], + "variant_set": [ + "ROLE_ADMIN", + "ROLE_GUEST", + "ROLE_OWNER" + ] + } + ], + "semantic_effects": [ + { + "detail": "callback", + "function": "CSyntaxFactsCore_process", + "kind": "callback_inversion", + "line": 15, + "span": [ + 15, + 2, + 15, + 20 + ] + }, + { + "detail": "puts", + "function": "CSyntaxFactsCore_audit", + "kind": "hidden_io", + "line": 43, + "span": [ + 43, + 2, + 43, + 12 + ] + } + ], + "predicate_bodies": [ + + ] + } + ] +} diff --git a/gems/decomplex/examples/syntax-facts/oracles/cpp-core.json b/gems/decomplex/examples/syntax-facts/oracles/cpp-core.json new file mode 100644 index 000000000..b0c7783da --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/oracles/cpp-core.json @@ -0,0 +1,637 @@ +{ + "format": "decomplex.syntax-facts.v1", + "documents": [ + { + "file": "gems/decomplex/examples/syntax-facts/cpp/core.cpp", + "language": "cpp", + "functions": [ + { + "line": 15, + "name": "CppSyntaxFactsCore", + "owner": "CppSyntaxFactsCore", + "params": [ + "status", + "sink" + ], + "span": [ + 15, + 2, + 16, + 47 + ], + "visibility": "public" + }, + { + "line": 18, + "name": "process", + "owner": "CppSyntaxFactsCore", + "params": [ + "user", + "items", + "callback" + ], + "span": [ + 18, + 2, + 48, + 3 + ], + "visibility": "public" + }, + { + "line": 51, + "name": "audit", + "owner": "CppSyntaxFactsCore", + "params": [ + "name" + ], + "span": [ + 51, + 2, + 55, + 3 + ], + "visibility": "private" + }, + { + "line": 57, + "name": "ready", + "owner": "CppSyntaxFactsCore", + "params": [ + + ], + "span": [ + 57, + 2, + 59, + 3 + ], + "visibility": "private" + } + ], + "owners": [ + { + "kind": "class", + "line": 9, + "name": "CppSyntaxFactsCore", + "span": [ + 9, + 0, + 60, + 1 + ] + } + ], + "calls": [ + { + "arguments": [ + "Status::Busy" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 38, + "message": "publish", + "owner": "CppSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 38, + 6, + 38, + 27 + ] + }, + { + "arguments": [ + "\"not ready\"" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 40, + "message": "warn", + "owner": "CppSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 40, + 6, + 40, + 23 + ] + }, + { + "arguments": [ + "\"record\"", + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "audit", + "line": 53, + "message": "send", + "owner": "CppSyntaxFactsCore", + "receiver": "sink", + "safe_navigation": false, + "span": [ + 53, + 4, + 53, + 30 + ] + }, + { + "arguments": [ + "account" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 21, + "message": "callback", + "owner": "CppSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 21, + 4, + 21, + 21 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 26, + "message": "escalate", + "owner": "CppSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 26, + 6, + 26, + 20 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 29, + "message": "fallback", + "owner": "CppSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 29, + 6, + 29, + 20 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 32, + "message": "defaultCase", + "owner": "CppSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 32, + 6, + 32, + 23 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 19, + "message": "name", + "owner": "CppSyntaxFactsCore", + "receiver": "user.profile()", + "safe_navigation": false, + "span": [ + 19, + 23, + 19, + 44 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 19, + "message": "profile", + "owner": "CppSyntaxFactsCore", + "receiver": "user", + "safe_navigation": false, + "span": [ + 19, + 23, + 19, + 37 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 20, + "message": "active", + "owner": "CppSyntaxFactsCore", + "receiver": "user", + "safe_navigation": false, + "span": [ + 20, + 26, + 20, + 39 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 23, + "message": "role", + "owner": "CppSyntaxFactsCore", + "receiver": "user", + "safe_navigation": false, + "span": [ + 23, + 12, + 23, + 23 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 36, + "message": "ready", + "owner": "CppSyntaxFactsCore", + "receiver": "user", + "safe_navigation": false, + "span": [ + 36, + 34, + 36, + 46 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": true, + "control": "iterates", + "function": "process", + "line": 44, + "message": "children", + "owner": "CppSyntaxFactsCore", + "receiver": "item", + "safe_navigation": false, + "span": [ + 44, + 6, + 44, + 21 + ] + } + ], + "state_reads": [ + { + "field": "active", + "function": "process", + "line": 20, + "owner": "CppSyntaxFactsCore", + "receiver": "user", + "span": [ + 20, + 26, + 20, + 37 + ] + }, + { + "field": "children", + "function": "process", + "line": 44, + "owner": "CppSyntaxFactsCore", + "receiver": "item", + "span": [ + 44, + 6, + 44, + 19 + ] + }, + { + "field": "count", + "function": "CppSyntaxFactsCore", + "line": 16, + "owner": "CppSyntaxFactsCore", + "receiver": "self", + "span": [ + 16, + 24, + 16, + 29 + ] + }, + { + "field": "count", + "function": "ready", + "line": 58, + "owner": "CppSyntaxFactsCore", + "receiver": "self", + "span": [ + 58, + 11, + 58, + 16 + ] + }, + { + "field": "name", + "function": "process", + "line": 19, + "owner": "CppSyntaxFactsCore", + "receiver": "user.profile()", + "span": [ + 19, + 23, + 19, + 42 + ] + }, + { + "field": "profile", + "function": "process", + "line": 19, + "owner": "CppSyntaxFactsCore", + "receiver": "user", + "span": [ + 19, + 23, + 19, + 35 + ] + }, + { + "field": "ready", + "function": "process", + "line": 36, + "owner": "CppSyntaxFactsCore", + "receiver": "user", + "span": [ + 36, + 34, + 36, + 44 + ] + }, + { + "field": "role", + "function": "process", + "line": 23, + "owner": "CppSyntaxFactsCore", + "receiver": "user", + "span": [ + 23, + 12, + 23, + 21 + ] + }, + { + "field": "send", + "function": "audit", + "line": 53, + "owner": "CppSyntaxFactsCore", + "receiver": "sink", + "span": [ + 53, + 4, + 53, + 14 + ] + }, + { + "field": "sink", + "function": "audit", + "line": 53, + "owner": "CppSyntaxFactsCore", + "receiver": "self", + "span": [ + 53, + 4, + 53, + 8 + ] + }, + { + "field": "status", + "function": "audit", + "line": 54, + "owner": "CppSyntaxFactsCore", + "receiver": "self", + "span": [ + 54, + 11, + 54, + 17 + ] + }, + { + "field": "status", + "function": "process", + "line": 36, + "owner": "CppSyntaxFactsCore", + "receiver": "self", + "span": [ + 36, + 8, + 36, + 14 + ] + } + ], + "state_writes": [ + { + "field": "count", + "function": "process", + "line": 37, + "owner": "CppSyntaxFactsCore", + "receiver": "self", + "span": [ + 37, + 6, + 37, + 11 + ] + } + ], + "decisions": [ + { + "enclosing_span": [ + 23, + 4, + 34, + 5 + ], + "function": "process", + "kind": "case_dispatch", + "line": 23, + "members": [ + "Role::Admin", + "Role::Guest", + "Role::Owner" + ], + "predicate": "user.role()", + "span": [ + 23, + 4, + 34, + 5 + ] + }, + { + "enclosing_span": [ + 36, + 4, + 41, + 5 + ], + "function": "process", + "kind": "conjunction", + "line": 36, + "members": [ + "status == Status::Idle", + "user.ready()" + ], + "predicate": "status == Status::Idle && user.ready()", + "span": [ + 36, + 8, + 36, + 46 + ] + } + ], + "branch_decisions": [ + { + "function": "process", + "line": 23, + "predicate": "(user.role())", + "span": [ + 23, + 4, + 34, + 5 + ], + "state_refs": [ + "user.role" + ] + }, + { + "function": "process", + "line": 36, + "predicate": "(status == Status::Idle && user.ready())", + "span": [ + 36, + 4, + 41, + 5 + ], + "state_refs": [ + "user.ready" + ] + } + ], + "dispatch_sites": [ + { + "arm_members": { + "Role::Admin": [ + + ], + "Role::Guest": [ + + ], + "Role::Owner": [ + + ] + }, + "function": "process", + "line": 23, + "outside": [ + + ], + "span": [ + 23, + 4, + 34, + 5 + ], + "variant_set": [ + "Role::Admin", + "Role::Guest", + "Role::Owner" + ] + } + ], + "semantic_effects": [ + + ], + "predicate_bodies": [ + + ] + } + ] +} diff --git a/gems/decomplex/examples/syntax-facts/oracles/csharp-core.json b/gems/decomplex/examples/syntax-facts/oracles/csharp-core.json new file mode 100644 index 000000000..3fc20be16 --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/oracles/csharp-core.json @@ -0,0 +1,563 @@ +{ + "format": "decomplex.syntax-facts.v1", + "documents": [ + { + "file": "gems/decomplex/examples/syntax-facts/csharp/core.cs", + "language": "csharp", + "functions": [ + { + "line": 17, + "name": "Process", + "owner": "CSharpSyntaxFactsCore", + "params": [ + "user", + "items", + "callback" + ], + "span": [ + 17, + 4, + 53, + 5 + ], + "visibility": "public" + }, + { + "line": 55, + "name": "Audit", + "owner": "CSharpSyntaxFactsCore", + "params": [ + "name" + ], + "span": [ + 55, + 4, + 60, + 5 + ], + "visibility": "private" + }, + { + "line": 62, + "name": "Ready", + "owner": "CSharpSyntaxFactsCore", + "params": [ + + ], + "span": [ + 62, + 4, + 65, + 5 + ], + "visibility": "private" + } + ], + "owners": [ + { + "kind": "class", + "line": 4, + "name": "CSharpSyntaxFactsCore", + "span": [ + 4, + 0, + 66, + 1 + ] + } + ], + "calls": [ + { + "arguments": [ + "Status.Busy" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "Process", + "line": 40, + "message": "Publish", + "owner": "CSharpSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 40, + 12, + 40, + 32 + ] + }, + { + "arguments": [ + "\"not ready\"" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "Process", + "line": 44, + "message": "WriteLine", + "owner": "CSharpSyntaxFactsCore", + "receiver": "Console", + "safe_navigation": false, + "span": [ + 44, + 12, + 44, + 42 + ] + }, + { + "arguments": [ + "\"record\"", + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "Audit", + "line": 58, + "message": "Send", + "owner": "CSharpSyntaxFactsCore", + "receiver": "sink", + "safe_navigation": false, + "span": [ + 58, + 8, + 58, + 33 + ] + }, + { + "arguments": [ + "account" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "Process", + "line": 21, + "message": "callback", + "owner": "CSharpSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 21, + 8, + 21, + 25 + ] + }, + { + "arguments": [ + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "Audit", + "line": 57, + "message": "WriteLine", + "owner": "CSharpSyntaxFactsCore", + "receiver": "Console", + "safe_navigation": false, + "span": [ + 57, + 8, + 57, + 31 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "Process", + "line": 27, + "message": "Escalate", + "owner": "CSharpSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 27, + 16, + 27, + 30 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "Process", + "line": 30, + "message": "Fallback", + "owner": "CSharpSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 30, + 16, + 30, + 30 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "Process", + "line": 33, + "message": "DefaultCase", + "owner": "CSharpSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 33, + 16, + 33, + 33 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": true, + "control": "iterates", + "function": "Process", + "line": 49, + "message": "Children", + "owner": "CSharpSyntaxFactsCore", + "receiver": "item", + "safe_navigation": false, + "span": [ + 49, + 12, + 49, + 27 + ] + } + ], + "state_reads": [ + { + "field": "Active", + "function": "Process", + "line": 20, + "owner": "CSharpSyntaxFactsCore", + "receiver": "user", + "span": [ + 20, + 40, + 20, + 51 + ] + }, + { + "field": "Children", + "function": "Process", + "line": 49, + "owner": "CSharpSyntaxFactsCore", + "receiver": "item", + "span": [ + 49, + 12, + 49, + 25 + ] + }, + { + "field": "Name", + "function": "Process", + "line": 19, + "owner": "CSharpSyntaxFactsCore", + "receiver": "user.Profile", + "span": [ + 19, + 19, + 19, + 36 + ] + }, + { + "field": "Profile", + "function": "Process", + "line": 19, + "owner": "CSharpSyntaxFactsCore", + "receiver": "user", + "span": [ + 19, + 19, + 19, + 31 + ] + }, + { + "field": "Ready", + "function": "Process", + "line": 37, + "owner": "CSharpSyntaxFactsCore", + "receiver": "user", + "span": [ + 37, + 42, + 37, + 52 + ] + }, + { + "field": "Role", + "function": "Process", + "line": 23, + "owner": "CSharpSyntaxFactsCore", + "receiver": "user", + "span": [ + 23, + 16, + 23, + 25 + ] + }, + { + "field": "Send", + "function": "Audit", + "line": 58, + "owner": "CSharpSyntaxFactsCore", + "receiver": "sink", + "span": [ + 58, + 8, + 58, + 17 + ] + }, + { + "field": "count", + "function": "Ready", + "line": 64, + "owner": "CSharpSyntaxFactsCore", + "receiver": "self", + "span": [ + 64, + 15, + 64, + 20 + ] + }, + { + "field": "sink", + "function": "Audit", + "line": 58, + "owner": "CSharpSyntaxFactsCore", + "receiver": "self", + "span": [ + 58, + 8, + 58, + 12 + ] + }, + { + "field": "status", + "function": "Audit", + "line": 59, + "owner": "CSharpSyntaxFactsCore", + "receiver": "self", + "span": [ + 59, + 15, + 59, + 21 + ] + }, + { + "field": "status", + "function": "Process", + "line": 37, + "owner": "CSharpSyntaxFactsCore", + "receiver": "self", + "span": [ + 37, + 12, + 37, + 23 + ] + } + ], + "state_writes": [ + { + "field": "count", + "function": "(top-level)", + "line": 13, + "owner": "CSharpSyntaxFactsCore", + "receiver": "self", + "span": [ + 13, + 8, + 13, + 22 + ] + }, + { + "field": "count", + "function": "Process", + "line": 39, + "owner": "CSharpSyntaxFactsCore", + "receiver": "self", + "span": [ + 39, + 12, + 39, + 27 + ] + }, + { + "field": "sink", + "function": "(top-level)", + "line": 14, + "owner": "CSharpSyntaxFactsCore", + "receiver": "self", + "span": [ + 14, + 8, + 14, + 24 + ] + }, + { + "field": "status", + "function": "(top-level)", + "line": 12, + "owner": "CSharpSyntaxFactsCore", + "receiver": "self", + "span": [ + 12, + 8, + 12, + 28 + ] + } + ], + "decisions": [ + { + "enclosing_span": [ + 23, + 8, + 35, + 9 + ], + "function": "Process", + "kind": "case_dispatch", + "line": 23, + "members": [ + "\"admin\"", + "\"guest\"", + "\"owner\"" + ], + "predicate": "user.Role", + "span": [ + 23, + 8, + 35, + 9 + ] + }, + { + "enclosing_span": [ + 37, + 8, + 45, + 9 + ], + "function": "Process", + "kind": "conjunction", + "line": 37, + "members": [ + "this.status == Status.Idle", + "user.Ready" + ], + "predicate": "this.status == Status.Idle && user.Ready", + "span": [ + 37, + 12, + 37, + 52 + ] + } + ], + "branch_decisions": [ + { + "function": "Process", + "line": 23, + "predicate": "user.Role", + "span": [ + 23, + 8, + 35, + 9 + ], + "state_refs": [ + "user.Role" + ] + }, + { + "function": "Process", + "line": 37, + "predicate": "this.status == Status.Idle && user.Ready", + "span": [ + 37, + 8, + 45, + 9 + ], + "state_refs": [ + "this.status", + "user.Ready" + ] + } + ], + "dispatch_sites": [ + + ], + "semantic_effects": [ + { + "detail": "Console.WriteLine", + "function": "Audit", + "kind": "hidden_io", + "line": 57, + "span": [ + 57, + 8, + 57, + 31 + ] + }, + { + "detail": "Console.WriteLine", + "function": "Process", + "kind": "hidden_io", + "line": 44, + "span": [ + 44, + 12, + 44, + 42 + ] + } + ], + "predicate_bodies": [ + + ] + } + ] +} diff --git a/gems/decomplex/examples/syntax-facts/oracles/go-core.json b/gems/decomplex/examples/syntax-facts/oracles/go-core.json index 5ac9c16e9..32f611a0c 100644 --- a/gems/decomplex/examples/syntax-facts/oracles/go-core.json +++ b/gems/decomplex/examples/syntax-facts/oracles/go-core.json @@ -6,534 +6,368 @@ "language": "go", "functions": [ { + "line": 32, "name": "NewGoSyntaxFactsCore", "owner": "core", - "line": 32, + "params": [ + "status" + ], "span": [ 32, 0, 34, 1 ], - "visibility": "public", - "params": [ - "status" - ] + "visibility": "public" }, { + "line": 36, "name": "Process", "owner": "GoSyntaxFactsCore", - "line": 36, + "params": [ + "user", + "items", + "callback" + ], "span": [ 36, 0, 70, 1 ], - "visibility": "public", - "params": [ - "user", - "items", - "callback" - ] + "visibility": "public" }, { - "name": "Ready", + "line": 72, + "name": "audit", "owner": "GoSyntaxFactsCore", - "line": 78, + "params": [ + "name" + ], "span": [ - 78, + 72, 0, - 80, + 76, 1 ], - "visibility": "public", - "params": [] + "visibility": "private" }, { - "name": "audit", + "line": 78, + "name": "Ready", "owner": "GoSyntaxFactsCore", - "line": 72, + "params": [ + + ], "span": [ - 72, + 78, 0, - 76, + 80, 1 ], - "visibility": "private", - "params": [ - "name" - ] + "visibility": "public" } ], "owners": [ { - "name": "Account", "kind": "owner", - "line": 21, + "line": 10, + "name": "Profile", "span": [ - 21, + 10, 5, - 24, + 12, 1 ] }, { - "name": "GoSyntaxFactsCore", "kind": "owner", - "line": 26, + "line": 14, + "name": "User", "span": [ - 26, + 14, 5, - 30, + 19, 1 ] }, { - "name": "Profile", "kind": "owner", - "line": 10, + "line": 21, + "name": "Account", "span": [ - 10, + 21, 5, - 12, + 24, 1 ] }, { - "name": "Status", "kind": "owner", - "line": 3, + "line": 26, + "name": "GoSyntaxFactsCore", "span": [ - 3, + 26, 5, - 3, - 15 + 30, + 1 ] }, { - "name": "User", "kind": "owner", - "line": 14, + "line": 3, + "name": "Status", "span": [ - 14, + 3, 5, - 19, - 1 + 3, + 15 ] } ], "calls": [ { - "receiver": "self", - "message": "audit", + "arguments": [ + "Busy" + ], + "block": false, + "conditional": true, + "control": "conditional", "function": "Process", + "line": 56, + "message": "publish", "owner": "GoSyntaxFactsCore", - "line": 66, + "receiver": "self", + "safe_navigation": false, "span": [ - 66, - 4, - 66, + 56, + 2, + 56, 17 - ], - "conditional": false, - "arguments": [ - "name" - ], - "control": "always", - "safe_navigation": false, - "block": false + ] }, { - "receiver": "self", - "message": "audit", - "function": "Process", - "owner": "GoSyntaxFactsCore", - "line": 66, - "span": [ - 66, - 6, - 66, - 11 - ], - "conditional": false, "arguments": [ - "name" + "\"not ready\"" ], - "control": "always", - "safe_navigation": false, - "block": false - }, - { - "receiver": "self", - "message": "audit", + "block": false, + "conditional": true, + "control": "conditional", "function": "Process", + "line": 58, + "message": "warn", "owner": "GoSyntaxFactsCore", - "line": 67, + "receiver": "self", + "safe_navigation": false, "span": [ - 67, - 7, - 67, - 20 - ], - "conditional": false, + 58, + 2, + 58, + 21 + ] + }, + { "arguments": [ + "\"record\"", "name" ], + "block": false, + "conditional": false, "control": "always", + "function": "audit", + "line": 74, + "message": "send", + "owner": "GoSyntaxFactsCore", + "receiver": "self", "safe_navigation": false, - "block": false + "span": [ + 74, + 1, + 74, + 23 + ] }, { - "receiver": "self", - "message": "audit", - "function": "Process", - "owner": "GoSyntaxFactsCore", - "line": 67, - "span": [ - 67, - 9, - 67, - 14 - ], - "conditional": false, "arguments": [ - "name" + "account" ], + "block": false, + "conditional": false, "control": "always", - "safe_navigation": false, - "block": false - }, - { - "receiver": "self", - "message": "callback", "function": "Process", - "owner": "GoSyntaxFactsCore", "line": 43, + "message": "callback", + "owner": "GoSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, "span": [ 43, 1, 43, - 9 - ], - "conditional": false, - "arguments": [ - "account" - ], - "control": "always", - "safe_navigation": false, - "block": false + 18 + ] }, { - "receiver": "self", - "message": "children", - "function": "Process", - "owner": "GoSyntaxFactsCore", - "line": 62, - "span": [ - 62, - 2, - 62, - 12 - ], - "conditional": true, "arguments": [ "item" ], + "block": false, + "conditional": true, "control": "iterates", - "safe_navigation": false, - "block": false - }, - { - "receiver": "self", - "message": "children", "function": "Process", - "owner": "GoSyntaxFactsCore", "line": 62, + "message": "children", + "owner": "GoSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, "span": [ 62, - 4, + 2, 62, - 12 - ], - "conditional": true, + 18 + ] + }, + { "arguments": [ - "item" + "name" ], - "control": "iterates", + "block": false, + "conditional": false, + "control": "always", + "function": "Process", + "line": 66, + "message": "audit", + "owner": "GoSyntaxFactsCore", + "receiver": "self", "safe_navigation": false, - "block": false + "span": [ + 66, + 4, + 66, + 17 + ] }, { - "receiver": "self", - "message": "defaultCase", + "arguments": [ + "name" + ], + "block": false, + "conditional": false, + "control": "always", "function": "Process", + "line": 67, + "message": "audit", "owner": "GoSyntaxFactsCore", - "line": 51, + "receiver": "self", + "safe_navigation": false, "span": [ - 51, - 2, - 51, - 15 - ], - "conditional": true, + 67, + 7, + 67, + 20 + ] + }, + { "arguments": [ - "user" + "name" ], - "control": "conditional", + "block": false, + "conditional": false, + "control": "always", + "function": "audit", + "line": 73, + "message": "println", + "owner": "GoSyntaxFactsCore", + "receiver": "self", "safe_navigation": false, - "block": false + "span": [ + 73, + 1, + 73, + 14 + ] }, { - "receiver": "self", - "message": "defaultCase", - "function": "Process", - "owner": "GoSyntaxFactsCore", - "line": 51, - "span": [ - 51, - 4, - 51, - 15 - ], - "conditional": true, "arguments": [ "user" ], + "block": false, + "conditional": true, "control": "conditional", - "safe_navigation": false, - "block": false - }, - { - "receiver": "self", - "message": "escalate", "function": "Process", - "owner": "GoSyntaxFactsCore", "line": 47, + "message": "escalate", + "owner": "GoSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, "span": [ 47, 2, 47, - 12 - ], - "conditional": true, - "arguments": [ - "user" - ], - "control": "conditional", - "safe_navigation": false, - "block": false + 18 + ] }, { - "receiver": "self", - "message": "escalate", - "function": "Process", - "owner": "GoSyntaxFactsCore", - "line": 47, - "span": [ - 47, - 4, - 47, - 12 - ], - "conditional": true, "arguments": [ "user" ], + "block": false, + "conditional": true, "control": "conditional", - "safe_navigation": false, - "block": false - }, - { - "receiver": "self", - "message": "fallback", "function": "Process", - "owner": "GoSyntaxFactsCore", "line": 49, - "span": [ - 49, - 2, - 49, - 12 - ], - "conditional": true, - "arguments": [ - "user" - ], - "control": "conditional", - "safe_navigation": false, - "block": false - }, - { - "receiver": "self", "message": "fallback", - "function": "Process", "owner": "GoSyntaxFactsCore", - "line": 49, + "receiver": "self", + "safe_navigation": false, "span": [ 49, - 4, + 2, 49, - 12 - ], - "conditional": true, - "arguments": [ - "user" - ], - "control": "conditional", - "safe_navigation": false, - "block": false + 18 + ] }, { - "receiver": "self", - "message": "println", - "function": "audit", - "owner": "GoSyntaxFactsCore", - "line": 73, - "span": [ - 73, - 1, - 73, - 8 - ], - "conditional": false, "arguments": [ - "name" - ], - "control": "always", - "safe_navigation": false, - "block": false - }, - { - "receiver": "self", - "message": "publish", - "function": "Process", - "owner": "GoSyntaxFactsCore", - "line": 56, - "span": [ - 56, - 2, - 56, - 11 + "user" ], + "block": false, "conditional": true, - "arguments": [ - "Busy" - ], "control": "conditional", - "safe_navigation": false, - "block": false - }, - { - "receiver": "self", - "message": "publish", "function": "Process", + "line": 51, + "message": "defaultCase", "owner": "GoSyntaxFactsCore", - "line": 56, - "span": [ - 56, - 4, - 56, - 11 - ], - "conditional": true, - "arguments": [ - "Busy" - ], - "control": "conditional", - "safe_navigation": false, - "block": false - }, - { - "receiver": "self", - "message": "send", - "function": "audit", - "owner": "GoSyntaxFactsCore", - "line": 74, - "span": [ - 74, - 1, - 74, - 7 - ], - "conditional": false, - "arguments": [ - "\"record\"", - "name" - ], - "control": "always", - "safe_navigation": false, - "block": false - }, - { "receiver": "self", - "message": "send", - "function": "audit", - "owner": "GoSyntaxFactsCore", - "line": 74, - "span": [ - 74, - 3, - 74, - 7 - ], - "conditional": false, - "arguments": [ - "\"record\"", - "name" - ], - "control": "always", "safe_navigation": false, - "block": false - }, - { - "receiver": "self", - "message": "warn", - "function": "Process", - "owner": "GoSyntaxFactsCore", - "line": 58, "span": [ - 58, + 51, 2, - 58, - 8 - ], - "conditional": true, - "arguments": [ - "\"not ready\"" - ], - "control": "conditional", - "safe_navigation": false, - "block": false - }, + 51, + 21 + ] + } + ], + "state_reads": [ { - "receiver": "self", - "message": "warn", + "field": "Active", "function": "Process", + "line": 42, "owner": "GoSyntaxFactsCore", - "line": 58, + "receiver": "user", "span": [ - 58, - 4, - 58, - 8 - ], - "conditional": true, - "arguments": [ - "\"not ready\"" - ], - "control": "conditional", - "safe_navigation": false, - "block": false - } - ], - "state_reads": [ + 42, + 40, + 42, + 51 + ] + }, { "field": "Name", - "receiver": "user.Profile", "function": "Process", - "owner": "GoSyntaxFactsCore", "line": 41, + "owner": "GoSyntaxFactsCore", + "receiver": "user.Profile", "span": [ 41, 9, @@ -543,10 +377,10 @@ }, { "field": "Profile", - "receiver": "user", "function": "Process", - "owner": "GoSyntaxFactsCore", "line": 41, + "owner": "GoSyntaxFactsCore", + "receiver": "user", "span": [ 41, 9, @@ -556,10 +390,10 @@ }, { "field": "Ready", - "receiver": "user", "function": "Process", - "owner": "GoSyntaxFactsCore", "line": 54, + "owner": "GoSyntaxFactsCore", + "receiver": "user", "span": [ 54, 24, @@ -569,10 +403,10 @@ }, { "field": "Role", - "receiver": "user", "function": "Process", - "owner": "GoSyntaxFactsCore", "line": 45, + "owner": "GoSyntaxFactsCore", + "receiver": "user", "span": [ 45, 8, @@ -582,10 +416,10 @@ }, { "field": "audit", - "receiver": "self", "function": "Process", - "owner": "GoSyntaxFactsCore", "line": 66, + "owner": "GoSyntaxFactsCore", + "receiver": "self", "span": [ 66, 4, @@ -595,10 +429,10 @@ }, { "field": "audit", - "receiver": "self", "function": "Process", - "owner": "GoSyntaxFactsCore", "line": 67, + "owner": "GoSyntaxFactsCore", + "receiver": "self", "span": [ 67, 7, @@ -608,10 +442,10 @@ }, { "field": "children", - "receiver": "self", "function": "Process", - "owner": "GoSyntaxFactsCore", "line": 62, + "owner": "GoSyntaxFactsCore", + "receiver": "self", "span": [ 62, 2, @@ -621,10 +455,10 @@ }, { "field": "count", - "receiver": "self", "function": "Process", - "owner": "GoSyntaxFactsCore", "line": 65, + "owner": "GoSyntaxFactsCore", + "receiver": "self", "span": [ 65, 18, @@ -634,10 +468,10 @@ }, { "field": "count", - "receiver": "self", "function": "Ready", - "owner": "GoSyntaxFactsCore", "line": 79, + "owner": "GoSyntaxFactsCore", + "receiver": "self", "span": [ 79, 8, @@ -647,10 +481,10 @@ }, { "field": "defaultCase", - "receiver": "self", "function": "Process", - "owner": "GoSyntaxFactsCore", "line": 51, + "owner": "GoSyntaxFactsCore", + "receiver": "self", "span": [ 51, 2, @@ -660,10 +494,10 @@ }, { "field": "escalate", - "receiver": "self", "function": "Process", - "owner": "GoSyntaxFactsCore", "line": 47, + "owner": "GoSyntaxFactsCore", + "receiver": "self", "span": [ 47, 2, @@ -673,10 +507,10 @@ }, { "field": "fallback", - "receiver": "self", "function": "Process", - "owner": "GoSyntaxFactsCore", "line": 49, + "owner": "GoSyntaxFactsCore", + "receiver": "self", "span": [ 49, 2, @@ -686,10 +520,10 @@ }, { "field": "lookup", - "receiver": "self", "function": "Process", - "owner": "GoSyntaxFactsCore", "line": 65, + "owner": "GoSyntaxFactsCore", + "receiver": "self", "span": [ 65, 1, @@ -699,10 +533,10 @@ }, { "field": "publish", - "receiver": "self", "function": "Process", - "owner": "GoSyntaxFactsCore", "line": 56, + "owner": "GoSyntaxFactsCore", + "receiver": "self", "span": [ 56, 2, @@ -712,10 +546,10 @@ }, { "field": "send", - "receiver": "self", "function": "audit", - "owner": "GoSyntaxFactsCore", "line": 74, + "owner": "GoSyntaxFactsCore", + "receiver": "self", "span": [ 74, 1, @@ -725,10 +559,10 @@ }, { "field": "status", - "receiver": "self", "function": "Process", - "owner": "GoSyntaxFactsCore", "line": 54, + "owner": "GoSyntaxFactsCore", + "receiver": "self", "span": [ 54, 4, @@ -738,10 +572,10 @@ }, { "field": "status", - "receiver": "self", "function": "audit", - "owner": "GoSyntaxFactsCore", "line": 75, + "owner": "GoSyntaxFactsCore", + "receiver": "self", "span": [ 75, 5, @@ -751,10 +585,10 @@ }, { "field": "warn", - "receiver": "self", "function": "Process", - "owner": "GoSyntaxFactsCore", "line": 58, + "owner": "GoSyntaxFactsCore", + "receiver": "self", "span": [ 58, 2, @@ -766,10 +600,10 @@ "state_writes": [ { "field": "count", - "receiver": "self", "function": "Process", - "owner": "GoSyntaxFactsCore", "line": 55, + "owner": "GoSyntaxFactsCore", + "receiver": "self", "span": [ 55, 2, @@ -779,10 +613,10 @@ }, { "field": "lookup", - "receiver": "self", "function": "Process", - "owner": "GoSyntaxFactsCore", "line": 65, + "owner": "GoSyntaxFactsCore", + "receiver": "self", "span": [ 65, 1, @@ -793,21 +627,21 @@ ], "decisions": [ { - "kind": "case_dispatch", - "members": [ - "\"guest\"", - "\"owner\", \"admin\"" - ], - "function": "Process", - "line": 45, - "span": [ + "enclosing_span": [ 45, 1, 52, 2 ], + "function": "Process", + "kind": "case_dispatch", + "line": 45, + "members": [ + "\"guest\"", + "\"owner\", \"admin\"" + ], "predicate": "user.Role", - "enclosing_span": [ + "span": [ 45, 1, 52, @@ -815,25 +649,25 @@ ] }, { + "enclosing_span": [ + 54, + 1, + 59, + 2 + ], + "function": "Process", "kind": "conjunction", + "line": 54, "members": [ "c.status == Idle", "user.Ready" ], - "function": "Process", - "line": 54, + "predicate": "c.status == Idle && user.Ready", "span": [ 54, 4, 54, 34 - ], - "predicate": "c.status == Idle && user.Ready", - "enclosing_span": [ - 54, - 1, - 59, - 2 ] } ], @@ -841,13 +675,13 @@ { "function": "Process", "line": 45, + "predicate": "user.Role", "span": [ 45, 1, 52, 2 ], - "predicate": "user.Role", "state_refs": [ "user.Role" ] @@ -855,421 +689,38 @@ { "function": "Process", "line": 54, + "predicate": "c.status == Idle && user.Ready", "span": [ 54, 1, 59, 2 ], - "predicate": "c.status == Idle && user.Ready", "state_refs": [ "c.status", "user.Ready" ] } ], - "dispatch_sites": [], + "dispatch_sites": [ + + ], "semantic_effects": [ { - "kind": "hidden_io", "detail": "println", "function": "audit", + "kind": "hidden_io", "line": 73, "span": [ 73, 1, 73, - 8 - ] - } - ], - "predicate_bodies": [], - "local_complexity": [ - { - "id": "(top-level)#NewGoSyntaxFactsCore", - "score": 0.0, - "signals": { - "early_exits": 2 - } - }, - { - "id": "GoSyntaxFactsCore#Process", - "score": 6.1, - "signals": { - "boolean_ops": 2, - "branches": 1, - "cases": 2, - "early_exits": 2, - "loops": 2, - "nested": 1 - } - }, - { - "id": "GoSyntaxFactsCore#Ready", - "score": 0.0, - "signals": { - "early_exits": 2 - } - }, - { - "id": "GoSyntaxFactsCore#audit", - "score": 0.0, - "signals": {} - } - ], - "clone_candidates": [ - { - "method_name": "NewGoSyntaxFactsCore", - "node_name": "block", - "line": 32, - "span": [ - 32, - 60, - 34, - 1 - ], - "mass": 31, - "fingerprint": "block({:{ statement_list(return_statement(id expression_list(&:& composite_literal(id literal_value({:{ keyed_element(id ::: id) ,:, keyed_element(id ::: literal_element(map_type(id [:[ id ]:] id) literal_value({:{ }:}))) }:}))))) }:})", - "child_fingerprints": [ - "statement_list(return_statement(id expression_list(&:& composite_literal(id literal_value({:{ keyed_element(id ::: id) ,:, keyed_element(id ::: literal_element(map_type(id [:[ id ]:] id) literal_value({:{ }:}))) }:})))))" - ], - "child_masses": [ - 28 - ] - }, - { - "method_name": "NewGoSyntaxFactsCore", - "node_name": "defn", - "line": 32, - "span": [ - 32, - 0, - 34, - 1 - ], - "mass": 43, - "fingerprint": "function_declaration(id id parameter_list((:( parameter_declaration(id id) ):)) pointer_type(*:* id) block({:{ statement_list(return_statement(id expression_list(&:& composite_literal(id literal_value({:{ keyed_element(id ::: id) ,:, keyed_element(id ::: literal_element(map_type(id [:[ id ]:] id) literal_value({:{ }:}))) }:}))))) }:}))", - "child_fingerprints": [ - "statement_list(return_statement(id expression_list(&:& composite_literal(id literal_value({:{ keyed_element(id ::: id) ,:, keyed_element(id ::: literal_element(map_type(id [:[ id ]:] id) literal_value({:{ }:}))) }:})))))" - ], - "child_masses": [ - 28 - ] - }, - { - "method_name": "Process", - "node_name": "assignment_statement", - "line": 38, - "span": [ - 38, - 1, - 38, - 10 - ], - "mass": 4, - "fingerprint": "assignment_statement(id =:= id)", - "child_fingerprints": [ - "id", - "id" - ], - "child_masses": [ - 1, - 1 - ] - }, - { - "method_name": "Process", - "node_name": "assignment_statement", - "line": 39, - "span": [ - 39, - 1, - 39, - 11 - ], - "mass": 4, - "fingerprint": "assignment_statement(id =:= id)", - "child_fingerprints": [ - "id", - "id" - ], - "child_masses": [ - 1, - 1 - ] - }, - { - "method_name": "Process", - "node_name": "assignment_statement", - "line": 55, - "span": [ - 55, - 2, - 55, 14 - ], - "mass": 7, - "fingerprint": "assignment_statement(expression_list(id .:. id) +=:+= lit)", - "child_fingerprints": [ - "expression_list(id .:. id)", - "lit" - ], - "child_masses": [ - 4, - 1 - ] - }, - { - "method_name": "Process", - "node_name": "assignment_statement", - "line": 65, - "span": [ - 65, - 1, - 65, - 25 - ], - "mass": 14, - "fingerprint": "assignment_statement(expression_list(selector_expression(id .:. id) [:[ id ]:]) =:= expression_list(id .:. id))", - "child_fingerprints": [ - "expression_list(selector_expression(id .:. id) [:[ id ]:])", - "expression_list(id .:. id)" - ], - "child_masses": [ - 8, - 4 - ] - }, - { - "method_name": "Process", - "node_name": "block", - "line": 36, - "span": [ - 36, - 94, - 70, - 1 - ], - "mass": 235, - "fingerprint": "block({:{ statement_list(var_declaration(id var_spec(id ,:, id id =:= expression_list(lit ,:, lit))) assignment_statement(id =:= id) assignment_statement(id =:= id) short_var_declaration(id :=::= expression_list(selector_expression(id .:. id) .:. id)) short_var_declaration(id :=::= expression_list(id literal_value({:{ keyed_element(id ::: id) ,:, keyed_element(id ::: literal_element(id .:. id)) }:}))) expression_statement(id argument_list((:( id ):))) expression_switch_statement(id selector_expression(id .:. id) {:{ expression_case(id expression_list(interpreted_string_literal(\":\" id \":\") ,:, interpreted_string_literal(\":\" id \":\")) ::: statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( id ):))))) expression_case(id expression_list(\":\" id \":\") ::: statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( id ):))))) default_case(id ::: statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( id ):))))) }:}) if_statement(id binary_expression(binary_expression(selector_expression(id .:. id) ==:== id) &&:&& selector_expression(id .:. id)) block({:{ statement_list(assignment_statement(expression_list(id .:. id) +=:+= lit) expression_statement(selector_expression(id .:. id) argument_list((:( id ):)))) }:}) id block({:{ statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( interpreted_string_literal(\":\" interpreted_string_literal_content:not ready \":\") ):)))) }:})) for_statement(id range_clause(expression_list(id ,:, id) :=::= id id) block({:{ statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( id ):)))) }:})) assignment_statement(expression_list(selector_expression(id .:. id) [:[ id ]:]) =:= expression_list(id .:. id)) go_statement(id call_expression(selector_expression(id .:. id) argument_list((:( id ):)))) defer_statement(id call_expression(selector_expression(id .:. id) argument_list((:( id ):)))) return_statement(id id)) }:})", - "child_fingerprints": [ - "statement_list(var_declaration(id var_spec(id ,:, id id =:= expression_list(lit ,:, lit))) assignment_statement(id =:= id) assignment_statement(id =:= id) short_var_declaration(id :=::= expression_list(selector_expression(id .:. id) .:. id)) short_var_declaration(id :=::= expression_list(id literal_value({:{ keyed_element(id ::: id) ,:, keyed_element(id ::: literal_element(id .:. id)) }:}))) expression_statement(id argument_list((:( id ):))) expression_switch_statement(id selector_expression(id .:. id) {:{ expression_case(id expression_list(interpreted_string_literal(\":\" id \":\") ,:, interpreted_string_literal(\":\" id \":\")) ::: statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( id ):))))) expression_case(id expression_list(\":\" id \":\") ::: statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( id ):))))) default_case(id ::: statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( id ):))))) }:}) if_statement(id binary_expression(binary_expression(selector_expression(id .:. id) ==:== id) &&:&& selector_expression(id .:. id)) block({:{ statement_list(assignment_statement(expression_list(id .:. id) +=:+= lit) expression_statement(selector_expression(id .:. id) argument_list((:( id ):)))) }:}) id block({:{ statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( interpreted_string_literal(\":\" interpreted_string_literal_content:not ready \":\") ):)))) }:})) for_statement(id range_clause(expression_list(id ,:, id) :=::= id id) block({:{ statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( id ):)))) }:})) assignment_statement(expression_list(selector_expression(id .:. id) [:[ id ]:]) =:= expression_list(id .:. id)) go_statement(id call_expression(selector_expression(id .:. id) argument_list((:( id ):)))) defer_statement(id call_expression(selector_expression(id .:. id) argument_list((:( id ):)))) return_statement(id id))" - ], - "child_masses": [ - 232 - ] - }, - { - "method_name": "Process", - "node_name": "block", - "line": 54, - "span": [ - 54, - 35, - 57, - 2 - ], - "mass": 20, - "fingerprint": "block({:{ statement_list(assignment_statement(expression_list(id .:. id) +=:+= lit) expression_statement(selector_expression(id .:. id) argument_list((:( id ):)))) }:})", - "child_fingerprints": [ - "statement_list(assignment_statement(expression_list(id .:. id) +=:+= lit) expression_statement(selector_expression(id .:. id) argument_list((:( id ):))))" - ], - "child_masses": [ - 17 - ] - }, - { - "method_name": "Process", - "node_name": "block", - "line": 57, - "span": [ - 57, - 8, - 59, - 2 - ], - "mass": 16, - "fingerprint": "block({:{ statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( interpreted_string_literal(\":\" interpreted_string_literal_content:not ready \":\") ):)))) }:})", - "child_fingerprints": [ - "statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( interpreted_string_literal(\":\" interpreted_string_literal_content:not ready \":\") ):))))" - ], - "child_masses": [ - 13 - ] - }, - { - "method_name": "Process", - "node_name": "block", - "line": 61, - "span": [ - 61, - 28, - 63, - 2 - ], - "mass": 13, - "fingerprint": "block({:{ statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( id ):)))) }:})", - "child_fingerprints": [ - "statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( id ):))))" - ], - "child_masses": [ - 10 - ] - }, - { - "method_name": "Process", - "node_name": "defn", - "line": 36, - "span": [ - 36, - 0, - 70, - 1 - ], - "mass": 269, - "fingerprint": "method_declaration(id parameter_list((:( parameter_declaration(id pointer_type(*:* id)) ):)) id parameter_list((:( parameter_declaration(id id) ,:, parameter_declaration(id slice_type([:[ ]:] id)) ,:, parameter_declaration(id function_type(id parameter_list((:( id ):)))) ):)) id block({:{ statement_list(var_declaration(id var_spec(id ,:, id id =:= expression_list(lit ,:, lit))) assignment_statement(id =:= id) assignment_statement(id =:= id) short_var_declaration(id :=::= expression_list(selector_expression(id .:. id) .:. id)) short_var_declaration(id :=::= expression_list(id literal_value({:{ keyed_element(id ::: id) ,:, keyed_element(id ::: literal_element(id .:. id)) }:}))) expression_statement(id argument_list((:( id ):))) expression_switch_statement(id selector_expression(id .:. id) {:{ expression_case(id expression_list(interpreted_string_literal(\":\" id \":\") ,:, interpreted_string_literal(\":\" id \":\")) ::: statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( id ):))))) expression_case(id expression_list(\":\" id \":\") ::: statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( id ):))))) default_case(id ::: statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( id ):))))) }:}) if_statement(id binary_expression(binary_expression(selector_expression(id .:. id) ==:== id) &&:&& selector_expression(id .:. id)) block({:{ statement_list(assignment_statement(expression_list(id .:. id) +=:+= lit) expression_statement(selector_expression(id .:. id) argument_list((:( id ):)))) }:}) id block({:{ statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( interpreted_string_literal(\":\" interpreted_string_literal_content:not ready \":\") ):)))) }:})) for_statement(id range_clause(expression_list(id ,:, id) :=::= id id) block({:{ statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( id ):)))) }:})) assignment_statement(expression_list(selector_expression(id .:. id) [:[ id ]:]) =:= expression_list(id .:. id)) go_statement(id call_expression(selector_expression(id .:. id) argument_list((:( id ):)))) defer_statement(id call_expression(selector_expression(id .:. id) argument_list((:( id ):)))) return_statement(id id)) }:}))", - "child_fingerprints": [ - "statement_list(var_declaration(id var_spec(id ,:, id id =:= expression_list(lit ,:, lit))) assignment_statement(id =:= id) assignment_statement(id =:= id) short_var_declaration(id :=::= expression_list(selector_expression(id .:. id) .:. id)) short_var_declaration(id :=::= expression_list(id literal_value({:{ keyed_element(id ::: id) ,:, keyed_element(id ::: literal_element(id .:. id)) }:}))) expression_statement(id argument_list((:( id ):))) expression_switch_statement(id selector_expression(id .:. id) {:{ expression_case(id expression_list(interpreted_string_literal(\":\" id \":\") ,:, interpreted_string_literal(\":\" id \":\")) ::: statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( id ):))))) expression_case(id expression_list(\":\" id \":\") ::: statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( id ):))))) default_case(id ::: statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( id ):))))) }:}) if_statement(id binary_expression(binary_expression(selector_expression(id .:. id) ==:== id) &&:&& selector_expression(id .:. id)) block({:{ statement_list(assignment_statement(expression_list(id .:. id) +=:+= lit) expression_statement(selector_expression(id .:. id) argument_list((:( id ):)))) }:}) id block({:{ statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( interpreted_string_literal(\":\" interpreted_string_literal_content:not ready \":\") ):)))) }:})) for_statement(id range_clause(expression_list(id ,:, id) :=::= id id) block({:{ statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( id ):)))) }:})) assignment_statement(expression_list(selector_expression(id .:. id) [:[ id ]:]) =:= expression_list(id .:. id)) go_statement(id call_expression(selector_expression(id .:. id) argument_list((:( id ):)))) defer_statement(id call_expression(selector_expression(id .:. id) argument_list((:( id ):)))) return_statement(id id))" - ], - "child_masses": [ - 232 - ] - }, - { - "method_name": "Process", - "node_name": "for_statement", - "line": 61, - "span": [ - 61, - 1, - 63, - 2 - ], - "mass": 23, - "fingerprint": "for_statement(id range_clause(expression_list(id ,:, id) :=::= id id) block({:{ statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( id ):)))) }:}))", - "child_fingerprints": [ - "statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( id ):))))" - ], - "child_masses": [ - 10 - ] - }, - { - "method_name": "Process", - "node_name": "if_statement", - "line": 54, - "span": [ - 54, - 1, - 59, - 2 - ], - "mass": 52, - "fingerprint": "if_statement(id binary_expression(binary_expression(selector_expression(id .:. id) ==:== id) &&:&& selector_expression(id .:. id)) block({:{ statement_list(assignment_statement(expression_list(id .:. id) +=:+= lit) expression_statement(selector_expression(id .:. id) argument_list((:( id ):)))) }:}) id block({:{ statement_list(expression_statement(selector_expression(id .:. id) argument_list((:( interpreted_string_literal(\":\" interpreted_string_literal_content:not ready \":\") ):)))) }:}))", - "child_fingerprints": [ - "statement_list(assignment_statement(expression_list(id .:. id) +=:+= lit) expression_statement(selector_expression(id .:. id) argument_list((:( id ):))))" - ], - "child_masses": [ - 17 - ] - }, - { - "method_name": "Ready", - "node_name": "block", - "line": 78, - "span": [ - 78, - 40, - 80, - 1 - ], - "mass": 13, - "fingerprint": "block({:{ statement_list(return_statement(id expression_list(selector_expression(id .:. id) >:> lit))) }:})", - "child_fingerprints": [ - "statement_list(return_statement(id expression_list(selector_expression(id .:. id) >:> lit)))" - ], - "child_masses": [ - 10 - ] - }, - { - "method_name": "Ready", - "node_name": "defn", - "line": 78, - "span": [ - 78, - 0, - 80, - 1 - ], - "mass": 26, - "fingerprint": "method_declaration(id parameter_list((:( parameter_declaration(id id) ):)) id parameter_list((:( ):)) id block({:{ statement_list(return_statement(id expression_list(selector_expression(id .:. id) >:> lit))) }:}))", - "child_fingerprints": [ - "statement_list(return_statement(id expression_list(selector_expression(id .:. id) >:> lit)))" - ], - "child_masses": [ - 10 - ] - }, - { - "method_name": "audit", - "node_name": "assignment_statement", - "line": 75, - "span": [ - 75, - 1, - 75, - 13 - ], - "mass": 7, - "fingerprint": "assignment_statement(id =:= expression_list(id .:. id))", - "child_fingerprints": [ - "id", - "expression_list(id .:. id)" - ], - "child_masses": [ - 1, - 4 - ] - }, - { - "method_name": "audit", - "node_name": "block", - "line": 72, - "span": [ - 72, - 47, - 76, - 1 - ], - "mass": 31, - "fingerprint": "block({:{ statement_list(expression_statement(id argument_list((:( id ):))) expression_statement(selector_expression(id .:. id) argument_list((:( interpreted_string_literal(\":\" id \":\") ,:, id ):))) assignment_statement(id =:= expression_list(id .:. id))) }:})", - "child_fingerprints": [ - "statement_list(expression_statement(id argument_list((:( id ):))) expression_statement(selector_expression(id .:. id) argument_list((:( interpreted_string_literal(\":\" id \":\") ,:, id ):))) assignment_statement(id =:= expression_list(id .:. id)))" - ], - "child_masses": [ - 28 - ] - }, - { - "method_name": "audit", - "node_name": "defn", - "line": 72, - "span": [ - 72, - 0, - 76, - 1 - ], - "mass": 48, - "fingerprint": "method_declaration(id parameter_list((:( parameter_declaration(id pointer_type(*:* id)) ):)) id parameter_list((:( parameter_declaration(id id) ):)) block({:{ statement_list(expression_statement(id argument_list((:( id ):))) expression_statement(selector_expression(id .:. id) argument_list((:( interpreted_string_literal(\":\" id \":\") ,:, id ):))) assignment_statement(id =:= expression_list(id .:. id))) }:}))", - "child_fingerprints": [ - "statement_list(expression_statement(id argument_list((:( id ):))) expression_statement(selector_expression(id .:. id) argument_list((:( interpreted_string_literal(\":\" id \":\") ,:, id ):))) assignment_statement(id =:= expression_list(id .:. id)))" - ], - "child_masses": [ - 28 ] } + ], + "predicate_bodies": [ + ] } ] diff --git a/gems/decomplex/examples/syntax-facts/oracles/java-core.json b/gems/decomplex/examples/syntax-facts/oracles/java-core.json new file mode 100644 index 000000000..ca8fdb2f6 --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/oracles/java-core.json @@ -0,0 +1,563 @@ +{ + "format": "decomplex.syntax-facts.v1", + "documents": [ + { + "file": "gems/decomplex/examples/syntax-facts/java/core.java", + "language": "java", + "functions": [ + { + "line": 12, + "name": "process", + "owner": "JavaSyntaxFactsCore", + "params": [ + "user", + "items", + "callback" + ], + "span": [ + 12, + 2, + 41, + 3 + ], + "visibility": "public" + }, + { + "line": 43, + "name": "audit", + "owner": "JavaSyntaxFactsCore", + "params": [ + "name" + ], + "span": [ + 43, + 2, + 47, + 3 + ], + "visibility": "private" + }, + { + "line": 49, + "name": "ready", + "owner": "JavaSyntaxFactsCore", + "params": [ + + ], + "span": [ + 49, + 2, + 51, + 3 + ], + "visibility": null + } + ], + "owners": [ + { + "kind": "class", + "line": 3, + "name": "JavaSyntaxFactsCore", + "span": [ + 3, + 0, + 52, + 1 + ] + } + ], + "calls": [ + { + "arguments": [ + "Status.BUSY" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 31, + "message": "this", + "owner": "JavaSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 31, + 6, + 31, + 31 + ] + }, + { + "arguments": [ + "\"not ready\"" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 33, + "message": "err", + "owner": "JavaSyntaxFactsCore", + "receiver": "System", + "safe_navigation": false, + "span": [ + 33, + 6, + 33, + 37 + ] + }, + { + "arguments": [ + "\"record\"", + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "audit", + "line": 45, + "message": "this", + "owner": "JavaSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 45, + 4, + 45, + 29 + ] + }, + { + "arguments": [ + "account" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 15, + "message": "call", + "owner": "JavaSyntaxFactsCore", + "receiver": "callback", + "safe_navigation": false, + "span": [ + 15, + 4, + 15, + 26 + ] + }, + { + "arguments": [ + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "audit", + "line": 44, + "message": "out", + "owner": "JavaSyntaxFactsCore", + "receiver": "System", + "safe_navigation": false, + "span": [ + 44, + 4, + 44, + 28 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 20, + "message": "this", + "owner": "JavaSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 20, + 8, + 20, + 27 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 23, + "message": "this", + "owner": "JavaSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 23, + 8, + 23, + 27 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 26, + "message": "this", + "owner": "JavaSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 26, + 8, + 26, + 30 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": false, + "control": "always", + "function": "audit", + "line": 46, + "message": "status", + "owner": "JavaSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 46, + 4, + 46, + 22 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 13, + "message": "profile", + "owner": "JavaSyntaxFactsCore", + "receiver": "user", + "safe_navigation": false, + "span": [ + 13, + 18, + 13, + 32 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 13, + "message": "profile()", + "owner": "JavaSyntaxFactsCore", + "receiver": "user", + "safe_navigation": false, + "span": [ + 13, + 18, + 13, + 39 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 14, + "message": "active", + "owner": "JavaSyntaxFactsCore", + "receiver": "user", + "safe_navigation": false, + "span": [ + 14, + 40, + 14, + 53 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 17, + "message": "role", + "owner": "JavaSyntaxFactsCore", + "receiver": "user", + "safe_navigation": false, + "span": [ + 17, + 12, + 17, + 23 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 29, + "message": "ready", + "owner": "JavaSyntaxFactsCore", + "receiver": "user", + "safe_navigation": false, + "span": [ + 29, + 38, + 29, + 50 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": true, + "control": "iterates", + "function": "process", + "line": 37, + "message": "children", + "owner": "JavaSyntaxFactsCore", + "receiver": "item", + "safe_navigation": false, + "span": [ + 37, + 6, + 37, + 21 + ] + } + ], + "state_reads": [ + { + "field": "count", + "function": "ready", + "line": 50, + "owner": "JavaSyntaxFactsCore", + "receiver": "self", + "span": [ + 50, + 11, + 50, + 21 + ] + }, + { + "field": "status", + "function": "audit", + "line": 46, + "owner": "JavaSyntaxFactsCore", + "receiver": "self", + "span": [ + 46, + 4, + 46, + 15 + ] + }, + { + "field": "status", + "function": "process", + "line": 29, + "owner": "JavaSyntaxFactsCore", + "receiver": "self", + "span": [ + 29, + 8, + 29, + 19 + ] + } + ], + "state_writes": [ + { + "field": "count", + "function": "(top-level)", + "line": 9, + "owner": "JavaSyntaxFactsCore", + "receiver": "self", + "span": [ + 9, + 4, + 9, + 18 + ] + }, + { + "field": "count", + "function": "process", + "line": 30, + "owner": "JavaSyntaxFactsCore", + "receiver": "self", + "span": [ + 30, + 6, + 30, + 21 + ] + }, + { + "field": "status", + "function": "(top-level)", + "line": 8, + "owner": "JavaSyntaxFactsCore", + "receiver": "self", + "span": [ + 8, + 4, + 8, + 24 + ] + } + ], + "decisions": [ + { + "enclosing_span": [ + 17, + 4, + 27, + 5 + ], + "function": "process", + "kind": "case_dispatch", + "line": 17, + "members": [ + "case \"admin\"", + "case \"guest\"", + "case \"owner\"" + ], + "predicate": "user.role()", + "span": [ + 17, + 4, + 27, + 5 + ] + }, + { + "enclosing_span": [ + 29, + 4, + 34, + 5 + ], + "function": "process", + "kind": "conjunction", + "line": 29, + "members": [ + "this.status == Status.IDLE", + "user.ready()" + ], + "predicate": "this.status == Status.IDLE && user.ready()", + "span": [ + 29, + 8, + 29, + 50 + ] + } + ], + "branch_decisions": [ + { + "function": "process", + "line": 29, + "predicate": "(this.status == Status.IDLE && user.ready())", + "span": [ + 29, + 4, + 34, + 5 + ], + "state_refs": [ + "this.status" + ] + } + ], + "dispatch_sites": [ + + ], + "semantic_effects": [ + { + "detail": "System.err", + "function": "process", + "kind": "hidden_io", + "line": 33, + "span": [ + 33, + 6, + 33, + 37 + ] + }, + { + "detail": "System.out", + "function": "audit", + "kind": "hidden_io", + "line": 44, + "span": [ + 44, + 4, + 44, + 28 + ] + }, + { + "detail": "callback.call", + "function": "process", + "kind": "dynamic_dispatch", + "line": 15, + "span": [ + 15, + 4, + 15, + 26 + ] + } + ], + "predicate_bodies": [ + + ] + } + ] +} diff --git a/gems/decomplex/examples/syntax-facts/oracles/javascript-core.json b/gems/decomplex/examples/syntax-facts/oracles/javascript-core.json new file mode 100644 index 000000000..a44e8c524 --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/oracles/javascript-core.json @@ -0,0 +1,695 @@ +{ + "format": "decomplex.syntax-facts.v1", + "documents": [ + { + "file": "gems/decomplex/examples/syntax-facts/javascript/core.js", + "language": "javascript", + "functions": [ + { + "line": 10, + "name": "process", + "owner": "JavaScriptSyntaxFactsCore", + "params": [ + "user", + "items", + "callback" + ], + "span": [ + 10, + 2, + 39, + 3 + ], + "visibility": "public" + }, + { + "line": 4, + "name": "constructor", + "owner": "JavaScriptSyntaxFactsCore", + "params": [ + "status", + "sink" + ], + "span": [ + 4, + 2, + 8, + 3 + ], + "visibility": "public" + }, + { + "line": 41, + "name": "#audit", + "owner": "JavaScriptSyntaxFactsCore", + "params": [ + "name" + ], + "span": [ + 41, + 2, + 45, + 3 + ], + "visibility": "private" + }, + { + "line": 47, + "name": "ready", + "owner": "JavaScriptSyntaxFactsCore", + "params": [ + + ], + "span": [ + 47, + 2, + 49, + 3 + ], + "visibility": "public" + }, + { + "line": 52, + "name": "normalizeValue", + "owner": "core", + "params": [ + "input" + ], + "span": [ + 52, + 7, + 54, + 1 + ], + "visibility": "public" + } + ], + "owners": [ + { + "kind": "class", + "line": 1, + "name": "JavaScriptSyntaxFactsCore", + "span": [ + 1, + 7, + 50, + 1 + ] + } + ], + "calls": [ + { + "arguments": [ + "\"busy\"" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 29, + "message": "publish", + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 29, + 6, + 29, + 26 + ] + }, + { + "arguments": [ + "\"not ready\"" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 31, + "message": "warn", + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "console", + "safe_navigation": false, + "span": [ + 31, + 6, + 31, + 31 + ] + }, + { + "arguments": [ + "\"record\"", + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "#audit", + "line": 43, + "message": "send", + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "this.sink", + "safe_navigation": false, + "span": [ + 43, + 4, + 43, + 34 + ] + }, + { + "arguments": [ + "account" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 13, + "message": "callback", + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 13, + 4, + 13, + 21 + ] + }, + { + "arguments": [ + "items[index]" + ], + "block": false, + "conditional": true, + "control": "iterates", + "function": "process", + "line": 35, + "message": "#audit", + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 35, + 6, + 35, + 31 + ] + }, + { + "arguments": [ + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "#audit", + "line": 42, + "message": "log", + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "console", + "safe_navigation": false, + "span": [ + 42, + 4, + 42, + 21 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 18, + "message": "escalate", + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 18, + 8, + 18, + 27 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 21, + "message": "fallback", + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 21, + 8, + 21, + 27 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 24, + "message": "defaultCase", + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 24, + 8, + 24, + 30 + ] + } + ], + "state_reads": [ + { + "field": "#audit", + "function": "process", + "line": 35, + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 35, + 6, + 35, + 17 + ] + }, + { + "field": "#status", + "function": "#audit", + "line": 44, + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 44, + 11, + 44, + 23 + ] + }, + { + "field": "#status", + "function": "process", + "line": 27, + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 27, + 8, + 27, + 20 + ] + }, + { + "field": "active", + "function": "process", + "line": 12, + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "user", + "span": [ + 12, + 36, + 12, + 47 + ] + }, + { + "field": "count", + "function": "ready", + "line": 48, + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 48, + 11, + 48, + 21 + ] + }, + { + "field": "defaultCase", + "function": "process", + "line": 24, + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 24, + 8, + 24, + 24 + ] + }, + { + "field": "escalate", + "function": "process", + "line": 18, + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 18, + 8, + 18, + 21 + ] + }, + { + "field": "fallback", + "function": "process", + "line": 21, + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 21, + 8, + 21, + 21 + ] + }, + { + "field": "log", + "function": "#audit", + "line": 42, + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "console", + "span": [ + 42, + 4, + 42, + 15 + ] + }, + { + "field": "name", + "function": "process", + "line": 11, + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "user?.profile", + "span": [ + 11, + 17, + 11, + 36 + ] + }, + { + "field": "profile", + "function": "process", + "line": 11, + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "user", + "span": [ + 11, + 17, + 11, + 30 + ] + }, + { + "field": "publish", + "function": "process", + "line": 29, + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 29, + 6, + 29, + 18 + ] + }, + { + "field": "ready", + "function": "process", + "line": 27, + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "user", + "span": [ + 27, + 35, + 27, + 45 + ] + }, + { + "field": "role", + "function": "process", + "line": 15, + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "user", + "span": [ + 15, + 12, + 15, + 21 + ] + }, + { + "field": "send", + "function": "#audit", + "line": 43, + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "this.sink", + "span": [ + 43, + 4, + 43, + 18 + ] + }, + { + "field": "sink", + "function": "#audit", + "line": 43, + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 43, + 4, + 43, + 13 + ] + }, + { + "field": "warn", + "function": "process", + "line": 31, + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "console", + "span": [ + 31, + 6, + 31, + 18 + ] + } + ], + "state_writes": [ + { + "field": "#status", + "function": "constructor", + "line": 5, + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 5, + 4, + 5, + 25 + ] + }, + { + "field": "count", + "function": "constructor", + "line": 6, + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 6, + 4, + 6, + 18 + ] + }, + { + "field": "count", + "function": "process", + "line": 28, + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 28, + 6, + 28, + 21 + ] + }, + { + "field": "sink", + "function": "constructor", + "line": 7, + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 7, + 4, + 7, + 20 + ] + } + ], + "decisions": [ + { + "enclosing_span": [ + 15, + 4, + 25, + 5 + ], + "function": "process", + "kind": "case_dispatch", + "line": 15, + "members": [ + "\"admin\"", + "\"guest\"", + "\"owner\"" + ], + "predicate": "user.role", + "span": [ + 15, + 4, + 25, + 5 + ] + }, + { + "enclosing_span": [ + 27, + 4, + 32, + 5 + ], + "function": "process", + "kind": "conjunction", + "line": 27, + "members": [ + "this.#status === \"idle\"", + "user.ready" + ], + "predicate": "this.#status === \"idle\" && user.ready", + "span": [ + 27, + 8, + 27, + 45 + ] + } + ], + "branch_decisions": [ + { + "function": "process", + "line": 15, + "predicate": "(user.role)", + "span": [ + 15, + 4, + 25, + 5 + ], + "state_refs": [ + "user.role" + ] + }, + { + "function": "process", + "line": 27, + "predicate": "(this.#status === \"idle\" && user.ready)", + "span": [ + 27, + 4, + 32, + 5 + ], + "state_refs": [ + "this.#status", + "user.ready" + ] + } + ], + "dispatch_sites": [ + + ], + "semantic_effects": [ + { + "detail": "console.log", + "function": "#audit", + "kind": "hidden_io", + "line": 42, + "span": [ + 42, + 4, + 42, + 21 + ] + }, + { + "detail": "console.warn", + "function": "process", + "kind": "hidden_io", + "line": 31, + "span": [ + 31, + 6, + 31, + 31 + ] + } + ], + "predicate_bodies": [ + { + "body": "input ?? null", + "line": 52, + "name": "normalizeValue", + "owner": "core", + "span": [ + 52, + 7, + 54, + 1 + ] + }, + { + "body": "name ?? null", + "line": 10, + "name": "process", + "owner": "JavaScriptSyntaxFactsCore", + "span": [ + 10, + 2, + 39, + 3 + ] + } + ] + } + ] +} diff --git a/gems/decomplex/examples/syntax-facts/oracles/kotlin-core.json b/gems/decomplex/examples/syntax-facts/oracles/kotlin-core.json new file mode 100644 index 000000000..70499c795 --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/oracles/kotlin-core.json @@ -0,0 +1,491 @@ +{ + "format": "decomplex.syntax-facts.v1", + "documents": [ + { + "file": "gems/decomplex/examples/syntax-facts/kotlin/core.kt", + "language": "kotlin", + "functions": [ + { + "line": 31, + "name": "audit", + "owner": "KotlinSyntaxFactsCore", + "params": [ + "name" + ], + "span": [ + 31, + 4, + 35, + 5 + ], + "visibility": "private" + }, + { + "line": 37, + "name": "ready", + "owner": "KotlinSyntaxFactsCore", + "params": [ + + ], + "span": [ + 37, + 4, + 39, + 5 + ], + "visibility": null + }, + { + "line": 6, + "name": "process", + "owner": "KotlinSyntaxFactsCore", + "params": [ + "user", + "items", + "callback" + ], + "span": [ + 6, + 4, + 29, + 5 + ], + "visibility": null + } + ], + "owners": [ + { + "kind": "class", + "line": 3, + "name": "KotlinSyntaxFactsCore", + "span": [ + 3, + 0, + 40, + 1 + ] + }, + { + "kind": "class", + "line": 42, + "name": "Status", + "span": [ + 42, + 0, + 45, + 1 + ] + } + ], + "calls": [ + { + "arguments": [ + "Status.BUSY" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 19, + "message": "publish", + "owner": "KotlinSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 19, + 12, + 19, + 32 + ] + }, + { + "arguments": [ + "\"not ready\"" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 21, + "message": "println", + "owner": "KotlinSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 21, + 12, + 21, + 19 + ] + }, + { + "arguments": [ + "\"record\"", + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "audit", + "line": 33, + "message": "send", + "owner": "KotlinSyntaxFactsCore", + "receiver": "sink", + "safe_navigation": false, + "span": [ + 33, + 8, + 33, + 33 + ] + }, + { + "arguments": [ + "account" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 9, + "message": "callback", + "owner": "KotlinSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 9, + 8, + 9, + 25 + ] + }, + { + "arguments": [ + "name", + "user.active" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 8, + "message": "Account", + "owner": "KotlinSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 8, + 22, + 8, + 48 + ] + }, + { + "arguments": [ + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "audit", + "line": 32, + "message": "println", + "owner": "KotlinSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 32, + 8, + 32, + 21 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 12, + "message": "escalate", + "owner": "KotlinSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 12, + 32, + 12, + 40 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 13, + "message": "fallback", + "owner": "KotlinSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 13, + 23, + 13, + 31 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 14, + "message": "defaultCase", + "owner": "KotlinSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 14, + 20, + 14, + 31 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": true, + "control": "iterates", + "function": "process", + "line": 25, + "message": "children", + "owner": "KotlinSyntaxFactsCore", + "receiver": "item", + "safe_navigation": false, + "span": [ + 25, + 12, + 25, + 25 + ] + } + ], + "state_reads": [ + { + "field": "active", + "function": "process", + "line": 8, + "owner": "KotlinSyntaxFactsCore", + "receiver": "user", + "span": [ + 8, + 36, + 8, + 47 + ] + }, + { + "field": "children", + "function": "process", + "line": 25, + "owner": "KotlinSyntaxFactsCore", + "receiver": "item", + "span": [ + 25, + 12, + 25, + 25 + ] + }, + { + "field": "name", + "function": "process", + "line": 7, + "owner": "KotlinSyntaxFactsCore", + "receiver": "user.profile", + "span": [ + 7, + 19, + 7, + 37 + ] + }, + { + "field": "profile", + "function": "process", + "line": 7, + "owner": "KotlinSyntaxFactsCore", + "receiver": "user", + "span": [ + 7, + 19, + 7, + 31 + ] + }, + { + "field": "ready", + "function": "process", + "line": 17, + "owner": "KotlinSyntaxFactsCore", + "receiver": "user", + "span": [ + 17, + 37, + 17, + 47 + ] + }, + { + "field": "role", + "function": "process", + "line": 11, + "owner": "KotlinSyntaxFactsCore", + "receiver": "user", + "span": [ + 11, + 14, + 11, + 23 + ] + }, + { + "field": "send", + "function": "audit", + "line": 33, + "owner": "KotlinSyntaxFactsCore", + "receiver": "sink", + "span": [ + 33, + 8, + 33, + 17 + ] + } + ], + "state_writes": [ + + ], + "decisions": [ + { + "enclosing_span": [ + 11, + 8, + 15, + 9 + ], + "function": "process", + "kind": "case_dispatch", + "line": 11, + "members": [ + "\"admin\"", + "\"guest\"", + "\"owner\"" + ], + "predicate": "user.role", + "span": [ + 11, + 8, + 15, + 9 + ] + }, + { + "enclosing_span": [ + 17, + 8, + 22, + 9 + ], + "function": "process", + "kind": "conjunction", + "line": 17, + "members": [ + "status == Status.IDLE", + "user.ready" + ], + "predicate": "status == Status.IDLE && user.ready", + "span": [ + 17, + 12, + 17, + 47 + ] + } + ], + "branch_decisions": [ + { + "function": "process", + "line": 11, + "predicate": "(user.role)", + "span": [ + 11, + 8, + 15, + 9 + ], + "state_refs": [ + "user.role" + ] + }, + { + "function": "process", + "line": 17, + "predicate": "status == Status.IDLE && user.ready", + "span": [ + 17, + 8, + 22, + 9 + ], + "state_refs": [ + "user.ready" + ] + } + ], + "dispatch_sites": [ + + ], + "semantic_effects": [ + { + "detail": "println", + "function": "audit", + "kind": "hidden_io", + "line": 32, + "span": [ + 32, + 8, + 32, + 21 + ] + }, + { + "detail": "println", + "function": "process", + "kind": "hidden_io", + "line": 21, + "span": [ + 21, + 12, + 21, + 19 + ] + } + ], + "predicate_bodies": [ + + ] + } + ] +} diff --git a/gems/decomplex/examples/syntax-facts/oracles/lua-core.json b/gems/decomplex/examples/syntax-facts/oracles/lua-core.json new file mode 100644 index 000000000..9791f7048 --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/oracles/lua-core.json @@ -0,0 +1,597 @@ +{ + "format": "decomplex.syntax-facts.v1", + "documents": [ + { + "file": "gems/decomplex/examples/syntax-facts/lua/core.lua", + "language": "lua", + "functions": [ + { + "line": 13, + "name": "process", + "owner": "LuaSyntaxFactsCore", + "params": [ + "user", + "items", + "callback" + ], + "span": [ + 13, + 0, + 38, + 3 + ], + "visibility": null + }, + { + "line": 4, + "name": "LuaSyntaxFactsCore.new", + "owner": "core", + "params": [ + "status", + "sink" + ], + "span": [ + 4, + 0, + 11, + 3 + ], + "visibility": null + }, + { + "line": 40, + "name": "audit", + "owner": "LuaSyntaxFactsCore", + "params": [ + "name" + ], + "span": [ + 40, + 0, + 44, + 3 + ], + "visibility": null + }, + { + "line": 46, + "name": "ready", + "owner": "LuaSyntaxFactsCore", + "params": [ + + ], + "span": [ + 46, + 0, + 48, + 3 + ], + "visibility": null + } + ], + "owners": [ + + ], + "calls": [ + { + "arguments": [ + "\"busy\"" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 28, + "message": "publish", + "owner": "LuaSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 28, + 4, + 28, + 24 + ] + }, + { + "arguments": [ + "\"not ready\"" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 30, + "message": "print", + "owner": "LuaSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 30, + 4, + 30, + 9 + ] + }, + { + "arguments": [ + "\"record\"", + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "audit", + "line": 42, + "message": "send", + "owner": "LuaSyntaxFactsCore", + "receiver": "self.sink", + "safe_navigation": false, + "span": [ + 42, + 2, + 42, + 32 + ] + }, + { + "arguments": [ + "account" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 16, + "message": "callback", + "owner": "LuaSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 16, + 2, + 16, + 19 + ] + }, + { + "arguments": [ + "instance", + "LuaSyntaxFactsCore" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "LuaSyntaxFactsCore.new", + "line": 10, + "message": "setmetatable", + "owner": "core", + "receiver": "self", + "safe_navigation": false, + "span": [ + 10, + 9, + 10, + 21 + ] + }, + { + "arguments": [ + "items" + ], + "block": false, + "conditional": true, + "control": "iterates", + "function": "process", + "line": 33, + "message": "ipairs", + "owner": "LuaSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 33, + 17, + 33, + 23 + ] + }, + { + "arguments": [ + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "audit", + "line": 41, + "message": "print", + "owner": "LuaSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 41, + 2, + 41, + 13 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 19, + "message": "escalate", + "owner": "LuaSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 19, + 4, + 19, + 17 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 21, + "message": "fallback", + "owner": "LuaSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 21, + 4, + 21, + 17 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 23, + "message": "default_case", + "owner": "LuaSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 23, + 4, + 23, + 21 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": true, + "control": "iterates", + "function": "process", + "line": 34, + "message": "children", + "owner": "LuaSyntaxFactsCore", + "receiver": "item", + "safe_navigation": false, + "span": [ + 34, + 4, + 34, + 17 + ] + } + ], + "state_reads": [ + { + "field": "active", + "function": "process", + "line": 15, + "owner": "LuaSyntaxFactsCore", + "receiver": "user", + "span": [ + 15, + 42, + 15, + 53 + ] + }, + { + "field": "count", + "function": "process", + "line": 27, + "owner": "LuaSyntaxFactsCore", + "receiver": "self", + "span": [ + 27, + 17, + 27, + 27 + ] + }, + { + "field": "count", + "function": "ready", + "line": 47, + "owner": "LuaSyntaxFactsCore", + "receiver": "self", + "span": [ + 47, + 9, + 47, + 19 + ] + }, + { + "field": "name", + "function": "process", + "line": 14, + "owner": "LuaSyntaxFactsCore", + "receiver": "user.profile", + "span": [ + 14, + 15, + 14, + 32 + ] + }, + { + "field": "profile", + "function": "process", + "line": 14, + "owner": "LuaSyntaxFactsCore", + "receiver": "user", + "span": [ + 14, + 15, + 14, + 27 + ] + }, + { + "field": "ready", + "function": "process", + "line": 26, + "owner": "LuaSyntaxFactsCore", + "receiver": "user", + "span": [ + 26, + 31, + 26, + 41 + ] + }, + { + "field": "role", + "function": "process", + "line": 18, + "owner": "LuaSyntaxFactsCore", + "receiver": "user", + "span": [ + 18, + 29, + 18, + 38 + ] + }, + { + "field": "role", + "function": "process", + "line": 18, + "owner": "LuaSyntaxFactsCore", + "receiver": "user", + "span": [ + 18, + 5, + 18, + 14 + ] + }, + { + "field": "role", + "function": "process", + "line": 20, + "owner": "LuaSyntaxFactsCore", + "receiver": "user", + "span": [ + 20, + 9, + 20, + 18 + ] + }, + { + "field": "sink", + "function": "audit", + "line": 42, + "owner": "LuaSyntaxFactsCore", + "receiver": "self", + "span": [ + 42, + 2, + 42, + 11 + ] + }, + { + "field": "status", + "function": "audit", + "line": 43, + "owner": "LuaSyntaxFactsCore", + "receiver": "self", + "span": [ + 43, + 9, + 43, + 20 + ] + }, + { + "field": "status", + "function": "process", + "line": 26, + "owner": "LuaSyntaxFactsCore", + "receiver": "self", + "span": [ + 26, + 5, + 26, + 16 + ] + } + ], + "state_writes": [ + { + "field": "__index", + "function": "(top-level)", + "line": 2, + "owner": "core", + "receiver": "LuaSyntaxFactsCore", + "span": [ + 2, + 0, + 2, + 47 + ] + }, + { + "field": "count", + "function": "process", + "line": 27, + "owner": "LuaSyntaxFactsCore", + "receiver": "self", + "span": [ + 27, + 4, + 27, + 31 + ] + } + ], + "decisions": [ + { + "enclosing_span": [ + 26, + 2, + 31, + 5 + ], + "function": "process", + "kind": "conjunction", + "line": 26, + "members": [ + "self.status == \"idle\"", + "user.ready" + ], + "predicate": "self.status == \"idle\" and user.ready", + "span": [ + 26, + 5, + 26, + 41 + ] + } + ], + "branch_decisions": [ + { + "function": "process", + "line": 18, + "predicate": "user.role == \"owner\" or user.role == \"admin\"", + "span": [ + 18, + 2, + 24, + 5 + ], + "state_refs": [ + "user.role" + ] + }, + { + "function": "process", + "line": 26, + "predicate": "self.status == \"idle\" and user.ready", + "span": [ + 26, + 2, + 31, + 5 + ], + "state_refs": [ + "status", + "user.ready" + ] + } + ], + "dispatch_sites": [ + + ], + "semantic_effects": [ + { + "detail": "print", + "function": "audit", + "kind": "hidden_io", + "line": 41, + "span": [ + 41, + 2, + 41, + 13 + ] + }, + { + "detail": "print", + "function": "process", + "kind": "hidden_io", + "line": 30, + "span": [ + 30, + 4, + 30, + 9 + ] + }, + { + "detail": "setmetatable", + "function": "LuaSyntaxFactsCore.new", + "kind": "metaprogramming", + "line": 10, + "span": [ + 10, + 9, + 10, + 21 + ] + } + ], + "predicate_bodies": [ + { + "body": "name or \"missing\"", + "line": 13, + "name": "process", + "owner": "LuaSyntaxFactsCore", + "span": [ + 13, + 0, + 38, + 3 + ] + } + ] + } + ] +} diff --git a/gems/decomplex/examples/syntax-facts/oracles/php-core.json b/gems/decomplex/examples/syntax-facts/oracles/php-core.json new file mode 100644 index 000000000..b89836f8f --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/oracles/php-core.json @@ -0,0 +1,629 @@ +{ + "format": "decomplex.syntax-facts.v1", + "documents": [ + { + "file": "gems/decomplex/examples/syntax-facts/php/core.php", + "language": "php", + "functions": [ + { + "line": 15, + "name": "process", + "owner": "PhpSyntaxFactsCore", + "params": [ + "user", + "items", + "callback" + ], + "span": [ + 15, + 4, + 46, + 5 + ], + "visibility": "public" + }, + { + "line": 48, + "name": "audit", + "owner": "PhpSyntaxFactsCore", + "params": [ + "name" + ], + "span": [ + 48, + 4, + 53, + 5 + ], + "visibility": "private" + }, + { + "line": 55, + "name": "ready", + "owner": "PhpSyntaxFactsCore", + "params": [ + + ], + "span": [ + 55, + 4, + 58, + 5 + ], + "visibility": "public" + }, + { + "line": 9, + "name": "__construct", + "owner": "PhpSyntaxFactsCore", + "params": [ + "status", + "sink" + ], + "span": [ + 9, + 4, + 13, + 5 + ], + "visibility": "public" + } + ], + "owners": [ + { + "kind": "class", + "line": 3, + "name": "PhpSyntaxFactsCore", + "span": [ + 3, + 0, + 59, + 1 + ] + } + ], + "calls": [ + { + "arguments": [ + "\"busy\"" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 36, + "message": "publish", + "owner": "PhpSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 36, + 12, + 36, + 34 + ] + }, + { + "arguments": [ + "\"not ready\"" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 38, + "message": "print", + "owner": "PhpSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 38, + 12, + 38, + 29 + ] + }, + { + "arguments": [ + "\"record\"", + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "audit", + "line": 51, + "message": "send", + "owner": "PhpSyntaxFactsCore", + "receiver": "this.sink", + "safe_navigation": false, + "span": [ + 51, + 8, + 51, + 42 + ] + }, + { + "arguments": [ + "account" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 19, + "message": "callback", + "owner": "PhpSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 19, + 8, + 19, + 27 + ] + }, + { + "arguments": [ + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "audit", + "line": 50, + "message": "print", + "owner": "PhpSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 50, + 8, + 50, + 20 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 24, + "message": "escalate", + "owner": "PhpSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 24, + 16, + 24, + 38 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 27, + "message": "fallback", + "owner": "PhpSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 27, + 16, + 27, + 38 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 30, + "message": "defaultCase", + "owner": "PhpSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 30, + 16, + 30, + 41 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": true, + "control": "iterates", + "function": "process", + "line": 42, + "message": "children", + "owner": "PhpSyntaxFactsCore", + "receiver": "item", + "safe_navigation": false, + "span": [ + 42, + 12, + 42, + 29 + ] + } + ], + "state_reads": [ + { + "field": "active", + "function": "process", + "line": 18, + "owner": "PhpSyntaxFactsCore", + "receiver": "user", + "span": [ + 18, + 38, + 18, + 51 + ] + }, + { + "field": "children", + "function": "process", + "line": 42, + "owner": "PhpSyntaxFactsCore", + "receiver": "item", + "span": [ + 42, + 12, + 42, + 29 + ] + }, + { + "field": "count", + "function": "ready", + "line": 57, + "owner": "PhpSyntaxFactsCore", + "receiver": "self", + "span": [ + 57, + 15, + 57, + 27 + ] + }, + { + "field": "defaultCase", + "function": "process", + "line": 30, + "owner": "PhpSyntaxFactsCore", + "receiver": "self", + "span": [ + 30, + 16, + 30, + 41 + ] + }, + { + "field": "escalate", + "function": "process", + "line": 24, + "owner": "PhpSyntaxFactsCore", + "receiver": "self", + "span": [ + 24, + 16, + 24, + 38 + ] + }, + { + "field": "fallback", + "function": "process", + "line": 27, + "owner": "PhpSyntaxFactsCore", + "receiver": "self", + "span": [ + 27, + 16, + 27, + 38 + ] + }, + { + "field": "name", + "function": "process", + "line": 17, + "owner": "PhpSyntaxFactsCore", + "receiver": "user?.profile", + "span": [ + 17, + 16, + 17, + 38 + ] + }, + { + "field": "profile", + "function": "process", + "line": 17, + "owner": "PhpSyntaxFactsCore", + "receiver": "user", + "span": [ + 17, + 16, + 17, + 31 + ] + }, + { + "field": "publish", + "function": "process", + "line": 36, + "owner": "PhpSyntaxFactsCore", + "receiver": "self", + "span": [ + 36, + 12, + 36, + 34 + ] + }, + { + "field": "ready", + "function": "process", + "line": 34, + "owner": "PhpSyntaxFactsCore", + "receiver": "user", + "span": [ + 34, + 40, + 34, + 52 + ] + }, + { + "field": "role", + "function": "process", + "line": 21, + "owner": "PhpSyntaxFactsCore", + "receiver": "user", + "span": [ + 21, + 16, + 21, + 27 + ] + }, + { + "field": "send", + "function": "audit", + "line": 51, + "owner": "PhpSyntaxFactsCore", + "receiver": "this.sink", + "span": [ + 51, + 8, + 51, + 42 + ] + }, + { + "field": "sink", + "function": "audit", + "line": 51, + "owner": "PhpSyntaxFactsCore", + "receiver": "self", + "span": [ + 51, + 8, + 51, + 19 + ] + }, + { + "field": "status", + "function": "audit", + "line": 52, + "owner": "PhpSyntaxFactsCore", + "receiver": "self", + "span": [ + 52, + 15, + 52, + 28 + ] + }, + { + "field": "status", + "function": "process", + "line": 34, + "owner": "PhpSyntaxFactsCore", + "receiver": "self", + "span": [ + 34, + 12, + 34, + 25 + ] + } + ], + "state_writes": [ + { + "field": "count", + "function": "process", + "line": 35, + "owner": "PhpSyntaxFactsCore", + "receiver": "self", + "span": [ + 35, + 12, + 35, + 29 + ] + }, + { + "field": "sink", + "function": "__construct", + "line": 12, + "owner": "PhpSyntaxFactsCore", + "receiver": "self", + "span": [ + 12, + 8, + 12, + 27 + ] + }, + { + "field": "status", + "function": "__construct", + "line": 11, + "owner": "PhpSyntaxFactsCore", + "receiver": "self", + "span": [ + 11, + 8, + 11, + 31 + ] + } + ], + "decisions": [ + { + "enclosing_span": [ + 21, + 8, + 32, + 9 + ], + "function": "process", + "kind": "case_dispatch", + "line": 21, + "members": [ + "\"admin\"", + "\"guest\"", + "\"owner\"" + ], + "predicate": "user.role", + "span": [ + 21, + 8, + 32, + 9 + ] + }, + { + "enclosing_span": [ + 34, + 8, + 39, + 9 + ], + "function": "process", + "kind": "conjunction", + "line": 34, + "members": [ + "this.status === \"idle\"", + "user.ready" + ], + "predicate": "this.status === \"idle\" && user.ready", + "span": [ + 34, + 12, + 34, + 52 + ] + } + ], + "branch_decisions": [ + { + "function": "process", + "line": 21, + "predicate": "(user.role)", + "span": [ + 21, + 8, + 32, + 9 + ], + "state_refs": [ + "user.role" + ] + }, + { + "function": "process", + "line": 34, + "predicate": "(this.status === \"idle\" && user.ready)", + "span": [ + 34, + 8, + 39, + 9 + ], + "state_refs": [ + "status", + "user.ready" + ] + } + ], + "dispatch_sites": [ + + ], + "semantic_effects": [ + { + "detail": "print", + "function": "audit", + "kind": "hidden_io", + "line": 50, + "span": [ + 50, + 8, + 50, + 20 + ] + }, + { + "detail": "print", + "function": "process", + "kind": "hidden_io", + "line": 38, + "span": [ + 38, + 12, + 38, + 29 + ] + } + ], + "predicate_bodies": [ + { + "body": "name ?? null", + "line": 15, + "name": "process", + "owner": "PhpSyntaxFactsCore", + "span": [ + 15, + 4, + 46, + 5 + ] + } + ] + } + ] +} diff --git a/gems/decomplex/examples/syntax-facts/oracles/python-core.json b/gems/decomplex/examples/syntax-facts/oracles/python-core.json index fa22f9927..8f12ef7e8 100644 --- a/gems/decomplex/examples/syntax-facts/oracles/python-core.json +++ b/gems/decomplex/examples/syntax-facts/oracles/python-core.json @@ -6,94 +6,94 @@ "language": "python", "functions": [ { - "name": "__init__", + "line": 12, + "name": "process", "owner": "PythonSyntaxFactsCore", - "line": 7, + "params": [ + "self", + "user", + "items", + "callback" + ], "span": [ - 7, + 12, 4, - 10, - 22 + 52, + 41 ], - "visibility": "public", - "params": [ - "self", - "lock", - "resource" - ] + "visibility": "public" }, { + "line": 54, "name": "_normalize", "owner": "PythonSyntaxFactsCore", - "line": 54, + "params": [ + "self", + "value" + ], "span": [ 54, 4, 56, 22 ], - "visibility": "private", - "params": [ - "self", - "value" - ] + "visibility": "private" }, { + "line": 58, "name": "generator", "owner": "PythonSyntaxFactsCore", - "line": 58, + "params": [ + "self", + "values" + ], "span": [ 58, 4, 60, 23 ], - "visibility": "public", - "params": [ - "self", - "values" - ] + "visibility": "public" }, { - "name": "process", + "line": 62, + "name": "simple_with", "owner": "PythonSyntaxFactsCore", - "line": 12, - "span": [ - 12, - 4, - 52, - 41 - ], - "visibility": "public", "params": [ "self", - "user", - "items", - "callback" - ] - }, - { - "name": "simple_with", - "owner": "PythonSyntaxFactsCore", - "line": 62, + "resource" + ], "span": [ 62, 4, 64, 16 ], - "visibility": "public", + "visibility": "public" + }, + { + "line": 7, + "name": "__init__", + "owner": "PythonSyntaxFactsCore", "params": [ "self", + "lock", "resource" - ] + ], + "span": [ + 7, + 4, + 10, + 22 + ], + "visibility": "public" } ], "owners": [ { - "name": "PythonSyntaxFactsCore", "kind": "class", "line": 6, + "name": "PythonSyntaxFactsCore", "span": [ 6, 0, @@ -104,345 +104,213 @@ ], "calls": [ { - "receiver": "handle", - "message": "read", - "function": "process", - "owner": "PythonSyntaxFactsCore", - "line": 22, - "span": [ - 22, - 19, - 22, - 32 + "arguments": [ + "\"x\"" ], - "conditional": false, - "arguments": [], - "control": "always", - "safe_navigation": false, - "block": false - }, - { - "receiver": "handle", - "message": "read", + "block": false, + "conditional": true, + "control": "conditional", "function": "process", - "owner": "PythonSyntaxFactsCore", - "line": 22, - "span": [ - 22, - 26, - 22, - 30 - ], - "conditional": false, - "arguments": [], - "control": "always", - "safe_navigation": false, - "block": false - }, - { - "receiver": "item", + "line": 28, "message": "startswith", - "function": "process", "owner": "PythonSyntaxFactsCore", - "line": 28, + "receiver": "item", + "safe_navigation": false, "span": [ 28, 30, 28, 50 - ], - "conditional": true, - "arguments": [ - "\"x\"" - ], - "control": "conditional", - "safe_navigation": false, - "block": false + ] }, { - "receiver": "item", - "message": "startswith", - "function": "process", - "owner": "PythonSyntaxFactsCore", - "line": 28, - "span": [ - 28, - 35, - 28, - 45 - ], - "conditional": true, "arguments": [ - "\"x\"" + "item" ], + "block": false, + "conditional": true, "control": "conditional", + "function": "process", + "line": 29, + "message": "callback", + "owner": "PythonSyntaxFactsCore", + "receiver": "self", "safe_navigation": false, - "block": false + "span": [ + 29, + 16, + 29, + 30 + ] }, { - "receiver": "result", - "message": "append", + "arguments": [ + "item" + ], + "block": false, + "conditional": true, + "control": "iterates", "function": "process", - "owner": "PythonSyntaxFactsCore", "line": 37, + "message": "append", + "owner": "PythonSyntaxFactsCore", + "receiver": "result", + "safe_navigation": false, "span": [ 37, 12, 37, 31 - ], - "conditional": true, + ] + }, + { "arguments": [ - "item" + "result" ], + "block": false, + "conditional": true, "control": "iterates", - "safe_navigation": false, - "block": false - }, - { - "receiver": "result", - "message": "append", "function": "process", + "line": 40, + "message": "len", "owner": "PythonSyntaxFactsCore", - "line": 37, + "receiver": "self", + "safe_navigation": false, "span": [ - 37, - 19, - 37, - 25 - ], - "conditional": true, + 40, + 22, + 40, + 33 + ] + }, + { "arguments": [ - "item" + "result[index]" ], + "block": false, + "conditional": true, "control": "iterates", - "safe_navigation": false, - "block": false - }, - { - "receiver": "self", - "message": "audit", "function": "process", - "owner": "PythonSyntaxFactsCore", "line": 45, + "message": "audit", + "owner": "PythonSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, "span": [ 45, 16, 45, 41 + ] + }, + { + "arguments": [ + "user" ], + "block": false, "conditional": true, + "control": "conditional", + "function": "process", + "line": 33, + "message": "escalate", + "owner": "PythonSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 33, + 20, + 33, + 39 + ] + }, + { "arguments": [ - "result[index]" + "user" ], - "control": "iterates", + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 35, + "message": "default", + "owner": "PythonSyntaxFactsCore", + "receiver": "self", "safe_navigation": false, - "block": false + "span": [ + 35, + 20, + 35, + 38 + ] }, { - "receiver": "self", - "message": "audit", + "arguments": [ + "user.path" + ], + "block": false, + "conditional": false, + "control": "always", "function": "process", + "line": 21, + "message": "open", "owner": "PythonSyntaxFactsCore", - "line": 45, - "span": [ - 45, - 21, - 45, - 26 - ], - "conditional": true, - "arguments": [ - "result[index]" - ], - "control": "iterates", - "safe_navigation": false, - "block": false - }, - { - "receiver": "self", - "message": "callback", - "function": "process", - "owner": "PythonSyntaxFactsCore", - "line": 29, - "span": [ - 29, - 16, - 29, - 30 - ], - "conditional": true, - "arguments": [ - "item" - ], - "control": "conditional", - "safe_navigation": false, - "block": false - }, - { - "receiver": "self", - "message": "default", - "function": "process", - "owner": "PythonSyntaxFactsCore", - "line": 35, - "span": [ - 35, - 20, - 35, - 38 - ], - "conditional": true, - "arguments": [ - "user" - ], - "control": "conditional", - "safe_navigation": false, - "block": false - }, - { - "receiver": "self", - "message": "default", - "function": "process", - "owner": "PythonSyntaxFactsCore", - "line": 35, - "span": [ - 35, - 25, - 35, - 32 - ], - "conditional": true, - "arguments": [ - "user" - ], - "control": "conditional", - "safe_navigation": false, - "block": false - }, - { - "receiver": "self", - "message": "escalate", - "function": "process", - "owner": "PythonSyntaxFactsCore", - "line": 33, - "span": [ - 33, - 20, - 33, - 39 - ], - "conditional": true, - "arguments": [ - "user" - ], - "control": "conditional", - "safe_navigation": false, - "block": false - }, - { - "receiver": "self", - "message": "escalate", - "function": "process", - "owner": "PythonSyntaxFactsCore", - "line": 33, - "span": [ - 33, - 25, - 33, - 33 - ], - "conditional": true, - "arguments": [ - "user" - ], - "control": "conditional", - "safe_navigation": false, - "block": false - }, - { - "receiver": "self", - "message": "len", - "function": "process", - "owner": "PythonSyntaxFactsCore", - "line": 40, - "span": [ - 40, - 22, - 40, - 33 - ], - "conditional": true, - "arguments": [ - "result" - ], - "control": "iterates", - "safe_navigation": false, - "block": false - }, - { - "receiver": "self", - "message": "open", - "function": "process", - "owner": "PythonSyntaxFactsCore", - "line": 21, + "receiver": "self", + "safe_navigation": false, "span": [ 21, 13, 21, 28 - ], - "conditional": false, + ] + }, + { "arguments": [ - "user.path" + ], + "block": false, + "conditional": false, "control": "always", - "safe_navigation": false, - "block": false - }, - { - "receiver": "value", - "message": "strip", "function": "_normalize", - "owner": "PythonSyntaxFactsCore", "line": 55, + "message": "strip", + "owner": "PythonSyntaxFactsCore", + "receiver": "value", + "safe_navigation": false, "span": [ 55, 18, 55, 31 - ], - "conditional": false, - "arguments": [], - "control": "always", - "safe_navigation": false, - "block": false + ] }, { - "receiver": "value", - "message": "strip", - "function": "_normalize", - "owner": "PythonSyntaxFactsCore", - "line": 55, - "span": [ - 55, - 24, - 55, - 29 + "arguments": [ + ], + "block": false, "conditional": false, - "arguments": [], "control": "always", + "function": "process", + "line": 22, + "message": "read", + "owner": "PythonSyntaxFactsCore", + "receiver": "handle", "safe_navigation": false, - "block": false + "span": [ + 22, + 19, + 22, + 32 + ] } ], "state_reads": [ { "field": "append", - "receiver": "result", "function": "process", - "owner": "PythonSyntaxFactsCore", "line": 37, + "owner": "PythonSyntaxFactsCore", + "receiver": "result", "span": [ 37, 12, @@ -452,10 +320,10 @@ }, { "field": "audit", - "receiver": "self", "function": "process", - "owner": "PythonSyntaxFactsCore", "line": 45, + "owner": "PythonSyntaxFactsCore", + "receiver": "self", "span": [ 45, 16, @@ -465,10 +333,10 @@ }, { "field": "default", - "receiver": "self", "function": "process", - "owner": "PythonSyntaxFactsCore", "line": 35, + "owner": "PythonSyntaxFactsCore", + "receiver": "self", "span": [ 35, 20, @@ -478,10 +346,10 @@ }, { "field": "escalate", - "receiver": "self", "function": "process", - "owner": "PythonSyntaxFactsCore", "line": 33, + "owner": "PythonSyntaxFactsCore", + "receiver": "self", "span": [ 33, 20, @@ -491,10 +359,10 @@ }, { "field": "name", - "receiver": "user.profile", "function": "process", - "owner": "PythonSyntaxFactsCore", "line": 13, + "owner": "PythonSyntaxFactsCore", + "receiver": "user.profile", "span": [ 13, 20, @@ -504,10 +372,10 @@ }, { "field": "path", - "receiver": "user", "function": "process", - "owner": "PythonSyntaxFactsCore", "line": 21, + "owner": "PythonSyntaxFactsCore", + "receiver": "user", "span": [ 21, 18, @@ -517,10 +385,10 @@ }, { "field": "profile", - "receiver": "user", "function": "process", - "owner": "PythonSyntaxFactsCore", "line": 13, + "owner": "PythonSyntaxFactsCore", + "receiver": "user", "span": [ 13, 20, @@ -530,10 +398,10 @@ }, { "field": "read", - "receiver": "handle", "function": "process", - "owner": "PythonSyntaxFactsCore", "line": 22, + "owner": "PythonSyntaxFactsCore", + "receiver": "handle", "span": [ 22, 19, @@ -543,10 +411,10 @@ }, { "field": "ready", - "receiver": "user", "function": "process", - "owner": "PythonSyntaxFactsCore", "line": 28, + "owner": "PythonSyntaxFactsCore", + "receiver": "user", "span": [ 28, 15, @@ -556,10 +424,10 @@ }, { "field": "startswith", - "receiver": "item", "function": "process", - "owner": "PythonSyntaxFactsCore", "line": 28, + "owner": "PythonSyntaxFactsCore", + "receiver": "item", "span": [ 28, 30, @@ -569,10 +437,10 @@ }, { "field": "strip", - "receiver": "value", "function": "_normalize", - "owner": "PythonSyntaxFactsCore", "line": 55, + "owner": "PythonSyntaxFactsCore", + "receiver": "value", "span": [ 55, 18, @@ -584,10 +452,10 @@ "state_writes": [ { "field": "_lock", - "receiver": "self", "function": "__init__", - "owner": "PythonSyntaxFactsCore", "line": 8, + "owner": "PythonSyntaxFactsCore", + "receiver": "self", "span": [ 8, 8, @@ -597,10 +465,10 @@ }, { "field": "count", - "receiver": "self", "function": "__init__", - "owner": "PythonSyntaxFactsCore", "line": 10, + "owner": "PythonSyntaxFactsCore", + "receiver": "self", "span": [ 10, 8, @@ -610,23 +478,23 @@ }, { "field": "count", - "receiver": "self", "function": "process", - "owner": "PythonSyntaxFactsCore", "line": 19, + "owner": "PythonSyntaxFactsCore", + "receiver": "self", "span": [ 19, 12, 19, - 22 + 27 ] }, { "field": "resource", - "receiver": "self", "function": "__init__", - "owner": "PythonSyntaxFactsCore", "line": 9, + "owner": "PythonSyntaxFactsCore", + "receiver": "self", "span": [ 9, 8, @@ -637,25 +505,25 @@ ], "decisions": [ { + "enclosing_span": [ + 28, + 12, + 29, + 30 + ], + "function": "process", "kind": "conjunction", + "line": 28, "members": [ "item.startswith(\"x\")", "user.ready" ], - "function": "process", - "line": 28, + "predicate": "user.ready and item.startswith(\"x\")", "span": [ 28, 15, 28, 50 - ], - "predicate": "user.ready and item.startswith(\"x\")", - "enclosing_span": [ - 28, - 12, - 29, - 30 ] } ], @@ -663,25 +531,27 @@ { "function": "process", "line": 28, + "predicate": "user.ready and item.startswith(\"x\")", "span": [ 28, 12, 29, 30 ], - "predicate": "user.ready and item.startswith(\"x\")", "state_refs": [ "item.startswith", "user.ready" ] } ], - "dispatch_sites": [], + "dispatch_sites": [ + + ], "semantic_effects": [ { - "kind": "hidden_io", "detail": "open", "function": "process", + "kind": "hidden_io", "line": 21, "span": [ 21, @@ -691,726 +561,8 @@ ] } ], - "predicate_bodies": [], - "local_complexity": [ - { - "id": "PythonSyntaxFactsCore#__init__", - "score": 0.0, - "signals": {} - }, - { - "id": "PythonSyntaxFactsCore#_normalize", - "score": 0.0, - "signals": { - "early_exits": 2 - } - }, - { - "id": "PythonSyntaxFactsCore#generator", - "score": 3.2, - "signals": { - "loops": 2, - "nested": 1 - } - }, - { - "id": "PythonSyntaxFactsCore#process", - "score": 15.0, - "signals": { - "boolean_ops": 2, - "branches": 3, - "cases": 3, - "early_exits": 2, - "loops": 4, - "nested": 5 - } - }, - { - "id": "PythonSyntaxFactsCore#simple_with", - "score": 0.0, - "signals": {} - } - ], - "clone_candidates": [ - { - "method_name": "(top-level)", - "node_name": "class_definition", - "line": 6, - "span": [ - 6, - 0, - 64, - 16 - ], - "mass": 377, - "fingerprint": "class_definition(id id ::: block(function_definition(id id parameters((:( id ,:, id ,:, id ):)) ::: block(expression_statement(attribute(id .:. id) =:= id) expression_statement(attribute(id .:. id) =:= id) expression_statement(attribute(id .:. id) =:= lit))) function_definition(id id parameters((:( id ,:, typed_parameter(id ::: type(string_start:\" lit string_end:\")) ,:, typed_parameter(id ::: type(id type_parameter([:[ id ]:]))) ,:, id ):)) ::: block(expression_statement(id ::: id =:= attribute(attribute(id .:. id) .:. id)) expression_statement(id ::: id) expression_statement(id =:= list([:[ ]:])) expression_statement(id =:= string(string_start:\" string_content(escape_sequence:\\\\) string_end:\")) with_statement(id with_clause(id .:. id) ::: block(attribute(id .:. id) +=:+= lit)) with_statement(id with_clause(call(id argument_list((:( attribute(id .:. id) ):))) id id) ::: block(id =:= call(attribute(id .:. id) argument_list((:( ):))))) for_statement(id id id id ::: block(if_statement(id comparison_operator(id id nil) ::: id) if_statement(id boolean_operator(attribute(id .:. id) id call(attribute(id .:. id) argument_list((:( string(string_start:\" lit string_end:\") ):)))) ::: block(id argument_list((:( id ):)))) match_statement(id id ::: block(case_clause(id case_pattern(string(string_start:\" lit string_end:\") |:| string(string_start:\" lit string_end:\")) ::: block(attribute(id .:. id) argument_list((:( id ):)))) case_clause(id id ::: block(attribute(id .:. id) argument_list((:( id ):)))))) expression_statement(attribute(id .:. id) argument_list((:( id ):))))) expression_statement(id =:= lit) while_statement(id comparison_operator(id <:< call(id argument_list((:( id ):)))) ::: block(if_statement(id comparison_operator(subscript(id [:[ id ]:]) ==:== string(string_start:\" lit string_end:\")) ::: id) try_statement(id ::: block(attribute(id .:. id) argument_list((:( subscript(id [:[ id ]:]) ):))) except_clause(id id ::: id)) expression_statement(id +=:+= lit))) assert_statement(id id) return_statement(id conditional_expression(id id id id id)))) function_definition(id id parameters((:( id ,:, typed_default_parameter(id ::: type(id |:| nil) =:= nil) ):)) ::: block(expression_statement(id =:= conditional_expression(call(attribute(id .:. id) argument_list((:( ):))) id comparison_operator(id is not(id id) nil) id string(string_start:\" lit string_end:\"))) return_statement(id id))) function_definition(id id parameters((:( id ,:, id ):)) ::: block(id id id id ::: block(id id))) function_definition(id id parameters((:( id ,:, id ):)) ::: block(id id ::: id))))", - "child_fingerprints": [ - "function_definition(id id parameters((:( id ,:, id ,:, id ):)) ::: block(expression_statement(attribute(id .:. id) =:= id) expression_statement(attribute(id .:. id) =:= id) expression_statement(attribute(id .:. id) =:= lit)))", - "function_definition(id id parameters((:( id ,:, typed_parameter(id ::: type(string_start:\" lit string_end:\")) ,:, typed_parameter(id ::: type(id type_parameter([:[ id ]:]))) ,:, id ):)) ::: block(expression_statement(id ::: id =:= attribute(attribute(id .:. id) .:. id)) expression_statement(id ::: id) expression_statement(id =:= list([:[ ]:])) expression_statement(id =:= string(string_start:\" string_content(escape_sequence:\\\\) string_end:\")) with_statement(id with_clause(id .:. id) ::: block(attribute(id .:. id) +=:+= lit)) with_statement(id with_clause(call(id argument_list((:( attribute(id .:. id) ):))) id id) ::: block(id =:= call(attribute(id .:. id) argument_list((:( ):))))) for_statement(id id id id ::: block(if_statement(id comparison_operator(id id nil) ::: id) if_statement(id boolean_operator(attribute(id .:. id) id call(attribute(id .:. id) argument_list((:( string(string_start:\" lit string_end:\") ):)))) ::: block(id argument_list((:( id ):)))) match_statement(id id ::: block(case_clause(id case_pattern(string(string_start:\" lit string_end:\") |:| string(string_start:\" lit string_end:\")) ::: block(attribute(id .:. id) argument_list((:( id ):)))) case_clause(id id ::: block(attribute(id .:. id) argument_list((:( id ):)))))) expression_statement(attribute(id .:. id) argument_list((:( id ):))))) expression_statement(id =:= lit) while_statement(id comparison_operator(id <:< call(id argument_list((:( id ):)))) ::: block(if_statement(id comparison_operator(subscript(id [:[ id ]:]) ==:== string(string_start:\" lit string_end:\")) ::: id) try_statement(id ::: block(attribute(id .:. id) argument_list((:( subscript(id [:[ id ]:]) ):))) except_clause(id id ::: id)) expression_statement(id +=:+= lit))) assert_statement(id id) return_statement(id conditional_expression(id id id id id))))", - "function_definition(id id parameters((:( id ,:, typed_default_parameter(id ::: type(id |:| nil) =:= nil) ):)) ::: block(expression_statement(id =:= conditional_expression(call(attribute(id .:. id) argument_list((:( ):))) id comparison_operator(id is not(id id) nil) id string(string_start:\" lit string_end:\"))) return_statement(id id)))", - "function_definition(id id parameters((:( id ,:, id ):)) ::: block(id id id id ::: block(id id)))", - "function_definition(id id parameters((:( id ,:, id ):)) ::: block(id id ::: id))" - ], - "child_masses": [ - 34, - 258, - 46, - 19, - 15 - ] - }, - { - "method_name": "(top-level)", - "node_name": "module", - "line": 1, - "span": [ - 1, - 0, - 66, - 0 - ], - "mass": 389, - "fingerprint": "module(future_import_statement(id id id id) import_statement(id dotted_name(id .:. id)) class_definition(id id ::: block(function_definition(id id parameters((:( id ,:, id ,:, id ):)) ::: block(expression_statement(attribute(id .:. id) =:= id) expression_statement(attribute(id .:. id) =:= id) expression_statement(attribute(id .:. id) =:= lit))) function_definition(id id parameters((:( id ,:, typed_parameter(id ::: type(string_start:\" lit string_end:\")) ,:, typed_parameter(id ::: type(id type_parameter([:[ id ]:]))) ,:, id ):)) ::: block(expression_statement(id ::: id =:= attribute(attribute(id .:. id) .:. id)) expression_statement(id ::: id) expression_statement(id =:= list([:[ ]:])) expression_statement(id =:= string(string_start:\" string_content(escape_sequence:\\\\) string_end:\")) with_statement(id with_clause(id .:. id) ::: block(attribute(id .:. id) +=:+= lit)) with_statement(id with_clause(call(id argument_list((:( attribute(id .:. id) ):))) id id) ::: block(id =:= call(attribute(id .:. id) argument_list((:( ):))))) for_statement(id id id id ::: block(if_statement(id comparison_operator(id id nil) ::: id) if_statement(id boolean_operator(attribute(id .:. id) id call(attribute(id .:. id) argument_list((:( string(string_start:\" lit string_end:\") ):)))) ::: block(id argument_list((:( id ):)))) match_statement(id id ::: block(case_clause(id case_pattern(string(string_start:\" lit string_end:\") |:| string(string_start:\" lit string_end:\")) ::: block(attribute(id .:. id) argument_list((:( id ):)))) case_clause(id id ::: block(attribute(id .:. id) argument_list((:( id ):)))))) expression_statement(attribute(id .:. id) argument_list((:( id ):))))) expression_statement(id =:= lit) while_statement(id comparison_operator(id <:< call(id argument_list((:( id ):)))) ::: block(if_statement(id comparison_operator(subscript(id [:[ id ]:]) ==:== string(string_start:\" lit string_end:\")) ::: id) try_statement(id ::: block(attribute(id .:. id) argument_list((:( subscript(id [:[ id ]:]) ):))) except_clause(id id ::: id)) expression_statement(id +=:+= lit))) assert_statement(id id) return_statement(id conditional_expression(id id id id id)))) function_definition(id id parameters((:( id ,:, typed_default_parameter(id ::: type(id |:| nil) =:= nil) ):)) ::: block(expression_statement(id =:= conditional_expression(call(attribute(id .:. id) argument_list((:( ):))) id comparison_operator(id is not(id id) nil) id string(string_start:\" lit string_end:\"))) return_statement(id id))) function_definition(id id parameters((:( id ,:, id ):)) ::: block(id id id id ::: block(id id))) function_definition(id id parameters((:( id ,:, id ):)) ::: block(id id ::: id)))))", - "child_fingerprints": [ - "future_import_statement(id id id id)", - "import_statement(id dotted_name(id .:. id))", - "class_definition(id id ::: block(function_definition(id id parameters((:( id ,:, id ,:, id ):)) ::: block(expression_statement(attribute(id .:. id) =:= id) expression_statement(attribute(id .:. id) =:= id) expression_statement(attribute(id .:. id) =:= lit))) function_definition(id id parameters((:( id ,:, typed_parameter(id ::: type(string_start:\" lit string_end:\")) ,:, typed_parameter(id ::: type(id type_parameter([:[ id ]:]))) ,:, id ):)) ::: block(expression_statement(id ::: id =:= attribute(attribute(id .:. id) .:. id)) expression_statement(id ::: id) expression_statement(id =:= list([:[ ]:])) expression_statement(id =:= string(string_start:\" string_content(escape_sequence:\\\\) string_end:\")) with_statement(id with_clause(id .:. id) ::: block(attribute(id .:. id) +=:+= lit)) with_statement(id with_clause(call(id argument_list((:( attribute(id .:. id) ):))) id id) ::: block(id =:= call(attribute(id .:. id) argument_list((:( ):))))) for_statement(id id id id ::: block(if_statement(id comparison_operator(id id nil) ::: id) if_statement(id boolean_operator(attribute(id .:. id) id call(attribute(id .:. id) argument_list((:( string(string_start:\" lit string_end:\") ):)))) ::: block(id argument_list((:( id ):)))) match_statement(id id ::: block(case_clause(id case_pattern(string(string_start:\" lit string_end:\") |:| string(string_start:\" lit string_end:\")) ::: block(attribute(id .:. id) argument_list((:( id ):)))) case_clause(id id ::: block(attribute(id .:. id) argument_list((:( id ):)))))) expression_statement(attribute(id .:. id) argument_list((:( id ):))))) expression_statement(id =:= lit) while_statement(id comparison_operator(id <:< call(id argument_list((:( id ):)))) ::: block(if_statement(id comparison_operator(subscript(id [:[ id ]:]) ==:== string(string_start:\" lit string_end:\")) ::: id) try_statement(id ::: block(attribute(id .:. id) argument_list((:( subscript(id [:[ id ]:]) ):))) except_clause(id id ::: id)) expression_statement(id +=:+= lit))) assert_statement(id id) return_statement(id conditional_expression(id id id id id)))) function_definition(id id parameters((:( id ,:, typed_default_parameter(id ::: type(id |:| nil) =:= nil) ):)) ::: block(expression_statement(id =:= conditional_expression(call(attribute(id .:. id) argument_list((:( ):))) id comparison_operator(id is not(id id) nil) id string(string_start:\" lit string_end:\"))) return_statement(id id))) function_definition(id id parameters((:( id ,:, id ):)) ::: block(id id id id ::: block(id id))) function_definition(id id parameters((:( id ,:, id ):)) ::: block(id id ::: id))))" - ], - "child_masses": [ - 5, - 6, - 377 - ] - }, - { - "method_name": "__init__", - "node_name": "block", - "line": 7, - "span": [ - 7, - 4, - 64, - 16 - ], - "mass": 373, - "fingerprint": "block(function_definition(id id parameters((:( id ,:, id ,:, id ):)) ::: block(expression_statement(attribute(id .:. id) =:= id) expression_statement(attribute(id .:. id) =:= id) expression_statement(attribute(id .:. id) =:= lit))) function_definition(id id parameters((:( id ,:, typed_parameter(id ::: type(string_start:\" lit string_end:\")) ,:, typed_parameter(id ::: type(id type_parameter([:[ id ]:]))) ,:, id ):)) ::: block(expression_statement(id ::: id =:= attribute(attribute(id .:. id) .:. id)) expression_statement(id ::: id) expression_statement(id =:= list([:[ ]:])) expression_statement(id =:= string(string_start:\" string_content(escape_sequence:\\\\) string_end:\")) with_statement(id with_clause(id .:. id) ::: block(attribute(id .:. id) +=:+= lit)) with_statement(id with_clause(call(id argument_list((:( attribute(id .:. id) ):))) id id) ::: block(id =:= call(attribute(id .:. id) argument_list((:( ):))))) for_statement(id id id id ::: block(if_statement(id comparison_operator(id id nil) ::: id) if_statement(id boolean_operator(attribute(id .:. id) id call(attribute(id .:. id) argument_list((:( string(string_start:\" lit string_end:\") ):)))) ::: block(id argument_list((:( id ):)))) match_statement(id id ::: block(case_clause(id case_pattern(string(string_start:\" lit string_end:\") |:| string(string_start:\" lit string_end:\")) ::: block(attribute(id .:. id) argument_list((:( id ):)))) case_clause(id id ::: block(attribute(id .:. id) argument_list((:( id ):)))))) expression_statement(attribute(id .:. id) argument_list((:( id ):))))) expression_statement(id =:= lit) while_statement(id comparison_operator(id <:< call(id argument_list((:( id ):)))) ::: block(if_statement(id comparison_operator(subscript(id [:[ id ]:]) ==:== string(string_start:\" lit string_end:\")) ::: id) try_statement(id ::: block(attribute(id .:. id) argument_list((:( subscript(id [:[ id ]:]) ):))) except_clause(id id ::: id)) expression_statement(id +=:+= lit))) assert_statement(id id) return_statement(id conditional_expression(id id id id id)))) function_definition(id id parameters((:( id ,:, typed_default_parameter(id ::: type(id |:| nil) =:= nil) ):)) ::: block(expression_statement(id =:= conditional_expression(call(attribute(id .:. id) argument_list((:( ):))) id comparison_operator(id is not(id id) nil) id string(string_start:\" lit string_end:\"))) return_statement(id id))) function_definition(id id parameters((:( id ,:, id ):)) ::: block(id id id id ::: block(id id))) function_definition(id id parameters((:( id ,:, id ):)) ::: block(id id ::: id)))", - "child_fingerprints": [ - "function_definition(id id parameters((:( id ,:, id ,:, id ):)) ::: block(expression_statement(attribute(id .:. id) =:= id) expression_statement(attribute(id .:. id) =:= id) expression_statement(attribute(id .:. id) =:= lit)))", - "function_definition(id id parameters((:( id ,:, typed_parameter(id ::: type(string_start:\" lit string_end:\")) ,:, typed_parameter(id ::: type(id type_parameter([:[ id ]:]))) ,:, id ):)) ::: block(expression_statement(id ::: id =:= attribute(attribute(id .:. id) .:. id)) expression_statement(id ::: id) expression_statement(id =:= list([:[ ]:])) expression_statement(id =:= string(string_start:\" string_content(escape_sequence:\\\\) string_end:\")) with_statement(id with_clause(id .:. id) ::: block(attribute(id .:. id) +=:+= lit)) with_statement(id with_clause(call(id argument_list((:( attribute(id .:. id) ):))) id id) ::: block(id =:= call(attribute(id .:. id) argument_list((:( ):))))) for_statement(id id id id ::: block(if_statement(id comparison_operator(id id nil) ::: id) if_statement(id boolean_operator(attribute(id .:. id) id call(attribute(id .:. id) argument_list((:( string(string_start:\" lit string_end:\") ):)))) ::: block(id argument_list((:( id ):)))) match_statement(id id ::: block(case_clause(id case_pattern(string(string_start:\" lit string_end:\") |:| string(string_start:\" lit string_end:\")) ::: block(attribute(id .:. id) argument_list((:( id ):)))) case_clause(id id ::: block(attribute(id .:. id) argument_list((:( id ):)))))) expression_statement(attribute(id .:. id) argument_list((:( id ):))))) expression_statement(id =:= lit) while_statement(id comparison_operator(id <:< call(id argument_list((:( id ):)))) ::: block(if_statement(id comparison_operator(subscript(id [:[ id ]:]) ==:== string(string_start:\" lit string_end:\")) ::: id) try_statement(id ::: block(attribute(id .:. id) argument_list((:( subscript(id [:[ id ]:]) ):))) except_clause(id id ::: id)) expression_statement(id +=:+= lit))) assert_statement(id id) return_statement(id conditional_expression(id id id id id))))", - "function_definition(id id parameters((:( id ,:, typed_default_parameter(id ::: type(id |:| nil) =:= nil) ):)) ::: block(expression_statement(id =:= conditional_expression(call(attribute(id .:. id) argument_list((:( ):))) id comparison_operator(id is not(id id) nil) id string(string_start:\" lit string_end:\"))) return_statement(id id)))", - "function_definition(id id parameters((:( id ,:, id ):)) ::: block(id id id id ::: block(id id)))", - "function_definition(id id parameters((:( id ,:, id ):)) ::: block(id id ::: id))" - ], - "child_masses": [ - 34, - 258, - 46, - 19, - 15 - ] - }, - { - "method_name": "__init__", - "node_name": "block", - "line": 8, - "span": [ - 8, - 8, - 10, - 22 - ], - "mass": 22, - "fingerprint": "block(expression_statement(attribute(id .:. id) =:= id) expression_statement(attribute(id .:. id) =:= id) expression_statement(attribute(id .:. id) =:= lit))", - "child_fingerprints": [ - "expression_statement(attribute(id .:. id) =:= id)", - "expression_statement(attribute(id .:. id) =:= id)", - "expression_statement(attribute(id .:. id) =:= lit)" - ], - "child_masses": [ - 7, - 7, - 7 - ] - }, - { - "method_name": "__init__", - "node_name": "defn", - "line": 7, - "span": [ - 7, - 4, - 10, - 22 - ], - "mass": 34, - "fingerprint": "function_definition(id id parameters((:( id ,:, id ,:, id ):)) ::: block(expression_statement(attribute(id .:. id) =:= id) expression_statement(attribute(id .:. id) =:= id) expression_statement(attribute(id .:. id) =:= lit)))", - "child_fingerprints": [ - "expression_statement(attribute(id .:. id) =:= id)", - "expression_statement(attribute(id .:. id) =:= id)", - "expression_statement(attribute(id .:. id) =:= lit)" - ], - "child_masses": [ - 7, - 7, - 7 - ] - }, - { - "method_name": "_normalize", - "node_name": "block", - "line": 55, - "span": [ - 55, - 8, - 56, - 22 - ], - "mass": 28, - "fingerprint": "block(expression_statement(id =:= conditional_expression(call(attribute(id .:. id) argument_list((:( ):))) id comparison_operator(id is not(id id) nil) id string(string_start:\" lit string_end:\"))) return_statement(id id))", - "child_fingerprints": [ - "expression_statement(id =:= conditional_expression(call(attribute(id .:. id) argument_list((:( ):))) id comparison_operator(id is not(id id) nil) id string(string_start:\" lit string_end:\")))", - "return_statement(id id)" - ], - "child_masses": [ - 24, - 3 - ] - }, - { - "method_name": "_normalize", - "node_name": "defn", - "line": 54, - "span": [ - 54, - 4, - 56, - 22 - ], - "mass": 46, - "fingerprint": "function_definition(id id parameters((:( id ,:, typed_default_parameter(id ::: type(id |:| nil) =:= nil) ):)) ::: block(expression_statement(id =:= conditional_expression(call(attribute(id .:. id) argument_list((:( ):))) id comparison_operator(id is not(id id) nil) id string(string_start:\" lit string_end:\"))) return_statement(id id)))", - "child_fingerprints": [ - "expression_statement(id =:= conditional_expression(call(attribute(id .:. id) argument_list((:( ):))) id comparison_operator(id is not(id id) nil) id string(string_start:\" lit string_end:\")))", - "return_statement(id id)" - ], - "child_masses": [ - 24, - 3 - ] - }, - { - "method_name": "generator", - "node_name": "block", - "line": 59, - "span": [ - 59, - 8, - 60, - 23 - ], - "mass": 9, - "fingerprint": "block(id id id id ::: block(id id))", - "child_fingerprints": [], - "child_masses": [] - }, - { - "method_name": "generator", - "node_name": "block", - "line": 60, - "span": [ - 60, - 12, - 60, - 23 - ], - "mass": 3, - "fingerprint": "block(id id)", - "child_fingerprints": [], - "child_masses": [] - }, - { - "method_name": "generator", - "node_name": "defn", - "line": 58, - "span": [ - 58, - 4, - 60, - 23 - ], - "mass": 19, - "fingerprint": "function_definition(id id parameters((:( id ,:, id ):)) ::: block(id id id id ::: block(id id)))", - "child_fingerprints": [ - "block(id id)" - ], - "child_masses": [ - 3 - ] - }, - { - "method_name": "process", - "node_name": "block", - "line": 13, - "span": [ - 13, - 8, - 52, - 41 - ], - "mass": 230, - "fingerprint": "block(expression_statement(id ::: id =:= attribute(attribute(id .:. id) .:. id)) expression_statement(id ::: id) expression_statement(id =:= list([:[ ]:])) expression_statement(id =:= string(string_start:\" string_content(escape_sequence:\\\\) string_end:\")) with_statement(id with_clause(id .:. id) ::: block(attribute(id .:. id) +=:+= lit)) with_statement(id with_clause(call(id argument_list((:( attribute(id .:. id) ):))) id id) ::: block(id =:= call(attribute(id .:. id) argument_list((:( ):))))) for_statement(id id id id ::: block(if_statement(id comparison_operator(id id nil) ::: id) if_statement(id boolean_operator(attribute(id .:. id) id call(attribute(id .:. id) argument_list((:( string(string_start:\" lit string_end:\") ):)))) ::: block(id argument_list((:( id ):)))) match_statement(id id ::: block(case_clause(id case_pattern(string(string_start:\" lit string_end:\") |:| string(string_start:\" lit string_end:\")) ::: block(attribute(id .:. id) argument_list((:( id ):)))) case_clause(id id ::: block(attribute(id .:. id) argument_list((:( id ):)))))) expression_statement(attribute(id .:. id) argument_list((:( id ):))))) expression_statement(id =:= lit) while_statement(id comparison_operator(id <:< call(id argument_list((:( id ):)))) ::: block(if_statement(id comparison_operator(subscript(id [:[ id ]:]) ==:== string(string_start:\" lit string_end:\")) ::: id) try_statement(id ::: block(attribute(id .:. id) argument_list((:( subscript(id [:[ id ]:]) ):))) except_clause(id id ::: id)) expression_statement(id +=:+= lit))) assert_statement(id id) return_statement(id conditional_expression(id id id id id)))", - "child_fingerprints": [ - "expression_statement(id ::: id =:= attribute(attribute(id .:. id) .:. id))", - "expression_statement(id ::: id)", - "expression_statement(id =:= list([:[ ]:]))", - "expression_statement(id =:= string(string_start:\" string_content(escape_sequence:\\\\) string_end:\"))", - "with_statement(id with_clause(id .:. id) ::: block(attribute(id .:. id) +=:+= lit))", - "with_statement(id with_clause(call(id argument_list((:( attribute(id .:. id) ):))) id id) ::: block(id =:= call(attribute(id .:. id) argument_list((:( ):)))))", - "for_statement(id id id id ::: block(if_statement(id comparison_operator(id id nil) ::: id) if_statement(id boolean_operator(attribute(id .:. id) id call(attribute(id .:. id) argument_list((:( string(string_start:\" lit string_end:\") ):)))) ::: block(id argument_list((:( id ):)))) match_statement(id id ::: block(case_clause(id case_pattern(string(string_start:\" lit string_end:\") |:| string(string_start:\" lit string_end:\")) ::: block(attribute(id .:. id) argument_list((:( id ):)))) case_clause(id id ::: block(attribute(id .:. id) argument_list((:( id ):)))))) expression_statement(attribute(id .:. id) argument_list((:( id ):)))))", - "expression_statement(id =:= lit)", - "while_statement(id comparison_operator(id <:< call(id argument_list((:( id ):)))) ::: block(if_statement(id comparison_operator(subscript(id [:[ id ]:]) ==:== string(string_start:\" lit string_end:\")) ::: id) try_statement(id ::: block(attribute(id .:. id) argument_list((:( subscript(id [:[ id ]:]) ):))) except_clause(id id ::: id)) expression_statement(id +=:+= lit)))", - "assert_statement(id id)", - "return_statement(id conditional_expression(id id id id id))" - ], - "child_masses": [ - 12, - 4, - 6, - 8, - 14, - 26, - 91, - 4, - 53, - 3, - 8 - ] - }, - { - "method_name": "process", - "node_name": "block", - "line": 19, - "span": [ - 19, - 12, - 19, - 27 - ], - "mass": 7, - "fingerprint": "block(attribute(id .:. id) +=:+= lit)", - "child_fingerprints": [ - "attribute(id .:. id)", - "lit" - ], - "child_masses": [ - 4, - 1 - ] - }, - { - "method_name": "process", - "node_name": "block", - "line": 22, - "span": [ - 22, - 12, - 22, - 32 - ], - "mass": 11, - "fingerprint": "block(id =:= call(attribute(id .:. id) argument_list((:( ):))))", - "child_fingerprints": [ - "call(attribute(id .:. id) argument_list((:( ):)))" - ], - "child_masses": [ - 8 - ] - }, - { - "method_name": "process", - "node_name": "block", - "line": 25, - "span": [ - 25, - 12, - 37, - 31 - ], - "mass": 85, - "fingerprint": "block(if_statement(id comparison_operator(id id nil) ::: id) if_statement(id boolean_operator(attribute(id .:. id) id call(attribute(id .:. id) argument_list((:( string(string_start:\" lit string_end:\") ):)))) ::: block(id argument_list((:( id ):)))) match_statement(id id ::: block(case_clause(id case_pattern(string(string_start:\" lit string_end:\") |:| string(string_start:\" lit string_end:\")) ::: block(attribute(id .:. id) argument_list((:( id ):)))) case_clause(id id ::: block(attribute(id .:. id) argument_list((:( id ):)))))) expression_statement(attribute(id .:. id) argument_list((:( id ):))))", - "child_fingerprints": [ - "if_statement(id comparison_operator(id id nil) ::: id)", - "if_statement(id boolean_operator(attribute(id .:. id) id call(attribute(id .:. id) argument_list((:( string(string_start:\" lit string_end:\") ):)))) ::: block(id argument_list((:( id ):))))", - "match_statement(id id ::: block(case_clause(id case_pattern(string(string_start:\" lit string_end:\") |:| string(string_start:\" lit string_end:\")) ::: block(attribute(id .:. id) argument_list((:( id ):)))) case_clause(id id ::: block(attribute(id .:. id) argument_list((:( id ):))))))", - "expression_statement(attribute(id .:. id) argument_list((:( id ):)))" - ], - "child_masses": [ - 8, - 27, - 40, - 9 - ] - }, - { - "method_name": "process", - "node_name": "block", - "line": 29, - "span": [ - 29, - 16, - 29, - 30 - ], - "mass": 6, - "fingerprint": "block(id argument_list((:( id ):)))", - "child_fingerprints": [], - "child_masses": [] - }, - { - "method_name": "process", - "node_name": "block", - "line": 31, - "span": [ - 31, - 23, - 35, - 38 - ], - "mass": 36, - "fingerprint": "block(case_clause(id case_pattern(string(string_start:\" lit string_end:\") |:| string(string_start:\" lit string_end:\")) ::: block(attribute(id .:. id) argument_list((:( id ):)))) case_clause(id id ::: block(attribute(id .:. id) argument_list((:( id ):)))))", - "child_fingerprints": [ - "case_clause(id case_pattern(string(string_start:\" lit string_end:\") |:| string(string_start:\" lit string_end:\")) ::: block(attribute(id .:. id) argument_list((:( id ):))))", - "case_clause(id id ::: block(attribute(id .:. id) argument_list((:( id ):))))" - ], - "child_masses": [ - 22, - 13 - ] - }, - { - "method_name": "process", - "node_name": "block", - "line": 33, - "span": [ - 33, - 20, - 33, - 39 - ], - "mass": 9, - "fingerprint": "block(attribute(id .:. id) argument_list((:( id ):)))", - "child_fingerprints": [ - "attribute(id .:. id)" - ], - "child_masses": [ - 4 - ] - }, - { - "method_name": "process", - "node_name": "block", - "line": 35, - "span": [ - 35, - 20, - 35, - 38 - ], - "mass": 9, - "fingerprint": "block(attribute(id .:. id) argument_list((:( id ):)))", - "child_fingerprints": [ - "attribute(id .:. id)" - ], - "child_masses": [ - 4 - ] - }, - { - "method_name": "process", - "node_name": "block", - "line": 41, - "span": [ - 41, - 12, - 49, - 22 - ], - "mass": 41, - "fingerprint": "block(if_statement(id comparison_operator(subscript(id [:[ id ]:]) ==:== string(string_start:\" lit string_end:\")) ::: id) try_statement(id ::: block(attribute(id .:. id) argument_list((:( subscript(id [:[ id ]:]) ):))) except_clause(id id ::: id)) expression_statement(id +=:+= lit))", - "child_fingerprints": [ - "if_statement(id comparison_operator(subscript(id [:[ id ]:]) ==:== string(string_start:\" lit string_end:\")) ::: id)", - "try_statement(id ::: block(attribute(id .:. id) argument_list((:( subscript(id [:[ id ]:]) ):))) except_clause(id id ::: id))", - "expression_statement(id +=:+= lit)" - ], - "child_masses": [ - 15, - 21, - 4 - ] - }, - { - "method_name": "process", - "node_name": "block", - "line": 45, - "span": [ - 45, - 16, - 45, - 41 - ], - "mass": 13, - "fingerprint": "block(attribute(id .:. id) argument_list((:( subscript(id [:[ id ]:]) ):)))", - "child_fingerprints": [ - "attribute(id .:. id)" - ], - "child_masses": [ - 4 - ] - }, - { - "method_name": "process", - "node_name": "case_clause", - "line": 32, - "span": [ - 32, - 16, - 33, - 39 - ], - "mass": 22, - "fingerprint": "case_clause(id case_pattern(string(string_start:\" lit string_end:\") |:| string(string_start:\" lit string_end:\")) ::: block(attribute(id .:. id) argument_list((:( id ):))))", - "child_fingerprints": [ - "attribute(id .:. id)" - ], - "child_masses": [ - 4 - ] - }, - { - "method_name": "process", - "node_name": "case_clause", - "line": 34, - "span": [ - 34, - 16, - 35, - 38 - ], - "mass": 13, - "fingerprint": "case_clause(id id ::: block(attribute(id .:. id) argument_list((:( id ):))))", - "child_fingerprints": [ - "attribute(id .:. id)" - ], - "child_masses": [ - 4 - ] - }, - { - "method_name": "process", - "node_name": "defn", - "line": 12, - "span": [ - 12, - 4, - 52, - 41 - ], - "mass": 258, - "fingerprint": "function_definition(id id parameters((:( id ,:, typed_parameter(id ::: type(string_start:\" lit string_end:\")) ,:, typed_parameter(id ::: type(id type_parameter([:[ id ]:]))) ,:, id ):)) ::: block(expression_statement(id ::: id =:= attribute(attribute(id .:. id) .:. id)) expression_statement(id ::: id) expression_statement(id =:= list([:[ ]:])) expression_statement(id =:= string(string_start:\" string_content(escape_sequence:\\\\) string_end:\")) with_statement(id with_clause(id .:. id) ::: block(attribute(id .:. id) +=:+= lit)) with_statement(id with_clause(call(id argument_list((:( attribute(id .:. id) ):))) id id) ::: block(id =:= call(attribute(id .:. id) argument_list((:( ):))))) for_statement(id id id id ::: block(if_statement(id comparison_operator(id id nil) ::: id) if_statement(id boolean_operator(attribute(id .:. id) id call(attribute(id .:. id) argument_list((:( string(string_start:\" lit string_end:\") ):)))) ::: block(id argument_list((:( id ):)))) match_statement(id id ::: block(case_clause(id case_pattern(string(string_start:\" lit string_end:\") |:| string(string_start:\" lit string_end:\")) ::: block(attribute(id .:. id) argument_list((:( id ):)))) case_clause(id id ::: block(attribute(id .:. id) argument_list((:( id ):)))))) expression_statement(attribute(id .:. id) argument_list((:( id ):))))) expression_statement(id =:= lit) while_statement(id comparison_operator(id <:< call(id argument_list((:( id ):)))) ::: block(if_statement(id comparison_operator(subscript(id [:[ id ]:]) ==:== string(string_start:\" lit string_end:\")) ::: id) try_statement(id ::: block(attribute(id .:. id) argument_list((:( subscript(id [:[ id ]:]) ):))) except_clause(id id ::: id)) expression_statement(id +=:+= lit))) assert_statement(id id) return_statement(id conditional_expression(id id id id id))))", - "child_fingerprints": [ - "expression_statement(id ::: id =:= attribute(attribute(id .:. id) .:. id))", - "expression_statement(id ::: id)", - "expression_statement(id =:= list([:[ ]:]))", - "expression_statement(id =:= string(string_start:\" string_content(escape_sequence:\\\\) string_end:\"))", - "with_statement(id with_clause(id .:. id) ::: block(attribute(id .:. id) +=:+= lit))", - "with_statement(id with_clause(call(id argument_list((:( attribute(id .:. id) ):))) id id) ::: block(id =:= call(attribute(id .:. id) argument_list((:( ):)))))", - "for_statement(id id id id ::: block(if_statement(id comparison_operator(id id nil) ::: id) if_statement(id boolean_operator(attribute(id .:. id) id call(attribute(id .:. id) argument_list((:( string(string_start:\" lit string_end:\") ):)))) ::: block(id argument_list((:( id ):)))) match_statement(id id ::: block(case_clause(id case_pattern(string(string_start:\" lit string_end:\") |:| string(string_start:\" lit string_end:\")) ::: block(attribute(id .:. id) argument_list((:( id ):)))) case_clause(id id ::: block(attribute(id .:. id) argument_list((:( id ):)))))) expression_statement(attribute(id .:. id) argument_list((:( id ):)))))", - "expression_statement(id =:= lit)", - "while_statement(id comparison_operator(id <:< call(id argument_list((:( id ):)))) ::: block(if_statement(id comparison_operator(subscript(id [:[ id ]:]) ==:== string(string_start:\" lit string_end:\")) ::: id) try_statement(id ::: block(attribute(id .:. id) argument_list((:( subscript(id [:[ id ]:]) ):))) except_clause(id id ::: id)) expression_statement(id +=:+= lit)))", - "assert_statement(id id)", - "return_statement(id conditional_expression(id id id id id))" - ], - "child_masses": [ - 12, - 4, - 6, - 8, - 14, - 26, - 91, - 4, - 53, - 3, - 8 - ] - }, - { - "method_name": "process", - "node_name": "for_statement", - "line": 24, - "span": [ - 24, - 8, - 37, - 31 - ], - "mass": 91, - "fingerprint": "for_statement(id id id id ::: block(if_statement(id comparison_operator(id id nil) ::: id) if_statement(id boolean_operator(attribute(id .:. id) id call(attribute(id .:. id) argument_list((:( string(string_start:\" lit string_end:\") ):)))) ::: block(id argument_list((:( id ):)))) match_statement(id id ::: block(case_clause(id case_pattern(string(string_start:\" lit string_end:\") |:| string(string_start:\" lit string_end:\")) ::: block(attribute(id .:. id) argument_list((:( id ):)))) case_clause(id id ::: block(attribute(id .:. id) argument_list((:( id ):)))))) expression_statement(attribute(id .:. id) argument_list((:( id ):)))))", - "child_fingerprints": [ - "if_statement(id comparison_operator(id id nil) ::: id)", - "if_statement(id boolean_operator(attribute(id .:. id) id call(attribute(id .:. id) argument_list((:( string(string_start:\" lit string_end:\") ):)))) ::: block(id argument_list((:( id ):))))", - "match_statement(id id ::: block(case_clause(id case_pattern(string(string_start:\" lit string_end:\") |:| string(string_start:\" lit string_end:\")) ::: block(attribute(id .:. id) argument_list((:( id ):)))) case_clause(id id ::: block(attribute(id .:. id) argument_list((:( id ):))))))", - "expression_statement(attribute(id .:. id) argument_list((:( id ):)))" - ], - "child_masses": [ - 8, - 27, - 40, - 9 - ] - }, - { - "method_name": "process", - "node_name": "if_statement", - "line": 25, - "span": [ - 25, - 12, - 26, - 24 - ], - "mass": 8, - "fingerprint": "if_statement(id comparison_operator(id id nil) ::: id)", - "child_fingerprints": [ - "comparison_operator(id id nil)", - "id" - ], - "child_masses": [ - 4, - 1 - ] - }, - { - "method_name": "process", - "node_name": "if_statement", - "line": 28, - "span": [ - 28, - 12, - 29, - 30 - ], - "mass": 27, - "fingerprint": "if_statement(id boolean_operator(attribute(id .:. id) id call(attribute(id .:. id) argument_list((:( string(string_start:\" lit string_end:\") ):)))) ::: block(id argument_list((:( id ):))))", - "child_fingerprints": [], - "child_masses": [] - }, - { - "method_name": "process", - "node_name": "if_statement", - "line": 41, - "span": [ - 41, - 12, - 42, - 21 - ], - "mass": 15, - "fingerprint": "if_statement(id comparison_operator(subscript(id [:[ id ]:]) ==:== string(string_start:\" lit string_end:\")) ::: id)", - "child_fingerprints": [ - "comparison_operator(subscript(id [:[ id ]:]) ==:== string(string_start:\" lit string_end:\"))", - "id" - ], - "child_masses": [ - 11, - 1 - ] - }, - { - "method_name": "process", - "node_name": "match_statement", - "line": 31, - "span": [ - 31, - 12, - 35, - 38 - ], - "mass": 40, - "fingerprint": "match_statement(id id ::: block(case_clause(id case_pattern(string(string_start:\" lit string_end:\") |:| string(string_start:\" lit string_end:\")) ::: block(attribute(id .:. id) argument_list((:( id ):)))) case_clause(id id ::: block(attribute(id .:. id) argument_list((:( id ):))))))", - "child_fingerprints": [ - "case_clause(id case_pattern(string(string_start:\" lit string_end:\") |:| string(string_start:\" lit string_end:\")) ::: block(attribute(id .:. id) argument_list((:( id ):))))", - "case_clause(id id ::: block(attribute(id .:. id) argument_list((:( id ):))))" - ], - "child_masses": [ - 22, - 13 - ] - }, - { - "method_name": "process", - "node_name": "while_statement", - "line": 40, - "span": [ - 40, - 8, - 49, - 22 - ], - "mass": 53, - "fingerprint": "while_statement(id comparison_operator(id <:< call(id argument_list((:( id ):)))) ::: block(if_statement(id comparison_operator(subscript(id [:[ id ]:]) ==:== string(string_start:\" lit string_end:\")) ::: id) try_statement(id ::: block(attribute(id .:. id) argument_list((:( subscript(id [:[ id ]:]) ):))) except_clause(id id ::: id)) expression_statement(id +=:+= lit)))", - "child_fingerprints": [ - "if_statement(id comparison_operator(subscript(id [:[ id ]:]) ==:== string(string_start:\" lit string_end:\")) ::: id)", - "try_statement(id ::: block(attribute(id .:. id) argument_list((:( subscript(id [:[ id ]:]) ):))) except_clause(id id ::: id))", - "expression_statement(id +=:+= lit)" - ], - "child_masses": [ - 15, - 21, - 4 - ] - }, - { - "method_name": "simple_with", - "node_name": "block", - "line": 63, - "span": [ - 63, - 8, - 64, - 16 - ], - "mass": 5, - "fingerprint": "block(id id ::: id)", - "child_fingerprints": [ - "id", - "id" - ], - "child_masses": [ - 1, - 1 - ] - }, - { - "method_name": "simple_with", - "node_name": "defn", - "line": 62, - "span": [ - 62, - 4, - 64, - 16 - ], - "mass": 15, - "fingerprint": "function_definition(id id parameters((:( id ,:, id ):)) ::: block(id id ::: id))", - "child_fingerprints": [ - "id", - "id" - ], - "child_masses": [ - 1, - 1 - ] - } + "predicate_bodies": [ + ] } ] diff --git a/gems/decomplex/examples/syntax-facts/oracles/ruby-core.json b/gems/decomplex/examples/syntax-facts/oracles/ruby-core.json index b756de0b4..75a78ed85 100644 --- a/gems/decomplex/examples/syntax-facts/oracles/ruby-core.json +++ b/gems/decomplex/examples/syntax-facts/oracles/ruby-core.json @@ -6,114 +6,118 @@ "language": "ruby", "functions": [ { - "name": "audit", + "line": 14, + "name": "self.build", "owner": "RubySyntaxFactsCore", - "line": 57, + "params": [ + "source" + ], "span": [ - 57, + 14, 2, - 62, + 16, 5 ], - "visibility": "private", - "params": [ - "name" - ] + "visibility": "public" }, { + "line": 19, "name": "initialize", "owner": "RubySyntaxFactsCore", - "line": 19, + "params": [ + "source" + ], "span": [ 19, 2, 23, 5 ], - "visibility": "public", - "params": [ - "source" - ] + "visibility": "public" }, { - "name": "inline_private", + "line": 26, + "name": "process", "owner": "RubySyntaxFactsCore", - "line": 64, + "params": [ + "user", + "items", + "callback" + ], "span": [ - 64, - 10, - 66, + 26, + 2, + 53, 5 ], - "visibility": "private", - "params": [ - "value" - ] + "visibility": "public" }, { - "name": "loaded?", + "line": 57, + "name": "audit", "owner": "RubySyntaxFactsCore", - "line": 72, + "params": [ + "name" + ], "span": [ - 72, + 57, 2, - 72, - 33 + 62, + 5 ], - "visibility": "private", - "params": [] + "visibility": "private" }, { - "name": "process", + "line": 64, + "name": "inline_private", "owner": "RubySyntaxFactsCore", - "line": 26, + "params": [ + "value" + ], "span": [ - 26, - 2, - 53, + 64, + 10, + 66, 5 ], - "visibility": "public", - "params": [ - "user", - "items", - "callback" - ] + "visibility": "private" }, { + "line": 68, "name": "ready?", "owner": "RubySyntaxFactsCore", - "line": 68, + "params": [ + + ], "span": [ 68, 2, 70, 5 ], - "visibility": "private", - "params": [] + "visibility": "private" }, { - "name": "self.build", + "line": 72, + "name": "loaded?", "owner": "RubySyntaxFactsCore", - "line": 14, + "params": [ + + ], "span": [ - 14, + 72, 2, - 16, - 5 + 72, + 33 ], - "visibility": "public", - "params": [ - "source" - ] + "visibility": "private" } ], "owners": [ { - "name": "Account", "kind": "class", "line": 3, + "name": "Account", "span": [ 3, 0, @@ -122,9 +126,9 @@ ] }, { - "name": "RubySyntaxFactsCore", "kind": "class", "line": 8, + "name": "RubySyntaxFactsCore", "span": [ 8, 0, @@ -135,743 +139,448 @@ ], "calls": [ { - "receiver": "%w[owner admin]", - "message": "freeze", - "function": "(top-level)", - "owner": "RubySyntaxFactsCore", - "line": 9, - "span": [ - 9, - 16, - 9, - 38 - ], - "conditional": false, - "arguments": [], - "control": "always", - "safe_navigation": false, - "block": false - }, - { - "receiver": "Account", - "message": "new", - "function": "process", - "owner": "RubySyntaxFactsCore", - "line": 28, - "span": [ - 28, - 14, - 28, - 59 - ], - "conditional": false, "arguments": [ - "name: name", - "active: user.active?" + "0", + "Integer" ], + "block": false, + "conditional": false, "control": "always", - "safe_navigation": false, - "block": false - }, - { - "receiver": "T", - "message": "let", "function": "initialize", - "owner": "RubySyntaxFactsCore", "line": 21, + "message": "let", + "owner": "RubySyntaxFactsCore", + "receiver": "T", + "safe_navigation": false, "span": [ 21, 13, 21, 30 + ] + }, + { + "arguments": [ + ":active", + "T::Boolean" ], + "block": false, "conditional": false, + "control": "always", + "function": "(top-level)", + "line": 5, + "message": "prop", + "owner": "Account", + "receiver": "self", + "safe_navigation": false, + "span": [ + 5, + 2, + 5, + 26 + ] + }, + { "arguments": [ - "0", - "Integer" + ":count" ], + "block": false, + "conditional": false, "control": "always", + "function": "(top-level)", + "line": 12, + "message": "attr_reader", + "owner": "RubySyntaxFactsCore", + "receiver": "self", "safe_navigation": false, - "block": false + "span": [ + 12, + 2, + 12, + 20 + ] }, { - "receiver": "T", - "message": "let", + "arguments": [ + ":idle", + "Status" + ], + "block": false, + "conditional": false, + "control": "always", "function": "initialize", - "owner": "RubySyntaxFactsCore", "line": 22, + "message": "let", + "owner": "RubySyntaxFactsCore", + "receiver": "T", + "safe_navigation": false, "span": [ 22, 14, 22, 34 - ], - "conditional": false, + ] + }, + { "arguments": [ - ":idle", - "Status" + ":name", + "String" ], + "block": false, + "conditional": false, "control": "always", + "function": "(top-level)", + "line": 4, + "message": "const", + "owner": "Account", + "receiver": "self", "safe_navigation": false, - "block": false + "span": [ + 4, + 2, + 4, + 21 + ] }, { - "receiver": "T", - "message": "type_alias", - "function": "(top-level)", + "arguments": [ + ":ready" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 43, + "message": "publish", "owner": "RubySyntaxFactsCore", - "line": 10, + "receiver": "self", + "safe_navigation": false, "span": [ - 10, - 11, - 10, - 34 + 43, + 6, + 43, + 21 + ] + }, + { + "arguments": [ + ":record", + "name" ], + "block": false, "conditional": false, - "arguments": [], "control": "always", + "function": "audit", + "line": 59, + "message": "send", + "owner": "RubySyntaxFactsCore", + "receiver": "self", "safe_navigation": false, - "block": true + "span": [ + 59, + 4, + 59, + 23 + ] }, { - "receiver": "callback", - "message": "call", + "arguments": [ + "\"not ready\"" + ], + "block": false, + "conditional": true, + "control": "conditional", "function": "process", + "line": 45, + "message": "warn", "owner": "RubySyntaxFactsCore", - "line": 30, + "receiver": "self", + "safe_navigation": false, "span": [ - 30, - 4, - 30, - 22 - ], - "conditional": false, + 45, + 6, + 45, + 23 + ] + }, + { "arguments": [ "account" ], + "block": false, + "conditional": false, "control": "always", - "safe_navigation": false, - "block": false - }, - { - "receiver": "item", - "message": "children", "function": "process", + "line": 30, + "message": "call", "owner": "RubySyntaxFactsCore", - "line": 49, - "span": [ - 49, - 6, - 49, - 19 - ], - "conditional": true, - "arguments": [], - "control": "iterates", + "receiver": "callback", "safe_navigation": false, - "block": false - }, - { - "receiver": "items", - "message": "flat_map", - "function": "process", - "owner": "RubySyntaxFactsCore", - "line": 48, "span": [ - 48, + 30, 4, - 50, - 7 + 30, + 22 + ] + }, + { + "arguments": [ + "inline_private", + "(value)", + "helper(value)" ], + "block": false, "conditional": false, - "arguments": [], "control": "always", - "safe_navigation": false, - "block": true - }, - { - "receiver": "self", - "message": "attr_reader", "function": "(top-level)", + "line": 64, + "message": "private", "owner": "RubySyntaxFactsCore", - "line": 12, + "receiver": "self", + "safe_navigation": false, "span": [ - 12, + 64, 2, - 12, - 20 - ], - "conditional": false, + 66, + 5 + ] + }, + { "arguments": [ - ":count" + "name" ], + "block": false, + "conditional": false, "control": "always", + "function": "audit", + "line": 58, + "message": "puts", + "owner": "RubySyntaxFactsCore", + "receiver": "self", "safe_navigation": false, - "block": false + "span": [ + 58, + 4, + 58, + 14 + ] }, { - "receiver": "self", - "message": "audit", + "arguments": [ + "name" + ], + "block": false, + "conditional": false, + "control": "always", "function": "process", - "owner": "RubySyntaxFactsCore", "line": 29, + "message": "audit", + "owner": "RubySyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, "span": [ 29, 4, 29, 15 - ], - "conditional": false, + ] + }, + { "arguments": [ - "name" + "name: name", + "active: user.active?" ], + "block": false, + "conditional": false, "control": "always", + "function": "process", + "line": 28, + "message": "new", + "owner": "RubySyntaxFactsCore", + "receiver": "Account", "safe_navigation": false, - "block": false + "span": [ + 28, + 14, + 28, + 59 + ] }, { + "arguments": [ + "source" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "self.build", + "line": 15, + "message": "new", + "owner": "RubySyntaxFactsCore", "receiver": "self", - "message": "const", - "function": "(top-level)", - "owner": "Account", - "line": 4, + "safe_navigation": false, "span": [ + 15, 4, - 2, - 4, - 21 - ], - "conditional": false, + 15, + 15 + ] + }, + { "arguments": [ - ":name", - "String" + "source: Object" ], + "block": false, + "conditional": false, "control": "always", + "function": "(top-level)", + "line": 18, + "message": "params", + "owner": "RubySyntaxFactsCore", + "receiver": "self", "safe_navigation": false, - "block": false + "span": [ + 18, + 8, + 18, + 30 + ] }, { - "receiver": "self", - "message": "default", - "function": "process", - "owner": "RubySyntaxFactsCore", - "line": 38, - "span": [ - 38, - 6, - 38, - 19 - ], - "conditional": true, "arguments": [ "user" ], + "block": false, + "conditional": true, "control": "conditional", - "safe_navigation": false, - "block": false - }, - { - "receiver": "self", - "message": "escalate", "function": "process", - "owner": "RubySyntaxFactsCore", "line": 34, + "message": "escalate", + "owner": "RubySyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, "span": [ 34, 6, 34, 20 - ], - "conditional": true, + ] + }, + { "arguments": [ "user" ], + "block": false, + "conditional": true, "control": "conditional", - "safe_navigation": false, - "block": false - }, - { - "receiver": "self", - "message": "fallback", "function": "process", - "owner": "RubySyntaxFactsCore", "line": 36, + "message": "fallback", + "owner": "RubySyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, "span": [ 36, 6, 36, 20 - ], - "conditional": true, + ] + }, + { "arguments": [ "user" ], + "block": false, + "conditional": true, "control": "conditional", + "function": "process", + "line": 38, + "message": "default", + "owner": "RubySyntaxFactsCore", + "receiver": "self", "safe_navigation": false, - "block": false + "span": [ + 38, + 6, + 38, + 19 + ] }, { + "arguments": [ + "user: Object", + "items: Array", + "callback: Proc" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "(top-level)", + "line": 25, + "message": "params", + "owner": "RubySyntaxFactsCore", "receiver": "self", - "message": "helper", + "safe_navigation": false, + "span": [ + 25, + 8, + 25, + 58 + ] + }, + { + "arguments": [ + "value" + ], + "block": false, + "conditional": false, + "control": "always", "function": "inline_private", - "owner": "RubySyntaxFactsCore", "line": 65, + "message": "helper", + "owner": "RubySyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, "span": [ 65, 4, 65, 17 - ], - "conditional": false, + ] + }, + { "arguments": [ - "value" + ], + "block": false, + "conditional": false, "control": "always", - "safe_navigation": false, - "block": false - }, - { - "receiver": "self", - "message": "new", - "function": "self.build", - "owner": "RubySyntaxFactsCore", - "line": 15, - "span": [ - 15, - 4, - 15, - 15 - ], - "conditional": false, - "arguments": [ - "source" - ], - "control": "always", - "safe_navigation": false, - "block": false - }, - { - "receiver": "self", - "message": "params", - "function": "(top-level)", - "owner": "RubySyntaxFactsCore", - "line": 18, - "span": [ - 18, - 8, - 18, - 30 - ], - "conditional": false, - "arguments": [ - "source: Object" - ], - "control": "always", - "safe_navigation": false, - "block": false - }, - { - "receiver": "self", - "message": "params", - "function": "(top-level)", - "owner": "RubySyntaxFactsCore", - "line": 25, - "span": [ - 25, - 8, - 25, - 58 - ], - "conditional": false, - "arguments": [ - "user: Object", - "items: Array", - "callback: Proc" - ], - "control": "always", - "safe_navigation": false, - "block": false - }, - { - "receiver": "self", - "message": "private", - "function": "(top-level)", - "owner": "RubySyntaxFactsCore", - "line": 55, - "span": [ - 55, - 2, - 55, - 9 - ], - "conditional": false, - "arguments": [], - "control": "always", - "safe_navigation": false, - "block": false - }, - { - "receiver": "self", - "message": "private", - "function": "(top-level)", - "owner": "RubySyntaxFactsCore", - "line": 64, - "span": [ - 64, - 2, - 66, - 5 - ], - "conditional": false, - "arguments": [ - "inline_private", - "(value)", - "helper(value)" - ], - "control": "always", - "safe_navigation": false, - "block": false - }, - { - "receiver": "self", - "message": "prop", - "function": "(top-level)", - "owner": "Account", - "line": 5, - "span": [ - 5, - 2, - 5, - 26 - ], - "conditional": false, - "arguments": [ - ":active", - "T::Boolean" - ], - "control": "always", - "safe_navigation": false, - "block": false - }, - { - "receiver": "self", - "message": "publish", - "function": "process", - "owner": "RubySyntaxFactsCore", - "line": 43, - "span": [ - 43, - 6, - 43, - 21 - ], - "conditional": true, - "arguments": [ - ":ready" - ], - "control": "conditional", - "safe_navigation": false, - "block": false - }, - { - "receiver": "self", - "message": "puts", - "function": "audit", - "owner": "RubySyntaxFactsCore", - "line": 58, - "span": [ - 58, - 4, - 58, - 14 - ], - "conditional": false, - "arguments": [ - "name" - ], - "control": "always", - "safe_navigation": false, - "block": false - }, - { - "receiver": "self", - "message": "send", - "function": "audit", - "owner": "RubySyntaxFactsCore", - "line": 59, - "span": [ - 59, - 4, - 59, - 23 - ], - "conditional": false, - "arguments": [ - ":record", - "name" - ], - "control": "always", - "safe_navigation": false, - "block": false - }, - { - "receiver": "self", - "message": "sig", - "function": "(top-level)", - "owner": "RubySyntaxFactsCore", - "line": 18, - "span": [ - 18, - 2, - 18, - 37 - ], - "conditional": false, - "arguments": [], - "control": "always", - "safe_navigation": false, - "block": true - }, - { - "receiver": "self", - "message": "sig", - "function": "(top-level)", - "owner": "RubySyntaxFactsCore", - "line": 25, - "span": [ - 25, - 2, - 25, - 76 - ], - "conditional": false, - "arguments": [], - "control": "always", - "safe_navigation": false, - "block": true - }, - { - "receiver": "self", - "message": "warn", - "function": "process", - "owner": "RubySyntaxFactsCore", - "line": 45, - "span": [ - 45, - 6, - 45, - 23 - ], - "conditional": true, - "arguments": [ - "\"not ready\"" - ], - "control": "conditional", - "safe_navigation": false, - "block": false - }, - { - "receiver": "user", - "message": "active?", - "function": "process", - "owner": "RubySyntaxFactsCore", - "line": 28, - "span": [ - 28, - 46, - 28, - 58 - ], - "conditional": false, - "arguments": [], - "control": "always", - "safe_navigation": false, - "block": false - }, - { - "receiver": "user", - "message": "profile", - "function": "process", - "owner": "RubySyntaxFactsCore", - "line": 27, - "span": [ - 27, - 11, - 27, - 24 - ], - "conditional": false, - "arguments": [], - "control": "always", - "safe_navigation": true, - "block": false - }, - { - "receiver": "user", - "message": "ready?", - "function": "process", - "owner": "RubySyntaxFactsCore", - "line": 41, - "span": [ - 41, - 27, - 41, - 38 - ], - "conditional": true, - "arguments": [], - "control": "conditional", - "safe_navigation": false, - "block": false - }, - { - "receiver": "user", - "message": "role", - "function": "process", - "owner": "RubySyntaxFactsCore", - "line": 32, - "span": [ - 32, - 9, - 32, - 18 - ], - "conditional": true, - "arguments": [], - "control": "conditional", - "safe_navigation": false, - "block": false - }, - { - "receiver": "user&.profile", - "message": "name", - "function": "process", - "owner": "RubySyntaxFactsCore", - "line": 27, - "span": [ - 27, - 11, - 27, - 30 - ], - "conditional": false, - "arguments": [], - "control": "always", - "safe_navigation": true, - "block": false - } - ], - "state_reads": [ - { - "field": "$GLOBAL_STATE", - "receiver": "self", - "function": "audit", - "owner": "RubySyntaxFactsCore", - "line": 60, - "span": [ - 60, - 4, - 60, - 17 - ] - }, - { - "field": "@count", - "receiver": "self", - "function": "ready?", - "owner": "RubySyntaxFactsCore", - "line": 69, - "span": [ - 69, - 4, - 69, - 10 - ] - }, - { - "field": "@source", - "receiver": "self", - "function": "audit", - "owner": "RubySyntaxFactsCore", - "line": 61, - "span": [ - 61, - 4, - 61, - 11 - ] - }, - { - "field": "@status", - "receiver": "self", - "function": "loaded?", - "owner": "RubySyntaxFactsCore", - "line": 72, - "span": [ - 72, - 16, - 72, - 23 - ] - }, - { - "field": "@status", - "receiver": "self", - "function": "process", + "function": "(top-level)", + "line": 55, + "message": "private", "owner": "RubySyntaxFactsCore", - "line": 41, - "span": [ - 41, - 7, - 41, - 14 - ] - }, - { - "field": "@status", "receiver": "self", - "function": "process", - "owner": "RubySyntaxFactsCore", - "line": 52, - "span": [ - 52, - 4, - 52, - 11 - ] - }, - { - "field": "active?", - "receiver": "user", - "function": "process", - "owner": "RubySyntaxFactsCore", - "line": 28, - "span": [ - 28, - 46, - 28, - 58 - ] - }, - { - "field": "flat_map", - "receiver": "items", - "function": "process", - "owner": "RubySyntaxFactsCore", - "line": 48, + "safe_navigation": false, "span": [ - 48, - 4, - 50, - 7 + 55, + 2, + 55, + 9 ] }, { - "field": "freeze", - "receiver": "%w[owner admin]", + "arguments": [ + + ], + "block": false, + "conditional": false, + "control": "always", "function": "(top-level)", - "owner": "RubySyntaxFactsCore", "line": 9, + "message": "freeze", + "owner": "RubySyntaxFactsCore", + "receiver": "%w[owner admin]", + "safe_navigation": false, "span": [ 9, 16, @@ -880,11 +589,18 @@ ] }, { - "field": "name", - "receiver": "user&.profile", + "arguments": [ + + ], + "block": false, + "conditional": false, + "control": "always", "function": "process", - "owner": "RubySyntaxFactsCore", "line": 27, + "message": "name", + "owner": "RubySyntaxFactsCore", + "receiver": "user&.profile", + "safe_navigation": true, "span": [ 27, 11, @@ -893,11 +609,18 @@ ] }, { - "field": "profile", - "receiver": "user", + "arguments": [ + + ], + "block": false, + "conditional": false, + "control": "always", "function": "process", - "owner": "RubySyntaxFactsCore", "line": 27, + "message": "profile", + "owner": "RubySyntaxFactsCore", + "receiver": "user", + "safe_navigation": true, "span": [ 27, 11, @@ -906,171 +629,173 @@ ] }, { - "field": "ready?", - "receiver": "user", + "arguments": [ + + ], + "block": false, + "conditional": false, + "control": "always", "function": "process", + "line": 28, + "message": "active?", "owner": "RubySyntaxFactsCore", - "line": 41, + "receiver": "user", + "safe_navigation": false, "span": [ - 41, - 27, - 41, - 38 + 28, + 46, + 28, + 58 ] }, { - "field": "role", - "receiver": "user", + "arguments": [ + + ], + "block": false, + "conditional": true, + "control": "conditional", "function": "process", - "owner": "RubySyntaxFactsCore", "line": 32, + "message": "role", + "owner": "RubySyntaxFactsCore", + "receiver": "user", + "safe_navigation": false, "span": [ 32, 9, 32, 18 ] - } - ], - "state_writes": [ - { - "field": "@count", - "receiver": "self", - "function": "initialize", - "owner": "RubySyntaxFactsCore", - "line": 21, - "span": [ - 21, - 4, - 21, - 30 - ] }, { - "field": "@count", - "receiver": "self", + "arguments": [ + + ], + "block": false, + "conditional": true, + "control": "conditional", "function": "process", + "line": 41, + "message": "ready?", "owner": "RubySyntaxFactsCore", - "line": 42, + "receiver": "user", + "safe_navigation": false, "span": [ - 42, - 6, - 42, - 17 + 41, + 27, + 41, + 38 ] }, { - "field": "@source", - "receiver": "self", - "function": "initialize", + "arguments": [ + + ], + "block": false, + "conditional": true, + "control": "iterates", + "function": "process", + "line": 49, + "message": "children", "owner": "RubySyntaxFactsCore", - "line": 20, + "receiver": "item", + "safe_navigation": false, "span": [ - 20, - 4, - 20, - 20 + 49, + 6, + 49, + 19 ] }, { - "field": "@status", - "receiver": "self", - "function": "initialize", + "arguments": [ + + ], + "block": true, + "conditional": false, + "control": "always", + "function": "(top-level)", + "line": 10, + "message": "type_alias", "owner": "RubySyntaxFactsCore", - "line": 22, + "receiver": "T", + "safe_navigation": false, "span": [ - 22, - 4, - 22, + 10, + 11, + 10, 34 ] - } - ], - "decisions": [ - { - "kind": "case_dispatch", - "members": [ - "\"owner\"", - "ADMIN_ROLES", - "nil" - ], - "function": "process", - "line": 32, - "span": [ - 32, - 4, - 39, - 7 - ], - "predicate": "user.role", - "enclosing_span": [ - 32, - 4, - 39, - 7 - ] }, { - "kind": "conjunction", - "members": [ - "@status == :idle", - "user.ready?" - ], - "function": "process", - "line": 41, - "span": [ - 41, - 7, - 41, - 38 + "arguments": [ + ], - "predicate": "@status == :idle && user.ready?", - "enclosing_span": [ - 41, - 4, - 46, - 7 - ] - } - ], - "branch_decisions": [ - { - "function": "process", - "line": 32, + "block": true, + "conditional": false, + "control": "always", + "function": "(top-level)", + "line": 18, + "message": "sig", + "owner": "RubySyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, "span": [ - 32, - 4, - 39, - 7 + 18, + 2, + 18, + 37 + ] + }, + { + "arguments": [ + ], - "predicate": "user.role", - "state_refs": [ - "user.role" + "block": true, + "conditional": false, + "control": "always", + "function": "(top-level)", + "line": 25, + "message": "sig", + "owner": "RubySyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 25, + 2, + 25, + 76 ] }, { + "arguments": [ + + ], + "block": true, + "conditional": false, + "control": "always", "function": "process", - "line": 41, + "line": 48, + "message": "flat_map", + "owner": "RubySyntaxFactsCore", + "receiver": "items", + "safe_navigation": false, "span": [ - 41, + 48, 4, - 46, + 50, 7 - ], - "predicate": "@status == :idle && user.ready?", - "state_refs": [ - "@status", - "user.ready?" ] } ], - "dispatch_sites": [], - "semantic_effects": [ + "state_reads": [ { - "kind": "context_dependency", - "detail": "$GLOBAL_STATE", + "field": "$GLOBAL_STATE", "function": "audit", "line": 60, + "owner": "RubySyntaxFactsCore", + "receiver": "self", "span": [ 60, 4, @@ -1079,597 +804,371 @@ ] }, { - "kind": "dynamic_dispatch", - "detail": "callback.call", - "function": "process", - "line": 30, + "field": "@count", + "function": "ready?", + "line": 69, + "owner": "RubySyntaxFactsCore", + "receiver": "self", "span": [ - 30, + 69, 4, - 30, - 22 + 69, + 10 ] }, { - "kind": "dynamic_dispatch", - "detail": "send", + "field": "@source", "function": "audit", - "line": 59, + "line": 61, + "owner": "RubySyntaxFactsCore", + "receiver": "self", "span": [ - 59, + 61, 4, - 59, - 23 + 61, + 11 ] }, { - "kind": "hidden_io", - "detail": "puts", - "function": "audit", - "line": 58, + "field": "@status", + "function": "loaded?", + "line": 72, + "owner": "RubySyntaxFactsCore", + "receiver": "self", "span": [ - 58, - 4, - 58, - 14 + 72, + 16, + 72, + 23 ] }, { - "kind": "hidden_io", - "detail": "warn", + "field": "@status", "function": "process", - "line": 45, + "line": 41, + "owner": "RubySyntaxFactsCore", + "receiver": "self", "span": [ - 45, - 6, - 45, - 23 + 41, + 7, + 41, + 14 ] - } - ], - "predicate_bodies": [ + }, { - "name": "loaded?", + "field": "@status", + "function": "process", + "line": 52, "owner": "RubySyntaxFactsCore", - "body": "@status == :ready", - "line": 72, + "receiver": "self", "span": [ - 72, - 2, - 72, - 33 + 52, + 4, + 52, + 11 ] - } - ], - "local_complexity": [ - { - "id": "RubySyntaxFactsCore#audit", - "score": 0.0, - "signals": {} - }, - { - "id": "RubySyntaxFactsCore#initialize", - "score": 0.0, - "signals": {} - }, - { - "id": "RubySyntaxFactsCore#inline_private", - "score": 0.0, - "signals": {} - }, - { - "id": "RubySyntaxFactsCore#loaded?", - "score": 0.0, - "signals": {} }, { - "id": "RubySyntaxFactsCore#process", - "score": 4.0, - "signals": { - "boolean_ops": 2, - "branches": 1, - "cases": 2, - "loops": 1 - } - }, - { - "id": "RubySyntaxFactsCore#ready?", - "score": 0.0, - "signals": {} + "field": "active?", + "function": "process", + "line": 28, + "owner": "RubySyntaxFactsCore", + "receiver": "user", + "span": [ + 28, + 46, + 28, + 58 + ] }, { - "id": "RubySyntaxFactsCore#self.build", - "score": 0.0, - "signals": {} - } - ], - "clone_candidates": [ - { - "method_name": "(top-level)", - "node_name": "assignment", - "line": 10, + "field": "flat_map", + "function": "process", + "line": 48, + "owner": "RubySyntaxFactsCore", + "receiver": "items", "span": [ - 10, - 2, - 10, - 34 - ], - "mass": 11, - "fingerprint": "assignment(id =:= call(id .:. id block({:{ id }:})))", - "child_fingerprints": [ - "call(id .:. id block({:{ id }:}))" - ], - "child_masses": [ - 8 + 48, + 4, + 50, + 7 ] }, { - "method_name": "(top-level)", - "node_name": "assignment", + "field": "freeze", + "function": "(top-level)", "line": 9, + "owner": "RubySyntaxFactsCore", + "receiver": "%w[owner admin]", "span": [ 9, - 2, + 16, 9, 38 - ], - "mass": 11, - "fingerprint": "assignment(id =:= call(string_array(%w(:%w[ id id ):]) .:. id))", - "child_fingerprints": [ - "call(string_array(%w(:%w[ id id ):]) .:. id)" - ], - "child_masses": [ - 8 ] }, { - "method_name": "(top-level)", - "node_name": "block", - "line": 10, + "field": "name", + "function": "process", + "line": 27, + "owner": "RubySyntaxFactsCore", + "receiver": "user&.profile", "span": [ - 10, - 24, - 10, - 34 - ], - "mass": 4, - "fingerprint": "block({:{ id }:})", - "child_fingerprints": [ - "id" - ], - "child_masses": [ - 1 + 27, + 11, + 27, + 30 ] }, { - "method_name": "(top-level)", - "node_name": "block", - "line": 18, + "field": "profile", + "function": "process", + "line": 27, + "owner": "RubySyntaxFactsCore", + "receiver": "user", "span": [ - 18, - 6, - 18, - 37 - ], - "mass": 15, - "fingerprint": "block({:{ block_body(call(id argument_list((:( pair(id ::: id) ):))) .:. id) }:})", - "child_fingerprints": [ - "call(id argument_list((:( pair(id ::: id) ):)))" - ], - "child_masses": [ - 9 + 27, + 11, + 27, + 24 ] }, { - "method_name": "(top-level)", - "node_name": "block", - "line": 25, + "field": "ready?", + "function": "process", + "line": 41, + "owner": "RubySyntaxFactsCore", + "receiver": "user", "span": [ - 25, - 6, - 25, - 76 - ], - "mass": 29, - "fingerprint": "block({:{ block_body(call(id argument_list((:( pair(id ::: id) ,:, pair(id ::: id) ,:, pair(id ::: id) ):))) .:. id argument_list((:( id ):))) }:})", - "child_fingerprints": [ - "call(id argument_list((:( pair(id ::: id) ,:, pair(id ::: id) ,:, pair(id ::: id) ):)))" - ], - "child_masses": [ - 19 + 41, + 27, + 41, + 38 ] }, { - "method_name": "(top-level)", - "node_name": "class", - "line": 8, + "field": "role", + "function": "process", + "line": 32, + "owner": "RubySyntaxFactsCore", + "receiver": "user", "span": [ - 8, - 0, - 73, - 3 - ], - "mass": 342, - "fingerprint": "class(id id body_statement(assignment(id =:= call(string_array(%w(:%w[ id id ):]) .:. id)) assignment(id =:= call(id .:. id block({:{ id }:}))) call(id lit) singleton_method(id id .:. id method_parameters((:( id ):)) body_statement(id argument_list((:( id ):))) id) call(id block({:{ block_body(call(id argument_list((:( pair(id ::: id) ):))) .:. id) }:})) method(id id method_parameters((:( id ):)) body_statement(assignment(instance_variable:@source =:= id) assignment(instance_variable:@count =:= call(id .:. id argument_list((:( lit ,:, id ):)))) assignment(instance_variable:@status =:= call(id .:. id argument_list((:( lit ,:, id ):))))) id) call(id block({:{ block_body(call(id argument_list((:( pair(id ::: id) ,:, pair(id ::: id) ,:, pair(id ::: id) ):))) .:. id argument_list((:( id ):))) }:})) method(id id method_parameters((:( id ,:, id ,:, id ):)) body_statement(assignment(id =:= call(call(id &.:&. id) &.:&. id)) assignment(id =:= call(id .:. id argument_list((:( pair(id ::: id) ,:, pair(id ::: call(id .:. id)) ):)))) call(id argument_list((:( id ):))) call(id .:. argument_list((:( id ):))) case(id call(id .:. id) when(id pattern(\":\" lit \":\") ,:, pattern(*:* id) then(call(id argument_list((:( id ):))))) when(id id then(call(id argument_list((:( id ):))))) else(id call(id argument_list((:( id ):)))) id) if(id binary(binary(instance_variable:@status ==:== lit) &&:&& call(id .:. id)) then(operator_assignment(instance_variable:@count +=:+= lit) call(id argument_list((:( lit ):)))) else(id call(id argument_list((:( string(\":\" lit \":\") ):)))) id) call(id .:. id do_block(id block_parameters(|:| id |:|) body_statement(id .:. id) id)) instance_variable:@status) id) id method(id id method_parameters((:( id ):)) body_statement(call(id argument_list((:( id ):))) call(id argument_list((:( lit ,:, id ):))) global_variable:$GLOBAL_STATE instance_variable:@source) id) call(id argument_list(id id method_parameters((:( id ):)) body_statement(id argument_list((:( id ):))) id)) method(id id body_statement(instance_variable:@count >:> lit) id) method(id id =:= binary(instance_variable:@status ==:== lit))) id)", - "child_fingerprints": [ - "assignment(id =:= call(string_array(%w(:%w[ id id ):]) .:. id))", - "assignment(id =:= call(id .:. id block({:{ id }:})))", - "call(id lit)", - "singleton_method(id id .:. id method_parameters((:( id ):)) body_statement(id argument_list((:( id ):))) id)", - "call(id block({:{ block_body(call(id argument_list((:( pair(id ::: id) ):))) .:. id) }:}))", - "method(id id method_parameters((:( id ):)) body_statement(assignment(instance_variable:@source =:= id) assignment(instance_variable:@count =:= call(id .:. id argument_list((:( lit ,:, id ):)))) assignment(instance_variable:@status =:= call(id .:. id argument_list((:( lit ,:, id ):))))) id)", - "call(id block({:{ block_body(call(id argument_list((:( pair(id ::: id) ,:, pair(id ::: id) ,:, pair(id ::: id) ):))) .:. id argument_list((:( id ):))) }:}))", - "method(id id method_parameters((:( id ,:, id ,:, id ):)) body_statement(assignment(id =:= call(call(id &.:&. id) &.:&. id)) assignment(id =:= call(id .:. id argument_list((:( pair(id ::: id) ,:, pair(id ::: call(id .:. id)) ):)))) call(id argument_list((:( id ):))) call(id .:. argument_list((:( id ):))) case(id call(id .:. id) when(id pattern(\":\" lit \":\") ,:, pattern(*:* id) then(call(id argument_list((:( id ):))))) when(id id then(call(id argument_list((:( id ):))))) else(id call(id argument_list((:( id ):)))) id) if(id binary(binary(instance_variable:@status ==:== lit) &&:&& call(id .:. id)) then(operator_assignment(instance_variable:@count +=:+= lit) call(id argument_list((:( lit ):)))) else(id call(id argument_list((:( string(\":\" lit \":\") ):)))) id) call(id .:. id do_block(id block_parameters(|:| id |:|) body_statement(id .:. id) id)) instance_variable:@status) id)", - "method(id id method_parameters((:( id ):)) body_statement(call(id argument_list((:( id ):))) call(id argument_list((:( lit ,:, id ):))) global_variable:$GLOBAL_STATE instance_variable:@source) id)", - "call(id argument_list(id id method_parameters((:( id ):)) body_statement(id argument_list((:( id ):))) id))", - "method(id id body_statement(instance_variable:@count >:> lit) id)", - "method(id id =:= binary(instance_variable:@status ==:== lit))" - ], - "child_masses": [ - 11, - 11, - 3, - 16, - 17, - 39, - 31, - 151, - 25, - 16, - 8, - 8 + 32, + 9, + 32, + 18 + ] + } + ], + "state_writes": [ + { + "field": "@count", + "function": "initialize", + "line": 21, + "owner": "RubySyntaxFactsCore", + "receiver": "self", + "span": [ + 21, + 4, + 21, + 30 ] }, { - "method_name": "audit", - "node_name": "defn", - "line": 57, + "field": "@count", + "function": "process", + "line": 42, + "owner": "RubySyntaxFactsCore", + "receiver": "self", "span": [ - 57, - 2, - 62, - 5 - ], - "mass": 25, - "fingerprint": "method(id id method_parameters((:( id ):)) body_statement(call(id argument_list((:( id ):))) call(id argument_list((:( lit ,:, id ):))) global_variable:$GLOBAL_STATE instance_variable:@source) id)", - "child_fingerprints": [ - "call(id argument_list((:( id ):)))", - "call(id argument_list((:( lit ,:, id ):)))", - "global_variable:$GLOBAL_STATE", - "instance_variable:@source" - ], - "child_masses": [ + 42, 6, - 8, - 1, - 1 + 42, + 17 ] }, { - "method_name": "initialize", - "node_name": "assignment", + "field": "@source", + "function": "initialize", "line": 20, + "owner": "RubySyntaxFactsCore", + "receiver": "self", "span": [ 20, 4, 20, 20 - ], - "mass": 4, - "fingerprint": "assignment(instance_variable:@source =:= id)", - "child_fingerprints": [ - "instance_variable:@source" - ], - "child_masses": [ - 1 - ] - }, - { - "method_name": "initialize", - "node_name": "assignment", - "line": 21, - "span": [ - 21, - 4, - 21, - 30 - ], - "mass": 13, - "fingerprint": "assignment(instance_variable:@count =:= call(id .:. id argument_list((:( lit ,:, id ):))))", - "child_fingerprints": [ - "instance_variable:@count", - "call(id .:. id argument_list((:( lit ,:, id ):)))" - ], - "child_masses": [ - 1, - 10 ] }, { - "method_name": "initialize", - "node_name": "assignment", + "field": "@status", + "function": "initialize", "line": 22, + "owner": "RubySyntaxFactsCore", + "receiver": "self", "span": [ 22, 4, 22, 34 - ], - "mass": 13, - "fingerprint": "assignment(instance_variable:@status =:= call(id .:. id argument_list((:( lit ,:, id ):))))", - "child_fingerprints": [ - "instance_variable:@status", - "call(id .:. id argument_list((:( lit ,:, id ):)))" - ], - "child_masses": [ - 1, - 10 ] - }, + } + ], + "decisions": [ { - "method_name": "initialize", - "node_name": "defn", - "line": 19, - "span": [ - 19, - 2, - 23, - 5 - ], - "mass": 39, - "fingerprint": "method(id id method_parameters((:( id ):)) body_statement(assignment(instance_variable:@source =:= id) assignment(instance_variable:@count =:= call(id .:. id argument_list((:( lit ,:, id ):)))) assignment(instance_variable:@status =:= call(id .:. id argument_list((:( lit ,:, id ):))))) id)", - "child_fingerprints": [ - "assignment(instance_variable:@source =:= id)", - "assignment(instance_variable:@count =:= call(id .:. id argument_list((:( lit ,:, id ):))))", - "assignment(instance_variable:@status =:= call(id .:. id argument_list((:( lit ,:, id ):))))" - ], - "child_masses": [ + "enclosing_span": [ + 32, 4, - 13, - 13 - ] - }, - { - "method_name": "inline_private", - "node_name": "defn", - "line": 64, - "span": [ - 64, - 10, - 66, - 5 - ], - "mass": 14, - "fingerprint": "argument_list(id id method_parameters((:( id ):)) body_statement(id argument_list((:( id ):))) id)", - "child_fingerprints": [], - "child_masses": [] - }, - { - "method_name": "loaded?", - "node_name": "defn", - "line": 72, - "span": [ - 72, - 2, - 72, - 33 + 39, + 7 ], - "mass": 8, - "fingerprint": "method(id id =:= binary(instance_variable:@status ==:== lit))", - "child_fingerprints": [ - "instance_variable:@status", - "lit" + "function": "process", + "kind": "case_dispatch", + "line": 32, + "members": [ + "\"owner\"", + "ADMIN_ROLES", + "nil" ], - "child_masses": [ - 1, - 1 - ] - }, - { - "method_name": "process", - "node_name": "assignment", - "line": 27, + "predicate": "user.role", "span": [ - 27, + 32, 4, - 27, - 30 - ], - "mass": 10, - "fingerprint": "assignment(id =:= call(call(id &.:&. id) &.:&. id))", - "child_fingerprints": [ - "call(call(id &.:&. id) &.:&. id)" - ], - "child_masses": [ + 39, 7 ] }, { - "method_name": "process", - "node_name": "assignment", - "line": 28, - "span": [ - 28, + "enclosing_span": [ + 41, 4, - 28, - 59 + 46, + 7 ], - "mass": 22, - "fingerprint": "assignment(id =:= call(id .:. id argument_list((:( pair(id ::: id) ,:, pair(id ::: call(id .:. id)) ):))))", - "child_fingerprints": [ - "call(id .:. id argument_list((:( pair(id ::: id) ,:, pair(id ::: call(id .:. id)) ):)))" + "function": "process", + "kind": "conjunction", + "line": 41, + "members": [ + "@status == :idle", + "user.ready?" ], - "child_masses": [ - 19 + "predicate": "@status == :idle && user.ready?", + "span": [ + 41, + 7, + 41, + 38 ] - }, + } + ], + "branch_decisions": [ { - "method_name": "process", - "node_name": "case", + "function": "process", "line": 32, + "predicate": "user.role", "span": [ 32, 4, 39, 7 ], - "mass": 42, - "fingerprint": "case(id call(id .:. id) when(id pattern(\":\" lit \":\") ,:, pattern(*:* id) then(call(id argument_list((:( id ):))))) when(id id then(call(id argument_list((:( id ):))))) else(id call(id argument_list((:( id ):)))) id)", - "child_fingerprints": [ - "call(id .:. id)", - "when(id pattern(\":\" lit \":\") ,:, pattern(*:* id) then(call(id argument_list((:( id ):)))))", - "when(id id then(call(id argument_list((:( id ):)))))", - "else(id call(id argument_list((:( id ):))))" - ], - "child_masses": [ - 4, - 17, - 10, - 8 + "state_refs": [ + "user.role" ] }, { - "method_name": "process", - "node_name": "defn", - "line": 26, + "function": "process", + "line": 41, + "predicate": "@status == :idle && user.ready?", "span": [ - 26, - 2, - 53, - 5 - ], - "mass": 151, - "fingerprint": "method(id id method_parameters((:( id ,:, id ,:, id ):)) body_statement(assignment(id =:= call(call(id &.:&. id) &.:&. id)) assignment(id =:= call(id .:. id argument_list((:( pair(id ::: id) ,:, pair(id ::: call(id .:. id)) ):)))) call(id argument_list((:( id ):))) call(id .:. argument_list((:( id ):))) case(id call(id .:. id) when(id pattern(\":\" lit \":\") ,:, pattern(*:* id) then(call(id argument_list((:( id ):))))) when(id id then(call(id argument_list((:( id ):))))) else(id call(id argument_list((:( id ):)))) id) if(id binary(binary(instance_variable:@status ==:== lit) &&:&& call(id .:. id)) then(operator_assignment(instance_variable:@count +=:+= lit) call(id argument_list((:( lit ):)))) else(id call(id argument_list((:( string(\":\" lit \":\") ):)))) id) call(id .:. id do_block(id block_parameters(|:| id |:|) body_statement(id .:. id) id)) instance_variable:@status) id)", - "child_fingerprints": [ - "assignment(id =:= call(call(id &.:&. id) &.:&. id))", - "assignment(id =:= call(id .:. id argument_list((:( pair(id ::: id) ,:, pair(id ::: call(id .:. id)) ):))))", - "call(id argument_list((:( id ):)))", - "call(id .:. argument_list((:( id ):)))", - "case(id call(id .:. id) when(id pattern(\":\" lit \":\") ,:, pattern(*:* id) then(call(id argument_list((:( id ):))))) when(id id then(call(id argument_list((:( id ):))))) else(id call(id argument_list((:( id ):)))) id)", - "if(id binary(binary(instance_variable:@status ==:== lit) &&:&& call(id .:. id)) then(operator_assignment(instance_variable:@count +=:+= lit) call(id argument_list((:( lit ):)))) else(id call(id argument_list((:( string(\":\" lit \":\") ):)))) id)", - "call(id .:. id do_block(id block_parameters(|:| id |:|) body_statement(id .:. id) id))", - "instance_variable:@status" + 41, + 4, + 46, + 7 ], - "child_masses": [ - 10, - 22, - 6, - 7, - 42, - 35, - 15, - 1 + "state_refs": [ + "@status", + "user.ready?" ] - }, + } + ], + "dispatch_sites": [ + + ], + "semantic_effects": [ { - "method_name": "process", - "node_name": "do_block", - "line": 48, + "detail": "$GLOBAL_STATE", + "function": "audit", + "kind": "context_dependency", + "line": 60, "span": [ - 48, - 19, - 50, - 7 - ], - "mass": 11, - "fingerprint": "do_block(id block_parameters(|:| id |:|) body_statement(id .:. id) id)", - "child_fingerprints": [], - "child_masses": [] + 60, + 4, + 60, + 17 + ] }, { - "method_name": "process", - "node_name": "if", - "line": 41, + "detail": "callback.call", + "function": "process", + "kind": "dynamic_dispatch", + "line": 30, "span": [ - 41, + 30, 4, - 46, - 7 - ], - "mass": 35, - "fingerprint": "if(id binary(binary(instance_variable:@status ==:== lit) &&:&& call(id .:. id)) then(operator_assignment(instance_variable:@count +=:+= lit) call(id argument_list((:( lit ):)))) else(id call(id argument_list((:( string(\":\" lit \":\") ):)))) id)", - "child_fingerprints": [ - "binary(binary(instance_variable:@status ==:== lit) &&:&& call(id .:. id))", - "then(operator_assignment(instance_variable:@count +=:+= lit) call(id argument_list((:( lit ):))))", - "else(id call(id argument_list((:( string(\":\" lit \":\") ):))))" - ], - "child_masses": [ - 10, - 11, - 11 + 30, + 22 ] }, { - "method_name": "process", - "node_name": "operator_assignment", - "line": 42, + "detail": "puts", + "function": "audit", + "kind": "hidden_io", + "line": 58, "span": [ - 42, - 6, - 42, - 17 - ], - "mass": 4, - "fingerprint": "operator_assignment(instance_variable:@count +=:+= lit)", - "child_fingerprints": [ - "instance_variable:@count", - "lit" - ], - "child_masses": [ - 1, - 1 + 58, + 4, + 58, + 14 ] }, { - "method_name": "ready?", - "node_name": "defn", - "line": 68, + "detail": "send", + "function": "audit", + "kind": "dynamic_dispatch", + "line": 59, "span": [ - 68, - 2, - 70, - 5 - ], - "mass": 8, - "fingerprint": "method(id id body_statement(instance_variable:@count >:> lit) id)", - "child_fingerprints": [ - "instance_variable:@count", - "lit" - ], - "child_masses": [ - 1, - 1 + 59, + 4, + 59, + 23 ] }, { - "method_name": "self.build", - "node_name": "defn", - "line": 14, + "detail": "warn", + "function": "process", + "kind": "hidden_io", + "line": 45, "span": [ - 14, - 2, - 16, - 5 - ], - "mass": 16, - "fingerprint": "singleton_method(id id .:. id method_parameters((:( id ):)) body_statement(id argument_list((:( id ):))) id)", - "child_fingerprints": [], - "child_masses": [] - }, + 45, + 6, + 45, + 23 + ] + } + ], + "predicate_bodies": [ { - "method_name": "self.build", - "node_name": "defs", - "line": 14, + "body": "@status == :ready", + "line": 72, + "name": "loaded?", + "owner": "RubySyntaxFactsCore", "span": [ - 14, + 72, 2, - 16, - 5 - ], - "mass": 16, - "fingerprint": "singleton_method(id id .:. id method_parameters((:( id ):)) body_statement(id argument_list((:( id ):))) id)", - "child_fingerprints": [], - "child_masses": [] + 72, + 33 + ] } ] } diff --git a/gems/decomplex/examples/syntax-facts/oracles/rust-core.json b/gems/decomplex/examples/syntax-facts/oracles/rust-core.json index 3eb8d14cb..d8d73466b 100644 --- a/gems/decomplex/examples/syntax-facts/oracles/rust-core.json +++ b/gems/decomplex/examples/syntax-facts/oracles/rust-core.json @@ -6,75 +6,75 @@ "language": "rust", "functions": [ { - "name": "audit", - "owner": "RustSyntaxFactsCore", - "line": 46, - "span": [ - 46, - 4, - 50, - 5 - ], - "visibility": "private", - "params": [ - "&self", - "name" - ] - }, - { + "line": 12, "name": "new", "owner": "RustSyntaxFactsCore", - "line": 12, + "params": [ + "status" + ], "span": [ 12, 4, 14, 5 ], - "visibility": "public", - "params": [ - "status" - ] + "visibility": "public" }, { + "line": 16, "name": "process", "owner": "RustSyntaxFactsCore", - "line": 16, + "params": [ + "&mut self", + "user", + "items", + "callback" + ], "span": [ 16, 4, 44, 5 ], - "visibility": "public", + "visibility": "public" + }, + { + "line": 46, + "name": "audit", + "owner": "RustSyntaxFactsCore", "params": [ - "&mut self", - "user", - "items", - "callback" - ] + "&self", + "name" + ], + "span": [ + 46, + 4, + 50, + 5 + ], + "visibility": "private" }, { + "line": 52, "name": "ready", "owner": "RustSyntaxFactsCore", - "line": 52, + "params": [ + "&self" + ], "span": [ 52, 4, 54, 5 ], - "visibility": "private", - "params": [ - "&self" - ] + "visibility": "private" } ], "owners": [ { - "name": "RustSyntaxFactsCore", "kind": "impl", "line": 11, + "name": "RustSyntaxFactsCore", "span": [ 11, 0, @@ -83,9 +83,9 @@ ] }, { - "name": "RustSyntaxFactsCore", "kind": "struct", "line": 1, + "name": "RustSyntaxFactsCore", "span": [ 1, 0, @@ -96,357 +96,375 @@ ], "calls": [ { - "receiver": "Account", - "message": "new", - "function": "process", - "owner": "RustSyntaxFactsCore", - "line": 23, - "span": [ - 23, - 22, - 23, - 63 - ], - "conditional": false, "arguments": [ - "name.clone()", - "user.active()" + "&account" ], + "block": false, + "conditional": false, "control": "always", + "function": "process", + "line": 24, + "message": "callback", + "owner": "RustSyntaxFactsCore", + "receiver": "self", "safe_navigation": false, - "block": false + "span": [ + 24, + 8, + 24, + 26 + ] }, { - "receiver": "item", - "message": "children", + "arguments": [ + "Status::Busy" + ], + "block": false, + "conditional": true, + "control": "conditional", "function": "process", + "line": 34, + "message": "publish", "owner": "RustSyntaxFactsCore", - "line": 40, + "receiver": "self", + "safe_navigation": false, "span": [ - 40, + 34, 12, - 40, - 27 - ], - "conditional": true, - "arguments": [], - "control": "iterates", - "safe_navigation": false, - "block": false + 34, + 38 + ] }, { - "receiver": "name", - "message": "clone", + "arguments": [ + "\"not ready\"" + ], + "block": false, + "conditional": true, + "control": "conditional", "function": "process", + "line": 36, + "message": "warn", "owner": "RustSyntaxFactsCore", - "line": 23, + "receiver": "self", + "safe_navigation": false, "span": [ - 23, - 35, - 23, - 47 + 36, + 12, + 36, + 34 + ] + }, + { + "arguments": [ + "\"record\"", + "name" ], + "block": false, "conditional": false, - "arguments": [], "control": "always", + "function": "audit", + "line": 48, + "message": "send", + "owner": "RustSyntaxFactsCore", + "receiver": "self", "safe_navigation": false, - "block": false + "span": [ + 48, + 8, + 48, + 33 + ] }, { - "receiver": "self", - "message": "Some", + "arguments": [ + "name" + ], + "block": false, + "conditional": false, + "control": "always", "function": "process", - "owner": "RustSyntaxFactsCore", "line": 43, + "message": "Some", + "owner": "RustSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, "span": [ 43, 8, 43, 18 - ], - "conditional": false, - "arguments": [ - "name" - ], - "control": "always", - "safe_navigation": false, - "block": false + ] }, { - "receiver": "self", - "message": "callback", - "function": "process", - "owner": "RustSyntaxFactsCore", - "line": 24, - "span": [ - 24, - 8, - 24, - 26 - ], - "conditional": false, "arguments": [ - "&account" + "name.clone()", + "user.active()" ], + "block": false, + "conditional": false, "control": "always", - "safe_navigation": false, - "block": false - }, - { - "receiver": "self", - "message": "default_case", "function": "process", + "line": 23, + "message": "new", "owner": "RustSyntaxFactsCore", - "line": 29, + "receiver": "Account", + "safe_navigation": false, "span": [ - 29, - 17, - 29, - 40 - ], - "conditional": true, + 23, + 22, + 23, + 63 + ] + }, + { "arguments": [ "user" ], + "block": false, + "conditional": true, "control": "conditional", - "safe_navigation": false, - "block": false - }, - { - "receiver": "self", - "message": "escalate", "function": "process", - "owner": "RustSyntaxFactsCore", "line": 27, + "message": "escalate", + "owner": "RustSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, "span": [ 27, 41, 27, 60 - ], - "conditional": true, + ] + }, + { "arguments": [ "user" ], + "block": false, + "conditional": true, "control": "conditional", - "safe_navigation": false, - "block": false - }, - { - "receiver": "self", - "message": "fallback", "function": "process", - "owner": "RustSyntaxFactsCore", "line": 28, + "message": "fallback", + "owner": "RustSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, "span": [ 28, 27, 28, 46 - ], - "conditional": true, + ] + }, + { "arguments": [ "user" ], + "block": false, + "conditional": true, "control": "conditional", - "safe_navigation": false, - "block": false - }, - { - "receiver": "self", - "message": "publish", "function": "process", + "line": 29, + "message": "default_case", "owner": "RustSyntaxFactsCore", - "line": 34, - "span": [ - 34, - 12, - 34, - 38 - ], - "conditional": true, - "arguments": [ - "Status::Busy" - ], - "control": "conditional", + "receiver": "self", "safe_navigation": false, - "block": false + "span": [ + 29, + 17, + 29, + 40 + ] }, { - "receiver": "self", - "message": "send", - "function": "audit", - "owner": "RustSyntaxFactsCore", - "line": 48, - "span": [ - 48, - 8, - 48, - 33 - ], - "conditional": false, "arguments": [ - "\"record\"", - "name" + ], + "block": false, + "conditional": false, "control": "always", - "safe_navigation": false, - "block": false - }, - { - "receiver": "self", - "message": "status", "function": "audit", - "owner": "RustSyntaxFactsCore", "line": 49, + "message": "status", + "owner": "RustSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, "span": [ 49, 8, 49, 21 + ] + }, + { + "arguments": [ + ], + "block": false, "conditional": false, - "arguments": [], "control": "always", - "safe_navigation": false, - "block": false - }, - { - "receiver": "self", - "message": "warn", "function": "process", + "line": 22, + "message": "name", "owner": "RustSyntaxFactsCore", - "line": 36, - "span": [ - 36, - 12, - 36, - 34 - ], - "conditional": true, - "arguments": [ - "\"not ready\"" - ], - "control": "conditional", + "receiver": "user.profile()", "safe_navigation": false, - "block": false + "span": [ + 22, + 19, + 22, + 40 + ] }, { - "receiver": "user", - "message": "active", - "function": "process", - "owner": "RustSyntaxFactsCore", - "line": 23, - "span": [ - 23, - 49, - 23, - 62 + "arguments": [ + ], + "block": false, "conditional": false, - "arguments": [], "control": "always", - "safe_navigation": false, - "block": false - }, - { - "receiver": "user", - "message": "profile", "function": "process", - "owner": "RustSyntaxFactsCore", "line": 22, + "message": "profile", + "owner": "RustSyntaxFactsCore", + "receiver": "user", + "safe_navigation": false, "span": [ 22, 19, 22, 33 + ] + }, + { + "arguments": [ + ], + "block": false, "conditional": false, - "arguments": [], "control": "always", + "function": "process", + "line": 22, + "message": "to_string", + "owner": "RustSyntaxFactsCore", + "receiver": "user.profile().name()", "safe_navigation": false, - "block": false + "span": [ + 22, + 19, + 22, + 52 + ] }, { - "receiver": "user", - "message": "ready", + "arguments": [ + + ], + "block": false, + "conditional": false, + "control": "always", "function": "process", + "line": 23, + "message": "active", "owner": "RustSyntaxFactsCore", - "line": 32, + "receiver": "user", + "safe_navigation": false, "span": [ - 32, - 50, - 32, + 23, + 49, + 23, 62 + ] + }, + { + "arguments": [ + ], - "conditional": true, - "arguments": [], - "control": "conditional", + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 23, + "message": "clone", + "owner": "RustSyntaxFactsCore", + "receiver": "name", "safe_navigation": false, - "block": false + "span": [ + 23, + 35, + 23, + 47 + ] }, { - "receiver": "user", - "message": "role", + "arguments": [ + + ], + "block": false, + "conditional": true, + "control": "conditional", "function": "process", - "owner": "RustSyntaxFactsCore", "line": 26, + "message": "role", + "owner": "RustSyntaxFactsCore", + "receiver": "user", + "safe_navigation": false, "span": [ 26, 14, 26, 25 + ] + }, + { + "arguments": [ + ], + "block": false, "conditional": true, - "arguments": [], "control": "conditional", - "safe_navigation": false, - "block": false - }, - { - "receiver": "user.profile()", - "message": "name", "function": "process", + "line": 32, + "message": "ready", "owner": "RustSyntaxFactsCore", - "line": 22, - "span": [ - 22, - 19, - 22, - 40 - ], - "conditional": false, - "arguments": [], - "control": "always", + "receiver": "user", "safe_navigation": false, - "block": false + "span": [ + 32, + 50, + 32, + 62 + ] }, { - "receiver": "user.profile().name()", - "message": "to_string", - "function": "process", - "owner": "RustSyntaxFactsCore", - "line": 22, - "span": [ - 22, - 19, - 22, - 52 + "arguments": [ + ], - "conditional": false, - "arguments": [], - "control": "always", + "block": false, + "conditional": true, + "control": "iterates", + "function": "process", + "line": 40, + "message": "children", + "owner": "RustSyntaxFactsCore", + "receiver": "item", "safe_navigation": false, - "block": false + "span": [ + 40, + 12, + 40, + 27 + ] } ], "state_reads": [ { "field": "active", - "receiver": "user", "function": "process", - "owner": "RustSyntaxFactsCore", "line": 23, + "owner": "RustSyntaxFactsCore", + "receiver": "user", "span": [ 23, 49, @@ -456,10 +474,10 @@ }, { "field": "children", - "receiver": "item", "function": "process", - "owner": "RustSyntaxFactsCore", "line": 40, + "owner": "RustSyntaxFactsCore", + "receiver": "item", "span": [ 40, 12, @@ -469,10 +487,10 @@ }, { "field": "clone", - "receiver": "name", "function": "process", - "owner": "RustSyntaxFactsCore", "line": 23, + "owner": "RustSyntaxFactsCore", + "receiver": "name", "span": [ 23, 35, @@ -482,10 +500,10 @@ }, { "field": "count", - "receiver": "self", "function": "ready", - "owner": "RustSyntaxFactsCore", "line": 53, + "owner": "RustSyntaxFactsCore", + "receiver": "self", "span": [ 53, 8, @@ -495,10 +513,10 @@ }, { "field": "default_case", - "receiver": "self", "function": "process", - "owner": "RustSyntaxFactsCore", "line": 29, + "owner": "RustSyntaxFactsCore", + "receiver": "self", "span": [ 29, 17, @@ -508,10 +526,10 @@ }, { "field": "escalate", - "receiver": "self", "function": "process", - "owner": "RustSyntaxFactsCore", "line": 27, + "owner": "RustSyntaxFactsCore", + "receiver": "self", "span": [ 27, 41, @@ -521,10 +539,10 @@ }, { "field": "fallback", - "receiver": "self", "function": "process", - "owner": "RustSyntaxFactsCore", "line": 28, + "owner": "RustSyntaxFactsCore", + "receiver": "self", "span": [ 28, 27, @@ -534,10 +552,10 @@ }, { "field": "name", - "receiver": "user.profile()", "function": "process", - "owner": "RustSyntaxFactsCore", "line": 22, + "owner": "RustSyntaxFactsCore", + "receiver": "user.profile()", "span": [ 22, 19, @@ -547,10 +565,10 @@ }, { "field": "profile", - "receiver": "user", "function": "process", - "owner": "RustSyntaxFactsCore", "line": 22, + "owner": "RustSyntaxFactsCore", + "receiver": "user", "span": [ 22, 19, @@ -560,10 +578,10 @@ }, { "field": "publish", - "receiver": "self", "function": "process", - "owner": "RustSyntaxFactsCore", "line": 34, + "owner": "RustSyntaxFactsCore", + "receiver": "self", "span": [ 34, 12, @@ -573,10 +591,10 @@ }, { "field": "ready", - "receiver": "user", "function": "process", - "owner": "RustSyntaxFactsCore", "line": 32, + "owner": "RustSyntaxFactsCore", + "receiver": "user", "span": [ 32, 50, @@ -586,10 +604,10 @@ }, { "field": "role", - "receiver": "user", "function": "process", - "owner": "RustSyntaxFactsCore", "line": 26, + "owner": "RustSyntaxFactsCore", + "receiver": "user", "span": [ 26, 14, @@ -599,10 +617,10 @@ }, { "field": "send", - "receiver": "self", "function": "audit", - "owner": "RustSyntaxFactsCore", "line": 48, + "owner": "RustSyntaxFactsCore", + "receiver": "self", "span": [ 48, 8, @@ -612,10 +630,10 @@ }, { "field": "status", - "receiver": "self", "function": "audit", - "owner": "RustSyntaxFactsCore", "line": 49, + "owner": "RustSyntaxFactsCore", + "receiver": "self", "span": [ 49, 8, @@ -625,10 +643,10 @@ }, { "field": "to_string", - "receiver": "user.profile().name()", "function": "process", - "owner": "RustSyntaxFactsCore", "line": 22, + "owner": "RustSyntaxFactsCore", + "receiver": "user.profile().name()", "span": [ 22, 19, @@ -638,10 +656,10 @@ }, { "field": "warn", - "receiver": "self", "function": "process", - "owner": "RustSyntaxFactsCore", "line": 36, + "owner": "RustSyntaxFactsCore", + "receiver": "self", "span": [ 36, 12, @@ -653,10 +671,10 @@ "state_writes": [ { "field": "count", - "receiver": "self", "function": "process", - "owner": "RustSyntaxFactsCore", "line": 33, + "owner": "RustSyntaxFactsCore", + "receiver": "self", "span": [ 33, 12, @@ -667,21 +685,21 @@ ], "decisions": [ { - "kind": "case_dispatch", - "members": [ - "Role::Guest", - "Role::Owner | Role::Admin" - ], - "function": "process", - "line": 26, - "span": [ + "enclosing_span": [ 26, 8, 30, 9 ], + "function": "process", + "kind": "case_dispatch", + "line": 26, + "members": [ + "Role::Guest", + "Role::Owner | Role::Admin" + ], "predicate": "user.role()", - "enclosing_span": [ + "span": [ 26, 8, 30, @@ -689,25 +707,25 @@ ] }, { + "enclosing_span": [ + 32, + 8, + 37, + 9 + ], + "function": "process", "kind": "conjunction", + "line": 32, "members": [ "matches!(self.status, Status::Idle)", "user.ready()" ], - "function": "process", - "line": 32, + "predicate": "matches!(self.status, Status::Idle) && user.ready()", "span": [ 32, 11, 32, 62 - ], - "predicate": "matches!(self.status, Status::Idle) && user.ready()", - "enclosing_span": [ - 32, - 8, - 37, - 9 ] } ], @@ -715,13 +733,13 @@ { "function": "process", "line": 26, + "predicate": "user.role()", "span": [ 26, 8, 30, 9 ], - "predicate": "user.role()", "state_refs": [ "user.role" ] @@ -729,24 +747,26 @@ { "function": "process", "line": 32, + "predicate": "matches!(self.status, Status::Idle) && user.ready()", "span": [ 32, 8, 37, 9 ], - "predicate": "matches!(self.status, Status::Idle) && user.ready()", "state_refs": [ "user.ready" ] } ], - "dispatch_sites": [], + "dispatch_sites": [ + + ], "semantic_effects": [ { - "kind": "callback_inversion", "detail": "callback", "function": "process", + "kind": "callback_inversion", "line": 24, "span": [ 24, @@ -756,279 +776,8 @@ ] } ], - "predicate_bodies": [], - "local_complexity": [ - { - "id": "RustSyntaxFactsCore#audit", - "score": 0.0, - "signals": {} - }, - { - "id": "RustSyntaxFactsCore#new", - "score": 0.0, - "signals": {} - }, - { - "id": "RustSyntaxFactsCore#process", - "score": 5.6, - "signals": { - "boolean_ops": 2, - "branches": 1, - "cases": 1, - "loops": 2, - "nested": 1 - } - }, - { - "id": "RustSyntaxFactsCore#ready", - "score": 0.0, - "signals": {} - } - ], - "clone_candidates": [ - { - "method_name": "audit", - "node_name": "block", - "line": 46, - "span": [ - 46, - 32, - 50, - 5 - ], - "mass": 43, - "fingerprint": "block({:{ expression_statement(macro_invocation(id !:! token_tree((:( string_literal(\":\" lit \":\") ,:, id ):))) ;:;) expression_statement(call_expression(field_expression(id .:. id) arguments((:( string_literal(\":\" lit \":\") ,:, id ):))) ;:;) expression_statement(call_expression(field_expression(id .:. id) arguments((:( ):))) ;:;) }:})", - "child_fingerprints": [ - "expression_statement(macro_invocation(id !:! token_tree((:( string_literal(\":\" lit \":\") ,:, id ):))) ;:;)", - "expression_statement(call_expression(field_expression(id .:. id) arguments((:( string_literal(\":\" lit \":\") ,:, id ):))) ;:;)", - "expression_statement(call_expression(field_expression(id .:. id) arguments((:( ):))) ;:;)" - ], - "child_masses": [ - 14, - 16, - 10 - ] - }, - { - "method_name": "audit", - "node_name": "defn", - "line": 46, - "span": [ - 46, - 4, - 50, - 5 - ], - "mass": 59, - "fingerprint": "function_item(id id parameters((:( self_parameter(&:& id) ,:, parameter(id ::: reference_type(&:& id)) ):)) block({:{ expression_statement(macro_invocation(id !:! token_tree((:( string_literal(\":\" lit \":\") ,:, id ):))) ;:;) expression_statement(call_expression(field_expression(id .:. id) arguments((:( string_literal(\":\" lit \":\") ,:, id ):))) ;:;) expression_statement(call_expression(field_expression(id .:. id) arguments((:( ):))) ;:;) }:}))", - "child_fingerprints": [ - "expression_statement(macro_invocation(id !:! token_tree((:( string_literal(\":\" lit \":\") ,:, id ):))) ;:;)", - "expression_statement(call_expression(field_expression(id .:. id) arguments((:( string_literal(\":\" lit \":\") ,:, id ):))) ;:;)", - "expression_statement(call_expression(field_expression(id .:. id) arguments((:( ):))) ;:;)" - ], - "child_masses": [ - 14, - 16, - 10 - ] - }, - { - "method_name": "new", - "node_name": "block", - "line": 12, - "span": [ - 12, - 39, - 14, - 5 - ], - "mass": 14, - "fingerprint": "block({:{ struct_expression(id field_initializer_list({:{ id ,:, field_initializer(id ::: lit) }:})) }:})", - "child_fingerprints": [ - "struct_expression(id field_initializer_list({:{ id ,:, field_initializer(id ::: lit) }:}))" - ], - "child_masses": [ - 11 - ] - }, - { - "method_name": "new", - "node_name": "defn", - "line": 12, - "span": [ - 12, - 4, - 14, - 5 - ], - "mass": 27, - "fingerprint": "function_item(id id id parameters((:( parameter(id ::: id) ):)) ->:-> id block({:{ struct_expression(id field_initializer_list({:{ id ,:, field_initializer(id ::: lit) }:})) }:}))", - "child_fingerprints": [ - "struct_expression(id field_initializer_list({:{ id ,:, field_initializer(id ::: lit) }:}))" - ], - "child_masses": [ - 11 - ] - }, - { - "method_name": "process", - "node_name": "block", - "line": 21, - "span": [ - 21, - 24, - 44, - 5 - ], - "mass": 228, - "fingerprint": "block({:{ let_declaration(id id =:= call_expression(field_expression(call_expression(field_expression(call_expression(field_expression(id .:. id) arguments((:( ):))) .:. id) arguments((:( ):))) .:. id) arguments((:( ):))) ;:;) let_declaration(id id =:= call_expression(scoped_identifier(id ::::: id) arguments((:( call_expression(field_expression(id .:. id) arguments((:( ):))) ,:, call_expression(field_expression(id .:. id) arguments((:( ):))) ):))) ;:;) expression_statement(call_expression(id arguments((:( reference_expression(&:& id) ):))) ;:;) expression_statement(id call_expression(field_expression(id .:. id) arguments((:( ):))) match_block({:{ match_arm(match_pattern(scoped_identifier(id ::::: id) |:| scoped_identifier(id ::::: id)) =>:=> call_expression(field_expression(id .:. id) arguments((:( id ):))) ,:,) match_arm(match_pattern(id ::::: id) =>:=> call_expression(field_expression(id .:. id) arguments((:( id ):))) ,:,) match_arm(id =>:=> call_expression(field_expression(id .:. id) arguments((:( id ):))) ,:,) }:})) expression_statement(id binary_expression(macro_invocation(id !:! token_tree((:( id .:. id ,:, id ::::: id ):))) &&:&& call_expression(field_expression(id .:. id) arguments((:( ):)))) block({:{ expression_statement(compound_assignment_expr(field_expression(id .:. id) +=:+= lit) ;:;) expression_statement(call_expression(field_expression(id .:. id) arguments((:( scoped_identifier(id ::::: id) ):))) ;:;) }:}) else_clause(id block({:{ expression_statement(call_expression(field_expression(id .:. id) arguments((:( string_literal(\":\" lit \":\") ):))) ;:;) }:}))) expression_statement(id id id id block({:{ expression_statement(call_expression(field_expression(id .:. id) arguments((:( ):))) ;:;) }:})) call_expression(id arguments((:( id ):))) }:})", - "child_fingerprints": [ - "let_declaration(id id =:= call_expression(field_expression(call_expression(field_expression(call_expression(field_expression(id .:. id) arguments((:( ):))) .:. id) arguments((:( ):))) .:. id) arguments((:( ):))) ;:;)", - "let_declaration(id id =:= call_expression(scoped_identifier(id ::::: id) arguments((:( call_expression(field_expression(id .:. id) arguments((:( ):))) ,:, call_expression(field_expression(id .:. id) arguments((:( ):))) ):))) ;:;)", - "expression_statement(call_expression(id arguments((:( reference_expression(&:& id) ):))) ;:;)", - "expression_statement(id call_expression(field_expression(id .:. id) arguments((:( ):))) match_block({:{ match_arm(match_pattern(scoped_identifier(id ::::: id) |:| scoped_identifier(id ::::: id)) =>:=> call_expression(field_expression(id .:. id) arguments((:( id ):))) ,:,) match_arm(match_pattern(id ::::: id) =>:=> call_expression(field_expression(id .:. id) arguments((:( id ):))) ,:,) match_arm(id =>:=> call_expression(field_expression(id .:. id) arguments((:( id ):))) ,:,) }:}))", - "expression_statement(id binary_expression(macro_invocation(id !:! token_tree((:( id .:. id ,:, id ::::: id ):))) &&:&& call_expression(field_expression(id .:. id) arguments((:( ):)))) block({:{ expression_statement(compound_assignment_expr(field_expression(id .:. id) +=:+= lit) ;:;) expression_statement(call_expression(field_expression(id .:. id) arguments((:( scoped_identifier(id ::::: id) ):))) ;:;) }:}) else_clause(id block({:{ expression_statement(call_expression(field_expression(id .:. id) arguments((:( string_literal(\":\" lit \":\") ):))) ;:;) }:})))", - "expression_statement(id id id id block({:{ expression_statement(call_expression(field_expression(id .:. id) arguments((:( ):))) ;:;) }:}))", - "call_expression(id arguments((:( id ):)))" - ], - "child_masses": [ - 27, - 30, - 10, - 64, - 70, - 18, - 6 - ] - }, - { - "method_name": "process", - "node_name": "block", - "line": 32, - "span": [ - 32, - 63, - 35, - 9 - ], - "mass": 26, - "fingerprint": "block({:{ expression_statement(compound_assignment_expr(field_expression(id .:. id) +=:+= lit) ;:;) expression_statement(call_expression(field_expression(id .:. id) arguments((:( scoped_identifier(id ::::: id) ):))) ;:;) }:})", - "child_fingerprints": [ - "expression_statement(compound_assignment_expr(field_expression(id .:. id) +=:+= lit) ;:;)", - "expression_statement(call_expression(field_expression(id .:. id) arguments((:( scoped_identifier(id ::::: id) ):))) ;:;)" - ], - "child_masses": [ - 9, - 14 - ] - }, - { - "method_name": "process", - "node_name": "block", - "line": 35, - "span": [ - 35, - 15, - 37, - 9 - ], - "mass": 17, - "fingerprint": "block({:{ expression_statement(call_expression(field_expression(id .:. id) arguments((:( string_literal(\":\" lit \":\") ):))) ;:;) }:})", - "child_fingerprints": [ - "expression_statement(call_expression(field_expression(id .:. id) arguments((:( string_literal(\":\" lit \":\") ):))) ;:;)" - ], - "child_masses": [ - 14 - ] - }, - { - "method_name": "process", - "node_name": "block", - "line": 39, - "span": [ - 39, - 26, - 41, - 9 - ], - "mass": 13, - "fingerprint": "block({:{ expression_statement(call_expression(field_expression(id .:. id) arguments((:( ):))) ;:;) }:})", - "child_fingerprints": [ - "expression_statement(call_expression(field_expression(id .:. id) arguments((:( ):))) ;:;)" - ], - "child_masses": [ - 10 - ] - }, - { - "method_name": "process", - "node_name": "defn", - "line": 16, - "span": [ - 16, - 4, - 44, - 5 - ], - "mass": 276, - "fingerprint": "function_item(id id id parameters((:( self_parameter(&:& id id) ,:, parameter(id ::: reference_type(&:& id)) ,:, parameter(id ::: generic_type(id type_arguments(<:< id >:>))) ,:, parameter(id ::: function_type(id parameters((:( reference_type(&:& id) ):)))) ,:, ):)) ->:-> generic_type(id type_arguments(<:< id >:>)) block({:{ let_declaration(id id =:= call_expression(field_expression(call_expression(field_expression(call_expression(field_expression(id .:. id) arguments((:( ):))) .:. id) arguments((:( ):))) .:. id) arguments((:( ):))) ;:;) let_declaration(id id =:= call_expression(scoped_identifier(id ::::: id) arguments((:( call_expression(field_expression(id .:. id) arguments((:( ):))) ,:, call_expression(field_expression(id .:. id) arguments((:( ):))) ):))) ;:;) expression_statement(call_expression(id arguments((:( reference_expression(&:& id) ):))) ;:;) expression_statement(id call_expression(field_expression(id .:. id) arguments((:( ):))) match_block({:{ match_arm(match_pattern(scoped_identifier(id ::::: id) |:| scoped_identifier(id ::::: id)) =>:=> call_expression(field_expression(id .:. id) arguments((:( id ):))) ,:,) match_arm(match_pattern(id ::::: id) =>:=> call_expression(field_expression(id .:. id) arguments((:( id ):))) ,:,) match_arm(id =>:=> call_expression(field_expression(id .:. id) arguments((:( id ):))) ,:,) }:})) expression_statement(id binary_expression(macro_invocation(id !:! token_tree((:( id .:. id ,:, id ::::: id ):))) &&:&& call_expression(field_expression(id .:. id) arguments((:( ):)))) block({:{ expression_statement(compound_assignment_expr(field_expression(id .:. id) +=:+= lit) ;:;) expression_statement(call_expression(field_expression(id .:. id) arguments((:( scoped_identifier(id ::::: id) ):))) ;:;) }:}) else_clause(id block({:{ expression_statement(call_expression(field_expression(id .:. id) arguments((:( string_literal(\":\" lit \":\") ):))) ;:;) }:}))) expression_statement(id id id id block({:{ expression_statement(call_expression(field_expression(id .:. id) arguments((:( ):))) ;:;) }:})) call_expression(id arguments((:( id ):))) }:}))", - "child_fingerprints": [ - "let_declaration(id id =:= call_expression(field_expression(call_expression(field_expression(call_expression(field_expression(id .:. id) arguments((:( ):))) .:. id) arguments((:( ):))) .:. id) arguments((:( ):))) ;:;)", - "let_declaration(id id =:= call_expression(scoped_identifier(id ::::: id) arguments((:( call_expression(field_expression(id .:. id) arguments((:( ):))) ,:, call_expression(field_expression(id .:. id) arguments((:( ):))) ):))) ;:;)", - "expression_statement(call_expression(id arguments((:( reference_expression(&:& id) ):))) ;:;)", - "expression_statement(id call_expression(field_expression(id .:. id) arguments((:( ):))) match_block({:{ match_arm(match_pattern(scoped_identifier(id ::::: id) |:| scoped_identifier(id ::::: id)) =>:=> call_expression(field_expression(id .:. id) arguments((:( id ):))) ,:,) match_arm(match_pattern(id ::::: id) =>:=> call_expression(field_expression(id .:. id) arguments((:( id ):))) ,:,) match_arm(id =>:=> call_expression(field_expression(id .:. id) arguments((:( id ):))) ,:,) }:}))", - "expression_statement(id binary_expression(macro_invocation(id !:! token_tree((:( id .:. id ,:, id ::::: id ):))) &&:&& call_expression(field_expression(id .:. id) arguments((:( ):)))) block({:{ expression_statement(compound_assignment_expr(field_expression(id .:. id) +=:+= lit) ;:;) expression_statement(call_expression(field_expression(id .:. id) arguments((:( scoped_identifier(id ::::: id) ):))) ;:;) }:}) else_clause(id block({:{ expression_statement(call_expression(field_expression(id .:. id) arguments((:( string_literal(\":\" lit \":\") ):))) ;:;) }:})))", - "expression_statement(id id id id block({:{ expression_statement(call_expression(field_expression(id .:. id) arguments((:( ):))) ;:;) }:}))", - "call_expression(id arguments((:( id ):)))" - ], - "child_masses": [ - 27, - 30, - 10, - 64, - 70, - 18, - 6 - ] - }, - { - "method_name": "ready", - "node_name": "block", - "line": 52, - "span": [ - 52, - 28, - 54, - 5 - ], - "mass": 10, - "fingerprint": "block({:{ binary_expression(field_expression(id .:. id) >:> lit) }:})", - "child_fingerprints": [ - "binary_expression(field_expression(id .:. id) >:> lit)" - ], - "child_masses": [ - 7 - ] - }, - { - "method_name": "ready", - "node_name": "defn", - "line": 52, - "span": [ - 52, - 4, - 54, - 5 - ], - "mass": 21, - "fingerprint": "function_item(id id parameters((:( self_parameter(&:& id) ):)) ->:-> id block({:{ binary_expression(field_expression(id .:. id) >:> lit) }:}))", - "child_fingerprints": [ - "binary_expression(field_expression(id .:. id) >:> lit)" - ], - "child_masses": [ - 7 - ] - } + "predicate_bodies": [ + ] } ] diff --git a/gems/decomplex/examples/syntax-facts/oracles/swift-core.json b/gems/decomplex/examples/syntax-facts/oracles/swift-core.json new file mode 100644 index 000000000..fec3e06d3 --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/oracles/swift-core.json @@ -0,0 +1,595 @@ +{ + "format": "decomplex.syntax-facts.v1", + "documents": [ + { + "file": "gems/decomplex/examples/syntax-facts/swift/core.swift", + "language": "swift", + "functions": [ + { + "line": 16, + "name": "process", + "owner": "SwiftSyntaxFactsCore", + "params": [ + "user", + "items", + "callback" + ], + "span": [ + 16, + 2, + 42, + 3 + ], + "visibility": null + }, + { + "line": 44, + "name": "audit", + "owner": "SwiftSyntaxFactsCore", + "params": [ + "name" + ], + "span": [ + 44, + 2, + 48, + 3 + ], + "visibility": "private" + }, + { + "line": 50, + "name": "ready", + "owner": "SwiftSyntaxFactsCore", + "params": [ + + ], + "span": [ + 50, + 2, + 52, + 3 + ], + "visibility": null + } + ], + "owners": [ + { + "kind": "class", + "line": 1, + "name": "Status", + "span": [ + 1, + 0, + 4, + 1 + ] + }, + { + "kind": "class", + "line": 6, + "name": "SwiftSyntaxFactsCore", + "span": [ + 6, + 0, + 53, + 1 + ] + } + ], + "calls": [ + { + "arguments": [ + ".busy" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 32, + "message": "publish", + "owner": "SwiftSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 32, + 6, + 32, + 25 + ] + }, + { + "arguments": [ + "\"not ready\"" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 34, + "message": "print", + "owner": "SwiftSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 34, + 6, + 34, + 24 + ] + }, + { + "arguments": [ + "\"record\"", + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "audit", + "line": 46, + "message": "send", + "owner": "SwiftSyntaxFactsCore", + "receiver": "sink", + "safe_navigation": false, + "span": [ + 46, + 4, + 46, + 29 + ] + }, + { + "arguments": [ + "account" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 19, + "message": "callback", + "owner": "SwiftSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 19, + 4, + 19, + 21 + ] + }, + { + "arguments": [ + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "audit", + "line": 45, + "message": "print", + "owner": "SwiftSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 45, + 4, + 45, + 15 + ] + }, + { + "arguments": [ + "name: name", + "active: user.active" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 18, + "message": "Account", + "owner": "SwiftSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 18, + 18, + 18, + 58 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 23, + "message": "escalate", + "owner": "SwiftSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 23, + 6, + 23, + 25 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 25, + "message": "fallback", + "owner": "SwiftSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 25, + 6, + 25, + 19 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 27, + "message": "defaultCase", + "owner": "SwiftSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 27, + 6, + 27, + 28 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": true, + "control": "iterates", + "function": "process", + "line": 38, + "message": "children", + "owner": "SwiftSyntaxFactsCore", + "receiver": "item", + "safe_navigation": false, + "span": [ + 38, + 6, + 38, + 21 + ] + } + ], + "state_reads": [ + { + "field": "active", + "function": "process", + "line": 18, + "owner": "SwiftSyntaxFactsCore", + "receiver": "user", + "span": [ + 18, + 46, + 18, + 57 + ] + }, + { + "field": "children", + "function": "process", + "line": 38, + "owner": "SwiftSyntaxFactsCore", + "receiver": "item", + "span": [ + 38, + 6, + 38, + 19 + ] + }, + { + "field": "defaultCase", + "function": "process", + "line": 27, + "owner": "SwiftSyntaxFactsCore", + "receiver": "self", + "span": [ + 27, + 6, + 27, + 22 + ] + }, + { + "field": "escalate", + "function": "process", + "line": 23, + "owner": "SwiftSyntaxFactsCore", + "receiver": "self", + "span": [ + 23, + 6, + 23, + 19 + ] + }, + { + "field": "fallback", + "function": "process", + "line": 25, + "owner": "SwiftSyntaxFactsCore", + "receiver": "self", + "span": [ + 25, + 6, + 25, + 19 + ] + }, + { + "field": "name", + "function": "process", + "line": 17, + "owner": "SwiftSyntaxFactsCore", + "receiver": "user.profile", + "span": [ + 17, + 15, + 17, + 33 + ] + }, + { + "field": "profile", + "function": "process", + "line": 17, + "owner": "SwiftSyntaxFactsCore", + "receiver": "user", + "span": [ + 17, + 15, + 17, + 27 + ] + }, + { + "field": "publish", + "function": "process", + "line": 32, + "owner": "SwiftSyntaxFactsCore", + "receiver": "self", + "span": [ + 32, + 6, + 32, + 18 + ] + }, + { + "field": "ready", + "function": "process", + "line": 30, + "owner": "SwiftSyntaxFactsCore", + "receiver": "user", + "span": [ + 30, + 31, + 30, + 41 + ] + }, + { + "field": "role", + "function": "process", + "line": 21, + "owner": "SwiftSyntaxFactsCore", + "receiver": "user", + "span": [ + 21, + 11, + 21, + 20 + ] + }, + { + "field": "send", + "function": "audit", + "line": 46, + "owner": "SwiftSyntaxFactsCore", + "receiver": "sink", + "span": [ + 46, + 4, + 46, + 13 + ] + }, + { + "field": "status", + "function": "process", + "line": 30, + "owner": "SwiftSyntaxFactsCore", + "receiver": "self", + "span": [ + 30, + 7, + 30, + 18 + ] + } + ], + "state_writes": [ + { + "field": "count", + "function": "process", + "line": 31, + "owner": "SwiftSyntaxFactsCore", + "receiver": "self", + "span": [ + 31, + 6, + 31, + 21 + ] + }, + { + "field": "sink", + "function": "(top-level)", + "line": 13, + "owner": "SwiftSyntaxFactsCore", + "receiver": "self", + "span": [ + 13, + 4, + 13, + 20 + ] + }, + { + "field": "status", + "function": "(top-level)", + "line": 12, + "owner": "SwiftSyntaxFactsCore", + "receiver": "self", + "span": [ + 12, + 4, + 12, + 24 + ] + } + ], + "decisions": [ + { + "enclosing_span": [ + 21, + 4, + 28, + 5 + ], + "function": "process", + "kind": "case_dispatch", + "line": 21, + "members": [ + "\"admin\"", + "\"guest\"", + "\"owner\"" + ], + "predicate": "user.role", + "span": [ + 21, + 4, + 28, + 5 + ] + }, + { + "enclosing_span": [ + 30, + 4, + 35, + 5 + ], + "function": "process", + "kind": "conjunction", + "line": 30, + "members": [ + ".idle", + "user.ready" + ], + "predicate": ".idle && user.ready", + "span": [ + 30, + 22, + 30, + 41 + ] + } + ], + "branch_decisions": [ + { + "function": "process", + "line": 21, + "predicate": "user.role", + "span": [ + 21, + 4, + 28, + 5 + ], + "state_refs": [ + "user.role" + ] + }, + { + "function": "process", + "line": 30, + "predicate": "self.status == .idle && user.ready", + "span": [ + 30, + 4, + 35, + 5 + ], + "state_refs": [ + "status", + "user.ready" + ] + } + ], + "dispatch_sites": [ + + ], + "semantic_effects": [ + { + "detail": "print", + "function": "audit", + "kind": "hidden_io", + "line": 45, + "span": [ + 45, + 4, + 45, + 15 + ] + }, + { + "detail": "print", + "function": "process", + "kind": "hidden_io", + "line": 34, + "span": [ + 34, + 6, + 34, + 24 + ] + } + ], + "predicate_bodies": [ + + ] + } + ] +} diff --git a/gems/decomplex/examples/syntax-facts/oracles/typescript-core.json b/gems/decomplex/examples/syntax-facts/oracles/typescript-core.json new file mode 100644 index 000000000..e8d47251d --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/oracles/typescript-core.json @@ -0,0 +1,646 @@ +{ + "format": "decomplex.syntax-facts.v1", + "documents": [ + { + "file": "gems/decomplex/examples/syntax-facts/typescript/core.ts", + "language": "typescript", + "functions": [ + { + "line": 19, + "name": "constructor", + "owner": "TypeScriptSyntaxFactsCore", + "params": [ + "status", + "sink" + ], + "span": [ + 19, + 2, + 21, + 3 + ], + "visibility": "public" + }, + { + "line": 23, + "name": "process", + "owner": "TypeScriptSyntaxFactsCore", + "params": [ + "user", + "items", + "callback" + ], + "span": [ + 23, + 2, + 52, + 3 + ], + "visibility": "public" + }, + { + "line": 54, + "name": "audit", + "owner": "TypeScriptSyntaxFactsCore", + "params": [ + "name" + ], + "span": [ + 54, + 2, + 58, + 3 + ], + "visibility": "private" + }, + { + "line": 60, + "name": "ready", + "owner": "TypeScriptSyntaxFactsCore", + "params": [ + + ], + "span": [ + 60, + 2, + 62, + 3 + ], + "visibility": "public" + }, + { + "line": 65, + "name": "normalizeValue", + "owner": "core", + "params": [ + "input" + ], + "span": [ + 65, + 7, + 67, + 1 + ], + "visibility": "public" + } + ], + "owners": [ + { + "kind": "class", + "line": 15, + "name": "TypeScriptSyntaxFactsCore", + "span": [ + 15, + 7, + 63, + 1 + ] + } + ], + "calls": [ + { + "arguments": [ + "\"busy\"" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 42, + "message": "publish", + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 42, + 6, + 42, + 26 + ] + }, + { + "arguments": [ + "\"not ready\"" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 44, + "message": "warn", + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "console", + "safe_navigation": false, + "span": [ + 44, + 6, + 44, + 31 + ] + }, + { + "arguments": [ + "\"record\"", + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "audit", + "line": 56, + "message": "send", + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "this.sink", + "safe_navigation": false, + "span": [ + 56, + 4, + 56, + 34 + ] + }, + { + "arguments": [ + "account" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 26, + "message": "callback", + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 26, + 4, + 26, + 21 + ] + }, + { + "arguments": [ + "items[index]" + ], + "block": false, + "conditional": true, + "control": "iterates", + "function": "process", + "line": 48, + "message": "audit", + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 48, + 6, + 48, + 30 + ] + }, + { + "arguments": [ + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "audit", + "line": 55, + "message": "log", + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "console", + "safe_navigation": false, + "span": [ + 55, + 4, + 55, + 21 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 31, + "message": "escalate", + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 31, + 8, + 31, + 27 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 34, + "message": "fallback", + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 34, + 8, + 34, + 27 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 37, + "message": "defaultCase", + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 37, + 8, + 37, + 30 + ] + } + ], + "state_reads": [ + { + "field": "active", + "function": "process", + "line": 25, + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "user", + "span": [ + 25, + 45, + 25, + 56 + ] + }, + { + "field": "audit", + "function": "process", + "line": 48, + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 48, + 6, + 48, + 16 + ] + }, + { + "field": "count", + "function": "ready", + "line": 61, + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 61, + 11, + 61, + 21 + ] + }, + { + "field": "defaultCase", + "function": "process", + "line": 37, + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 37, + 8, + 37, + 24 + ] + }, + { + "field": "escalate", + "function": "process", + "line": 31, + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 31, + 8, + 31, + 21 + ] + }, + { + "field": "fallback", + "function": "process", + "line": 34, + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 34, + 8, + 34, + 21 + ] + }, + { + "field": "log", + "function": "audit", + "line": 55, + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "console", + "span": [ + 55, + 4, + 55, + 15 + ] + }, + { + "field": "name", + "function": "process", + "line": 24, + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "user.profile", + "span": [ + 24, + 17, + 24, + 35 + ] + }, + { + "field": "profile", + "function": "process", + "line": 24, + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "user", + "span": [ + 24, + 17, + 24, + 29 + ] + }, + { + "field": "publish", + "function": "process", + "line": 42, + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 42, + 6, + 42, + 18 + ] + }, + { + "field": "ready", + "function": "process", + "line": 40, + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "user", + "span": [ + 40, + 34, + 40, + 44 + ] + }, + { + "field": "role", + "function": "process", + "line": 28, + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "user", + "span": [ + 28, + 12, + 28, + 21 + ] + }, + { + "field": "send", + "function": "audit", + "line": 56, + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "this.sink", + "span": [ + 56, + 4, + 56, + 18 + ] + }, + { + "field": "sink", + "function": "audit", + "line": 56, + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 56, + 4, + 56, + 13 + ] + }, + { + "field": "status", + "function": "audit", + "line": 57, + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 57, + 11, + 57, + 22 + ] + }, + { + "field": "status", + "function": "process", + "line": 40, + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 40, + 8, + 40, + 19 + ] + }, + { + "field": "warn", + "function": "process", + "line": 44, + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "console", + "span": [ + 44, + 6, + 44, + 18 + ] + } + ], + "state_writes": [ + { + "field": "count", + "function": "process", + "line": 41, + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 41, + 6, + 41, + 21 + ] + }, + { + "field": "status", + "function": "constructor", + "line": 20, + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 20, + 4, + 20, + 24 + ] + } + ], + "decisions": [ + { + "enclosing_span": [ + 28, + 4, + 38, + 5 + ], + "function": "process", + "kind": "case_dispatch", + "line": 28, + "members": [ + "\"admin\"", + "\"guest\"", + "\"owner\"" + ], + "predicate": "user.role", + "span": [ + 28, + 4, + 38, + 5 + ] + }, + { + "enclosing_span": [ + 40, + 4, + 45, + 5 + ], + "function": "process", + "kind": "conjunction", + "line": 40, + "members": [ + "this.status === \"idle\"", + "user.ready" + ], + "predicate": "this.status === \"idle\" && user.ready", + "span": [ + 40, + 8, + 40, + 44 + ] + } + ], + "branch_decisions": [ + { + "function": "process", + "line": 28, + "predicate": "(user.role)", + "span": [ + 28, + 4, + 38, + 5 + ], + "state_refs": [ + "user.role" + ] + }, + { + "function": "process", + "line": 40, + "predicate": "(this.status === \"idle\" && user.ready)", + "span": [ + 40, + 4, + 45, + 5 + ], + "state_refs": [ + "this.status", + "user.ready" + ] + } + ], + "dispatch_sites": [ + + ], + "semantic_effects": [ + { + "detail": "console.log", + "function": "audit", + "kind": "hidden_io", + "line": 55, + "span": [ + 55, + 4, + 55, + 21 + ] + }, + { + "detail": "console.warn", + "function": "process", + "kind": "hidden_io", + "line": 44, + "span": [ + 44, + 6, + 44, + 31 + ] + } + ], + "predicate_bodies": [ + + ] + } + ] +} diff --git a/gems/decomplex/examples/syntax-facts/oracles/zig-core.json b/gems/decomplex/examples/syntax-facts/oracles/zig-core.json index 936f2ff2b..9518741c5 100644 --- a/gems/decomplex/examples/syntax-facts/oracles/zig-core.json +++ b/gems/decomplex/examples/syntax-facts/oracles/zig-core.json @@ -6,221 +6,223 @@ "language": "zig", "functions": [ { - "name": "audit", + "line": 11, + "name": "process", "owner": "ZigSyntaxFactsCore", - "line": 38, + "params": [ + "self", + "user", + "items", + "callback" + ], "span": [ - 38, + 11, 4, - 41, + 36, 5 ], - "visibility": "private", - "params": [ - "self", - "name" - ] + "visibility": "public" }, { - "name": "init", + "line": 38, + "name": "audit", "owner": "ZigSyntaxFactsCore", - "line": 7, + "params": [ + "self", + "name" + ], "span": [ - 7, + 38, 4, - 9, + 41, 5 ], - "visibility": "public", - "params": [ - "status" - ] + "visibility": "private" }, { - "name": "process", + "line": 43, + "name": "ready", "owner": "ZigSyntaxFactsCore", - "line": 11, + "params": [ + "self" + ], "span": [ - 11, + 43, 4, - 36, + 45, 5 ], - "visibility": "public", - "params": [ - "self", - "user", - "items", - "callback" - ] + "visibility": "private" }, { - "name": "ready", + "line": 7, + "name": "init", "owner": "ZigSyntaxFactsCore", - "line": 43, + "params": [ + "status" + ], "span": [ - 43, + 7, 4, - 45, + 9, 5 ], - "visibility": "private", - "params": [ - "self" - ] + "visibility": "public" } ], "owners": [ { - "name": "Item", "kind": "struct", - "line": 53, + "line": 3, + "name": "ZigSyntaxFactsCore", "span": [ - 53, - 13, - 55, + 3, + 31, + 46, 1 ] }, { - "name": "ZigSyntaxFactsCore", "kind": "struct", - "line": 3, + "line": 53, + "name": "Item", "span": [ - 3, - 31, - 46, + 53, + 13, + 55, 1 ] } ], "calls": [ { - "receiver": "item", - "message": "children", + "arguments": [ + ".busy" + ], + "block": false, + "conditional": true, + "control": "conditional", "function": "process", + "line": 25, + "message": "publish", "owner": "ZigSyntaxFactsCore", - "line": 31, + "receiver": "self", + "safe_navigation": false, "span": [ - 31, - 16, - 31, + 25, + 12, + 25, 31 - ], - "conditional": true, - "arguments": [], - "control": "iterates", - "safe_navigation": false, - "block": false + ] }, { - "receiver": "self", - "message": "callback", + "arguments": [ + "user" + ], + "block": false, + "conditional": false, + "control": "always", "function": "process", - "owner": "ZigSyntaxFactsCore", "line": 15, + "message": "callback", + "owner": "ZigSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, "span": [ 15, 8, 15, 22 - ], - "conditional": false, - "arguments": [ - "user" - ], - "control": "always", - "safe_navigation": false, - "block": false + ] }, { - "receiver": "self", - "message": "defaultCase", - "function": "process", - "owner": "ZigSyntaxFactsCore", - "line": 20, - "span": [ - 20, - 20, - 20, - 42 - ], - "conditional": true, "arguments": [ "user" ], + "block": false, + "conditional": true, "control": "conditional", - "safe_navigation": false, - "block": false - }, - { - "receiver": "self", - "message": "escalate", "function": "process", - "owner": "ZigSyntaxFactsCore", "line": 18, + "message": "escalate", + "owner": "ZigSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, "span": [ 18, 30, 18, 49 - ], - "conditional": true, + ] + }, + { "arguments": [ "user" ], + "block": false, + "conditional": true, "control": "conditional", - "safe_navigation": false, - "block": false - }, - { - "receiver": "self", - "message": "fallback", "function": "process", - "owner": "ZigSyntaxFactsCore", "line": 19, + "message": "fallback", + "owner": "ZigSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, "span": [ 19, 22, 19, 41 - ], - "conditional": true, + ] + }, + { "arguments": [ "user" ], + "block": false, + "conditional": true, "control": "conditional", + "function": "process", + "line": 20, + "message": "defaultCase", + "owner": "ZigSyntaxFactsCore", + "receiver": "self", "safe_navigation": false, - "block": false + "span": [ + 20, + 20, + 20, + 42 + ] }, { - "receiver": "self", - "message": "publish", + "arguments": [ + + ], + "block": false, + "conditional": true, + "control": "iterates", "function": "process", + "line": 31, + "message": "children", "owner": "ZigSyntaxFactsCore", - "line": 25, + "receiver": "item", + "safe_navigation": false, "span": [ - 25, - 12, - 25, + 31, + 16, + 31, 31 - ], - "conditional": true, - "arguments": [ - ".busy" - ], - "control": "conditional", - "safe_navigation": false, - "block": false + ] } ], "state_reads": [ { "field": "admin", - "receiver": ".literal", "function": "process", - "owner": "ZigSyntaxFactsCore", "line": 18, + "owner": "ZigSyntaxFactsCore", + "receiver": ".literal", "span": [ 18, 20, @@ -230,10 +232,10 @@ }, { "field": "busy", - "receiver": ".literal", "function": "process", - "owner": "ZigSyntaxFactsCore", "line": 25, + "owner": "ZigSyntaxFactsCore", + "receiver": ".literal", "span": [ 25, 25, @@ -243,10 +245,10 @@ }, { "field": "children", - "receiver": "item", "function": "process", - "owner": "ZigSyntaxFactsCore", "line": 31, + "owner": "ZigSyntaxFactsCore", + "receiver": "item", "span": [ 31, 16, @@ -256,10 +258,10 @@ }, { "field": "count", - "receiver": "self", "function": "ready", - "owner": "ZigSyntaxFactsCore", "line": 44, + "owner": "ZigSyntaxFactsCore", + "receiver": "self", "span": [ 44, 15, @@ -269,10 +271,10 @@ }, { "field": "defaultCase", - "receiver": "self", "function": "process", - "owner": "ZigSyntaxFactsCore", "line": 20, + "owner": "ZigSyntaxFactsCore", + "receiver": "self", "span": [ 20, 20, @@ -282,10 +284,10 @@ }, { "field": "escalate", - "receiver": "self", "function": "process", - "owner": "ZigSyntaxFactsCore", "line": 18, + "owner": "ZigSyntaxFactsCore", + "receiver": "self", "span": [ 18, 30, @@ -295,10 +297,10 @@ }, { "field": "fallback", - "receiver": "self", "function": "process", - "owner": "ZigSyntaxFactsCore", "line": 19, + "owner": "ZigSyntaxFactsCore", + "receiver": "self", "span": [ 19, 22, @@ -308,10 +310,10 @@ }, { "field": "guest", - "receiver": ".literal", "function": "process", - "owner": "ZigSyntaxFactsCore", "line": 19, + "owner": "ZigSyntaxFactsCore", + "receiver": ".literal", "span": [ 19, 12, @@ -321,10 +323,10 @@ }, { "field": "idle", - "receiver": ".literal", "function": "process", - "owner": "ZigSyntaxFactsCore", "line": 23, + "owner": "ZigSyntaxFactsCore", + "receiver": ".literal", "span": [ 23, 27, @@ -334,10 +336,10 @@ }, { "field": "name", - "receiver": "user.profile", "function": "process", - "owner": "ZigSyntaxFactsCore", "line": 12, + "owner": "ZigSyntaxFactsCore", + "receiver": "user.profile", "span": [ 12, 21, @@ -347,10 +349,10 @@ }, { "field": "owner", - "receiver": ".literal", "function": "process", - "owner": "ZigSyntaxFactsCore", "line": 18, + "owner": "ZigSyntaxFactsCore", + "receiver": ".literal", "span": [ 18, 12, @@ -360,10 +362,10 @@ }, { "field": "profile", - "receiver": "user", "function": "process", - "owner": "ZigSyntaxFactsCore", "line": 12, + "owner": "ZigSyntaxFactsCore", + "receiver": "user", "span": [ 12, 21, @@ -373,10 +375,10 @@ }, { "field": "publish", - "receiver": "self", "function": "process", - "owner": "ZigSyntaxFactsCore", "line": 25, + "owner": "ZigSyntaxFactsCore", + "receiver": "self", "span": [ 25, 12, @@ -386,10 +388,10 @@ }, { "field": "ready", - "receiver": "user", "function": "process", - "owner": "ZigSyntaxFactsCore", "line": 23, + "owner": "ZigSyntaxFactsCore", + "receiver": "user", "span": [ 23, 37, @@ -399,10 +401,10 @@ }, { "field": "role", - "receiver": "user", "function": "process", - "owner": "ZigSyntaxFactsCore", "line": 17, + "owner": "ZigSyntaxFactsCore", + "receiver": "user", "span": [ 17, 16, @@ -412,10 +414,10 @@ }, { "field": "status", - "receiver": "self", "function": "audit", - "owner": "ZigSyntaxFactsCore", "line": 40, + "owner": "ZigSyntaxFactsCore", + "receiver": "self", "span": [ 40, 12, @@ -425,10 +427,10 @@ }, { "field": "status", - "receiver": "self", "function": "process", - "owner": "ZigSyntaxFactsCore", "line": 23, + "owner": "ZigSyntaxFactsCore", + "receiver": "self", "span": [ 23, 12, @@ -440,10 +442,10 @@ "state_writes": [ { "field": "count", - "receiver": ".literal", "function": "init", - "owner": "ZigSyntaxFactsCore", "line": 8, + "owner": "ZigSyntaxFactsCore", + "receiver": ".literal", "span": [ 8, 53, @@ -453,10 +455,10 @@ }, { "field": "count", - "receiver": "self", "function": "process", - "owner": "ZigSyntaxFactsCore", "line": 24, + "owner": "ZigSyntaxFactsCore", + "receiver": "self", "span": [ 24, 12, @@ -466,10 +468,10 @@ }, { "field": "status", - "receiver": ".literal", "function": "init", - "owner": "ZigSyntaxFactsCore", "line": 8, + "owner": "ZigSyntaxFactsCore", + "receiver": ".literal", "span": [ 8, 35, @@ -480,21 +482,21 @@ ], "decisions": [ { - "kind": "case_dispatch", - "members": [ - ".guest", - ".owner" - ], - "function": "process", - "line": 17, - "span": [ + "enclosing_span": [ 17, 8, 21, 9 ], + "function": "process", + "kind": "case_dispatch", + "line": 17, + "members": [ + ".guest", + ".owner" + ], "predicate": "user.role", - "enclosing_span": [ + "span": [ 17, 8, 21, @@ -502,25 +504,25 @@ ] }, { + "enclosing_span": [ + 23, + 8, + 28, + 9 + ], + "function": "process", "kind": "conjunction", + "line": 23, "members": [ "self.status == .idle", "user.ready" ], - "function": "process", - "line": 23, + "predicate": "self.status == .idle and user.ready", "span": [ 23, 12, 23, 47 - ], - "predicate": "self.status == .idle and user.ready", - "enclosing_span": [ - 23, - 8, - 28, - 9 ] } ], @@ -528,13 +530,13 @@ { "function": "process", "line": 17, + "predicate": "user.role", "span": [ 17, 8, 21, 9 ], - "predicate": "user.role", "state_refs": [ "user.role" ] @@ -542,13 +544,13 @@ { "function": "process", "line": 23, + "predicate": "self.status == .idle and user.ready", "span": [ 23, 8, 28, 9 ], - "predicate": "self.status == .idle and user.ready", "state_refs": [ ".literal.idle", "status", @@ -556,405 +558,14 @@ ] } ], - "dispatch_sites": [], - "semantic_effects": [], - "predicate_bodies": [], - "local_complexity": [ - { - "id": "ZigSyntaxFactsCore#audit", - "score": 0.0, - "signals": {} - }, - { - "id": "ZigSyntaxFactsCore#init", - "score": 0.0, - "signals": { - "early_exits": 1 - } - }, - { - "id": "ZigSyntaxFactsCore#process", - "score": 5.6, - "signals": { - "boolean_ops": 2, - "branches": 1, - "cases": 1, - "early_exits": 1, - "loops": 2, - "nested": 1 - } - }, - { - "id": "ZigSyntaxFactsCore#ready", - "score": 0.0, - "signals": { - "early_exits": 1 - } - } + "dispatch_sites": [ + ], - "clone_candidates": [ - { - "method_name": "(top-level)", - "node_name": "enum_declaration", - "line": 48, - "span": [ - 48, - 15, - 51, - 1 - ], - "mass": 8, - "fingerprint": "enum_declaration(id {:{ id ,:, id ,:, }:})", - "child_fingerprints": [ - "id", - "id" - ], - "child_masses": [ - 1, - 1 - ] - }, - { - "method_name": "(top-level)", - "node_name": "struct_declaration", - "line": 3, - "span": [ - 3, - 31, - 46, - 1 - ], - "mass": 365, - "fingerprint": "struct_declaration(id {:{ container_field(id ::: id) ,:, container_field(id ::: id) ,:, function_declaration(id id id parameters((:( parameter(id ::: id) ):)) id block({:{ expression_statement(return_expression(id struct_initializer(id initializer_list({:{ assignment_expression(field_expression(.:. id) =:= id) ,:, assignment_expression(field_expression(.:. id) =:= lit) }:}))) ;:;) }:})) function_declaration(id id id parameters((:( parameter(id ::: pointer_type(*:* id)) ,:, parameter(id ::: id) ,:, parameter(id ::: slice_type([:[ ]:] id id)) ,:, parameter(id ::: id) ):)) nullable_type(?:? slice_type([:[ ]:] id id)) block({:{ variable_declaration(id id =:= field_expression(field_expression(id .:. id) .:. id) ;:;) variable_declaration(id id ::: nullable_type(?:? slice_type([:[ ]:] id id)) =:= nil ;:;) expression_statement(call_expression(id (:( id ):)) ;:;) switch_expression(id (:( field_expression(id .:. id) ):) {:{ switch_case(field_expression(.:. id) ,:, field_expression(.:. id) =>:=> call_expression(field_expression(id .:. id) (:( id ):))) ,:, switch_case(field_expression(.:. id) =>:=> call_expression(field_expression(id .:. id) (:( id ):))) ,:, switch_case(id =>:=> call_expression(field_expression(id .:. id) (:( id ):))) ,:, }:}) if_statement(id (:( binary_expression(binary_expression(field_expression(id .:. id) ==:== field_expression(.:. id)) id field_expression(id .:. id)) ):) block_expression({:{ variable_declaration(field_expression(id .:. id) +=:+= lit ;:;) expression_statement(call_expression(field_expression(id .:. id) (:( field_expression(.:. id) ):)) ;:;) }:}) else_clause(id labeled_statement({:{ expression_statement(call_expression(field_expression(field_expression(id .:. id) .:. id) (:( string(\":\" lit \":\") ,:, anonymous_struct_initializer(.:. initializer_list({:{ }:})) ):)) ;:;) }:}))) labeled_statement(id (:( id ):) payload(|:| id |:|) block_expression({:{ variable_declaration(id =:= call_expression(field_expression(id .:. id) (:( ):)) ;:;) }:})) variable_declaration(id =:= id ;:;) expression_statement(return_expression(id id) ;:;) }:})) function_declaration(id id parameters((:( parameter(id ::: pointer_type(*:* id)) ,:, parameter(id ::: slice_type([:[ ]:] id id)) ):)) id block({:{ expression_statement(call_expression(field_expression(field_expression(id .:. id) .:. id) (:( string(\":\" lit \":\") ,:, anonymous_struct_initializer(.:. initializer_list({:{ id }:})) ):)) ;:;) variable_declaration(id =:= field_expression(id .:. id) ;:;) }:})) function_declaration(id id parameters((:( parameter(id ::: pointer_type(*:* id)) ):)) id block({:{ expression_statement(return_expression(id binary_expression(field_expression(id .:. id) >:> lit)) ;:;) }:})) }:})", - "child_fingerprints": [ - "container_field(id ::: id)", - "container_field(id ::: id)", - "function_declaration(id id id parameters((:( parameter(id ::: id) ):)) id block({:{ expression_statement(return_expression(id struct_initializer(id initializer_list({:{ assignment_expression(field_expression(.:. id) =:= id) ,:, assignment_expression(field_expression(.:. id) =:= lit) }:}))) ;:;) }:}))", - "function_declaration(id id id parameters((:( parameter(id ::: pointer_type(*:* id)) ,:, parameter(id ::: id) ,:, parameter(id ::: slice_type([:[ ]:] id id)) ,:, parameter(id ::: id) ):)) nullable_type(?:? slice_type([:[ ]:] id id)) block({:{ variable_declaration(id id =:= field_expression(field_expression(id .:. id) .:. id) ;:;) variable_declaration(id id ::: nullable_type(?:? slice_type([:[ ]:] id id)) =:= nil ;:;) expression_statement(call_expression(id (:( id ):)) ;:;) switch_expression(id (:( field_expression(id .:. id) ):) {:{ switch_case(field_expression(.:. id) ,:, field_expression(.:. id) =>:=> call_expression(field_expression(id .:. id) (:( id ):))) ,:, switch_case(field_expression(.:. id) =>:=> call_expression(field_expression(id .:. id) (:( id ):))) ,:, switch_case(id =>:=> call_expression(field_expression(id .:. id) (:( id ):))) ,:, }:}) if_statement(id (:( binary_expression(binary_expression(field_expression(id .:. id) ==:== field_expression(.:. id)) id field_expression(id .:. id)) ):) block_expression({:{ variable_declaration(field_expression(id .:. id) +=:+= lit ;:;) expression_statement(call_expression(field_expression(id .:. id) (:( field_expression(.:. id) ):)) ;:;) }:}) else_clause(id labeled_statement({:{ expression_statement(call_expression(field_expression(field_expression(id .:. id) .:. id) (:( string(\":\" lit \":\") ,:, anonymous_struct_initializer(.:. initializer_list({:{ }:})) ):)) ;:;) }:}))) labeled_statement(id (:( id ):) payload(|:| id |:|) block_expression({:{ variable_declaration(id =:= call_expression(field_expression(id .:. id) (:( ):)) ;:;) }:})) variable_declaration(id =:= id ;:;) expression_statement(return_expression(id id) ;:;) }:}))", - "function_declaration(id id parameters((:( parameter(id ::: pointer_type(*:* id)) ,:, parameter(id ::: slice_type([:[ ]:] id id)) ):)) id block({:{ expression_statement(call_expression(field_expression(field_expression(id .:. id) .:. id) (:( string(\":\" lit \":\") ,:, anonymous_struct_initializer(.:. initializer_list({:{ id }:})) ):)) ;:;) variable_declaration(id =:= field_expression(id .:. id) ;:;) }:}))", - "function_declaration(id id parameters((:( parameter(id ::: pointer_type(*:* id)) ):)) id block({:{ expression_statement(return_expression(id binary_expression(field_expression(id .:. id) >:> lit)) ;:;) }:}))" - ], - "child_masses": [ - 4, - 4, - 37, - 231, - 56, - 27 - ] - }, - { - "method_name": "(top-level)", - "node_name": "struct_declaration", - "line": 53, - "span": [ - 53, - 13, - 55, - 1 - ], - "mass": 13, - "fingerprint": "struct_declaration(id {:{ container_field(id ::: slice_type([:[ ]:] id id)) ,:, }:})", - "child_fingerprints": [ - "container_field(id ::: slice_type([:[ ]:] id id))" - ], - "child_masses": [ - 8 - ] - }, - { - "method_name": "audit", - "node_name": "block", - "line": 38, - "span": [ - 38, - 63, - 41, - 5 - ], - "mass": 34, - "fingerprint": "block({:{ expression_statement(call_expression(field_expression(field_expression(id .:. id) .:. id) (:( string(\":\" lit \":\") ,:, anonymous_struct_initializer(.:. initializer_list({:{ id }:})) ):)) ;:;) variable_declaration(id =:= field_expression(id .:. id) ;:;) }:})", - "child_fingerprints": [ - "expression_statement(call_expression(field_expression(field_expression(id .:. id) .:. id) (:( string(\":\" lit \":\") ,:, anonymous_struct_initializer(.:. initializer_list({:{ id }:})) ):)) ;:;)", - "variable_declaration(id =:= field_expression(id .:. id) ;:;)" - ], - "child_masses": [ - 23, - 8 - ] - }, - { - "method_name": "audit", - "node_name": "defn", - "line": 38, - "span": [ - 38, - 4, - 41, - 5 - ], - "mass": 56, - "fingerprint": "function_declaration(id id parameters((:( parameter(id ::: pointer_type(*:* id)) ,:, parameter(id ::: slice_type([:[ ]:] id id)) ):)) id block({:{ expression_statement(call_expression(field_expression(field_expression(id .:. id) .:. id) (:( string(\":\" lit \":\") ,:, anonymous_struct_initializer(.:. initializer_list({:{ id }:})) ):)) ;:;) variable_declaration(id =:= field_expression(id .:. id) ;:;) }:}))", - "child_fingerprints": [ - "expression_statement(call_expression(field_expression(field_expression(id .:. id) .:. id) (:( string(\":\" lit \":\") ,:, anonymous_struct_initializer(.:. initializer_list({:{ id }:})) ):)) ;:;)", - "variable_declaration(id =:= field_expression(id .:. id) ;:;)" - ], - "child_masses": [ - 23, - 8 - ] - }, - { - "method_name": "init", - "node_name": "block", - "line": 7, - "span": [ - 7, - 51, - 9, - 5 - ], - "mass": 25, - "fingerprint": "block({:{ expression_statement(return_expression(id struct_initializer(id initializer_list({:{ assignment_expression(field_expression(.:. id) =:= id) ,:, assignment_expression(field_expression(.:. id) =:= lit) }:}))) ;:;) }:})", - "child_fingerprints": [ - "expression_statement(return_expression(id struct_initializer(id initializer_list({:{ assignment_expression(field_expression(.:. id) =:= id) ,:, assignment_expression(field_expression(.:. id) =:= lit) }:}))) ;:;)" - ], - "child_masses": [ - 22 - ] - }, - { - "method_name": "init", - "node_name": "defn", - "line": 7, - "span": [ - 7, - 4, - 9, - 5 - ], - "mass": 37, - "fingerprint": "function_declaration(id id id parameters((:( parameter(id ::: id) ):)) id block({:{ expression_statement(return_expression(id struct_initializer(id initializer_list({:{ assignment_expression(field_expression(.:. id) =:= id) ,:, assignment_expression(field_expression(.:. id) =:= lit) }:}))) ;:;) }:}))", - "child_fingerprints": [ - "expression_statement(return_expression(id struct_initializer(id initializer_list({:{ assignment_expression(field_expression(.:. id) =:= id) ,:, assignment_expression(field_expression(.:. id) =:= lit) }:}))) ;:;)" - ], - "child_masses": [ - 22 - ] - }, - { - "method_name": "process", - "node_name": "block", - "line": 11, - "span": [ - 11, - 113, - 36, - 5 - ], - "mass": 192, - "fingerprint": "block({:{ variable_declaration(id id =:= field_expression(field_expression(id .:. id) .:. id) ;:;) variable_declaration(id id ::: nullable_type(?:? slice_type([:[ ]:] id id)) =:= nil ;:;) expression_statement(call_expression(id (:( id ):)) ;:;) switch_expression(id (:( field_expression(id .:. id) ):) {:{ switch_case(field_expression(.:. id) ,:, field_expression(.:. id) =>:=> call_expression(field_expression(id .:. id) (:( id ):))) ,:, switch_case(field_expression(.:. id) =>:=> call_expression(field_expression(id .:. id) (:( id ):))) ,:, switch_case(id =>:=> call_expression(field_expression(id .:. id) (:( id ):))) ,:, }:}) if_statement(id (:( binary_expression(binary_expression(field_expression(id .:. id) ==:== field_expression(.:. id)) id field_expression(id .:. id)) ):) block_expression({:{ variable_declaration(field_expression(id .:. id) +=:+= lit ;:;) expression_statement(call_expression(field_expression(id .:. id) (:( field_expression(.:. id) ):)) ;:;) }:}) else_clause(id labeled_statement({:{ expression_statement(call_expression(field_expression(field_expression(id .:. id) .:. id) (:( string(\":\" lit \":\") ,:, anonymous_struct_initializer(.:. initializer_list({:{ }:})) ):)) ;:;) }:}))) labeled_statement(id (:( id ):) payload(|:| id |:|) block_expression({:{ variable_declaration(id =:= call_expression(field_expression(id .:. id) (:( ):)) ;:;) }:})) variable_declaration(id =:= id ;:;) expression_statement(return_expression(id id) ;:;) }:})", - "child_fingerprints": [ - "variable_declaration(id id =:= field_expression(field_expression(id .:. id) .:. id) ;:;)", - "variable_declaration(id id ::: nullable_type(?:? slice_type([:[ ]:] id id)) =:= nil ;:;)", - "expression_statement(call_expression(id (:( id ):)) ;:;)", - "switch_expression(id (:( field_expression(id .:. id) ):) {:{ switch_case(field_expression(.:. id) ,:, field_expression(.:. id) =>:=> call_expression(field_expression(id .:. id) (:( id ):))) ,:, switch_case(field_expression(.:. id) =>:=> call_expression(field_expression(id .:. id) (:( id ):))) ,:, switch_case(id =>:=> call_expression(field_expression(id .:. id) (:( id ):))) ,:, }:})", - "if_statement(id (:( binary_expression(binary_expression(field_expression(id .:. id) ==:== field_expression(.:. id)) id field_expression(id .:. id)) ):) block_expression({:{ variable_declaration(field_expression(id .:. id) +=:+= lit ;:;) expression_statement(call_expression(field_expression(id .:. id) (:( field_expression(.:. id) ):)) ;:;) }:}) else_clause(id labeled_statement({:{ expression_statement(call_expression(field_expression(field_expression(id .:. id) .:. id) (:( string(\":\" lit \":\") ,:, anonymous_struct_initializer(.:. initializer_list({:{ }:})) ):)) ;:;) }:})))", - "labeled_statement(id (:( id ):) payload(|:| id |:|) block_expression({:{ variable_declaration(id =:= call_expression(field_expression(id .:. id) (:( ):)) ;:;) }:}))", - "variable_declaration(id =:= id ;:;)", - "expression_statement(return_expression(id id) ;:;)" - ], - "child_masses": [ - 12, - 14, - 7, - 54, - 69, - 23, - 5, - 5 - ] - }, - { - "method_name": "process", - "node_name": "defn", - "line": 11, - "span": [ - 11, - 4, - 36, - 5 - ], - "mass": 231, - "fingerprint": "function_declaration(id id id parameters((:( parameter(id ::: pointer_type(*:* id)) ,:, parameter(id ::: id) ,:, parameter(id ::: slice_type([:[ ]:] id id)) ,:, parameter(id ::: id) ):)) nullable_type(?:? slice_type([:[ ]:] id id)) block({:{ variable_declaration(id id =:= field_expression(field_expression(id .:. id) .:. id) ;:;) variable_declaration(id id ::: nullable_type(?:? slice_type([:[ ]:] id id)) =:= nil ;:;) expression_statement(call_expression(id (:( id ):)) ;:;) switch_expression(id (:( field_expression(id .:. id) ):) {:{ switch_case(field_expression(.:. id) ,:, field_expression(.:. id) =>:=> call_expression(field_expression(id .:. id) (:( id ):))) ,:, switch_case(field_expression(.:. id) =>:=> call_expression(field_expression(id .:. id) (:( id ):))) ,:, switch_case(id =>:=> call_expression(field_expression(id .:. id) (:( id ):))) ,:, }:}) if_statement(id (:( binary_expression(binary_expression(field_expression(id .:. id) ==:== field_expression(.:. id)) id field_expression(id .:. id)) ):) block_expression({:{ variable_declaration(field_expression(id .:. id) +=:+= lit ;:;) expression_statement(call_expression(field_expression(id .:. id) (:( field_expression(.:. id) ):)) ;:;) }:}) else_clause(id labeled_statement({:{ expression_statement(call_expression(field_expression(field_expression(id .:. id) .:. id) (:( string(\":\" lit \":\") ,:, anonymous_struct_initializer(.:. initializer_list({:{ }:})) ):)) ;:;) }:}))) labeled_statement(id (:( id ):) payload(|:| id |:|) block_expression({:{ variable_declaration(id =:= call_expression(field_expression(id .:. id) (:( ):)) ;:;) }:})) variable_declaration(id =:= id ;:;) expression_statement(return_expression(id id) ;:;) }:}))", - "child_fingerprints": [ - "variable_declaration(id id =:= field_expression(field_expression(id .:. id) .:. id) ;:;)", - "variable_declaration(id id ::: nullable_type(?:? slice_type([:[ ]:] id id)) =:= nil ;:;)", - "expression_statement(call_expression(id (:( id ):)) ;:;)", - "switch_expression(id (:( field_expression(id .:. id) ):) {:{ switch_case(field_expression(.:. id) ,:, field_expression(.:. id) =>:=> call_expression(field_expression(id .:. id) (:( id ):))) ,:, switch_case(field_expression(.:. id) =>:=> call_expression(field_expression(id .:. id) (:( id ):))) ,:, switch_case(id =>:=> call_expression(field_expression(id .:. id) (:( id ):))) ,:, }:})", - "if_statement(id (:( binary_expression(binary_expression(field_expression(id .:. id) ==:== field_expression(.:. id)) id field_expression(id .:. id)) ):) block_expression({:{ variable_declaration(field_expression(id .:. id) +=:+= lit ;:;) expression_statement(call_expression(field_expression(id .:. id) (:( field_expression(.:. id) ):)) ;:;) }:}) else_clause(id labeled_statement({:{ expression_statement(call_expression(field_expression(field_expression(id .:. id) .:. id) (:( string(\":\" lit \":\") ,:, anonymous_struct_initializer(.:. initializer_list({:{ }:})) ):)) ;:;) }:})))", - "labeled_statement(id (:( id ):) payload(|:| id |:|) block_expression({:{ variable_declaration(id =:= call_expression(field_expression(id .:. id) (:( ):)) ;:;) }:}))", - "variable_declaration(id =:= id ;:;)", - "expression_statement(return_expression(id id) ;:;)" - ], - "child_masses": [ - 12, - 14, - 7, - 54, - 69, - 23, - 5, - 5 - ] - }, - { - "method_name": "process", - "node_name": "if_statement", - "line": 23, - "span": [ - 23, - 8, - 28, - 9 - ], - "mass": 69, - "fingerprint": "if_statement(id (:( binary_expression(binary_expression(field_expression(id .:. id) ==:== field_expression(.:. id)) id field_expression(id .:. id)) ):) block_expression({:{ variable_declaration(field_expression(id .:. id) +=:+= lit ;:;) expression_statement(call_expression(field_expression(id .:. id) (:( field_expression(.:. id) ):)) ;:;) }:}) else_clause(id labeled_statement({:{ expression_statement(call_expression(field_expression(field_expression(id .:. id) .:. id) (:( string(\":\" lit \":\") ,:, anonymous_struct_initializer(.:. initializer_list({:{ }:})) ):)) ;:;) }:})))", - "child_fingerprints": [ - "variable_declaration(field_expression(id .:. id) +=:+= lit ;:;)", - "expression_statement(call_expression(field_expression(id .:. id) (:( field_expression(.:. id) ):)) ;:;)" - ], - "child_masses": [ - 8, - 12 - ] - }, - { - "method_name": "process", - "node_name": "switch_case", - "line": 18, - "span": [ - 18, - 12, - 18, - 49 - ], - "mass": 17, - "fingerprint": "switch_case(field_expression(.:. id) ,:, field_expression(.:. id) =>:=> call_expression(field_expression(id .:. id) (:( id ):)))", - "child_fingerprints": [ - "field_expression(.:. id)", - "field_expression(.:. id)", - "call_expression(field_expression(id .:. id) (:( id ):))" - ], - "child_masses": [ - 3, - 3, - 8 - ] - }, - { - "method_name": "process", - "node_name": "switch_case", - "line": 19, - "span": [ - 19, - 12, - 19, - 41 - ], - "mass": 13, - "fingerprint": "switch_case(field_expression(.:. id) =>:=> call_expression(field_expression(id .:. id) (:( id ):)))", - "child_fingerprints": [ - "field_expression(.:. id)", - "call_expression(field_expression(id .:. id) (:( id ):))" - ], - "child_masses": [ - 3, - 8 - ] - }, - { - "method_name": "process", - "node_name": "switch_case", - "line": 20, - "span": [ - 20, - 12, - 20, - 42 - ], - "mass": 11, - "fingerprint": "switch_case(id =>:=> call_expression(field_expression(id .:. id) (:( id ):)))", - "child_fingerprints": [ - "call_expression(field_expression(id .:. id) (:( id ):))" - ], - "child_masses": [ - 8 - ] - }, - { - "method_name": "process", - "node_name": "switch_expression", - "line": 17, - "span": [ - 17, - 8, - 21, - 9 - ], - "mass": 54, - "fingerprint": "switch_expression(id (:( field_expression(id .:. id) ):) {:{ switch_case(field_expression(.:. id) ,:, field_expression(.:. id) =>:=> call_expression(field_expression(id .:. id) (:( id ):))) ,:, switch_case(field_expression(.:. id) =>:=> call_expression(field_expression(id .:. id) (:( id ):))) ,:, switch_case(id =>:=> call_expression(field_expression(id .:. id) (:( id ):))) ,:, }:})", - "child_fingerprints": [ - "field_expression(id .:. id)", - "switch_case(field_expression(.:. id) ,:, field_expression(.:. id) =>:=> call_expression(field_expression(id .:. id) (:( id ):)))", - "switch_case(field_expression(.:. id) =>:=> call_expression(field_expression(id .:. id) (:( id ):)))", - "switch_case(id =>:=> call_expression(field_expression(id .:. id) (:( id ):)))" - ], - "child_masses": [ - 4, - 17, - 13, - 11 - ] - }, - { - "method_name": "ready", - "node_name": "block", - "line": 43, - "span": [ - 43, - 45, - 45, - 5 - ], - "mass": 14, - "fingerprint": "block({:{ expression_statement(return_expression(id binary_expression(field_expression(id .:. id) >:> lit)) ;:;) }:})", - "child_fingerprints": [ - "expression_statement(return_expression(id binary_expression(field_expression(id .:. id) >:> lit)) ;:;)" - ], - "child_masses": [ - 11 - ] - }, - { - "method_name": "ready", - "node_name": "defn", - "line": 43, - "span": [ - 43, - 4, - 45, - 5 - ], - "mass": 27, - "fingerprint": "function_declaration(id id parameters((:( parameter(id ::: pointer_type(*:* id)) ):)) id block({:{ expression_statement(return_expression(id binary_expression(field_expression(id .:. id) >:> lit)) ;:;) }:}))", - "child_fingerprints": [ - "expression_statement(return_expression(id binary_expression(field_expression(id .:. id) >:> lit)) ;:;)" - ], - "child_masses": [ - 11 - ] - } + "semantic_effects": [ + + ], + "predicate_bodies": [ + ] } ] diff --git a/gems/decomplex/examples/syntax-facts/php/core.php b/gems/decomplex/examples/syntax-facts/php/core.php new file mode 100644 index 000000000..ed925a380 --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/php/core.php @@ -0,0 +1,60 @@ +status = $status; + $this->sink = $sink; + } + + public function process($user, array $items, callable $callback): ?string + { + $name = $user?->profile?->name; + $account = new Account($name, $user->active); + $callback($account); + + switch ($user->role) { + case "owner": + case "admin": + $this->escalate($user); + break; + case "guest": + $this->fallback($user); + break; + default: + $this->defaultCase($user); + break; + } + + if ($this->status === "idle" && $user->ready) { + $this->count += 1; + $this->publish("busy"); + } else { + print "not ready"; + } + + foreach ($items as $item) { + $item->children(); + } + + return $name ?? null; + } + + private function audit(string $name): string + { + print($name); + $this->sink->send("record", $name); + return $this->status; + } + + public function ready(): bool + { + return $this->count > 0; + } +} + diff --git a/gems/decomplex/examples/syntax-facts/swift/core.swift b/gems/decomplex/examples/syntax-facts/swift/core.swift new file mode 100644 index 000000000..769e94e4d --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/swift/core.swift @@ -0,0 +1,54 @@ +enum Status { + case idle + case busy +} + +class SwiftSyntaxFactsCore { + private var status: Status + private var count = 0 + private let sink: Sink + + init(status: Status, sink: Sink) { + self.status = status + self.sink = sink + } + + func process(user: User, items: [Item], callback: (Account) -> Void) -> String? { + let name = user.profile?.name + let account = Account(name: name, active: user.active) + callback(account) + + switch user.role { + case "owner", "admin": + self.escalate(user) + case "guest": + self.fallback(user) + default: + self.defaultCase(user) + } + + if self.status == .idle && user.ready { + self.count += 1 + self.publish(.busy) + } else { + print("not ready") + } + + for item in items { + item.children() + } + + return name ?? "missing" + } + + private func audit(name: String) -> Status { + print(name) + sink.send("record", name) + return status + } + + func ready() -> Bool { + return count > 0 + } +} + diff --git a/gems/decomplex/examples/syntax-facts/typescript/core.ts b/gems/decomplex/examples/syntax-facts/typescript/core.ts new file mode 100644 index 000000000..1e1c6fdbb --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/typescript/core.ts @@ -0,0 +1,72 @@ +type Status = "idle" | "busy"; + +interface User { + role: string; + ready: boolean; + active: boolean; + profile?: { name?: string }; +} + +interface Account { + name: string | undefined; + active: boolean; +} + +export class TypeScriptSyntaxFactsCore { + private status: Status; + private count = 0; + + constructor(status: Status, private sink: Sink) { + this.status = status; + } + + process(user: User, items: string[], callback: (account: Account) => void): string | undefined { + const name = user.profile?.name; + const account: Account = { name, active: user.active }; + callback(account); + + switch (user.role) { + case "owner": + case "admin": + this.escalate(user); + break; + case "guest": + this.fallback(user); + break; + default: + this.defaultCase(user); + } + + if (this.status === "idle" && user.ready) { + this.count += 1; + this.publish("busy"); + } else { + console.warn("not ready"); + } + + for (const index in items) { + this.audit(items[index]); + } + + return name ?? undefined; + } + + private audit(name: string): Status { + console.log(name); + this.sink.send("record", name); + return this.status; + } + + ready(): boolean { + return this.count > 0; + } +} + +export function normalizeValue(input?: string): string | undefined { + return input ?? undefined; +} + +interface Sink { + send(kind: string, value: string): void; +} + diff --git a/gems/decomplex/lib/decomplex/syntax.rb b/gems/decomplex/lib/decomplex/syntax.rb index eda609bfa..86d80b635 100644 --- a/gems/decomplex/lib/decomplex/syntax.rb +++ b/gems/decomplex/lib/decomplex/syntax.rb @@ -1252,11 +1252,15 @@ def record_conjunction_decision(document, node, stack, out) function: current_function(stack), line: conjunction_span(node)[0], span: conjunction_span(node), - predicate: normalize_text(node.text), + predicate: conjunction_predicate(node), enclosing_span: decision_enclosing_span(node) ) end + def conjunction_predicate(node) + normalize_text(node.text) + end + def decision_enclosing_span(node) parent = parent_node(node) seen = Set.new @@ -1465,11 +1469,15 @@ def record_branch_decision(document, node, stack, out, immutable_readers:, immut function: current_function(stack), line: line(node), span: span(node), - predicate: normalize_text(cond.text), + predicate: branch_predicate(cond), state_refs: refs ) end + def branch_predicate(node) + normalize_text(node.text) + end + def record_branch_arm(document, node, stack, out) return if generated_prelude?(document, node) @@ -1615,6 +1623,9 @@ def case_arm_patterns(child) elsif switch_case_arm_node_kinds.include?(child.kind) return [] if child.text.to_s.lstrip.start_with?("else") + patterns = child.named_children.select { |node| case_pattern_node_kinds.include?(node.kind) } + return case_pattern_texts(patterns) unless patterns.empty? + value = named_field(child, "value") || named_field(child, "pattern") || child.named_children.find { |candidate| candidate.kind == "when_condition" } || child.named_children.find { |candidate| candidate.kind == "switch_pattern" } || @@ -2221,7 +2232,7 @@ def call_argument_nodes(node) def adjacent_argument_call_target(node) return nil if generic_member_name?(node) && !member_message_identifier?(node) - return nil if call_node_kinds.include?(parent_node(node)&.kind) + return nil if call_node_ancestor?(node) callee = node args = nil @@ -2246,6 +2257,22 @@ def adjacent_argument_call_target(node) nil end + def call_node_ancestor?(node) + parent = parent_node(node) + seen = Set.new + while parent + key = node_key(parent) + return false if seen.include?(key) + return true if call_node_kinds.include?(parent.kind) + + seen << key + parent = parent_node(parent) + end + false + rescue NoMethodError + false + end + def target_from_callee(callee) if field_like_node?(callee) object = named_field(callee, "object") || named_field(callee, "receiver") || diff --git a/gems/decomplex/lib/decomplex/syntax/adapters.rb b/gems/decomplex/lib/decomplex/syntax/adapters.rb index 124452f83..ed7648ed5 100644 --- a/gems/decomplex/lib/decomplex/syntax/adapters.rb +++ b/gems/decomplex/lib/decomplex/syntax/adapters.rb @@ -13,7 +13,29 @@ def c_family_function_params(node) params = declarator&.named_children&.find { |child| child.kind == "parameter_list" } return nil unless params - params.named_children.filter_map { |param| parameter_name(param) }.uniq + params.named_children.filter_map { |param| c_family_parameter_name(param) || parameter_name(param) }.uniq + end + + def c_family_parameter_name(param) + declarator = param.named_children.reverse.find { |child| child.kind.end_with?("_declarator") } + name = c_family_declarator_name_node(declarator) + return name.text if name + + direct = param.named_children.select do |child| + parameter_identifier_node_kinds.include?(child.kind) + end.last + direct&.text + end + + def c_family_declarator_name_node(node) + return nil unless ts_node?(node) + return node if parameter_identifier_node_kinds.include?(node.kind) + + node.named_children.reverse_each do |child| + nested = c_family_declarator_name_node(child) + return nested if nested + end + nil end def boolean_expression_list?(node, operator) diff --git a/gems/decomplex/lib/decomplex/syntax/cpp.rb b/gems/decomplex/lib/decomplex/syntax/cpp.rb index 96bb4629b..4276a9cbb 100644 --- a/gems/decomplex/lib/decomplex/syntax/cpp.rb +++ b/gems/decomplex/lib/decomplex/syntax/cpp.rb @@ -78,6 +78,16 @@ def implicit_state_accesses? true end + def field_declaration_name_node(node) + declarator = node.named_children.reverse.find { |child| child.kind.end_with?("_declarator") } + name = declarator&.named_children&.reverse&.find do |child| + (identifier_node_kinds + field_identifier_node_kinds).include?(child.kind) + end + return name if name + + super + end + private def control_context(node) diff --git a/gems/decomplex/lib/decomplex/syntax/csharp.rb b/gems/decomplex/lib/decomplex/syntax/csharp.rb index b697b5f8c..d8937acb5 100644 --- a/gems/decomplex/lib/decomplex/syntax/csharp.rb +++ b/gems/decomplex/lib/decomplex/syntax/csharp.rb @@ -72,6 +72,29 @@ def implicit_state_accesses? true end + def field_declaration_name_node(node) + declaration = node.named_children.find { |child| child.kind == "variable_declaration" } + declarator = declaration&.named_children&.find { |child| child.kind == "variable_declarator" } + return named_field(declarator, "name") || declarator if declarator + + super + end + + def state_read_target(node) + if node.kind == "argument" + object = named_field(node, "expression") + field = named_field(node, "name") + field_text = member_field_text(field) + return nil unless object && field_text + return nil if namespace_receiver?(object.text) + return nil if NOISE_MESSAGES.include?(field_text) + + return { receiver: normalize_text(object.text), field: field_text } + end + + super + end + private def control_context(node) diff --git a/gems/decomplex/lib/decomplex/syntax/go.rb b/gems/decomplex/lib/decomplex/syntax/go.rb index 7b2a3fbd7..c6b0b96f7 100644 --- a/gems/decomplex/lib/decomplex/syntax/go.rb +++ b/gems/decomplex/lib/decomplex/syntax/go.rb @@ -22,7 +22,7 @@ module Syntax class GoSyntaxAdapter < TreeSitterLanguageAdapter FUNCTION_NODE_KINDS = %w[function_declaration method_declaration].freeze CALL_NODE_KINDS = %w[call_expression].freeze - ADJACENT_CALL_NODE_KINDS = %w[selector_expression identifier field_identifier].freeze + ADJACENT_CALL_NODE_KINDS = %w[selector_expression identifier].freeze GENERIC_OWNER_NODE_KINDS = %w[type_spec].freeze PARAMETER_LIST_NODE_KINDS = %w[parameter_list].freeze METHOD_PARAMETER_LIST_NODE_KINDS = %w[parameter_list].freeze @@ -90,6 +90,17 @@ def function_params(node) params.named_children.filter_map { |param| parameter_name(param) }.uniq end + def call_target(document, node) + return generic_call_target(document, node) if call_node_kinds.include?(node.kind) + return go_adjacent_call_target(node) if adjacent_call_node_kinds.include?(node.kind) + + nil + end + + def state_read_target(node) + go_literal_element_member_target(node) || super + end + def generic_function_body_statements(node) body = generic_function_body_node(node) return super unless body @@ -161,6 +172,23 @@ def go_adjacent_call_statement?(node) argument_list_node_kinds.include?(named.last.kind) end + def go_adjacent_call_target(node) + target = adjacent_argument_call_target(node) + return nil unless target + + args = next_sibling(node) || next_sibling(parent_node(node)) + source = go_adjacent_call_source_node(node, args) + target.merge(source_node: source) + end + + def go_adjacent_call_source_node(node, args) + parent = parent_node(node) + return node unless parent && args + + call_text = "#{node.text}#{args.text}" + parent.text.to_s.include?(call_text) ? parent : node + end + def go_keyed_element_key?(node) parent = parent_node(node) return false unless parent&.kind == "keyed_element" @@ -168,6 +196,17 @@ def go_keyed_element_key?(node) parent.named_children.first == node end + def go_literal_element_member_target(node) + return nil unless node.kind == "literal_element" + return nil if go_keyed_element_key?(node) + + receiver, field = node.named_children + return nil unless receiver && field + return nil unless generic_identifier?(receiver) && field_identifier_node_kinds.include?(field.kind) + + { receiver: normalize_text(receiver.text), field: field.text } + end + def go_var_spec_name_nodes(node) go_var_spec_nodes(node).flat_map do |spec| names = spec.named_children.take_while { |child| child.kind == "identifier" } diff --git a/gems/decomplex/lib/decomplex/syntax/kotlin.rb b/gems/decomplex/lib/decomplex/syntax/kotlin.rb index ce9414faa..be46b957e 100644 --- a/gems/decomplex/lib/decomplex/syntax/kotlin.rb +++ b/gems/decomplex/lib/decomplex/syntax/kotlin.rb @@ -69,6 +69,24 @@ class KotlinSyntaxAdapter < TreeSitterLanguageAdapter NAVIGATION_SUFFIX_NODE_KINDS = %w[navigation_suffix].freeze FIELD_LIKE_NODE_KINDS = %w[navigation_expression directly_assignable_expression].freeze BLOCK_ARGUMENT_NODE_KINDS = [].freeze + + def state_read_target(node) + kotlin_value_argument_state_target(node) || super + end + + private + + def kotlin_value_argument_state_target(node) + return nil unless ts_node?(node) && node.kind == "value_argument" + + suffix = node.named_children.find { |child| navigation_suffix_node_kinds.include?(child.kind) } + receiver = node.named_children.find { |child| child != suffix } + field = member_field_text(suffix) + return nil unless receiver && field + return nil if namespace_receiver?(receiver.text) + + { receiver: normalize_text(receiver.text), field: field } + end end end end diff --git a/gems/decomplex/lib/decomplex/syntax/lua.rb b/gems/decomplex/lib/decomplex/syntax/lua.rb index 8cd0de951..4d66e0cc6 100644 --- a/gems/decomplex/lib/decomplex/syntax/lua.rb +++ b/gems/decomplex/lib/decomplex/syntax/lua.rb @@ -22,7 +22,7 @@ module Syntax class LuaSyntaxAdapter < TreeSitterLanguageAdapter FUNCTION_NODE_KINDS = %w[function_declaration].freeze CALL_NODE_KINDS = %w[function_call method_call].freeze - ADJACENT_CALL_NODE_KINDS = %w[dot_index_expression identifier expression_list variable_list].freeze + ADJACENT_CALL_NODE_KINDS = %w[dot_index_expression method_index_expression identifier expression_list variable_list].freeze PARAMETER_LIST_NODE_KINDS = %w[parameters].freeze FUNCTION_BODY_NODE_KINDS = %w[block].freeze NESTED_STATEMENT_WRAPPER_NODE_KINDS = %w[block].freeze @@ -68,13 +68,17 @@ def receiver_owner_name(node) end def call_target(document, node) - lua_expression_list_call_target(node) || + lua_method_call_target(node) || + lua_expression_list_call_target(node) || lua_adjacent_member_call_target(node) || super end def state_read_target(node) - lua_single_return_member_target(node) || super + target = lua_expression_list_member_target(node) || lua_single_return_member_target(node) || super + return nil if target && target[:receiver] == "_" && target[:field] == "_" + + target end def generated_prelude?(document, node) @@ -124,7 +128,50 @@ def lua_expression_list_call_target(node) nil end + def lua_method_call_target(node) + if node.kind == "function_call" + callee = node.named_children.find { |child| child.kind == "method_index_expression" } + args = node.named_children.find { |child| child.kind == "arguments" } + return nil unless callee && args + + return lua_method_target(callee, args) + end + + return nil if call_node_ancestor?(node) + return nil unless node.kind == "method_index_expression" + + args = next_sibling(node) + return nil unless args&.kind == "arguments" + + lua_method_target(node, args) + rescue StandardError + nil + end + + def lua_method_target(callee, args) + receiver = callee.named_children.first + message = callee.named_children.last + return nil unless receiver && message + + { + receiver: normalize_text(receiver.text), + message: normalize_text(message.text), + arguments: args.named_children.map { |child| normalize_text(child.text) } + } + end + + def lua_expression_list_member_target(node) + return nil unless node.kind == "expression_list" + + children = node.named_children + return nil unless children.size == 2 + return nil unless field_like_node?(children.first) && identifier_node_kinds.include?(children.last.kind) + + { receiver: normalize_text(children.first.text), field: children.last.text } + end + def lua_adjacent_member_call_target(node) + return nil if call_node_ancestor?(node) return nil unless node.kind == "identifier" args = next_sibling(node) diff --git a/gems/decomplex/lib/decomplex/syntax/php.rb b/gems/decomplex/lib/decomplex/syntax/php.rb index 6805ab832..f3624c247 100644 --- a/gems/decomplex/lib/decomplex/syntax/php.rb +++ b/gems/decomplex/lib/decomplex/syntax/php.rb @@ -55,7 +55,10 @@ class PhpSyntaxAdapter < TreeSitterLanguageAdapter SELF_CALL_IDENTIFIER_NODE_KINDS = %w[name variable_name].freeze SELF_RECEIVER_NAMES = %w[$this this self].freeze ACCESSOR_CALL_NODE_KINDS = [].freeze - FIELD_LIKE_NODE_KINDS = %w[member_access_expression member_call_expression class_constant_access_expression].freeze + FIELD_LIKE_NODE_KINDS = %w[ + member_access_expression nullsafe_member_access_expression member_call_expression + class_constant_access_expression + ].freeze BLOCK_ARGUMENT_NODE_KINDS = [].freeze def function_name(node) @@ -94,7 +97,7 @@ def call_target(document, node) def state_read_target(node) return nil if php_assignment_lhs?(node) - super + php_argument_member_target(node) || super end def state_declaration(node) @@ -153,9 +156,8 @@ def php_call_target(node) case node.kind when "member_call_expression" - access = names.find { |child| child.kind == "member_access_expression" } - receiver = access ? php_member_receiver(access) : names.first - message = access ? php_member_name(access) : names[1] + receiver = php_member_receiver(node) || names.first + message = php_member_name(node) || names[1] return nil unless receiver && message { @@ -189,10 +191,18 @@ def php_print_target(node) { receiver: "self", message: "print", - arguments: node.named_children.map { |child| php_normalize_source(child.text) } + arguments: node.named_children.map { |child| php_print_argument_text(child) } } end + def conjunction_predicate(node) + php_normalize_source(super) + end + + def branch_predicate(node) + php_normalize_source(super) + end + def php_property_declaration(node) return nil unless node.kind == "property_declaration" @@ -312,6 +322,25 @@ def php_argument_texts(args) Array(args&.named_children).map { |child| php_normalize_source(child.text) } end + def php_print_argument_text(node) + value = php_unwrap_parenthesized(node) + php_normalize_source(value&.text || node.text) + end + + def php_argument_member_target(node) + return nil unless ts_node?(node) && node.kind == "argument" + return nil unless node.text.to_s.include?("->") || node.text.to_s.include?("::") + return nil if node.text.to_s.include?("(") + + parts = php_normalize_source(node.text).split(".") + return nil unless parts.size >= 2 + + { + receiver: php_normalize_receiver(parts[0...-1].join(".")), + field: php_identifier_text_value(parts.last) + } + end + def php_member_receiver(node) return nil unless ts_node?(node) @@ -341,7 +370,7 @@ def php_identifier_text_value(text) end def php_normalize_receiver(receiver) - value = php_identifier_text_value(receiver) + value = php_normalize_source(php_identifier_text_value(receiver)) value == "this" ? "self" : value end diff --git a/gems/decomplex/lib/decomplex/syntax/python.rb b/gems/decomplex/lib/decomplex/syntax/python.rb index 1b5c256cd..5cf827a4e 100644 --- a/gems/decomplex/lib/decomplex/syntax/python.rb +++ b/gems/decomplex/lib/decomplex/syntax/python.rb @@ -99,6 +99,8 @@ def parameter_name(param) end def call_target(document, node) + return nil if node.kind == "identifier" && parent_node(node)&.kind == "attribute" + python_adjacent_call_target(node) || super end @@ -159,6 +161,10 @@ def record_state_param_origin(document, node, stack, out) end end + def state_write_source_node(node) + assignment_lhs?(node) ? (parent_node(node) || node) : super + end + def local_methods(document) document.function_defs.map do |function_def| statements = python_function_body_statements(function_def.body, document) @@ -217,7 +223,9 @@ def python_function_body_statements(node, document) end def python_adjacent_call_target(node) + return nil if call_node_ancestor?(node) return python_adjacent_member_call_target(node) if node.kind == "attribute" + return nil if parent_node(node)&.kind == "attribute" return nil unless node.kind == "identifier" args = next_sibling(node) @@ -234,6 +242,7 @@ def python_adjacent_call_target(node) end def python_adjacent_member_call_target(node) + return nil if call_node_ancestor?(node) args = next_sibling(node) return nil unless args&.kind == "argument_list" diff --git a/gems/decomplex/lib/decomplex/syntax_oracle.rb b/gems/decomplex/lib/decomplex/syntax_oracle.rb index 3ba88cf7f..da59b25d6 100644 --- a/gems/decomplex/lib/decomplex/syntax_oracle.rb +++ b/gems/decomplex/lib/decomplex/syntax_oracle.rb @@ -12,14 +12,16 @@ module SyntaxOracle def project(files, engine: "ruby", language: nil) paths = Array(files).map(&:to_s) - case engine.to_s - when "ruby" - project_files(paths, language: language) - when "rust" - rust_project_files(paths, language: language) - else - raise ArgumentError, "unsupported syntax oracle engine: #{engine}" - end + projection = + case engine.to_s + when "ruby" + project_files(paths, language: language) + when "rust" + rust_project_files(paths, language: language) + else + raise ArgumentError, "unsupported syntax oracle engine: #{engine}" + end + canonical_projection(projection) end def canonical_json(files, engine: "ruby", language: nil) @@ -52,12 +54,7 @@ def project_document(document) "branch_decisions" => branch_decision_rows(document), "dispatch_sites" => rows(document.dispatch_sites, %i[variant_set arm_members outside function line span]), "semantic_effects" => rows(document.semantic_effect_sites, %i[kind detail function line span]), - "predicate_bodies" => rows(document.predicate_defs, %i[name owner body line span]), - "local_complexity" => local_complexity_rows(document), - "clone_candidates" => rows( - document.clone_candidates, - %i[method_name node_name line span mass fingerprint child_fingerprints child_masses] - ) + "predicate_bodies" => rows(document.predicate_defs, %i[name owner body line span]) } end @@ -66,6 +63,29 @@ def rust_project_files(files, language:) JSON.parse(Native::Command.run("syntax-facts", "--language", lang.to_s, *files)) end + def canonical_projection(projection) + { + "format" => projection.fetch("format"), + "documents" => Array(projection.fetch("documents")).map { |document| canonical_document(document) } + } + end + + def canonical_document(document) + sections = %w[ + functions owners calls state_reads state_writes decisions branch_decisions + dispatch_sites semantic_effects predicate_bodies + ] + out = { + "file" => document.fetch("file"), + "language" => document.fetch("language") + } + sections.each do |section| + rows = Array(document.fetch(section)).map { |row| normalize_value(row) } + out[section] = rows.sort_by { |row| JSON.generate(row) } + end + out + end + def rows(items, keys) Array(items).map do |item| keys.each_with_object({}) do |key, out| diff --git a/gems/decomplex/rust/src/decomplex/syntax.rs b/gems/decomplex/rust/src/decomplex/syntax.rs index ce82989a8..36672ba5c 100644 --- a/gems/decomplex/rust/src/decomplex/syntax.rs +++ b/gems/decomplex/rust/src/decomplex/syntax.rs @@ -121,6 +121,8 @@ pub struct Document { #[serde(default)] pub call_sites: Vec, #[serde(default)] + pub state_declarations: Vec, + #[serde(default)] pub state_reads: Vec, #[serde(default)] pub state_writes: Vec, @@ -202,6 +204,16 @@ pub struct CallSite { pub block: bool, } +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct StateDeclaration { + pub field: String, + pub owner: String, + pub r#type: Option, + pub file: String, + pub line: usize, + pub span: Span, +} + #[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] pub struct StateWrite { pub field: String, @@ -230,6 +242,8 @@ pub struct PredicateAlias { pub body: String, pub file: String, pub defn: String, + #[serde(default)] + pub owner: String, pub line: usize, pub span: Span, } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs index 3beb79adf..fe63908ee 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs @@ -1,7 +1,7 @@ use super::super::tree_sitter_adapter::{ first_named_child, first_named_child_except, first_named_child_with_kind, first_named_text, - named_children, normalize_type_owner, previous_sibling_text, strip_assignment_suffix, - AssignmentTarget, CallTarget, Target, + named_children, normalize_type_owner, strip_assignment_suffix, AssignmentTarget, CallTarget, + Target, }; use super::super::{CallSite, CloneCandidate, Document, FunctionDef, Language}; use crate::decomplex::ast::{node_text, normalize_text, span, RawNode}; @@ -225,10 +225,26 @@ pub(crate) trait LanguageProfile { EMPTY_NODE_KINDS } + fn local_variable_declarator_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + fn multi_name_variable_declaration_node_kinds(&self) -> &[&str] { EMPTY_NODE_KINDS } + fn field_declaration_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn declaration_site_parent_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn assignment_state_declaration_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + fn declaration_assignment_node_kinds(&self) -> &[&str] { EMPTY_NODE_KINDS } @@ -354,6 +370,10 @@ pub(crate) trait LanguageProfile { false } + fn implicit_state_accesses(&self) -> bool { + false + } + fn path_action_node_kinds(&self) -> &[&str] { EMPTY_NODE_KINDS } @@ -423,6 +443,10 @@ pub(crate) trait LanguageProfile { self.default_owner_name_from_declaration(node, source) } + fn owner_def_name_from_declaration(&self, node: Node<'_>, source: &str) -> Option { + self.owner_name_from_declaration(node, source) + } + fn owner_kind(&self, node: Node<'_>) -> String { if self.class_owner_node_kinds().contains(&node.kind()) { "class".to_string() @@ -585,11 +609,25 @@ pub(crate) trait LanguageProfile { } let (receiver, message) = self.target_from_callee(callee, source)?; - Some(CallTarget::new( - receiver, - message, - self.call_argument_texts(node, source), - )) + let mut target = CallTarget::new(receiver, message, self.call_argument_texts(node, source)); + if let Some(receiver) = self.first_argument_receiver_call_receiver(node, source, &target) { + target.receiver = receiver; + } + Some(target) + } + + fn first_argument_receiver_call_receiver( + &self, + node: Node<'_>, + source: &str, + target: &CallTarget<'_>, + ) -> Option { + if !self.first_argument_receiver() || target.receiver != "self" { + return None; + } + let first_arg = self.call_argument_nodes(node).first().copied()?; + let arg_target = self.state_read_target(first_arg, source)?; + Some(format!("{}.{}", arg_target.receiver, arg_target.field)) } fn target_from_callee(&self, callee: Node<'_>, source: &str) -> Option<(String, String)> { @@ -687,6 +725,10 @@ pub(crate) trait LanguageProfile { self.default_state_target(lhs, source) } + fn state_declaration(&self, node: Node<'_>, source: &str) -> Option<(String, Option)> { + self.default_state_declaration(node, source) + } + fn state_read_target(&self, node: Node<'_>, source: &str) -> Option { self.default_state_read_target(node, source) } @@ -714,15 +756,14 @@ pub(crate) trait LanguageProfile { } fn default_state_target(&self, lhs: Node<'_>, source: &str) -> Option { - if previous_sibling_text(lhs, source).as_deref() == Some(":") { - return None; - } - if self.expression_list_node_kinds().contains(&lhs.kind()) { let children = named_children(lhs); if children.len() == 1 { return self.default_state_target(children[0], source); } + if !self.member_expression_list(lhs, source) { + return None; + } } if self.accessor_call_node_kinds().contains(&lhs.kind()) { @@ -762,6 +803,100 @@ pub(crate) trait LanguageProfile { None } + fn member_expression_list(&self, node: Node<'_>, source: &str) -> bool { + if node.child_by_field_name("operand").is_some() + && node.child_by_field_name("field").is_some() + { + return true; + } + if !self + .field_like_dot_wrapper_node_kinds() + .contains(&node.kind()) + { + return false; + } + let text = node_text(node, source); + text.contains('.') || text.contains("->") || text.contains("::") || text.contains("?.") + } + + fn default_state_declaration( + &self, + node: Node<'_>, + source: &str, + ) -> Option<(String, Option)> { + if self + .assignment_state_declaration_node_kinds() + .contains(&node.kind()) + { + if let Some((field, r#type)) = self.assignment_state_declaration(node, source) { + return Some((field, r#type)); + } + } + if !self.field_declaration_node_kinds().contains(&node.kind()) { + return None; + } + let name = self.field_declaration_name_node(node, source)?; + let field = node_text(name, source).to_string(); + let r#type = declared_type_text(node, name, source); + Some((field, r#type)) + } + + fn field_declaration_name_node<'tree>( + &self, + node: Node<'tree>, + source: &str, + ) -> Option> { + node.child_by_field_name("name") + .or_else(|| self.declarator_name_node(node, source)) + .or_else(|| { + named_children(node) + .into_iter() + .find(|child| self.field_identifier_node_kinds().contains(&child.kind())) + }) + .or_else(|| { + named_children(node).into_iter().rev().find(|child| { + self.identifier_node_kinds().contains(&child.kind()) + || self.field_identifier_node_kinds().contains(&child.kind()) + }) + }) + } + + fn declarator_name_node<'tree>(&self, node: Node<'tree>, _source: &str) -> Option> { + let mut pending = named_children(node); + let mut seen = HashSet::new(); + while let Some(current) = pending.pop() { + let key = format!("{:?}\0{}", span(current), current.kind()); + if !seen.insert(key) { + continue; + } + if self.identifier_node_kinds().contains(¤t.kind()) + || self.field_identifier_node_kinds().contains(¤t.kind()) + { + return Some(current); + } + pending.extend(named_children(current)); + } + None + } + + fn assignment_state_declaration( + &self, + node: Node<'_>, + source: &str, + ) -> Option<(String, Option)> { + let assignment = self.assignment_target(node)?; + let target = self.state_target(assignment.lhs, source)?; + if !matches!(target.receiver.as_str(), "self" | "this") { + return None; + } + let rhs = node + .child_by_field_name("right") + .or_else(|| node.child_by_field_name("value")) + .or_else(|| named_children(node).get(1).copied()); + let r#type = rhs.and_then(|node| inferred_assignment_type(node, source)); + r#type.map(|type_name| (target.field, Some(type_name))) + } + fn assignment_target<'tree>(&self, node: Node<'tree>) -> Option> { self.default_assignment_target(node) } @@ -1202,6 +1337,52 @@ fn clone_body_node(node: &RawNode) -> Option<&RawNode> { .find(|child| CLONE_BODY_KINDS.contains(&child.kind.as_str())) } +fn declared_type_text(node: Node<'_>, name: Node<'_>, source: &str) -> Option { + if let Some(r#type) = node.child_by_field_name("type") { + let text = normalize_text(node_text(r#type, source)); + if !text.is_empty() { + return Some(text); + } + } + + let text = node_text(node, source); + let name_text = node_text(name, source); + let before_name = text.split(name_text).next().unwrap_or("").trim(); + let candidate = before_name + .split_whitespace() + .filter(|token| { + !matches!( + *token, + "public" | "private" | "protected" | "static" | "final" | "const" + ) + }) + .last() + .unwrap_or("") + .trim_matches(['*', '&']); + (!candidate.is_empty()).then(|| candidate.to_string()) +} + +fn inferred_assignment_type(node: Node<'_>, source: &str) -> Option { + let text = normalize_text(node_text(node, source)); + for prefix in ["new ", ""] { + let value = text.strip_prefix(prefix).unwrap_or(&text); + let candidate = value + .split(['(', '{', '<', ' ', ':']) + .next() + .unwrap_or("") + .trim(); + if candidate + .chars() + .next() + .map(|ch| ch.is_ascii_uppercase()) + .unwrap_or(false) + { + return Some(candidate.to_string()); + } + } + None +} + fn clone_fingerprint_for_profile( profile: &P, node: &RawNode, diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/c.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/c.rs index d18862f9a..ddb85ffe2 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/c.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/c.rs @@ -1,6 +1,7 @@ use super::super::tree_sitter_adapter::normalize_type_owner; use super::super::Language; use super::base::LanguageProfile; +use crate::decomplex::ast::node_text; use tree_sitter::{Language as TreeSitterLanguage, Node}; pub(crate) struct CProfile; @@ -14,6 +15,14 @@ impl LanguageProfile for CProfile { tree_sitter_c::LANGUAGE.into() } + fn function_visibility(&self, node: Node<'_>, source: &str) -> Option { + if node_text(node, source).trim_start().starts_with("static ") { + Some("private".to_string()) + } else { + Some("public".to_string()) + } + } + fn first_argument_receiver(&self) -> bool { true } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/cpp.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/cpp.rs index ee996e330..82f7df4b3 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/cpp.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/cpp.rs @@ -1,6 +1,7 @@ use super::super::Language; use super::base::LanguageProfile; -use tree_sitter::Language as TreeSitterLanguage; +use crate::decomplex::ast::node_text; +use tree_sitter::{Language as TreeSitterLanguage, Node}; pub(crate) struct CppProfile; @@ -13,6 +14,10 @@ impl LanguageProfile for CppProfile { tree_sitter_cpp::LANGUAGE.into() } + fn function_visibility(&self, node: Node<'_>, source: &str) -> Option { + cpp_previous_access_specifier(node, source).or_else(|| Some("private".to_string())) + } + fn function_node_kinds(&self) -> &[&str] { &["function_definition"] } @@ -78,6 +83,32 @@ impl LanguageProfile for CppProfile { &["declaration", "init_declarator"] } + fn local_variable_declarator_node_kinds(&self) -> &[&str] { + &["init_declarator"] + } + + fn field_declaration_node_kinds(&self) -> &[&str] { + &["field_declaration"] + } + + fn declaration_site_parent_node_kinds(&self) -> &[&str] { + &[ + "parameter_declaration", + "init_declarator", + "function_declarator", + "class_specifier", + "struct_specifier", + ] + } + + fn assignment_state_declaration_node_kinds(&self) -> &[&str] { + &["assignment_expression"] + } + + fn implicit_state_accesses(&self) -> bool { + true + } + fn receiver_type_node_kinds(&self) -> &[&str] { &[ "type_identifier", @@ -147,3 +178,17 @@ impl LanguageProfile for CppProfile { &["field_expression"] } } + +fn cpp_previous_access_specifier(node: Node<'_>, source: &str) -> Option { + let mut sibling = node.prev_sibling(); + while let Some(current) = sibling { + if current.kind() == "access_specifier" { + let text = node_text(current, source); + if matches!(text, "public" | "private" | "protected") { + return Some(text.to_string()); + } + } + sibling = current.prev_sibling(); + } + None +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/csharp.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/csharp.rs index 87a4946c7..18307f0fb 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/csharp.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/csharp.rs @@ -1,6 +1,7 @@ use super::super::Language; use super::base::LanguageProfile; -use tree_sitter::Language as TreeSitterLanguage; +use crate::decomplex::ast::node_text; +use tree_sitter::{Language as TreeSitterLanguage, Node}; pub(crate) struct CSharpProfile; @@ -13,6 +14,19 @@ impl LanguageProfile for CSharpProfile { tree_sitter_c_sharp::LANGUAGE.into() } + fn function_visibility(&self, node: Node<'_>, source: &str) -> Option { + for child in super::super::tree_sitter_adapter::named_children(node) { + if child.kind() != "modifier" { + continue; + } + let text = node_text(child, source); + if matches!(text, "public" | "private" | "protected") { + return Some(text.to_string()); + } + } + Some("private".to_string()) + } + fn function_node_kinds(&self) -> &[&str] { &["method_declaration"] } @@ -73,10 +87,35 @@ impl LanguageProfile for CSharpProfile { ] } + fn local_variable_declarator_node_kinds(&self) -> &[&str] { + &["variable_declarator"] + } + fn variable_declaration_node_kinds(&self) -> &[&str] { &["variable_declaration"] } + fn field_declaration_node_kinds(&self) -> &[&str] { + &["field_declaration"] + } + + fn declaration_site_parent_node_kinds(&self) -> &[&str] { + &[ + "parameter", + "variable_declarator", + "method_declaration", + "class_declaration", + ] + } + + fn assignment_state_declaration_node_kinds(&self) -> &[&str] { + &["assignment_expression"] + } + + fn implicit_state_accesses(&self) -> bool { + true + } + fn comparison_node_kinds(&self) -> &[&str] { &["binary_expression"] } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/go.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/go.rs index bcbe24dfd..72b43f2ef 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/go.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/go.rs @@ -1,4 +1,4 @@ -use super::super::tree_sitter_adapter::{named_children, normalize_type_owner, CallTarget}; +use super::super::tree_sitter_adapter::{named_children, normalize_type_owner, CallTarget, Target}; use super::super::Language; use super::base::LanguageProfile; use crate::decomplex::ast::{node_text, normalize_text}; @@ -37,6 +37,15 @@ impl LanguageProfile for GoProfile { None } + fn function_visibility(&self, node: Node<'_>, source: &str) -> Option { + let name = self.function_name(node, source)?; + if name.chars().next().map(char::is_uppercase).unwrap_or(false) { + Some("public".to_string()) + } else { + Some("private".to_string()) + } + } + fn generic_owner_node_kinds(&self) -> &[&str] { &["type_spec"] } @@ -233,6 +242,27 @@ impl LanguageProfile for GoProfile { _ => None, } } + + fn state_target(&self, lhs: Node<'_>, source: &str) -> Option { + if self.expression_list_node_kinds().contains(&lhs.kind()) { + let children = named_children(lhs); + if children.len() == 1 { + return self.state_target(children[0], source); + } + } + if self.indexed_lhs_node_kinds().contains(&lhs.kind()) { + let object = named_children(lhs).into_iter().next()?; + return self.default_state_target(object, source); + } + self.default_state_target(lhs, source) + } + + fn state_read_target(&self, node: Node<'_>, source: &str) -> Option { + if go_augmented_assignment_lhs(node) { + return None; + } + self.default_state_read_target(node, source) + } } fn go_method_receiver(node: Node<'_>, source: &str) -> Option<(String, String)> { @@ -274,3 +304,38 @@ fn go_statement_arguments(node: Node<'_>, source: &str) -> Option> { .collect(), ) } + +fn go_augmented_assignment_lhs(node: Node<'_>) -> bool { + let mut current = node; + while let Some(parent) = current.parent() { + if parent.kind() == "assignment_statement" { + let lhs = named_children(parent).into_iter().next(); + let operator = go_assignment_operator(parent); + return lhs.map(|lhs| go_contains_node(lhs, node)).unwrap_or(false) + && !matches!(operator.as_deref(), Some("=" | ":=")); + } + current = parent; + } + false +} + +fn go_assignment_operator(node: Node<'_>) -> Option { + let mut cursor = node.walk(); + let operator = node + .children(&mut cursor) + .find(|child| !child.is_named() && child.kind().ends_with('=')) + .map(|child| child.kind().to_string()); + operator +} + +fn go_contains_node(root: Node<'_>, target: Node<'_>) -> bool { + if root.kind() == target.kind() + && root.start_byte() == target.start_byte() + && root.end_byte() == target.end_byte() + { + return true; + } + named_children(root) + .into_iter() + .any(|child| go_contains_node(child, target)) +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/java.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/java.rs index 7ec8910f6..3ce0b5244 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/java.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/java.rs @@ -16,6 +16,25 @@ impl LanguageProfile for JavaProfile { tree_sitter_java::LANGUAGE.into() } + fn function_visibility(&self, node: Node<'_>, source: &str) -> Option { + for child in named_children(node) { + if child.kind() != "modifiers" { + continue; + } + let text = node_text(child, source); + if text.split_whitespace().any(|token| token == "public") { + return Some("public".to_string()); + } + if text.split_whitespace().any(|token| token == "private") { + return Some("private".to_string()); + } + if text.split_whitespace().any(|token| token == "protected") { + return Some("protected".to_string()); + } + } + None + } + fn function_node_kinds(&self) -> &[&str] { &["method_declaration"] } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/javascript.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/javascript.rs index 64458ed62..3cb0aa323 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/javascript.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/javascript.rs @@ -1,6 +1,7 @@ use super::super::Language; use super::base::LanguageProfile; -use tree_sitter::Language as TreeSitterLanguage; +use crate::decomplex::ast::node_text; +use tree_sitter::{Language as TreeSitterLanguage, Node}; pub(crate) struct JavaScriptProfile; @@ -13,6 +14,29 @@ impl LanguageProfile for JavaScriptProfile { tree_sitter_javascript::LANGUAGE.into() } + fn function_visibility(&self, node: Node<'_>, source: &str) -> Option { + let name = self.function_name(node, source).unwrap_or_default(); + if name.starts_with('#') { + return Some("private".to_string()); + } + for child in super::super::tree_sitter_adapter::named_children(node) { + if !matches!(child.kind(), "accessibility_modifier" | "modifier") { + continue; + } + let text = node_text(child, source); + if text.split_whitespace().any(|token| token == "private") { + return Some("private".to_string()); + } + if text.split_whitespace().any(|token| token == "protected") { + return Some("protected".to_string()); + } + if text.split_whitespace().any(|token| token == "public") { + return Some("public".to_string()); + } + } + Some("public".to_string()) + } + fn function_node_kinds(&self) -> &[&str] { &["function_declaration", "method_definition"] } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/kotlin.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/kotlin.rs index 51dc785db..2fcb63356 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/kotlin.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/kotlin.rs @@ -1,4 +1,4 @@ -use super::super::tree_sitter_adapter::named_children; +use super::super::tree_sitter_adapter::{named_children, CallTarget}; use super::super::Language; use super::base::LanguageProfile; use crate::decomplex::ast::node_text; @@ -15,6 +15,21 @@ impl LanguageProfile for KotlinProfile { tree_sitter_kotlin_ng::LANGUAGE.into() } + fn function_visibility(&self, node: Node<'_>, source: &str) -> Option { + for child in named_children(node) { + if child.kind() != "modifiers" { + continue; + } + if node_text(child, source) + .split_whitespace() + .any(|token| token == "private") + { + return Some("private".to_string()); + } + } + None + } + fn function_node_kinds(&self) -> &[&str] { &["function_declaration"] } @@ -134,7 +149,7 @@ impl LanguageProfile for KotlinProfile { } fn case_pattern_node_kinds(&self) -> &[&str] { - &["when_condition", "pattern"] + &["when_condition", "pattern", "string_literal"] } fn case_subject_node_kinds(&self) -> &[&str] { @@ -177,4 +192,44 @@ impl LanguageProfile for KotlinProfile { fn field_like_node_kinds(&self) -> &[&str] { &["navigation_expression", "directly_assignable_expression"] } + + fn call_target<'tree>(&self, node: Node<'tree>, source: &str) -> Option> { + if node.kind() != "call_expression" { + return None; + } + let mut target = self.default_call_target(node, source)?; + if kotlin_single_call_control_body(node) { + target.source_node = named_children(node).into_iter().next(); + } + Some(target) + } +} + +fn kotlin_single_call_control_body(node: Node<'_>) -> bool { + let Some(parent) = node.parent() else { + return false; + }; + if parent.kind() == "when_entry" { + return true; + } + if parent.kind() != "block" { + return false; + } + if named_children(parent) + .into_iter() + .filter(|child| child.is_named()) + .count() + != 1 + { + return false; + } + parent + .parent() + .map(|ancestor| { + matches!( + ancestor.kind(), + "if_expression" | "for_statement" | "control_structure_body" + ) + }) + .unwrap_or(false) } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/lua.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/lua.rs index d03469f3f..2b991e7d3 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/lua.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/lua.rs @@ -1,4 +1,4 @@ -use super::super::tree_sitter_adapter::named_children; +use super::super::tree_sitter_adapter::{named_children, CallTarget, Target}; use super::super::Language; use super::base::LanguageProfile; use crate::decomplex::ast::{line, node_text}; @@ -28,6 +28,10 @@ impl LanguageProfile for LuaProfile { .or_else(|| self.default_owner_name_from_declaration(node, source)) } + fn owner_def_name_from_declaration(&self, _node: Node<'_>, _source: &str) -> Option { + None + } + fn parameter_list_node_kinds(&self) -> &[&str] { &["parameters"] } @@ -115,6 +119,106 @@ impl LanguageProfile for LuaProfile { let first_line = source.lines().next().unwrap_or(""); first_line.contains("_tl_compat") && first_line.contains("compat53.module") } + + fn call_target<'tree>(&self, node: Node<'tree>, source: &str) -> Option> { + if !self.call_node_kinds().contains(&node.kind()) { + return None; + } + let callee = named_children(node).into_iter().next()?; + let mut target = if callee.kind() == "method_index_expression" { + lua_method_call_target(callee, node, self.call_argument_texts(node, source), source)? + } else { + self.default_call_target(node, source)? + }; + if lua_callee_source_span(node) { + target.source_node = Some(callee); + } + Some(target) + } + + fn state_read_target(&self, node: Node<'_>, source: &str) -> Option { + let target = self.default_state_read_target(node, source)?; + if target.receiver == "_" && target.field == "_" { + None + } else { + Some(target) + } + } + + fn assignment_lhs_node(&self, node: Node<'_>) -> bool { + let candidate = if node + .parent() + .map(|parent| parent.kind() == "variable_list") + .unwrap_or(false) + { + node.parent().unwrap() + } else { + node + }; + let Some(parent) = candidate.parent() else { + return false; + }; + if parent.kind() != "assignment_statement" { + return false; + } + named_children(parent) + .into_iter() + .next() + .map(|lhs| same_node(lhs, candidate)) + .unwrap_or(false) + } +} + +fn lua_method_call_target<'tree>( + callee: Node<'tree>, + node: Node<'tree>, + arguments: Vec, + source: &str, +) -> Option> { + let children = named_children(callee); + let receiver = children.first().copied()?; + let message = children.last().copied()?; + let mut target = CallTarget::new( + node_text(receiver, source).to_string(), + node_text(message, source).to_string(), + arguments, + ); + target.source_node = Some(node); + Some(target) +} + +fn lua_callee_source_span(node: Node<'_>) -> bool { + if node + .parent() + .map(|parent| parent.kind() == "expression_list") + .unwrap_or(false) + { + return true; + } + let Some(parent) = node.parent() else { + return false; + }; + if parent.kind() != "block" { + return false; + } + if named_children(parent).len() != 1 { + return false; + } + parent + .parent() + .map(|ancestor| { + matches!( + ancestor.kind(), + "if_statement" | "elseif_statement" | "else_statement" | "for_statement" + ) + }) + .unwrap_or(false) +} + +fn same_node(left: Node<'_>, right: Node<'_>) -> bool { + left.kind() == right.kind() + && left.start_byte() == right.start_byte() + && left.end_byte() == right.end_byte() } fn lua_method_name(node: Node<'_>, source: &str) -> Option { diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/php.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/php.rs index 70ac14759..3a549b7c1 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/php.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/php.rs @@ -28,7 +28,7 @@ impl LanguageProfile for PhpProfile { } fn parameter_identifier_node_kinds(&self) -> &[&str] { - &["name", "variable_name", "simple_parameter"] + &["name", "variable_name"] } fn function_body_node_kinds(&self) -> &[&str] { @@ -122,6 +122,7 @@ impl LanguageProfile for PhpProfile { fn field_like_node_kinds(&self) -> &[&str] { &[ "member_access_expression", + "nullsafe_member_access_expression", "member_call_expression", "class_constant_access_expression", ] @@ -159,6 +160,15 @@ impl LanguageProfile for PhpProfile { self.default_owner_name_from_declaration(node, source) } + fn function_visibility(&self, node: Node<'_>, source: &str) -> Option { + named_children(node) + .into_iter() + .find(|child| child.kind() == "visibility_modifier") + .map(|modifier| node_text(modifier, source).to_string()) + .filter(|modifier| matches!(modifier.as_str(), "public" | "private" | "protected")) + .or_else(|| Some("public".to_string())) + } + fn assignment_target<'tree>(&self, node: Node<'tree>) -> Option> { self.default_assignment_target(node) } @@ -171,11 +181,12 @@ impl LanguageProfile for PhpProfile { return Some(CallTarget::new( "self".to_string(), "print".to_string(), - self.call_argument_texts(node, source), + php_print_argument_texts(node, source), )); } let mut target = self.default_call_target(node, source)?; target.receiver = php_normalize_receiver(&target.receiver); + target.message = php_identifier_text_value(&target.message); Some(target) } @@ -194,6 +205,14 @@ impl LanguageProfile for PhpProfile { }) } + fn state_read_target(&self, node: Node<'_>, source: &str) -> Option { + let target = self.default_state_read_target(node, source)?; + Some(Target { + receiver: php_normalize_receiver(&target.receiver), + field: php_identifier_text_value(&target.field), + }) + } + fn member_field_text(&self, field: Node<'_>, source: &str) -> Option { php_name_text(field, source) } @@ -226,7 +245,7 @@ fn php_identifier_text_value(text: &str) -> String { } fn php_normalize_receiver(receiver: &str) -> String { - let value = php_identifier_text_value(receiver); + let value = php_normalize_source(&php_identifier_text_value(receiver)); if value == "this" { "self".to_string() } else { @@ -234,6 +253,25 @@ fn php_normalize_receiver(receiver: &str) -> String { } } +fn php_print_argument_texts(node: Node<'_>, source: &str) -> Vec { + named_children(node) + .into_iter() + .flat_map(|child| { + if child.kind() == "parenthesized_expression" { + let children = named_children(child); + if children.is_empty() { + vec![child] + } else { + children + } + } else { + vec![child] + } + }) + .map(|argument| normalize_text(&php_normalize_source(node_text(argument, source)))) + .collect() +} + fn php_normalize_source(source: &str) -> String { let mut out = String::new(); let mut chars = source.chars().peekable(); diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/python.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/python.rs index e865225d4..fbae478f0 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/python.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/python.rs @@ -1,4 +1,4 @@ -use super::super::tree_sitter_adapter::{AssignmentTarget, Target}; +use super::super::tree_sitter_adapter::Target; use super::super::Language; use super::base::{default_clone_candidate_node, LanguageProfile}; use crate::decomplex::ast::RawNode; @@ -139,14 +139,6 @@ impl LanguageProfile for PythonProfile { Some(target) } - fn state_write_source_node<'tree>( - &self, - _node: Node<'tree>, - assignment: &AssignmentTarget<'tree>, - ) -> Node<'tree> { - assignment.lhs - } - fn clone_candidate_node(&self, node: &RawNode) -> bool { if python_assignment_wrapper_node(node) { return false; diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/ruby.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/ruby.rs index 2ba33fe7d..3165da531 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/ruby.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/ruby.rs @@ -154,7 +154,7 @@ impl LanguageProfile for RubyProfile { if ruby_brace_block_parameter_receiver(node, &target.receiver, source) { return None; } - if target.arguments.is_empty() { + if target.arguments.is_empty() && !ruby_call_has_block(node) { if let Some(span) = ruby_narrow_no_arg_call_span(node, source, &target.receiver, &target.message) { @@ -265,9 +265,16 @@ impl LanguageProfile for RubyProfile { if ruby_direct_flat_map_block_statement(node, source) { return None; } - ruby_state_variable_target(node, source) + if ruby_sorbet_signature_payload_node(node, source) { + return None; + } + let target = ruby_state_variable_target(node, source) .or_else(|| ruby_bare_state_reader_target(node, source)) - .or_else(|| self.default_state_read_target(node, source)) + .or_else(|| self.default_state_read_target(node, source))?; + if ruby_state_block_parameter_receiver(node, &target.receiver, source) { + return None; + } + Some(target) } fn assignment_target<'tree>(&self, node: Node<'tree>) -> Option> { @@ -344,7 +351,7 @@ fn ruby_call_target<'tree>(node: Node<'tree>, source: &str) -> Option, source: &str) -> Vec { let Some(args) = args else { return Vec::new(); }; + if let Some(arguments) = ruby_inline_def_argument_texts(args, source) { + return arguments; + } let values = named_children(args) .into_iter() .map(|child| normalize_text(node_text(child, source))) @@ -583,6 +593,38 @@ fn ruby_argument_texts(node: Node<'_>, source: &str) -> Vec { .collect() } +fn ruby_inline_def_argument_texts(args: Node<'_>, source: &str) -> Option> { + let children = named_children(args); + if children.len() != 1 || first_child_kind(children[0]) != Some("def") { + return None; + } + let method = children[0]; + let name = method + .child_by_field_name("name") + .or_else(|| { + named_children(method) + .into_iter() + .find(|child| matches!(child.kind(), "identifier" | "field_identifier")) + }) + .map(|node| normalize_text(node_text(node, source)))?; + let params = named_children(method) + .into_iter() + .find(|child| child.kind() == "method_parameters") + .map(|node| normalize_text(node_text(node, source))); + let body = named_children(method) + .into_iter() + .find(|child| child.kind() == "body_statement") + .map(|node| normalize_text(node_text(node, source))); + let mut out = vec![name]; + if let Some(params) = params.filter(|value| !value.is_empty()) { + out.push(params); + } + if let Some(body) = body.filter(|value| !value.is_empty()) { + out.push(body); + } + Some(out) +} + fn ruby_safe_navigation_call(node: Node<'_>, source: &str) -> bool { let mut cursor = node.walk(); let found = node @@ -715,6 +757,40 @@ fn ruby_brace_block_parameter_receiver(node: Node<'_>, receiver: &str, source: & false } +fn ruby_state_block_parameter_receiver(node: Node<'_>, receiver: &str, source: &str) -> bool { + if ruby_brace_block_parameter_receiver(node, receiver, source) { + return true; + } + if receiver.contains('.') || receiver.contains('[') || receiver == "self" { + return false; + } + let mut current = node.parent(); + while let Some(parent) = current { + if parent.kind() == "do_block" { + return ruby_block_parameters(parent, source) + .into_iter() + .any(|param| param == receiver); + } + if parent.kind() == "body_statement" + && parent + .parent() + .map(|grandparent| grandparent.kind() == "do_block") + .unwrap_or(false) + { + current = parent.parent(); + continue; + } + if matches!( + parent.kind(), + "method" | "singleton_method" | "body_statement" + ) { + return false; + } + current = parent.parent(); + } + false +} + fn ruby_block_parameters(block: Node<'_>, source: &str) -> Vec { named_children(block) .into_iter() @@ -774,7 +850,8 @@ fn ruby_narrow_no_arg_call_span( fn ruby_valid_call_target(target: &CallTarget<'_>) -> bool { if invalid_call_text(&target.receiver) || invalid_call_text(&target.message) - || target.receiver.split_whitespace().count() > 1 + || (target.receiver.split_whitespace().count() > 1 + && !ruby_literal_receiver_text(&target.receiver)) { return false; } @@ -786,6 +863,13 @@ fn ruby_valid_call_target(target: &CallTarget<'_>) -> bool { .is_match(target.message.as_str()) } +fn ruby_literal_receiver_text(text: &str) -> bool { + let value = text.trim(); + (value.starts_with("%w[") || value.starts_with("%i[")) + && value.ends_with(']') + && !value.contains('\n') +} + fn invalid_call_text(text: &str) -> bool { text.chars() .any(|ch| matches!(ch, '"' | '\'' | '\n' | '\r')) @@ -802,6 +886,9 @@ fn ruby_bare_state_reader_target(node: Node<'_>, source: &str) -> Option if node.kind() != "identifier" || !ruby_simple_call_text(node_text(node, source)) { return None; } + if matches!(node_text(node, source), "private" | "protected" | "public") { + return None; + } let parent = node.parent()?; if ruby_declaration_name(node, parent, source) { return None; @@ -835,6 +922,39 @@ fn ruby_bare_state_reader_target(node: Node<'_>, source: &str) -> Option }) } +fn ruby_sorbet_signature_payload_node(node: Node<'_>, source: &str) -> bool { + let mut current = Some(node); + while let Some(candidate) = current { + if candidate.kind() == "block" { + let Some(parent) = candidate.parent() else { + return false; + }; + if parent.kind() == "call" { + let message = parent + .child_by_field_name("method") + .or_else(|| named_children(parent).into_iter().next()) + .map(|method| node_text(method, source).to_string()); + return message.as_deref() == Some("sig"); + } + return false; + } + if matches!( + candidate.kind(), + "method" | "singleton_method" | "class" | "module" + ) { + return false; + } + current = candidate.parent(); + } + false +} + +fn ruby_call_has_block(node: Node<'_>) -> bool { + named_children(node) + .into_iter() + .any(|child| matches!(child.kind(), "do_block" | "block")) +} + fn ruby_direct_flat_map_block_statement(node: Node<'_>, source: &str) -> bool { if node.kind() != "call" { return false; diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/rust.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/rust.rs index 09f66e7ec..5870978a6 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/rust.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/rust.rs @@ -1,6 +1,7 @@ use super::super::Language; use super::base::LanguageProfile; -use tree_sitter::Language as TreeSitterLanguage; +use crate::decomplex::ast::node_text; +use tree_sitter::{Language as TreeSitterLanguage, Node}; pub(crate) struct RustProfile; @@ -17,6 +18,14 @@ impl LanguageProfile for RustProfile { &["function_item"] } + fn function_visibility(&self, node: Node<'_>, source: &str) -> Option { + if node_text(node, source).trim_start().starts_with("pub ") { + Some("public".to_string()) + } else { + Some("private".to_string()) + } + } + fn impl_owner_node_kinds(&self) -> &[&str] { &["impl_item"] } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/swift.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/swift.rs index a755ecb39..78ac1a451 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/swift.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/swift.rs @@ -1,3 +1,4 @@ +use super::super::tree_sitter_adapter::{named_children, CallTarget}; use super::super::Language; use super::base::LanguageProfile; use crate::decomplex::ast::{node_text, normalize_text}; @@ -14,6 +15,21 @@ impl LanguageProfile for SwiftProfile { tree_sitter_swift::LANGUAGE.into() } + fn function_visibility(&self, node: Node<'_>, source: &str) -> Option { + for child in named_children(node) { + if child.kind() != "modifiers" { + continue; + } + if node_text(child, source) + .split_whitespace() + .any(|token| token == "private") + { + return Some("private".to_string()); + } + } + None + } + fn function_node_kinds(&self) -> &[&str] { &["function_declaration"] } @@ -148,6 +164,75 @@ impl LanguageProfile for SwiftProfile { &["navigation_expression"] } + fn assignment_lhs_node(&self, node: Node<'_>) -> bool { + let candidate = if node + .parent() + .map(|parent| parent.kind() == "directly_assignable_expression") + .unwrap_or(false) + { + node.parent().unwrap() + } else { + node + }; + let Some(parent) = candidate.parent() else { + return false; + }; + if parent.kind() != "assignment" { + return false; + } + named_children(parent) + .into_iter() + .next() + .map(|lhs| same_node(lhs, candidate)) + .unwrap_or(false) + } + + fn state_read_target( + &self, + node: Node<'_>, + source: &str, + ) -> Option { + if self.assignment_lhs_node(node) { + return None; + } + self.default_state_read_target(node, source) + } + + fn call_argument_nodes<'tree>(&self, node: Node<'tree>) -> Vec> { + let Some(args) = named_children(node) + .into_iter() + .find(|child| matches!(child.kind(), "call_suffix" | "value_arguments")) + else { + return Vec::new(); + }; + let value_arguments = if args.kind() == "call_suffix" { + named_children(args) + .into_iter() + .find(|child| child.kind() == "value_arguments") + } else { + Some(args) + }; + value_arguments + .map(|arguments| { + named_children(arguments) + .into_iter() + .filter(|child| child.kind() == "value_argument") + .collect() + }) + .unwrap_or_default() + } + + fn call_target<'tree>(&self, node: Node<'tree>, source: &str) -> Option> { + if node.kind() != "call_expression" { + return None; + } + let mut target = self.default_call_target(node, source)?; + if swift_single_line_switch_call(node) { + target.source_node = named_children(node).into_iter().next(); + } + Some(target) + } + fn call_argument_texts(&self, node: Node<'_>, source: &str) -> Vec { self.call_argument_nodes(node) .into_iter() @@ -164,3 +249,22 @@ impl LanguageProfile for SwiftProfile { .collect() } } + +fn same_node(left: Node<'_>, right: Node<'_>) -> bool { + left.kind() == right.kind() + && left.start_byte() == right.start_byte() + && left.end_byte() == right.end_byte() +} + +fn swift_single_line_switch_call(node: Node<'_>) -> bool { + let Some(parent) = node.parent() else { + return false; + }; + if parent.kind() != "statements" || parent.start_position().row != parent.end_position().row { + return false; + } + parent + .parent() + .map(|ancestor| ancestor.kind() == "switch_entry") + .unwrap_or(false) +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/typescript.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/typescript.rs index 35ed6b80a..6c3d50df4 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/typescript.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/typescript.rs @@ -1,6 +1,7 @@ use super::super::Language; use super::base::LanguageProfile; -use tree_sitter::Language as TreeSitterLanguage; +use crate::decomplex::ast::node_text; +use tree_sitter::{Language as TreeSitterLanguage, Node}; pub(crate) struct TypeScriptProfile; @@ -13,6 +14,29 @@ impl LanguageProfile for TypeScriptProfile { tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into() } + fn function_visibility(&self, node: Node<'_>, source: &str) -> Option { + let name = self.function_name(node, source).unwrap_or_default(); + if name.starts_with('#') { + return Some("private".to_string()); + } + for child in super::super::tree_sitter_adapter::named_children(node) { + if !matches!(child.kind(), "accessibility_modifier" | "modifier") { + continue; + } + let text = node_text(child, source); + if text.split_whitespace().any(|token| token == "private") { + return Some("private".to_string()); + } + if text.split_whitespace().any(|token| token == "protected") { + return Some("protected".to_string()); + } + if text.split_whitespace().any(|token| token == "public") { + return Some("public".to_string()); + } + } + Some("public".to_string()) + } + fn function_node_kinds(&self) -> &[&str] { &["function_declaration", "method_definition"] } diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/zig.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/zig.rs index 09ceb42a9..cfd1ebce5 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/zig.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/zig.rs @@ -1,4 +1,4 @@ -use super::super::tree_sitter_adapter::named_children; +use super::super::tree_sitter_adapter::{named_children, AssignmentTarget, Target}; use super::super::Language; use super::base::LanguageProfile; use crate::decomplex::ast::node_text; @@ -19,6 +19,14 @@ impl LanguageProfile for ZigProfile { &["function_declaration"] } + fn function_visibility(&self, node: Node<'_>, source: &str) -> Option { + if node_text(node, source).trim_start().starts_with("pub ") { + Some("public".to_string()) + } else { + Some("private".to_string()) + } + } + fn owner_name_from_declaration(&self, node: Node<'_>, source: &str) -> Option { if node.kind() == "struct_declaration" { return node @@ -34,6 +42,10 @@ impl LanguageProfile for ZigProfile { self.default_owner_name_from_declaration(node, source) } + fn struct_owner_node_kinds(&self) -> &[&str] { + &["struct_declaration"] + } + fn parameter_list_node_kinds(&self) -> &[&str] { &["parameters"] } @@ -83,12 +95,7 @@ impl LanguageProfile for ZigProfile { } fn branch_node_kinds(&self) -> &[&str] { - &[ - "if_statement", - "switch_expression", - "for_statement", - "labeled_statement", - ] + &["if_statement", "switch_expression"] } fn case_node_kinds(&self) -> &[&str] { @@ -122,4 +129,39 @@ impl LanguageProfile for ZigProfile { fn field_like_node_kinds(&self) -> &[&str] { &["field_expression"] } + + fn state_target(&self, lhs: Node<'_>, source: &str) -> Option { + zig_literal_field_target(lhs, source).or_else(|| self.default_state_target(lhs, source)) + } + + fn state_read_target(&self, node: Node<'_>, source: &str) -> Option { + zig_literal_field_target(node, source) + .or_else(|| self.default_state_read_target(node, source)) + } + + fn state_write_source_node<'tree>( + &self, + node: Node<'tree>, + assignment: &AssignmentTarget<'tree>, + ) -> Node<'tree> { + let mut cursor = node.walk(); + if node.children(&mut cursor).any(|child| child.kind() == "+=") { + assignment.lhs + } else { + assignment.source + } + } +} + +fn zig_literal_field_target(node: Node<'_>, source: &str) -> Option { + if node.kind() != "field_expression" || !node_text(node, source).trim_start().starts_with('.') { + return None; + } + let field = named_children(node) + .into_iter() + .find(|child| child.kind() == "identifier")?; + Some(Target { + receiver: ".literal".to_string(), + field: node_text(field, source).to_string(), + }) } diff --git a/gems/decomplex/rust/src/decomplex/syntax/complexity.rs b/gems/decomplex/rust/src/decomplex/syntax/complexity.rs index de8afcae8..e70cf7de6 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/complexity.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/complexity.rs @@ -77,12 +77,7 @@ impl LocalComplexityScorer { } else { 0.0 }; - let child_cost = if bare_early_exit_wrapper(node) { - 0.0 - } else { - self.score_children(node, nesting, signals) - }; - return exit_cost + child_cost; + return exit_cost + self.score_children(node, nesting, signals); } if boolean_node(node) { @@ -100,12 +95,10 @@ impl LocalComplexityScorer { signals: &mut BTreeMap, ) -> f64 { compensated_sum(node.children.iter().map(|child| { - if transparent_single_line_suite_statement(node, child) { - if bare_early_exit_wrapper(child) { - 0.0 - } else { - self.score_children(child, nesting, signals) - } + if return_fallback_boolean_wrapper(node, child) { + 0.0 + } else if transparent_single_line_suite_statement(node, child) { + self.score_children(child, nesting, signals) } else { self.score_node(child, nesting, signals) } @@ -162,6 +155,14 @@ fn branch(node: &RawNode) -> bool { fn hidden_if(node: &RawNode) -> bool { if node.kind == "expression_statement" && node.text.trim_start().starts_with("if ") { + if node.named_children().iter().any(|child| { + matches!( + child.kind.as_str(), + "if" | "unless" | "if_statement" | "if_expression" + ) + }) { + return false; + } return true; } matches!( @@ -201,7 +202,12 @@ fn loop_node(node: &RawNode) -> bool { ) || hidden_loop(node) || (node.kind == "expression_statement" && starts_with_any(node.text.trim_start(), &["for", "while", "loop"])) - || (node.kind == "labeled_statement" && node.text.trim_start().starts_with("for ")) + || (node.kind == "labeled_statement" + && node.text.trim_start().starts_with("for ") + && !has_named_control_child( + node, + &["for_statement", "for_expression", "while_statement"], + )) } fn hidden_loop(node: &RawNode) -> bool { @@ -225,7 +231,24 @@ fn case_node(node: &RawNode) -> bool { matches!( node.kind.as_str(), "case" | "switch_statement" | "switch_expression" | "match_statement" | "match_expression" - ) || (node.kind == "expression_statement" && node.text.trim_start().starts_with("match ")) + ) || (node.kind == "expression_statement" + && node.text.trim_start().starts_with("match ") + && !has_named_control_child( + node, + &[ + "case", + "switch_statement", + "switch_expression", + "match_statement", + "match_expression", + ], + )) +} + +fn has_named_control_child(node: &RawNode, kinds: &[&str]) -> bool { + node.named_children() + .iter() + .any(|child| kinds.contains(&child.kind.as_str())) } fn rescue_node(node: &RawNode) -> bool { @@ -236,18 +259,17 @@ fn rescue_node(node: &RawNode) -> bool { } fn early_exit(node: &RawNode) -> bool { - (node.named || node.kind == "return") - && matches!( - node.kind.as_str(), - "return" - | "break" - | "next" - | "redo" - | "retry" - | "return_statement" - | "break_statement" - | "continue_statement" - ) + matches!( + node.kind.as_str(), + "return" + | "break" + | "next" + | "redo" + | "retry" + | "return_statement" + | "break_statement" + | "continue_statement" + ) } fn transparent_single_line_suite_statement(parent: &RawNode, child: &RawNode) -> bool { @@ -260,13 +282,13 @@ fn transparent_single_line_suite_statement(parent: &RawNode, child: &RawNode) -> ) } -fn bare_early_exit_wrapper(node: &RawNode) -> bool { - matches!( - node.kind.as_str(), - "return_statement" | "break_statement" | "continue_statement" - ) && node.children.len() == 1 - && !node.children[0].named - && node.children[0].text == node.text +fn return_fallback_boolean_wrapper(parent: &RawNode, child: &RawNode) -> bool { + parent.kind == "return_statement" + && child.kind == "expression_list" + && child + .named_children() + .iter() + .any(|grandchild| boolean_node(grandchild)) } fn boolean_node(node: &RawNode) -> bool { diff --git a/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs b/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs index 0f97c00ab..2c205c541 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs @@ -4,7 +4,8 @@ use super::{ language_profile, LanguageProfile, }, BranchDecision, CallSite, ComparisonUse, DecisionSite, DispatchSite, Document, FunctionDef, - Language, OwnerDef, PredicateAlias, SemanticEffectSite, StateRead, StateWrite, + Language, OwnerDef, PredicateAlias, SemanticEffectSite, StateDeclaration, StateRead, + StateWrite, }; use crate::decomplex::ast::{line, node_text, normalize_text, normalize_tree, span, RawNode}; use crate::decomplex::syntax::complexity::local_complexity_scores; @@ -19,6 +20,7 @@ pub fn parse_file(file: PathBuf, language: Language) -> Result { let mut function_defs = Vec::new(); let mut owner_defs = Vec::new(); let mut call_sites = Vec::new(); + let mut state_declarations = Vec::new(); let mut state_reads = Vec::new(); let mut state_writes = Vec::new(); let mut decision_sites = Vec::new(); @@ -41,6 +43,7 @@ pub fn parse_file(file: PathBuf, language: Language) -> Result { &mut function_defs, &mut owner_defs, &mut call_sites, + &mut state_declarations, &mut state_reads, &mut state_writes, &mut decision_sites, @@ -52,6 +55,19 @@ pub fn parse_file(file: PathBuf, language: Language) -> Result { &mut seen_calls, &mut seen_decisions, ); + collect_implicit_state_accesses( + parsed.tree.root_node(), + &parsed.source, + &parsed.file, + language, + &context, + &function_defs, + &state_declarations, + &mut state_reads, + &mut state_writes, + &mut seen_reads, + &mut seen_writes, + ); language_profile(language).after_collect_facts(&mut function_defs, &call_sites); collect_dispatch_sites( parsed.tree.root_node(), @@ -63,7 +79,8 @@ pub fn parse_file(file: PathBuf, language: Language) -> Result { &mut dispatch_sites, ); collect_equality_dispatch_sites(&comparison_uses, &call_sites, &mut dispatch_sites); - let semantic_effect_sites = semantic_effect_sites_from_calls(language, &call_sites); + let mut semantic_effect_sites = semantic_effect_sites_from_calls(language, &call_sites); + semantic_effect_sites.extend(ruby_global_context_effects(language, &state_reads)); let local_complexity_scores = local_complexity_scores(&parsed.file.to_string_lossy(), &function_defs); @@ -77,6 +94,7 @@ pub fn parse_file(file: PathBuf, language: Language) -> Result { function_defs, owner_defs, call_sites, + state_declarations, state_reads, state_writes, decision_sites, @@ -170,6 +188,7 @@ fn collect_facts( function_defs: &mut Vec, owner_defs: &mut Vec, call_sites: &mut Vec, + state_declarations: &mut Vec, state_reads: &mut Vec, state_writes: &mut Vec, decision_sites: &mut Vec, @@ -203,6 +222,14 @@ fn collect_facts( call_sites, seen_calls, ); + record_state_declaration( + node, + source, + file, + language, + &next_context, + state_declarations, + ); record_state_read( node, source, @@ -238,7 +265,14 @@ fn collect_facts( &next_context, branch_decisions, ); - record_predicate_alias(node, source, file, language, predicate_aliases); + record_predicate_alias( + node, + source, + file, + language, + &next_context, + predicate_aliases, + ); record_comparison_use(node, source, file, language, &next_context, comparison_uses); let mut cursor = node.walk(); @@ -252,6 +286,7 @@ fn collect_facts( function_defs, owner_defs, call_sites, + state_declarations, state_reads, state_writes, decision_sites, @@ -457,9 +492,6 @@ fn record_dispatch_site( &context.current_function(), span(arm), ); - if members.is_empty() { - continue; - } for pattern in case_arm_patterns(arm, source, profile) { for variant in dispatch_constant_patterns(&pattern) { arm_members @@ -540,9 +572,6 @@ fn collect_equality_dispatch_sites( &function, comparison.enclosing_span, ); - if members.is_empty() { - continue; - } arm_members.entry(variant).or_default().extend(members); } if arm_members.len() < 2 { @@ -644,7 +673,10 @@ fn record_owner_def( out: &mut Vec, ) { let profile = language_profile(language); - if profile.owner_name_from_declaration(node, source).is_none() { + if profile + .owner_def_name_from_declaration(node, source) + .is_none() + { return; } let owner = OwnerDef { @@ -672,6 +704,7 @@ fn record_predicate_alias( source: &str, file: &Path, language: Language, + context: &ContextState, out: &mut Vec, ) { let profile = language_profile(language); @@ -690,6 +723,7 @@ fn record_predicate_alias( body: text, file: file_name, defn: name, + owner: context.current_owner(), line: line(node), span: span(node), }); @@ -897,14 +931,18 @@ fn collect_branch_state_refs( out: &mut BTreeSet, ) { if let Some(target) = profile.state_read_target(node, source) { - let field = normalized_state_ref_field(&target.field); + let field = if profile.language() == Language::Ruby { + target.field.clone() + } else { + normalized_state_ref_field(&target.field) + }; let receiver = target.receiver.trim_start_matches('$'); if constant_like_state_ref(receiver, &field) { // Constants and type namespaces are not mutable object state. } else if branch_local_ref(node, source, receiver, &field, context) { // Function-local bindings are not object state, even when a // language permits bare predicate-style method calls. - } else if receiver.is_empty() || matches!(receiver, "self" | "this") { + } else if receiver.is_empty() || receiver == "self" { out.insert(field); } else { out.insert(format!("{receiver}.{field}")); @@ -917,6 +955,27 @@ fn collect_branch_state_refs( } } +fn ruby_global_context_effects( + language: Language, + state_reads: &[StateRead], +) -> Vec { + if language != Language::Ruby { + return Vec::new(); + } + state_reads + .iter() + .filter(|read| read.field.starts_with('$')) + .map(|read| SemanticEffectSite { + kind: "context_dependency".to_string(), + detail: read.field.clone(), + file: read.file.clone(), + function: read.function.clone(), + line: read.line, + span: read.span, + }) + .collect() +} + fn branch_local_ref( node: Node<'_>, source: &str, @@ -929,6 +988,149 @@ fn branch_local_ref( && normalize_text(node_text(node, source)) == field } +fn declared_state_index(declarations: &[StateDeclaration]) -> BTreeMap> { + let mut index: BTreeMap> = BTreeMap::new(); + for declaration in declarations { + index + .entry(declaration.owner.clone()) + .or_default() + .insert(declaration.field.clone()); + } + index +} + +fn function_param_index( + function_defs: &[FunctionDef], +) -> BTreeMap<(String, String), BTreeSet> { + let mut index: BTreeMap<(String, String), BTreeSet> = BTreeMap::new(); + for function in function_defs { + index + .entry((function.owner.clone(), function.name.clone())) + .or_default() + .extend(function.params.iter().cloned()); + } + index +} + +fn local_declaration_index( + root: Node<'_>, + source: &str, + language: Language, + context: &ContextState, +) -> BTreeMap<(String, String), BTreeSet> { + let mut index = BTreeMap::new(); + local_declaration_index_for_node(root, source, language, context, &mut index); + index +} + +fn local_declaration_index_for_node( + node: Node<'_>, + source: &str, + language: Language, + context: &ContextState, + out: &mut BTreeMap<(String, String), BTreeSet>, +) { + let next_context = push_control_context( + node, + push_function_context( + node, + push_owner_context(node, source, context, language), + source, + language, + ), + source, + language, + ); + let profile = language_profile(language); + if local_variable_declarator(profile, node) { + let owner = next_context.current_owner(); + let function = next_context.current_function(); + if function != "(top-level)" { + if let Some(name) = local_name_node(profile, node, source) { + out.entry((owner, function)) + .or_default() + .insert(node_text(name, source).to_string()); + } + } + } + + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + local_declaration_index_for_node(child, source, language, &next_context, out); + } +} + +fn local_variable_declarator(profile: &dyn LanguageProfile, node: Node<'_>) -> bool { + profile + .local_variable_declarator_node_kinds() + .contains(&node.kind()) + && !inside_kind(node, profile.field_declaration_node_kinds()) +} + +fn local_name_node<'tree>( + profile: &dyn LanguageProfile, + node: Node<'tree>, + source: &str, +) -> Option> { + node.child_by_field_name("name") + .or_else(|| profile.declarator_name_node(node, source)) + .or_else(|| { + named_children(node).into_iter().find(|child| { + profile.identifier_node_kinds().contains(&child.kind()) + || profile + .field_identifier_node_kinds() + .contains(&child.kind()) + }) + }) +} + +fn implicit_state_identifier(profile: &dyn LanguageProfile, node: Node<'_>) -> bool { + profile.identifier_node_kinds().contains(&node.kind()) + || profile.field_identifier_node_kinds().contains(&node.kind()) +} + +fn identifier_declaration_site(profile: &dyn LanguageProfile, node: Node<'_>) -> bool { + if node + .parent() + .map(|parent| { + profile + .declaration_site_parent_node_kinds() + .contains(&parent.kind()) + }) + .unwrap_or(false) + { + return true; + } + inside_kind(node, profile.field_declaration_node_kinds()) +} + +fn member_message_identifier(profile: &dyn LanguageProfile, node: Node<'_>) -> bool { + let Some(parent) = node.parent() else { + return false; + }; + if !profile.field_like_node_kinds().contains(&parent.kind()) { + return false; + } + let field = parent + .child_by_field_name("field") + .or_else(|| parent.child_by_field_name("property")) + .or_else(|| parent.child_by_field_name("name")) + .or_else(|| named_children(parent).into_iter().last()); + field.map(|field| same_node(field, node)).unwrap_or(false) +} + +fn implicit_assignment_lhs(profile: &dyn LanguageProfile, node: Node<'_>) -> bool { + if let Some(parent) = node.parent() { + if profile.assignment_node_kinds().contains(&parent.kind()) { + let lhs = parent + .child_by_field_name("left") + .or_else(|| first_named_child(parent)); + return lhs.map(|lhs| same_node(lhs, node)).unwrap_or(false); + } + } + profile.assignment_lhs_node(node) +} + fn normalized_state_ref_field(field: &str) -> String { field .trim_start_matches('@') @@ -1145,6 +1347,43 @@ fn record_call_site( }); } +fn record_state_declaration( + node: Node<'_>, + source: &str, + file: &Path, + language: Language, + context: &ContextState, + out: &mut Vec, +) { + let profile = language_profile(language); + let Some((field, r#type)) = profile.state_declaration(node, source) else { + return; + }; + let declaration = StateDeclaration { + field, + owner: context.current_owner(), + r#type, + file: file.to_string_lossy().to_string(), + line: line(node), + span: span(node), + }; + let key = ( + declaration.file.clone(), + declaration.owner.clone(), + declaration.field.clone(), + ); + if out.iter().any(|existing| { + ( + existing.file.clone(), + existing.owner.clone(), + existing.field.clone(), + ) == key + }) { + return; + } + out.push(declaration); +} + fn record_state_read( node: Node<'_>, source: &str, @@ -1197,6 +1436,194 @@ fn record_state_read( }); } +fn collect_implicit_state_accesses( + root: Node<'_>, + source: &str, + file: &Path, + language: Language, + context: &ContextState, + function_defs: &[FunctionDef], + state_declarations: &[StateDeclaration], + state_reads: &mut Vec, + state_writes: &mut Vec, + seen_reads: &mut HashSet, + seen_writes: &mut HashSet, +) { + let profile = language_profile(language); + if !profile.implicit_state_accesses() { + return; + } + let declared = declared_state_index(state_declarations); + if declared.is_empty() { + return; + } + let locals = local_declaration_index(root, source, language, context); + let params = function_param_index(function_defs); + collect_implicit_state_accesses_for_node( + root, + source, + file, + language, + context, + &declared, + &locals, + ¶ms, + state_reads, + state_writes, + seen_reads, + seen_writes, + ); +} + +fn collect_implicit_state_accesses_for_node( + node: Node<'_>, + source: &str, + file: &Path, + language: Language, + context: &ContextState, + declared: &BTreeMap>, + locals: &BTreeMap<(String, String), BTreeSet>, + params: &BTreeMap<(String, String), BTreeSet>, + state_reads: &mut Vec, + state_writes: &mut Vec, + seen_reads: &mut HashSet, + seen_writes: &mut HashSet, +) { + let next_context = push_control_context( + node, + push_function_context( + node, + push_owner_context(node, source, context, language), + source, + language, + ), + source, + language, + ); + record_implicit_state_access( + node, + source, + file, + language, + &next_context, + declared, + locals, + params, + state_reads, + state_writes, + seen_reads, + seen_writes, + ); + + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + collect_implicit_state_accesses_for_node( + child, + source, + file, + language, + &next_context, + declared, + locals, + params, + state_reads, + state_writes, + seen_reads, + seen_writes, + ); + } +} + +fn record_implicit_state_access( + node: Node<'_>, + source: &str, + file: &Path, + language: Language, + context: &ContextState, + declared: &BTreeMap>, + locals: &BTreeMap<(String, String), BTreeSet>, + params: &BTreeMap<(String, String), BTreeSet>, + state_reads: &mut Vec, + state_writes: &mut Vec, + seen_reads: &mut HashSet, + seen_writes: &mut HashSet, +) { + let profile = language_profile(language); + if !implicit_state_identifier(profile, node) { + return; + } + let owner = context.current_owner(); + let function = context.current_function(); + if function == "(top-level)" { + return; + } + let field = node_text(node, source).to_string(); + if !declared + .get(&owner) + .map(|fields| fields.contains(&field)) + .unwrap_or(false) + { + return; + } + let scope = (owner.clone(), function.clone()); + if params + .get(&scope) + .map(|fields| fields.contains(&field)) + .unwrap_or(false) + || locals + .get(&scope) + .map(|fields| fields.contains(&field)) + .unwrap_or(false) + || identifier_declaration_site(profile, node) + || member_message_identifier(profile, node) + { + return; + } + + let file_name = file.to_string_lossy().to_string(); + if implicit_assignment_lhs(profile, node) { + let key = format!( + "{}\0{}\0{}\0{}\0self\0{}", + file_name, + owner, + function, + line(node), + field + ); + if seen_writes.insert(key) { + state_writes.push(StateWrite { + field, + receiver: "self".to_string(), + file: file_name, + function, + line: line(node), + span: span(node), + owner, + }); + } + } else { + let key = format!( + "{}\0{}\0{}\0{:?}\0self\0{}", + file_name, + owner, + function, + span(node), + field + ); + if seen_reads.insert(key) { + state_reads.push(StateRead { + field, + receiver: "self".to_string(), + file: file_name, + function, + line: line(node), + span: span(node), + owner, + }); + } + } +} + fn record_state_write( node: Node<'_>, source: &str, @@ -1355,17 +1782,32 @@ pub(crate) fn named_children(node: Node<'_>) -> Vec> { node.named_children(&mut cursor).collect() } +fn inside_kind(node: Node<'_>, kinds: &[&str]) -> bool { + let mut parent = node.parent(); + let mut seen = HashSet::new(); + while let Some(current) = parent { + let key = format!("{:?}\0{}", span(current), current.kind()); + if !seen.insert(key) { + break; + } + if kinds.contains(¤t.kind()) { + return true; + } + parent = current.parent(); + } + false +} + +fn same_node(left: Node<'_>, right: Node<'_>) -> bool { + left.kind() == right.kind() && span(left) == span(right) +} + pub(crate) fn first_child_kind(node: Node<'_>) -> Option<&str> { let mut cursor = node.walk(); let kind = node.children(&mut cursor).next().map(|child| child.kind()); kind } -pub(crate) fn previous_sibling_text(node: Node<'_>, source: &str) -> Option { - node.prev_sibling() - .map(|sibling| node_text(sibling, source).to_string()) -} - pub(crate) fn previous_sibling_raw_text(node: Node<'_>) -> Option { node.prev_sibling() .map(|sibling| sibling.kind().to_string()) @@ -1570,11 +2012,11 @@ fn union_span(left: [usize; 4], right: [usize; 4]) -> [usize; 4] { fn decision_predicate(profile: &dyn LanguageProfile, node: Node<'_>, source: &str) -> String { let target = profile.decision_subject(node); - normalize_text( + strip_enclosing_parentheses(&normalize_text( target .map(|child| node_text(child, source)) .unwrap_or_else(|| node_text(node, source)), - ) + )) } fn boolean_and(profile: &dyn LanguageProfile, node: Node<'_>, source: &str) -> bool { @@ -1867,14 +2309,6 @@ fn normalize_call_receiver(target: &mut CallTarget<'_>, context: &ContextState) fn canonical_self_receiver(receiver: &str) -> String { match receiver { "self" | "this" | "$this" => "self".to_string(), - _ if receiver.starts_with("this.") => format!( - "self.{}", - receiver.strip_prefix("this.").unwrap_or_default() - ), - _ if receiver.starts_with("$this.") => format!( - "self.{}", - receiver.strip_prefix("$this.").unwrap_or_default() - ), _ => receiver.to_string(), } } diff --git a/gems/decomplex/rust/src/decomplex/syntax_oracle.rs b/gems/decomplex/rust/src/decomplex/syntax_oracle.rs index 03a534522..749309267 100644 --- a/gems/decomplex/rust/src/decomplex/syntax_oracle.rs +++ b/gems/decomplex/rust/src/decomplex/syntax_oracle.rs @@ -1,4 +1,3 @@ -use crate::decomplex::syntax::adapters::language_profile; use crate::decomplex::syntax::{self, Document, Language}; use anyhow::Result; use serde_json::{json, Value}; @@ -15,8 +14,6 @@ pub fn project_files(files: &[PathBuf], language: Language) -> Result { } pub fn project_document(document: &Document) -> Value { - let clone_candidates = language_profile(document.language).clone_candidates(document); - json!({ "file": logical_file(&document.file), "language": document.language.as_str(), @@ -96,26 +93,11 @@ pub fn project_document(document: &Document) -> Value { })).collect()), "predicate_bodies": sorted(document.predicate_aliases.iter().map(|predicate| json!({ "name": predicate.name, - "owner": "", + "owner": predicate.owner, "body": predicate.body, "line": predicate.line, "span": predicate.span, })).collect()), - "local_complexity": document.local_complexity_scores.iter().map(|(id, score)| json!({ - "id": id, - "score": score.score, - "signals": score.signals, - })).collect::>(), - "clone_candidates": sorted(clone_candidates.iter().map(|candidate| json!({ - "method_name": candidate.method_name, - "node_name": candidate.node_name, - "line": candidate.line, - "span": candidate.span, - "mass": candidate.mass, - "fingerprint": candidate.fingerprint, - "child_fingerprints": candidate.child_fingerprints, - "child_masses": candidate.child_masses, - })).collect()), }) } diff --git a/gems/decomplex/test/syntax_oracle_test.rb b/gems/decomplex/test/syntax_oracle_test.rb index 56401d0f4..25b39a5db 100644 --- a/gems/decomplex/test/syntax_oracle_test.rb +++ b/gems/decomplex/test/syntax_oracle_test.rb @@ -7,7 +7,7 @@ class SyntaxOracleTest < Minitest::Test EXAMPLES_ROOT = File.expand_path("../examples/syntax-facts", __dir__) ORACLE_ROOT = File.join(EXAMPLES_ROOT, "oracles") - ENGINES = %w[ruby].freeze + ENGINES = %w[ruby rust].freeze FIXTURES = Dir[File.join(EXAMPLES_ROOT, "*", "*")] .select { |path| File.file?(path) && Decomplex::Syntax.supported_source?(path) } From ae61f7620e32cf613f1bae3f9b4e6f6155f3503a Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Sat, 20 Jun 2026 16:59:26 +0000 Subject: [PATCH 49/52] WIP harden decomplex fact boundary tests --- .../agents/cross-system-fact-oracle-design.md | 143 ++ .../examples/facts/local-flow/consumers.json | 343 +++ .../oracles/ruby-block_receiver_calls.json | 161 ++ .../oracles/ruby-locals_not_state.json | 163 ++ .../oracles/ruby-semantic_effects.json | 131 + .../oracles/ruby-state_reads.json | 93 + .../source-facts/oracles/ruby-visibility.json | 86 + .../source-facts/ruby/block_receiver_calls.rb | 12 + .../source-facts/ruby/locals_not_state.rb | 15 + .../source-facts/ruby/semantic_effects.rb | 15 + .../examples/source-facts/ruby/state_reads.rb | 14 + .../examples/source-facts/ruby/visibility.rb | 19 + .../rust/src/decomplex/architecture_test.rs | 63 +- .../decomplex/detectors/false_simplicity.rs | 5 +- .../decomplex/detectors/flay_similarity.rs | 4 +- .../detectors/implicit_control_flow.rs | 73 +- .../src/decomplex/detectors/local_flow.rs | 2108 +--------------- .../src/decomplex/detectors/path_condition.rs | 741 +----- .../detectors/redundant_nil_guard.rs | 647 +---- gems/decomplex/rust/src/decomplex/syntax.rs | 44 + .../src/decomplex/syntax/adapters/base.rs | 121 +- .../src/decomplex/syntax/adapters/ruby.rs | 784 +++++- .../rust/src/decomplex/syntax/local_flow.rs | 2168 +++++++++++++++++ .../src/decomplex/syntax/path_condition.rs | 738 ++++++ .../decomplex/syntax/redundant_nil_guard.rs | 644 +++++ .../decomplex/syntax/tree_sitter_adapter.rs | 51 +- gems/decomplex/rust/tests/examples_oracle.rs | 150 ++ .../test/local_flow_fact_oracle_test.rb | 41 + .../test/source_facts_oracle_test.rb | 88 + 29 files changed, 5967 insertions(+), 3698 deletions(-) create mode 100644 gems/decomplex/docs/agents/cross-system-fact-oracle-design.md create mode 100644 gems/decomplex/examples/facts/local-flow/consumers.json create mode 100644 gems/decomplex/examples/source-facts/oracles/ruby-block_receiver_calls.json create mode 100644 gems/decomplex/examples/source-facts/oracles/ruby-locals_not_state.json create mode 100644 gems/decomplex/examples/source-facts/oracles/ruby-semantic_effects.json create mode 100644 gems/decomplex/examples/source-facts/oracles/ruby-state_reads.json create mode 100644 gems/decomplex/examples/source-facts/oracles/ruby-visibility.json create mode 100644 gems/decomplex/examples/source-facts/ruby/block_receiver_calls.rb create mode 100644 gems/decomplex/examples/source-facts/ruby/locals_not_state.rb create mode 100644 gems/decomplex/examples/source-facts/ruby/semantic_effects.rb create mode 100644 gems/decomplex/examples/source-facts/ruby/state_reads.rb create mode 100644 gems/decomplex/examples/source-facts/ruby/visibility.rb create mode 100644 gems/decomplex/rust/src/decomplex/syntax/local_flow.rs create mode 100644 gems/decomplex/rust/src/decomplex/syntax/path_condition.rs create mode 100644 gems/decomplex/rust/src/decomplex/syntax/redundant_nil_guard.rs create mode 100644 gems/decomplex/test/local_flow_fact_oracle_test.rb create mode 100644 gems/decomplex/test/source_facts_oracle_test.rb diff --git a/gems/decomplex/docs/agents/cross-system-fact-oracle-design.md b/gems/decomplex/docs/agents/cross-system-fact-oracle-design.md new file mode 100644 index 000000000..75960ac4c --- /dev/null +++ b/gems/decomplex/docs/agents/cross-system-fact-oracle-design.md @@ -0,0 +1,143 @@ +# Cross-System Fact Oracle Design + +Status: WIP design for the Ruby-vs-Rust Decomplex parity work. + +## Problem + +The current test stack lets Rust drift into detector-owned fact generation. That is an architectural failure. Detectors must consume already-normalized facts. If a detector needs to walk raw Tree-sitter nodes, normalized AST roots, language profiles, or language-specific syntax, the required fact is missing from the syntax layer and must be added there first. + +The test suite must prove fact generation before it proves detector scoring. A detector oracle that only checks the final finding is too late and too coarse; it can hide incorrect or missing facts, duplicated mining code, and detector-specific language hacks. + +## Required Oracle Layers + +Blocking rule: do not continue detector parity, report parity, SARIF parity, or real-repo end-to-end parity until source-level fact generation integration tests exist for every fact consumed by detectors and those tests run against both Ruby Decomplex and Rust Decomplex. + +1. Source fact oracle + - Input: source file in a real language. + - Engines: Ruby Decomplex and Rust Decomplex. + - Output: exact canonical fact projection. + - Purpose: prove that adapters and syntax modules generate the same facts from source. + +2. Normalized fact JSON oracle + - Input: language-neutral JSON fact set. + - Engines: Ruby detector/report pipeline and Rust detector/report pipeline. + - Output: exact detector/report/SARIF/root-cause/convergence projection. + - Purpose: prove detector consumers behave the same once facts are correct. + +3. End-to-end repository parity + - Input: real repos. + - Engines: Ruby full pipeline and Rust full pipeline. + - Output: byte-for-byte report/json/SARIF where supported. + - Purpose: acceptance only. This must not be the primary way bugs are discovered. + +## Fact Generation Contract + +Every fact that any detector consumes must have source-level oracle coverage: + +- `function_defs` +- `owner_defs` +- `call_sites` +- `state_reads` +- `state_writes` +- `state_declarations` +- `state_param_origins` +- `decision_sites` +- `branch_decisions` +- `dispatch_sites` +- `comparison_uses` +- `semantic_effect_sites` +- `predicate_defs` +- `path_condition_sites` +- `local_methods` +- `local_complexity_scores` +- `clone_candidates` +- `protocol_method_effects` +- `protocol_call_paths` +- `redundant_nil_guard_findings` +- language-specific optional contract facts such as immutable reader/type alias facts + +If a detector needs a new input, the change order is: + +1. Add or extend the syntax fact type. +2. Add source fixtures for at least Ruby and any language being touched. +3. Add exact Ruby-vs-Rust source fact oracle assertions. +4. Update the detector to consume that fact. +5. Add or extend normalized fact JSON detector oracles. + +No detector may add fallback fact mining. + +## Ruby Source Fixtures Needed First + +These Ruby fixtures should live under `gems/decomplex/examples/source-facts/ruby/` and each should have an exact oracle under `examples/source-facts/oracles/`. + +- `state_reads.rb`: receivers, chained receivers, self reads, globals, constants that must not become state, safe navigation when represented. +- `state_writes.rb`: instance/global writes, indexed writes, field writes, operator assignment, local writes that must not become state. +- `visibility.rb`: public/protected/private declarations, standalone visibility, symbol-list visibility, default public. +- `semantic_effects.rb`: hidden IO, dynamic dispatch, callback inversion, metaprogramming, context reads, `[]=`, `<<`, method hooks. +- `block_receiver_calls.rb`: block parameter receiver calls, nested block calls, iterator control metadata, without unrelated mutation noise. +- `locals_not_state.rb`: params, locals, `ENV[key]`, indexed local assignment receiver reads, assertion commands, block-local values, outer locals. +- `local_flow.rb`: reads, writes, dependencies, co-uses, boundaries, destructuring, loops, nested scopes, indexed/member writes. +- `nil_guards.rb`: prior non-nil proof, redundant `nil?`, safe navigation, branch dominance, termination. +- `path_conditions.rb`: nested guards, `&&`, modifier conditionals, case/when, guarded actions. +- `clone_candidates.rb`: function bodies, owner bodies, DSL wrapper bodies, fingerprint/mass behavior. +- `protocols.rb`: receiverless Ruby calls, bare readers, internal call paths, method effects, mutating calls, declarative/DSL calls that must not become protocol events. + +The Ruby source-fact oracle should not collapse these to counts. It should assert the exact relevant rows and fields for each section under test. + +## Cross-Language Happy Path Matrix + +For each supported language, add at least one fixture per fact bucket that proves the language adapter emits the shared fact shape: + +- functions/owners/calls +- state reads/writes +- local methods +- branch/path facts +- semantic effects +- clone candidates +- protocol facts where the language has receiverless calls or implicit receiver calls +- nil/null guard facts where the language supports the detector + +Languages where function calls require `()` should not need Ruby-style bare-call protocol heuristics. Languages that allow omitted call delimiters or implicit receiver calls must solve that ambiguity in the adapter and prove it with source-fact fixtures. + +## Normalized Fact JSON Path + +The JSON fact fixtures under `gems/decomplex/examples/facts/` should cover detector consumers after normalization. These fixtures are language-neutral and should be shared by Ruby and Rust. + +Required groups: + +- `facts/local-flow/`: derived-state, locality-drag, function-LCOM, operational discontinuity, inconsistent rename clone, decision pressure. +- `facts/detectors/`: detectors that consume simpler direct facts. +- `facts/root-cause/`: root-cause ranking from a full detector fact set. +- `facts/convergence/`: convergence output from the same full detector fact set. +- `facts/report/`: markdown and JSON report output from the same full detector fact set. +- `facts/sarif/`: SARIF output from the same full detector fact set. + +The normalized fact JSON must include the full fact set needed by the downstream stage, not a detector-specific stub that proves only that the current code repeats itself. + +## Architecture Invariants + +Rust must mirror Ruby's architectural guardrails: + +- production detector modules must not import `tree_sitter` +- production detector modules must not import `syntax::adapters` +- production detector modules must not call `language_profile` +- production detector modules must not inspect `document.language` +- production detector modules must not read `document.root` or `document.normalized_root` +- production detector modules must not use `RawNode` +- production detector modules must not branch on `Language::Ruby`, `Language::Python`, or any other concrete language + +If one of these invariants blocks a detector fix, the fix belongs in syntax/adapters or in a new fact type. + +## CI Gates + +The minimum CI gate before end-to-end repo parity work: + +- Ruby architecture invariants pass. +- Rust architecture invariants pass. +- Ruby source-fact oracle passes for Ruby and Rust engines. +- Rust integration source-fact oracle passes without shelling through Ruby test assertions. +- Normalized fact JSON detector oracles pass for Ruby and Rust. +- Report/root-cause/convergence/SARIF JSON-input oracles pass for Ruby and Rust. +- No skips for supported fact buckets. Unsupported language/fact combinations must be explicit `unsupported` entries in the matrix, not skipped tests. + +End-to-end repo parity should start only after these gates are green. diff --git a/gems/decomplex/examples/facts/local-flow/consumers.json b/gems/decomplex/examples/facts/local-flow/consumers.json new file mode 100644 index 000000000..2d8568754 --- /dev/null +++ b/gems/decomplex/examples/facts/local-flow/consumers.json @@ -0,0 +1,343 @@ +{ + "input": { + "documents": [ + { + "file": "facts/derived.rb", + "language": "ruby", + "local_methods": [ + { + "id": "Fixture#refresh", + "owner": "Fixture", + "name": "refresh", + "file": "facts/derived.rb", + "line": 1, + "span": [1, 0, 20, 3], + "statements": [ + { + "index": 0, + "line": 1, + "end_line": 1, + "span": [1, 0, 1, 13], + "source": "source = load", + "reads": ["load"], + "writes": ["source"], + "dependencies": [["source", "load"]], + "co_uses": [] + }, + { + "index": 1, + "line": 2, + "end_line": 2, + "span": [2, 0, 2, 16], + "source": "derived = source", + "reads": ["source"], + "writes": ["derived"], + "dependencies": [["derived", "source"]], + "co_uses": [] + }, + { + "index": 2, + "line": 7, + "end_line": 7, + "span": [7, 0, 7, 15], + "source": "source = reload", + "reads": ["reload"], + "writes": ["source"], + "dependencies": [["source", "reload"]], + "co_uses": [] + }, + { + "index": 3, + "line": 9, + "end_line": 9, + "span": [9, 0, 9, 12], + "source": "use(derived)", + "reads": ["derived"], + "writes": [], + "dependencies": [], + "co_uses": [] + } + ], + "boundaries": [] + } + ] + }, + { + "file": "facts/locality.rb", + "language": "ruby", + "local_complexity_scores": { + "Fixture#assemble": { + "score": 18.0, + "signals": { + "branches": 2 + } + } + }, + "local_methods": [ + { + "id": "Fixture#assemble", + "owner": "Fixture", + "name": "assemble", + "file": "facts/locality.rb", + "line": 1, + "span": [1, 0, 20, 3], + "statements": [ + { + "index": 0, + "line": 1, + "end_line": 1, + "span": [1, 0, 1, 23], + "source": "payload = build_payload", + "reads": ["build_payload"], + "writes": ["payload"], + "dependencies": [["payload", "build_payload"]], + "co_uses": [] + }, + { + "index": 1, + "line": 3, + "end_line": 3, + "span": [3, 0, 3, 18], + "source": "alpha = load_alpha", + "reads": ["load_alpha"], + "writes": ["alpha"], + "dependencies": [["alpha", "load_alpha"]], + "co_uses": [] + }, + { + "index": 2, + "line": 5, + "end_line": 5, + "span": [5, 0, 5, 16], + "source": "beta = load_beta", + "reads": ["load_beta"], + "writes": ["beta"], + "dependencies": [["beta", "load_beta"]], + "co_uses": [] + }, + { + "index": 3, + "line": 7, + "end_line": 7, + "span": [7, 0, 7, 18], + "source": "gamma = load_gamma", + "reads": ["load_gamma"], + "writes": ["gamma"], + "dependencies": [["gamma", "load_gamma"]], + "co_uses": [] + }, + { + "index": 4, + "line": 9, + "end_line": 9, + "span": [9, 0, 9, 18], + "source": "delta = load_delta", + "reads": ["load_delta"], + "writes": ["delta"], + "dependencies": [["delta", "load_delta"]], + "co_uses": [] + }, + { + "index": 5, + "line": 13, + "end_line": 13, + "span": [13, 0, 13, 16], + "source": "consume(payload)", + "reads": ["payload"], + "writes": [], + "dependencies": [], + "co_uses": [] + } + ], + "boundaries": [ + { + "before_index": 0, + "after_index": 5, + "line": 11, + "kind": "comment", + "text": "# phase 2" + } + ] + } + ] + }, + { + "file": "facts/lcom.rb", + "language": "ruby", + "local_methods": [ + { + "id": "Fixture#mixed", + "owner": "Fixture", + "name": "mixed", + "file": "facts/lcom.rb", + "line": 1, + "span": [1, 0, 20, 3], + "statements": [ + { + "index": 0, + "line": 1, + "end_line": 1, + "span": [1, 0, 1, 11], + "source": "a = input_a", + "reads": ["input_a"], + "writes": ["a"], + "dependencies": [["a", "input_a"]], + "co_uses": [] + }, + { + "index": 1, + "line": 2, + "end_line": 2, + "span": [2, 0, 2, 16], + "source": "b = normalize(a)", + "reads": ["a"], + "writes": ["b"], + "dependencies": [["b", "a"]], + "co_uses": [] + }, + { + "index": 2, + "line": 3, + "end_line": 3, + "span": [3, 0, 3, 11], + "source": "c = input_c", + "reads": ["input_c"], + "writes": ["c"], + "dependencies": [["c", "input_c"]], + "co_uses": [] + }, + { + "index": 3, + "line": 4, + "end_line": 4, + "span": [4, 0, 4, 16], + "source": "d = normalize(c)", + "reads": ["c"], + "writes": ["d"], + "dependencies": [["d", "c"]], + "co_uses": [] + }, + { + "index": 4, + "line": 5, + "end_line": 5, + "span": [5, 0, 5, 11], + "source": "e = input_e", + "reads": ["input_e"], + "writes": ["e"], + "dependencies": [["e", "input_e"]], + "co_uses": [] + }, + { + "index": 5, + "line": 6, + "end_line": 6, + "span": [6, 0, 6, 16], + "source": "f = normalize(e)", + "reads": ["e"], + "writes": ["f"], + "dependencies": [["f", "e"]], + "co_uses": [] + } + ], + "boundaries": [] + } + ] + } + ] + }, + "expected": { + "derived-state": [ + { + "at": "facts/derived.rb:refresh:2", + "defn": "refresh", + "derived": "derived", + "derived_at": 2, + "file": "facts/derived.rb", + "gap": 5, + "source": "source", + "source_reassigned_at": 7, + "spans": { + "facts/derived.rb:refresh:2": [2, 0, 2, 16] + } + } + ], + "locality-drag": [ + { + "at": "facts/locality.rb:assemble:1", + "boundaries": [ + { + "line": 11, + "marker": "# phase 2" + } + ], + "boundary_crossings": 1, + "defined_at": 1, + "definition_deps": ["build_payload"], + "defn": "assemble", + "examples": [ + { + "line": 3, + "source": "alpha = load_alpha" + }, + { + "line": 5, + "source": "beta = load_beta" + }, + { + "line": 7, + "source": "gamma = load_gamma" + } + ], + "file": "facts/locality.rb", + "gap_lines": 12, + "gap_statements": 4, + "line": 1, + "local_complexity": 18.0, + "method": "assemble", + "owner": "Fixture", + "reason": "`payload` is initialized 12 line(s) before first use; 4 unrelated intervening statement(s); 1 structural boundary crossing(s); method local complexity 18.0", + "related_statements": 0, + "score": 63, + "setup_statements": 0, + "spans": { + "facts/locality.rb:assemble:1": [1, 0, 20, 3] + }, + "unrelated_statements": 4, + "use_reads": ["payload"], + "used_at": 13, + "variable": "payload" + } + ], + "function-lcom": [ + { + "at": "facts/lcom.rb:mixed:1", + "component_lines": [ + [1, 2], + [3, 4], + [5, 6] + ], + "component_vars": [ + ["a", "b", "input_a"], + ["c", "d", "input_c"], + ["e", "f", "input_e"] + ], + "components": 3, + "defn": "mixed", + "file": "facts/lcom.rb", + "line": 1, + "locals": 9, + "method": "mixed", + "mode": "disjoint", + "owner": "Fixture", + "score": 45, + "spans": { + "facts/lcom.rb:mixed:1": [1, 0, 20, 3] + }, + "statements": 6, + "terminal_join": false + } + ], + "operational-discontinuity": [] + } +} diff --git a/gems/decomplex/examples/source-facts/oracles/ruby-block_receiver_calls.json b/gems/decomplex/examples/source-facts/oracles/ruby-block_receiver_calls.json new file mode 100644 index 000000000..ced1a727c --- /dev/null +++ b/gems/decomplex/examples/source-facts/oracles/ruby-block_receiver_calls.json @@ -0,0 +1,161 @@ +{ + "syntax": { + "functions": [ + { + "name": "collect", + "owner": "SourceFactBlockReceiverCalls", + "line": 4, + "visibility": "public", + "params": [ + "items" + ] + } + ], + "calls": [ + { + "receiver": "self", + "message": "names", + "function": "collect", + "line": 10, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "item", + "message": "name", + "function": "collect", + "line": 7, + "conditional": true, + "control": "iterates", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "item", + "message": "children", + "function": "collect", + "line": 8, + "conditional": true, + "control": "iterates", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "child", + "message": "name", + "function": "collect", + "line": 8, + "conditional": true, + "control": "iterates", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "items", + "message": "flat_map", + "function": "collect", + "line": 6, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": true, + "arguments": [] + }, + { + "receiver": "item.children", + "message": "each", + "function": "collect", + "line": 8, + "conditional": true, + "control": "iterates", + "safe_navigation": false, + "block": true, + "arguments": [] + } + ], + "state_reads": [ + { + "receiver": "item", + "field": "children", + "function": "collect", + "line": 8 + }, + { + "receiver": "item.children", + "field": "each", + "function": "collect", + "line": 8 + }, + { + "receiver": "items", + "field": "flat_map", + "function": "collect", + "line": 6 + }, + { + "receiver": "item", + "field": "name", + "function": "collect", + "line": 7 + }, + { + "receiver": "child", + "field": "name", + "function": "collect", + "line": 8 + } + ], + "semantic_effects": [ + { + "kind": "hidden_mutation", + "detail": "<<", + "function": "collect", + "line": 7 + } + ] + }, + "local_flow": [ + { + "method": "collect", + "statements": [ + { + "reads": [], + "writes": [ + "names" + ], + "dependencies": [], + "co_uses": [] + }, + { + "reads": [ + "items", + "names" + ], + "writes": [], + "dependencies": [], + "co_uses": [ + [ + "items", + "names" + ] + ] + }, + { + "reads": [ + "names" + ], + "writes": [], + "dependencies": [], + "co_uses": [] + } + ], + "boundaries": [] + } + ] +} diff --git a/gems/decomplex/examples/source-facts/oracles/ruby-locals_not_state.json b/gems/decomplex/examples/source-facts/oracles/ruby-locals_not_state.json new file mode 100644 index 000000000..0abef7a20 --- /dev/null +++ b/gems/decomplex/examples/source-facts/oracles/ruby-locals_not_state.json @@ -0,0 +1,163 @@ +{ + "syntax": { + "functions": [ + { + "name": "build", + "owner": "SourceFactLocalsNotState", + "line": 4, + "visibility": "public", + "params": [ + "values", + "config" + ] + } + ], + "calls": [ + { + "receiver": "self", + "message": "assert_empty", + "function": "build", + "line": 12, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [ + "values" + ] + }, + { + "receiver": "values", + "message": "each", + "function": "build", + "line": 8, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": true, + "arguments": [] + } + ], + "state_reads": [ + { + "receiver": "values", + "field": "each", + "function": "build", + "line": 8 + } + ], + "state_writes": [], + "semantic_effects": [ + { + "kind": "context_dependency", + "detail": "ENV", + "function": "build", + "line": 6 + }, + { + "kind": "hidden_mutation", + "detail": "[]=", + "function": "build", + "line": 11 + } + ] + }, + "local_flow": [ + { + "method": "build", + "statements": [ + { + "reads": [], + "writes": [ + "key" + ], + "dependencies": [], + "co_uses": [] + }, + { + "reads": [ + "key" + ], + "writes": [ + "path" + ], + "dependencies": [ + [ + "path", + "key" + ] + ], + "co_uses": [] + }, + { + "reads": [], + "writes": [ + "total" + ], + "dependencies": [], + "co_uses": [] + }, + { + "reads": [ + "total", + "values" + ], + "writes": [ + "total" + ], + "dependencies": [], + "co_uses": [ + [ + "total", + "values" + ] + ] + }, + { + "reads": [ + "config", + "path" + ], + "writes": [], + "dependencies": [], + "co_uses": [ + [ + "config", + "path" + ] + ] + }, + { + "reads": [], + "writes": [], + "dependencies": [], + "co_uses": [] + }, + { + "reads": [ + "config", + "path", + "total" + ], + "writes": [], + "dependencies": [], + "co_uses": [ + [ + "config", + "path" + ], + [ + "config", + "total" + ], + [ + "path", + "total" + ] + ] + } + ], + "boundaries": [] + } + ] +} diff --git a/gems/decomplex/examples/source-facts/oracles/ruby-semantic_effects.json b/gems/decomplex/examples/source-facts/oracles/ruby-semantic_effects.json new file mode 100644 index 000000000..5be6fd173 --- /dev/null +++ b/gems/decomplex/examples/source-facts/oracles/ruby-semantic_effects.json @@ -0,0 +1,131 @@ +{ + "syntax": { + "functions": [ + { + "name": "mutate", + "owner": "SourceFactSemanticEffects", + "line": 11, + "visibility": "public", + "params": [ + "target", + "value" + ] + }, + { + "name": "perform", + "owner": "SourceFactSemanticEffects", + "line": 4, + "visibility": "public", + "params": [ + "callback", + "name" + ] + } + ], + "calls": [ + { + "receiver": "self", + "message": "send", + "function": "perform", + "line": 7, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [ + ":audit", + "name" + ] + }, + { + "receiver": "self", + "message": "puts", + "function": "perform", + "line": 5, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [ + "name" + ] + }, + { + "receiver": "callback", + "message": "call", + "function": "perform", + "line": 6, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [ + "name" + ] + }, + { + "receiver": "target", + "message": "items", + "function": "mutate", + "line": 13, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [] + } + ], + "state_reads": [ + { + "receiver": "self", + "field": "$source_fact_seen", + "function": "perform", + "line": 8 + }, + { + "receiver": "target", + "field": "items", + "function": "mutate", + "line": 13 + } + ], + "semantic_effects": [ + { + "kind": "context_dependency", + "detail": "$source_fact_seen", + "function": "perform", + "line": 8 + }, + { + "kind": "hidden_mutation", + "detail": "<<", + "function": "mutate", + "line": 13 + }, + { + "kind": "hidden_mutation", + "detail": "[]=", + "function": "mutate", + "line": 12 + }, + { + "kind": "dynamic_dispatch", + "detail": "callback.call", + "function": "perform", + "line": 6 + }, + { + "kind": "hidden_io", + "detail": "puts", + "function": "perform", + "line": 5 + }, + { + "kind": "dynamic_dispatch", + "detail": "send", + "function": "perform", + "line": 7 + } + ] + } +} diff --git a/gems/decomplex/examples/source-facts/oracles/ruby-state_reads.json b/gems/decomplex/examples/source-facts/oracles/ruby-state_reads.json new file mode 100644 index 000000000..f6c517f7e --- /dev/null +++ b/gems/decomplex/examples/source-facts/oracles/ruby-state_reads.json @@ -0,0 +1,93 @@ +{ + "syntax": { + "functions": [ + { + "name": "initialize", + "owner": "SourceFactStateReads", + "line": 4, + "visibility": "public", + "params": [ + "user" + ] + }, + { + "name": "inspect_profile", + "owner": "SourceFactStateReads", + "line": 9, + "visibility": "public", + "params": [ + "account" + ] + } + ], + "calls": [ + { + "receiver": "@user.profile", + "message": "name", + "function": "inspect_profile", + "line": 10, + "conditional": false, + "control": "always", + "safe_navigation": true, + "block": false, + "arguments": [] + }, + { + "receiver": "@user", + "message": "profile", + "function": "inspect_profile", + "line": 10, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "account", + "message": "active?", + "function": "inspect_profile", + "line": 12, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [] + } + ], + "state_reads": [ + { + "receiver": "self", + "field": "@status", + "function": "inspect_profile", + "line": 11 + }, + { + "receiver": "self", + "field": "@user", + "function": "inspect_profile", + "line": 10 + }, + { + "receiver": "account", + "field": "active?", + "function": "inspect_profile", + "line": 12 + } + ], + "state_writes": [ + { + "receiver": "self", + "field": "@status", + "function": "initialize", + "line": 6 + }, + { + "receiver": "self", + "field": "@user", + "function": "initialize", + "line": 5 + } + ] + } +} diff --git a/gems/decomplex/examples/source-facts/oracles/ruby-visibility.json b/gems/decomplex/examples/source-facts/oracles/ruby-visibility.json new file mode 100644 index 000000000..b879d956b --- /dev/null +++ b/gems/decomplex/examples/source-facts/oracles/ruby-visibility.json @@ -0,0 +1,86 @@ +{ + "syntax": { + "functions": [ + { + "name": "prepare", + "owner": "SourceFactVisibility", + "line": 10, + "visibility": "private", + "params": [] + }, + { + "name": "inline_guard", + "owner": "SourceFactVisibility", + "line": 14, + "visibility": "private", + "params": [] + }, + { + "name": "public_step", + "owner": "SourceFactVisibility", + "line": 4, + "visibility": "public", + "params": [] + } + ], + "calls": [ + { + "receiver": "self", + "message": "private", + "function": "(top-level)", + "line": 18, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [ + ":inline_guard" + ] + }, + { + "receiver": "self", + "message": "protected", + "function": "(top-level)", + "line": 14, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [ + "inline_guard", + "true" + ] + }, + { + "receiver": "self", + "message": "private", + "function": "(top-level)", + "line": 8, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "self", + "message": "prepare", + "function": "public_step", + "line": 5, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [] + } + ], + "state_writes": [ + { + "receiver": "self", + "field": "@ready", + "function": "prepare", + "line": 11 + } + ] + } +} diff --git a/gems/decomplex/examples/source-facts/ruby/block_receiver_calls.rb b/gems/decomplex/examples/source-facts/ruby/block_receiver_calls.rb new file mode 100644 index 000000000..d120c2039 --- /dev/null +++ b/gems/decomplex/examples/source-facts/ruby/block_receiver_calls.rb @@ -0,0 +1,12 @@ +# frozen_string_literal: true + +class SourceFactBlockReceiverCalls + def collect(items) + names = [] + items.flat_map do |item| + names << item.name + item.children.each { |child| child.name } + end + names + end +end diff --git a/gems/decomplex/examples/source-facts/ruby/locals_not_state.rb b/gems/decomplex/examples/source-facts/ruby/locals_not_state.rb new file mode 100644 index 000000000..a4cf11cc9 --- /dev/null +++ b/gems/decomplex/examples/source-facts/ruby/locals_not_state.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +class SourceFactLocalsNotState + def build(values, config) + key = "HOME" + path = ENV[key] + total = 0 + values.each do |value| + total = total + value + end + config[:path] = path + assert_empty values + [path, total, config] + end +end diff --git a/gems/decomplex/examples/source-facts/ruby/semantic_effects.rb b/gems/decomplex/examples/source-facts/ruby/semantic_effects.rb new file mode 100644 index 000000000..6d55b05f1 --- /dev/null +++ b/gems/decomplex/examples/source-facts/ruby/semantic_effects.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +class SourceFactSemanticEffects + def perform(callback, name) + puts name + callback.call(name) + send(:audit, name) + $source_fact_seen + end + + def mutate(target, value) + target[:name] = value + target.items << value + end +end diff --git a/gems/decomplex/examples/source-facts/ruby/state_reads.rb b/gems/decomplex/examples/source-facts/ruby/state_reads.rb new file mode 100644 index 000000000..7cd6cd6c7 --- /dev/null +++ b/gems/decomplex/examples/source-facts/ruby/state_reads.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +class SourceFactStateReads + def initialize(user) + @user = user + @status = :idle + end + + def inspect_profile(account) + name = @user.profile&.name + status = @status + account.active? && status == :idle && name + end +end diff --git a/gems/decomplex/examples/source-facts/ruby/visibility.rb b/gems/decomplex/examples/source-facts/ruby/visibility.rb new file mode 100644 index 000000000..96e21a95c --- /dev/null +++ b/gems/decomplex/examples/source-facts/ruby/visibility.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +class SourceFactVisibility + def public_step + prepare + end + + private + + def prepare + @ready = true + end + + protected def inline_guard + true + end + + private :inline_guard +end diff --git a/gems/decomplex/rust/src/decomplex/architecture_test.rs b/gems/decomplex/rust/src/decomplex/architecture_test.rs index fc14b60d7..7ea1f7702 100644 --- a/gems/decomplex/rust/src/decomplex/architecture_test.rs +++ b/gems/decomplex/rust/src/decomplex/architecture_test.rs @@ -148,7 +148,7 @@ fn detectors_do_not_import_tree_sitter_directly() { if path.extension().and_then(|ext| ext.to_str()) != Some("rs") { continue; } - let source = fs::read_to_string(&path).expect("read detector source"); + let source = production_source(&fs::read_to_string(&path).expect("read detector source")); assert!( !source.contains("tree_sitter"), "{} imports tree_sitter directly; detectors should consume normalized syntax/AST facts", @@ -157,6 +157,57 @@ fn detectors_do_not_import_tree_sitter_directly() { } } +#[test] +fn detectors_do_not_cross_the_syntax_boundary() { + let detectors = crate_src().join("detectors"); + let forbidden = [ + ("syntax adapter access", "syntax::adapters"), + ("language profile access", "language_profile("), + ("raw syntax node type", "RawNode"), + ("raw document root access", "document.root"), + ( + "normalized document root access", + "document.normalized_root", + ), + ("document language inspection", "document.language"), + ("Ruby language branch", "Language::Ruby"), + ("Python language branch", "Language::Python"), + ("JavaScript language branch", "Language::JavaScript"), + ("Java language branch", "Language::Java"), + ("TypeScript language branch", "Language::TypeScript"), + ("Swift language branch", "Language::Swift"), + ("Kotlin language branch", "Language::Kotlin"), + ("Go language branch", "Language::Go"), + ("Rust language branch", "Language::Rust"), + ("Zig language branch", "Language::Zig"), + ("Lua language branch", "Language::Lua"), + ("C language branch", "Language::C"), + ("Cpp language branch", "Language::Cpp"), + ("CSharp language branch", "Language::CSharp"), + ("Php language branch", "Language::Php"), + ]; + let mut offenders = Vec::new(); + + for entry in fs::read_dir(&detectors).expect("read detectors dir") { + let path = entry.expect("detector entry").path(); + if path.extension().and_then(|ext| ext.to_str()) != Some("rs") { + continue; + } + let source = production_source(&fs::read_to_string(&path).expect("read detector source")); + for (reason, pattern) in forbidden { + if source.contains(pattern) { + offenders.push(format!("{}: {}: {}", path.display(), reason, pattern)); + } + } + } + + assert!( + offenders.is_empty(), + "Detectors must consume syntax facts, not language/parser internals:\n{}", + offenders.join("\n") + ); +} + #[test] fn report_facts_uses_document_detector_apis() { let path = crate_src().join("report_facts.rs"); @@ -214,7 +265,7 @@ fn state_branch_density_detector_does_not_own_ruby_source_mining() { #[test] fn flay_similarity_detector_does_not_own_clone_fingerprint_grammar() { let path = crate_src().join("detectors/flay_similarity.rs"); - let source = fs::read_to_string(&path).expect("read flay_similarity.rs"); + let source = production_source(&fs::read_to_string(&path).expect("read flay_similarity.rs")); for pattern in [ "RawNode", "CLONE_CANDIDATE_KINDS", @@ -232,6 +283,14 @@ fn flay_similarity_detector_does_not_own_clone_fingerprint_grammar() { } } +fn production_source(source: &str) -> String { + source + .lines() + .take_while(|line| line.trim() != "#[cfg(test)]") + .collect::>() + .join("\n") +} + #[test] fn ast_normalizer_does_not_branch_on_language_after_parser_setup() { let path = crate_src().join("ast.rs"); diff --git a/gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs b/gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs index 4d8a12c20..7a90fa12d 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs @@ -1,5 +1,4 @@ use crate::decomplex::ast::Span; -use crate::decomplex::syntax::adapters::false_simplicity_lexicon::false_simplicity_lexicon; use crate::decomplex::syntax::{self, Document, Language}; use anyhow::Result; use serde::Serialize; @@ -60,7 +59,7 @@ fn class_records_for_document(document: &Document) -> (Vec, Vec) .map(|function| function.owner.clone()) .filter(|owner| !owner.is_empty()) .collect::>(); - let lexicon = false_simplicity_lexicon(document.language); + let core_owner_names = syntax::core_owner_names(document); let mut recs = Vec::new(); let mut hits = Vec::new(); @@ -77,7 +76,7 @@ fn class_records_for_document(document: &Document) -> (Vec, Vec) .last() .unwrap_or(canonical.as_str()) .to_string(); - let core = !canonical.contains("::") && lexicon.core_consts.contains(&simple.as_str()); + let core = !canonical.contains("::") && core_owner_names.contains(&simple.as_str()); recs.push(ClassRec { name: canonical.clone(), file: owner.file.clone(), diff --git a/gems/decomplex/rust/src/decomplex/detectors/flay_similarity.rs b/gems/decomplex/rust/src/decomplex/detectors/flay_similarity.rs index c56bc2c6b..443d477c0 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/flay_similarity.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/flay_similarity.rs @@ -1,5 +1,4 @@ use crate::decomplex::ast::Span; -use crate::decomplex::syntax::adapters::language_profile; use crate::decomplex::syntax::{self, CloneCandidate, Document, Language, SimilarityFinding}; use anyhow::Result; use std::collections::{BTreeMap, HashMap, HashSet}; @@ -59,7 +58,7 @@ impl Scanner { fn candidates_for_document(&mut self, document: &Document) -> Vec { let mut out = Vec::new(); let mut seen = HashSet::new(); - for candidate in language_profile(document.language).clone_candidates(document) { + for candidate in syntax::clone_candidates(document) { self.add_candidate(&mut out, &mut seen, candidate); } out @@ -400,6 +399,7 @@ fn combinations(size: usize, count: usize) -> Vec> { #[cfg(test)] mod tests { use super::*; + use crate::decomplex::syntax::adapters::language_profile; use std::io::Write; use tempfile::NamedTempFile; diff --git a/gems/decomplex/rust/src/decomplex/detectors/implicit_control_flow.rs b/gems/decomplex/rust/src/decomplex/detectors/implicit_control_flow.rs index d8c5e6be3..85f4a683c 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/implicit_control_flow.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/implicit_control_flow.rs @@ -76,23 +76,16 @@ pub fn scan_documents(documents: &[Document]) -> ImplicitControlFlowReport { fn sequences_for_document(document: &Document, effect_index: &EffectIndex) -> Vec { document - .function_defs + .protocol_call_paths .iter() - .filter_map(|function_def| { - let defn = protocol_method_name(&function_def.name); - let calls = document - .call_sites + .filter_map(|path| { + let calls = path + .calls .iter() - .filter(|call| { - call.owner == function_def.owner - && call.function == function_def.name - && call.receiver == "self" - }) .map(|call| { - let mid = protocol_method_name(&call.message); - let effect = effect_index.effect_for(&function_def.owner, &mid); + let effect = effect_index.effect_for(&path.owner, &call.mid); Call { - mid, + mid: call.mid.clone(), line: call.line, span: call.span, reads: effect.map(|e| e.reads.clone()).unwrap_or_default(), @@ -111,30 +104,16 @@ fn sequences_for_document(document: &Document, effect_index: &EffectIndex) -> Ve } Some(MethodSequence { - file: function_def.file.clone(), - owner: function_def.owner.clone(), - defn, - line: function_def.line, + file: path.file.clone(), + owner: path.owner.clone(), + defn: path.name.clone(), + line: path.line, calls, }) }) .collect() } -fn protocol_method_name(name: &str) -> String { - name.split(['.', ':']) - .filter(|part| !part.is_empty()) - .last() - .unwrap_or(name) - .to_string() -} - -fn normalize_protocol_state(name: &str) -> String { - name.trim_start_matches('@') - .trim_end_matches('=') - .to_string() -} - struct EffectIndex { by_owner_name: BTreeMap<(String, String), MethodEffect>, by_name: BTreeMap>, @@ -144,34 +123,12 @@ impl EffectIndex { fn build_documents(documents: &[Document]) -> Self { let mut effects = Vec::new(); for document in documents { - for function_def in &document.function_defs { - let mut reads = document - .state_reads - .iter() - .filter(|read| { - read.owner == function_def.owner && read.function == function_def.name - }) - .map(|read| normalize_protocol_state(&read.field)) - .collect::>(); - reads.sort(); - reads.dedup(); - - let mut writes = document - .state_writes - .iter() - .filter(|write| { - write.owner == function_def.owner && write.function == function_def.name - }) - .map(|write| normalize_protocol_state(&write.field)) - .collect::>(); - writes.sort(); - writes.dedup(); - + for effect in &document.protocol_method_effects { effects.push(MethodEffect { - owner: function_def.owner.clone(), - name: protocol_method_name(&function_def.name), - reads, - writes, + owner: effect.owner.clone(), + name: effect.name.clone(), + reads: effect.reads.clone(), + writes: effect.writes.clone(), }); } } diff --git a/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs b/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs index fee39a45e..a37b671fe 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs @@ -1,2105 +1,3 @@ -use crate::decomplex::ast::{self, Child, Node, RawNode, Span}; -use crate::decomplex::syntax::adapters::{language_profile, LanguageProfile}; -use crate::decomplex::syntax::{self, Document, FunctionDef, Language}; -use anyhow::Result; -use serde::{Deserialize, Serialize}; -use std::collections::{BTreeMap, BTreeSet}; -use std::path::{Path, PathBuf}; - -#[derive(Clone, Debug, Eq, PartialEq, Serialize)] -pub struct LocalFlowRow { - pub summaries: Vec, -} - -#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)] -pub struct MethodSummary { - pub id: String, - pub owner: String, - pub name: String, - pub file: String, - pub line: usize, - pub span: Span, - #[serde(default = "empty_node", skip_serializing)] - pub node: Node, - #[serde(default, skip_serializing)] - pub raw_node: Option, - pub statements: Vec, - pub boundaries: Vec, -} - -#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)] -pub struct Statement { - pub index: usize, - pub line: usize, - pub end_line: usize, - pub span: Span, - pub source: String, - pub reads: BTreeSet, - pub writes: BTreeSet, - pub dependencies: Vec<(String, String)>, - pub co_uses: Vec<(String, String)>, -} - -#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)] -pub struct Boundary { - pub before_index: usize, - pub after_index: usize, - pub line: usize, - pub kind: String, - pub text: String, -} - -const OWNER_TYPES: &[&str] = &["CLASS", "MODULE"]; -const METHOD_TYPES: &[&str] = &["DEFN", "DEFS"]; -const SKIP_NESTED_TYPES: &[&str] = &["CLASS", "MODULE", "DEFN", "DEFS", "LAMBDA"]; -const LOCAL_READ_TYPES: &[&str] = &["LVAR", "DVAR"]; -const LOCAL_WRITE_TYPES: &[&str] = &["LASGN", "DASGN"]; -const STATEMENT_CONTAINER_TYPES: &[&str] = &[ - "BLOCK", - "COMPOUND_STATEMENT", - "DECLARATION_LIST", - "FUNCTION_BODY", - "HASH", - "STATEMENTS", -]; - -fn empty_node() -> Node { - Node { - r#type: "ROOT".to_string(), - children: Vec::new(), - first_lineno: 1, - first_column: 0, - last_lineno: 1, - last_column: 0, - text: String::new(), - } -} - -pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { - let documents = syntax::parse_files(files, language)?; - Ok(scan_documents(&documents)) -} - -pub fn scan_documents(documents: &[Document]) -> Vec { - let mut out = Vec::new(); - for document in documents { - let normalized = normalized_local_methods(document); - if document.language != Language::Ruby { - let raw = raw_local_methods(document); - let raw_keys: BTreeSet<_> = raw.iter().map(method_summary_key).collect(); - out.extend(raw); - out.extend( - normalized - .into_iter() - .filter(|summary| !raw_keys.contains(&method_summary_key(summary))), - ); - continue; - } - - out.extend(normalized); - } - out -} - -fn normalized_local_methods(document: &Document) -> Vec { - let mut detector = LocalFlow::new( - document.file.clone(), - document.lines.clone(), - method_metadata(document), - ); - detector.scan(&document.normalized_root) -} - -fn method_summary_key(summary: &MethodSummary) -> (String, String, usize) { - (summary.file.clone(), summary.id.clone(), summary.line) -} - -#[derive(Clone, Debug, Eq, PartialEq)] -struct MethodMetadata { - owner: String, - name: String, - params: BTreeSet, -} - -fn raw_local_methods(document: &Document) -> Vec { - let profile = language_profile(document.language); - document - .function_defs - .iter() - .map(|function| raw_method_summary(document, profile, function)) - .collect() -} - -fn raw_method_summary( - document: &Document, - profile: &dyn LanguageProfile, - function: &FunctionDef, -) -> MethodSummary { - let statement_nodes = raw_function_body_statements(&function.body, profile); - let local_names = raw_local_names(function, &statement_nodes, profile); - let statements: Vec<_> = statement_nodes - .iter() - .enumerate() - .map(|(index, statement)| raw_statement_summary(statement, index, &local_names, profile)) - .collect(); - let owner = local_flow_owner(&document.file, &function.owner); - - MethodSummary { - id: format!("{}#{}", owner, function.name), - owner, - name: function.name.clone(), - file: function.file.clone(), - line: function.line, - span: function.span, - node: normalized_node_for_span(&document.normalized_root, function.span) - .cloned() - .unwrap_or_else(|| fallback_node_from_raw(&function.body)), - raw_node: Some(function.body.clone()), - boundaries: raw_structural_boundaries(document, &statements), - statements, - } -} - -fn raw_function_body_statements<'a>( - node: &'a RawNode, - profile: &dyn LanguageProfile, -) -> Vec<&'a RawNode> { - let body = raw_function_body_node(node, profile); - let Some(body) = body else { - return Vec::new(); - }; - - let mut named = raw_named_children(body) - .into_iter() - .filter(|child| !raw_comment_node(child)) - .collect::>(); - if named.len() == 1 - && profile - .nested_statement_wrapper_node_kinds() - .contains(&named[0].kind.as_str()) - { - if raw_branch_node(named[0], profile) { - return vec![named[0]]; - } - named = raw_named_children(named[0]) - .into_iter() - .filter(|child| !raw_comment_node(child)) - .collect(); - } - if named.is_empty() && body.text.trim().is_empty() { - return Vec::new(); - } - if raw_branch_node(body, profile) || raw_assignment_statement(body, profile) || named.is_empty() - { - return vec![body]; - } - named -} - -fn raw_function_body_node<'a>( - node: &'a RawNode, - profile: &dyn LanguageProfile, -) -> Option<&'a RawNode> { - raw_named_children(node).into_iter().rev().find(|child| { - profile - .function_body_node_kinds() - .contains(&child.kind.as_str()) - }) -} - -fn raw_local_names( - function: &FunctionDef, - statements: &[&RawNode], - profile: &dyn LanguageProfile, -) -> BTreeSet { - let mut names: BTreeSet = function.params.iter().cloned().collect(); - if let Some(receiver) = raw_function_receiver_name(&function.body, profile) { - names.insert(receiver); - } - for statement in statements { - names.extend(raw_local_writes(statement, profile)); - } - names -} - -fn raw_function_receiver_name(node: &RawNode, profile: &dyn LanguageProfile) -> Option { - if !profile - .method_receiver_node_kinds() - .contains(&node.kind.as_str()) - { - return None; - } - let receiver_params = raw_named_children(node).into_iter().find(|child| { - profile - .parameter_list_node_kinds() - .contains(&child.kind.as_str()) - })?; - let receiver = raw_named_children(receiver_params) - .into_iter() - .find(|child| { - profile - .receiver_parameter_node_kinds() - .contains(&child.kind.as_str()) - })?; - let name = raw_named_children(receiver).into_iter().find(|child| { - profile - .first_argument_receiver_name_node_kinds() - .contains(&child.kind.as_str()) - })?; - raw_local_identifier_text(name, profile) -} - -fn raw_statement_summary( - node: &RawNode, - index: usize, - local_names: &BTreeSet, - profile: &dyn LanguageProfile, -) -> Statement { - let writes = raw_local_writes(node, profile); - let reads = raw_local_reads(node, local_names, profile); - Statement { - index, - line: node.span[0], - end_line: node.span[2], - span: node.span, - source: profile.normalize_source_text(&node.text), - dependencies: raw_assignment_dependencies(node, local_names, profile), - co_uses: co_use_pairs(&reads), - reads, - writes, - } -} - -fn raw_local_reads( - node: &RawNode, - local_names: &BTreeSet, - profile: &dyn LanguageProfile, -) -> BTreeSet { - if raw_nested_local_scope(node, profile) { - return BTreeSet::new(); - } - - let mut reads = Vec::new(); - raw_walk_local(node, None, node, profile, &mut |child, parent| { - let Some(name) = raw_local_identifier_text(child, profile) else { - return; - }; - if local_names.contains(&name) - && !raw_local_write_node(child, parent, profile) - && !raw_assignment_lhs_read_in_tree(node, child, profile) - && !raw_python_import_name(parent, profile) - && !raw_python_with_alias_read(child, parent, profile) - && !raw_declaration_name_in_tree(node, child, profile) - && !raw_declaration_name(child, parent, profile) - && !raw_member_name(child, parent, profile) - && !raw_keyed_element_key(child, parent, profile) - { - reads.push(name); - } - }); - reads.into_iter().collect() -} - -fn raw_local_writes(node: &RawNode, profile: &dyn LanguageProfile) -> BTreeSet { - if raw_nested_local_scope(node, profile) { - return BTreeSet::new(); - } - - let source = profile.normalize_source_text(&node.text); - let textual_writes_allowed = raw_assignment_statement(node, profile) - || profile - .local_declaration_node_kinds() - .contains(&node.kind.as_str()); - let mut writes = if !textual_writes_allowed { - Vec::new() - } else if profile.language() == Language::Python { - python_textual_local_writes(&source) - } else { - textual_local_writes(&source) - }; - if profile.language() == Language::Python { - writes.extend(raw_python_with_alias_names(node, profile)); - } - raw_walk_local(node, None, node, profile, &mut |child, parent| { - if raw_local_write_node(child, parent, profile) - || raw_declaration_name_in_tree(node, child, profile) - || raw_assignment_lhs_write_in_tree(node, child, profile) - { - if let Some(name) = raw_local_identifier_text(child, profile) { - writes.push(name); - } - } - }); - writes - .into_iter() - .filter_map(|name| { - let normalized = profile.normalize_local_identifier_text(&name); - (!normalized.is_empty()).then_some(normalized) - }) - .collect() -} - -fn raw_assignment_dependencies( - node: &RawNode, - local_names: &BTreeSet, - profile: &dyn LanguageProfile, -) -> Vec<(String, String)> { - let lhs_names = raw_local_writes(node, profile); - if lhs_names.is_empty() { - return Vec::new(); - } - - let reads = raw_local_reads(node, local_names, profile); - let mut deps = Vec::new(); - for lhs in &lhs_names { - for read in &reads { - if lhs != read && !lhs_names.contains(read) { - deps.push((lhs.clone(), read.clone())); - } - } - } - deps.sort(); - deps.dedup(); - deps -} - -fn co_use_pairs(reads: &BTreeSet) -> Vec<(String, String)> { - let reads = reads.iter().cloned().collect::>(); - let mut out = Vec::new(); - for i in 0..reads.len() { - for j in i + 1..reads.len() { - out.push((reads[i].clone(), reads[j].clone())); - } - } - out -} - -fn raw_structural_boundaries(document: &Document, statements: &[Statement]) -> Vec { - let mut out = Vec::new(); - for i in 0..statements.len().saturating_sub(1) { - let left = &statements[i]; - let right = &statements[i + 1]; - if let Some(boundary) = raw_source_boundary(document, left.end_line + 1, right.line - 1) { - out.push(Boundary { - before_index: left.index, - after_index: right.index, - line: boundary.line, - kind: boundary.kind, - text: boundary.text, - }); - } - } - out -} - -fn raw_source_boundary( - document: &Document, - first_line: usize, - last_line: usize, -) -> Option { - if first_line > last_line { - return None; - } - - let mut blank = None; - for line_number in first_line..=last_line { - let stripped = document - .lines - .get(line_number - 1) - .map(|line| line.trim()) - .unwrap_or(""); - if stripped.starts_with('#') || stripped.starts_with("//") || stripped.starts_with("--") { - return Some(RawBoundary { - line: line_number, - kind: "comment".to_string(), - text: stripped.to_string(), - }); - } - if stripped.is_empty() && blank.is_none() { - blank = Some(RawBoundary { - line: line_number, - kind: "blank".to_string(), - text: stripped.to_string(), - }); - } - } - blank -} - -fn raw_walk_local<'a>( - node: &'a RawNode, - parent: Option<&'a RawNode>, - root: &'a RawNode, - profile: &dyn LanguageProfile, - block: &mut dyn FnMut(&'a RawNode, Option<&'a RawNode>), -) { - if !std::ptr::eq(node, root) && raw_nested_local_scope(node, profile) { - return; - } - block(node, parent); - for child in &node.children { - raw_walk_local(child, Some(node), root, profile, block); - } -} - -fn raw_nested_local_scope(node: &RawNode, profile: &dyn LanguageProfile) -> bool { - profile.function_node_kinds().contains(&node.kind.as_str()) || raw_owner_node(node, profile) -} - -fn raw_owner_node(node: &RawNode, profile: &dyn LanguageProfile) -> bool { - profile - .class_owner_node_kinds() - .contains(&node.kind.as_str()) - || profile - .module_owner_node_kinds() - .contains(&node.kind.as_str()) - || profile - .generic_owner_node_kinds() - .contains(&node.kind.as_str()) - || profile - .impl_owner_node_kinds() - .contains(&node.kind.as_str()) - || profile - .struct_owner_node_kinds() - .contains(&node.kind.as_str()) -} - -fn raw_local_identifier_text(node: &RawNode, profile: &dyn LanguageProfile) -> Option { - if profile - .identifier_node_kinds() - .contains(&node.kind.as_str()) - { - let text = profile.normalize_local_identifier_text(&node.text); - return (!text.is_empty()).then_some(text); - } - if profile - .local_identifier_wrapper_node_kinds() - .contains(&node.kind.as_str()) - && node.named - && raw_named_children(node).is_empty() - && simple_identifier(&node.text) - { - let text = profile.normalize_local_identifier_text(&node.text); - return (!text.is_empty()).then_some(text); - } - None -} - -fn raw_local_write_node( - node: &RawNode, - parent: Option<&RawNode>, - profile: &dyn LanguageProfile, -) -> bool { - if raw_local_identifier_text(node, profile).is_none() || raw_member_name(node, parent, profile) - { - return false; - } - if raw_call_name(node, parent, profile) { - return false; - } - if raw_declaration_name(node, parent, profile) { - return true; - } - let Some(parent) = parent else { - return false; - }; - if profile - .update_statement_node_kinds() - .contains(&parent.kind.as_str()) - && raw_named_children(parent) - .first() - .map(|target| std::ptr::eq(*target, node)) - .unwrap_or(false) - { - return true; - } - if profile - .assignment_node_kinds() - .contains(&parent.kind.as_str()) - { - if let Some(lhs) = raw_named_children(parent).first() { - if raw_contains_node(lhs, node) { - return true; - } - } - } - if profile.language() == Language::Python { - if parent.kind == "keyword_argument" { - return false; - } - if raw_python_loop_target(node, parent) - || raw_python_named_expression_lhs(node, parent) - || raw_python_typed_assignment_lhs(node, parent) - || raw_python_annotation_lhs(node, parent) - { - return true; - } - } - raw_assignment_lhs(node, parent, profile) -} - -fn raw_python_loop_target(node: &RawNode, parent: &RawNode) -> bool { - if raw_previous_sibling(node, parent) - .map(|sibling| sibling.text.as_str() == "for") - .unwrap_or(false) - && raw_next_sibling(node, parent) - .map(|sibling| sibling.text.as_str() != ":") - .unwrap_or(false) - { - return true; - } - - let mut seen_for = false; - let mut current = raw_previous_sibling(node, parent); - while let Some(sibling) = current { - match sibling.text.as_str() { - "in" | ":" => return false, - "for" => { - seen_for = true; - break; - } - _ => current = raw_previous_sibling(sibling, parent), - } - } - if !seen_for { - return false; - } - - current = raw_next_sibling(node, parent); - while let Some(sibling) = current { - match sibling.text.as_str() { - "in" => return true, - ":" => return false, - _ => current = raw_next_sibling(sibling, parent), - } - } - false -} - -fn raw_python_typed_assignment_lhs(node: &RawNode, parent: &RawNode) -> bool { - let Some(colon) = raw_next_sibling(node, parent) else { - return false; - }; - if colon.text != ":" { - return false; - } - let Some(type_node) = raw_next_sibling(colon, parent) else { - return false; - }; - if type_node.kind != "type" { - return false; - } - raw_next_sibling(type_node, parent) - .map(|sibling| sibling.text.as_str() == "=") - .unwrap_or(false) -} - -fn raw_python_named_expression_lhs(node: &RawNode, parent: &RawNode) -> bool { - parent.kind == "named_expression" - && raw_named_children(parent) - .first() - .map(|lhs| std::ptr::eq(*lhs, node)) - .unwrap_or(false) - && raw_next_sibling(node, parent) - .map(|sibling| sibling.text.as_str() == ":=") - .unwrap_or(false) -} - -fn raw_python_annotation_lhs(node: &RawNode, parent: &RawNode) -> bool { - let Some(colon) = raw_next_sibling(node, parent) else { - return false; - }; - if colon.text != ":" { - return false; - } - let Some(type_node) = raw_next_sibling(colon, parent) else { - return false; - }; - if type_node.kind != "type" { - return false; - } - !raw_next_sibling(type_node, parent) - .map(|sibling| sibling.text.as_str() == "=") - .unwrap_or(false) -} - -fn raw_python_with_alias_names(node: &RawNode, profile: &dyn LanguageProfile) -> Vec { - let mut names = Vec::new(); - raw_walk_local(node, None, node, profile, &mut |child, _parent| { - if child.kind == "as_pattern_target" && simple_identifier(&child.text) { - names.push(child.text.clone()); - } - }); - names -} - -fn raw_python_import_name(parent: Option<&RawNode>, profile: &dyn LanguageProfile) -> bool { - profile.language() == Language::Python - && parent - .map(|parent| parent.kind.as_str() == "dotted_name") - .unwrap_or(false) -} - -fn raw_python_with_alias_read( - node: &RawNode, - parent: Option<&RawNode>, - profile: &dyn LanguageProfile, -) -> bool { - profile.language() == Language::Python - && (node.kind == "as_pattern_target" - || parent - .map(|parent| parent.kind.as_str() == "as_pattern_target") - .unwrap_or(false)) -} - -fn python_textual_local_writes(source: &str) -> Vec { - match split_assignment(source) { - Some((_lhs, ":=")) => Vec::new(), - _ => textual_local_writes(source), - } -} - -fn raw_declaration_name( - node: &RawNode, - parent: Option<&RawNode>, - profile: &dyn LanguageProfile, -) -> bool { - parent - .map(|parent| { - raw_local_declaration_name_nodes(parent, profile) - .into_iter() - .any(|name| std::ptr::eq(name, node) || raw_contains_node(name, node)) - }) - .unwrap_or(false) -} - -fn raw_declaration_name_in_tree( - root: &RawNode, - target: &RawNode, - profile: &dyn LanguageProfile, -) -> bool { - raw_local_declaration_name_nodes(root, profile) - .into_iter() - .any(|name| std::ptr::eq(name, target) || raw_contains_node(name, target)) - || root - .children - .iter() - .any(|child| raw_declaration_name_in_tree(child, target, profile)) -} - -fn raw_local_declaration_name_nodes<'a>( - node: &'a RawNode, - profile: &dyn LanguageProfile, -) -> Vec<&'a RawNode> { - if !profile - .local_declaration_node_kinds() - .contains(&node.kind.as_str()) - { - return Vec::new(); - } - - if profile - .short_variable_declaration_node_kinds() - .contains(&node.kind.as_str()) - { - if let Some(left) = raw_named_children(node).into_iter().find(|child| { - profile - .variable_declaration_node_kinds() - .contains(&child.kind.as_str()) - }) { - let identifiers = raw_named_children(left) - .into_iter() - .filter(|child| raw_local_identifier_text(child, profile).is_some()) - .collect::>(); - if !identifiers.is_empty() { - return identifiers; - } - if simple_identifier(&left.text) { - return vec![left]; - } - } - return Vec::new(); - } - - let variables = raw_variable_declaration_nodes(node, profile); - if !variables.is_empty() { - let names = variables - .into_iter() - .flat_map(|variable| raw_variable_declaration_name_nodes(variable, profile)) - .collect::>(); - if !names.is_empty() { - return names; - } - } - - if let Some(declaration_assignment) = raw_named_children(node).into_iter().find(|child| { - profile - .declaration_assignment_node_kinds() - .contains(&child.kind.as_str()) - }) { - if let Some(lhs) = raw_named_children(declaration_assignment).first().copied() { - return raw_first_identifier(lhs, profile) - .or(Some(lhs)) - .into_iter() - .collect(); - } - } - - raw_named_children(node) - .into_iter() - .find(|child| { - profile - .local_identifier_wrapper_node_kinds() - .contains(&child.kind.as_str()) - }) - .or_else(|| raw_first_identifier(node, profile)) - .into_iter() - .collect() -} - -fn raw_variable_declaration_nodes<'a>( - node: &'a RawNode, - profile: &dyn LanguageProfile, -) -> Vec<&'a RawNode> { - let mut out = Vec::new(); - raw_collect_variable_declaration_nodes(node, profile, &mut out); - out -} - -fn raw_collect_variable_declaration_nodes<'a>( - node: &'a RawNode, - profile: &dyn LanguageProfile, - out: &mut Vec<&'a RawNode>, -) { - if profile - .variable_declaration_node_kinds() - .contains(&node.kind.as_str()) - { - out.push(node); - return; - } - for child in raw_named_children(node) { - raw_collect_variable_declaration_nodes(child, profile, out); - } -} - -fn raw_variable_declaration_name_nodes<'a>( - variable: &'a RawNode, - profile: &dyn LanguageProfile, -) -> Vec<&'a RawNode> { - if simple_identifier(&variable.text) { - return vec![variable]; - } - - if profile - .multi_name_variable_declaration_node_kinds() - .contains(&variable.kind.as_str()) - { - let names = raw_named_children(variable) - .into_iter() - .take_while(|child| raw_local_identifier_text(child, profile).is_some()) - .collect::>(); - if !names.is_empty() { - return names; - } - } - - raw_first_identifier(variable, profile) - .into_iter() - .collect() -} - -fn raw_first_identifier<'a>( - node: &'a RawNode, - profile: &dyn LanguageProfile, -) -> Option<&'a RawNode> { - if raw_local_identifier_text(node, profile).is_some() { - return Some(node); - } - node.children - .iter() - .find_map(|child| raw_first_identifier(child, profile)) -} - -fn raw_assignment_lhs(node: &RawNode, parent: &RawNode, profile: &dyn LanguageProfile) -> bool { - if raw_previous_sibling(node, parent) - .map(|sibling| sibling.text.as_str() == ":") - .unwrap_or(false) - { - return false; - } - raw_next_sibling(node, parent) - .map(|sibling| { - !sibling.named - && profile - .assignment_operator_tokens() - .contains(&sibling.text.as_str()) - }) - .unwrap_or(false) -} - -fn raw_assignment_lhs_read_in_tree( - root: &RawNode, - target: &RawNode, - profile: &dyn LanguageProfile, -) -> bool { - if profile - .deferred_statement_node_kinds() - .contains(&root.kind.as_str()) - { - return false; - } - if profile - .assignment_node_kinds() - .contains(&root.kind.as_str()) - { - if let Some(lhs) = raw_named_children(root).first() { - if raw_assignment_lhs_read_target(lhs, target, profile) { - return true; - } - } - } - root.children - .iter() - .any(|child| raw_assignment_lhs_read_in_tree(child, target, profile)) -} - -fn raw_assignment_lhs_write_in_tree( - root: &RawNode, - target: &RawNode, - profile: &dyn LanguageProfile, -) -> bool { - if profile - .deferred_statement_node_kinds() - .contains(&root.kind.as_str()) - { - return false; - } - if profile - .assignment_node_kinds() - .contains(&root.kind.as_str()) - { - if let Some(lhs) = raw_named_children(root).first() { - if raw_assignment_lhs_write_target(lhs, target, profile) { - return true; - } - } - } - root.children - .iter() - .any(|child| raw_assignment_lhs_write_in_tree(child, target, profile)) -} - -fn raw_assignment_lhs_read_target( - lhs: &RawNode, - target: &RawNode, - profile: &dyn LanguageProfile, -) -> bool { - if raw_indexed_lhs_node(lhs, profile) { - return raw_contains_node(lhs, target); - } - if raw_field_like_node(lhs, profile) { - return profile.suppress_field_receiver_lhs_reads() - && raw_member_receiver_target(lhs, target, profile); - } - if let Some(lhs_name) = raw_local_identifier_text(lhs, profile) { - return std::ptr::eq(lhs, target) - || (raw_contains_node(lhs, target) - && raw_local_identifier_text(target, profile) - .map(|target_name| target_name == lhs_name) - .unwrap_or(false)); - } - if profile - .expression_list_node_kinds() - .contains(&lhs.kind.as_str()) - { - if raw_named_children(lhs).is_empty() && raw_local_identifier_text(lhs, profile).is_some() { - return std::ptr::eq(lhs, target); - } - return raw_named_children(lhs) - .into_iter() - .any(|child| raw_assignment_lhs_read_target(child, target, profile)); - } - raw_contains_node(lhs, target) -} - -fn raw_assignment_lhs_write_target( - lhs: &RawNode, - target: &RawNode, - profile: &dyn LanguageProfile, -) -> bool { - if raw_indexed_lhs_node(lhs, profile) { - return raw_named_children(lhs) - .first() - .map(|object| raw_assignment_lhs_write_target(object, target, profile)) - .unwrap_or(false); - } - if raw_field_like_node(lhs, profile) { - return raw_member_receiver_target(lhs, target, profile); - } - if let Some(lhs_name) = raw_local_identifier_text(lhs, profile) { - return std::ptr::eq(lhs, target) - || (raw_contains_node(lhs, target) - && raw_local_identifier_text(target, profile) - .map(|target_name| target_name == lhs_name) - .unwrap_or(false)); - } - if profile - .expression_list_node_kinds() - .contains(&lhs.kind.as_str()) - { - if raw_named_children(lhs).is_empty() && raw_local_identifier_text(lhs, profile).is_some() { - return std::ptr::eq(lhs, target); - } - return raw_named_children(lhs) - .into_iter() - .any(|child| raw_assignment_lhs_write_target(child, target, profile)); - } - raw_contains_node(lhs, target) -} - -fn raw_indexed_lhs_node(node: &RawNode, profile: &dyn LanguageProfile) -> bool { - profile - .indexed_lhs_node_kinds() - .contains(&node.kind.as_str()) - || (profile - .indexed_lhs_bracket_wrapper_node_kinds() - .contains(&node.kind.as_str()) - && node - .children - .iter() - .any(|child| !child.named && child.text == "[")) -} - -fn raw_field_like_node(node: &RawNode, profile: &dyn LanguageProfile) -> bool { - profile - .field_like_node_kinds() - .contains(&node.kind.as_str()) - || (profile - .field_like_dot_wrapper_node_kinds() - .contains(&node.kind.as_str()) - && node - .children - .iter() - .any(|child| !child.named && child.text == ".")) -} - -fn raw_member_receiver_target( - node: &RawNode, - target: &RawNode, - profile: &dyn LanguageProfile, -) -> bool { - let Some(receiver) = raw_named_children(node).first().copied() else { - return false; - }; - if raw_local_identifier_text(receiver, profile).is_some() { - return std::ptr::eq(receiver, target); - } - if raw_indexed_lhs_node(receiver, profile) { - return raw_named_children(receiver) - .first() - .map(|object| raw_member_receiver_target(object, target, profile)) - .unwrap_or(false); - } - if raw_field_like_node(receiver, profile) { - return raw_member_receiver_target(receiver, target, profile); - } - if raw_named_children(receiver) - .into_iter() - .any(|child| raw_member_receiver_target(child, target, profile)) - { - return true; - } - false -} - -fn raw_member_name( - node: &RawNode, - parent: Option<&RawNode>, - profile: &dyn LanguageProfile, -) -> bool { - let Some(parent) = parent else { - return false; - }; - if !raw_field_like_node(parent, profile) { - return false; - } - raw_named_children(parent) - .last() - .map(|field| std::ptr::eq(*field, node)) - .unwrap_or(false) -} - -fn raw_call_name(node: &RawNode, parent: Option<&RawNode>, profile: &dyn LanguageProfile) -> bool { - let Some(parent) = parent else { - return false; - }; - if raw_field_like_node(parent, profile) { - return false; - } - profile.call_node_kinds().contains(&parent.kind.as_str()) - && raw_named_children(parent) - .first() - .map(|callee| std::ptr::eq(*callee, node)) - .unwrap_or(false) -} - -fn raw_keyed_element_key( - node: &RawNode, - parent: Option<&RawNode>, - profile: &dyn LanguageProfile, -) -> bool { - let Some(parent) = parent else { - return false; - }; - if !profile - .keyed_element_node_kinds() - .contains(&parent.kind.as_str()) - { - return false; - } - raw_named_children(parent) - .first() - .map(|key| std::ptr::eq(*key, node)) - .unwrap_or(false) - || raw_next_sibling(node, parent) - .map(|sibling| !sibling.named && sibling.text == ":") - .unwrap_or(false) -} - -fn raw_assignment_statement(node: &RawNode, profile: &dyn LanguageProfile) -> bool { - profile - .assignment_node_kinds() - .contains(&node.kind.as_str()) - || node.children.iter().any(|child| { - !child.named - && profile - .assignment_operator_tokens() - .contains(&child.text.as_str()) - }) -} - -fn raw_branch_node(node: &RawNode, profile: &dyn LanguageProfile) -> bool { - profile.branch_node_kinds().contains(&node.kind.as_str()) -} - -fn raw_comment_node(node: &RawNode) -> bool { - node.kind.to_ascii_lowercase().contains("comment") -} - -fn raw_named_children(node: &RawNode) -> Vec<&RawNode> { - node.children.iter().filter(|child| child.named).collect() -} - -fn raw_next_sibling<'a>(node: &RawNode, parent: &'a RawNode) -> Option<&'a RawNode> { - let index = parent - .children - .iter() - .position(|child| std::ptr::eq(child, node))?; - parent.children.get(index + 1) -} - -fn raw_previous_sibling<'a>(node: &RawNode, parent: &'a RawNode) -> Option<&'a RawNode> { - let index = parent - .children - .iter() - .position(|child| std::ptr::eq(child, node))?; - index - .checked_sub(1) - .and_then(|previous| parent.children.get(previous)) -} - -fn raw_contains_node(root: &RawNode, target: &RawNode) -> bool { - std::ptr::eq(root, target) - || root - .children - .iter() - .any(|child| raw_contains_node(child, target)) -} - -fn normalized_node_for_span(root: &Node, span: Span) -> Option<&Node> { - if [ - root.first_lineno, - root.first_column, - root.last_lineno, - root.last_column, - ] == span - { - return Some(root); - } - root.children - .iter() - .filter_map(ast::node) - .find_map(|child| normalized_node_for_span(child, span)) -} - -fn fallback_node_from_raw(raw: &RawNode) -> Node { - Node { - r#type: "DEFN".to_string(), - children: raw - .children - .iter() - .filter(|child| child.named) - .map(|child| Child::Node(Box::new(fallback_node_from_raw(child)))) - .collect(), - first_lineno: raw.span[0], - first_column: raw.span[1], - last_lineno: raw.span[2], - last_column: raw.span[3], - text: raw.text.clone(), - } -} - -struct LocalFlow { - file: String, - lines: Vec, - methods_by_span: BTreeMap, -} - -impl LocalFlow { - fn new( - file: String, - lines: Vec, - methods_by_span: BTreeMap, - ) -> Self { - Self { - file, - lines, - methods_by_span, - } - } - - fn scan(&mut self, root: &Node) -> Vec { - let mut out = Vec::new(); - self.collect_methods(root, &Vec::new(), &mut out); - out - } - - fn collect_methods(&self, node: &Node, owners: &[String], out: &mut Vec) { - if OWNER_TYPES.contains(&node.r#type.as_str()) { - let owner = self.full_owner_name(owners, node); - for method in self.owner_methods(node) { - out.push(self.method_summary(method, Some(&owner))); - } - let mut next_owners = owners.to_vec(); - next_owners.push(self.owner_segment(node)); - self.collect_nested_owners(node, &next_owners, out); - } else if METHOD_TYPES.contains(&node.r#type.as_str()) && owners.is_empty() { - out.push(self.method_summary(node, None)); - } else { - for child in node.children.iter().filter_map(ast::node) { - self.collect_methods(child, owners, out); - } - } - } - - fn collect_nested_owners(&self, node: &Node, owners: &[String], out: &mut Vec) { - if METHOD_TYPES.contains(&node.r#type.as_str()) { - return; - } - - for child in node.children.iter().filter_map(ast::node) { - if OWNER_TYPES.contains(&child.r#type.as_str()) { - self.collect_methods(child, owners, out); - } else { - self.collect_nested_owners(child, owners, out); - } - } - } - - fn method_summary(&self, node: &Node, owner_hint: Option<&str>) -> MethodSummary { - let node_span = [ - node.first_lineno, - node.first_column, - node.last_lineno, - node.last_column, - ]; - let metadata = self.methods_by_span.get(&node_span); - let owner = metadata - .map(|item| item.owner.as_str()) - .or(owner_hint) - .unwrap_or("(top-level)"); - let name = metadata - .map(|item| item.name.clone()) - .unwrap_or_else(|| self.method_name(node)); - let statement_nodes = ast::body_stmts(node) - .into_iter() - .filter(|statement| !comment_statement(statement)) - .collect::>(); - let local_names = self.local_names(&statement_nodes, metadata); - let statements: Vec<_> = statement_nodes - .iter() - .enumerate() - .map(|(index, stmt)| self.statement_summary(stmt, index, &local_names)) - .collect(); - MethodSummary { - id: format!("{}#{}", owner, name), - owner: owner.to_string(), - name, - file: self.file.clone(), - line: node.first_lineno, - span: [ - node.first_lineno, - node.first_column, - node.last_lineno, - node.last_column, - ], - node: node.clone(), - raw_node: None, - boundaries: self.structural_boundaries(&statements), - statements, - } - } - - fn statement_summary( - &self, - node: &Node, - index: usize, - local_names: &BTreeSet, - ) -> Statement { - let source = ast::slice(node, &self.lines); - let writes = self.local_writes(node); - let reads = self.local_reads(node, local_names, &writes); - Statement { - index, - line: node.first_lineno, - end_line: node.last_lineno, - span: [ - node.first_lineno, - node.first_column, - node.last_lineno, - node.last_column, - ], - source, - dependencies: self.assignment_dependencies(node, local_names), - co_uses: self.co_use_edges(node, local_names), - reads, - writes, - } - } - - fn local_names( - &self, - statements: &[&Node], - metadata: Option<&MethodMetadata>, - ) -> BTreeSet { - let mut names = metadata.map(|item| item.params.clone()).unwrap_or_default(); - for statement in statements { - names.extend(self.local_writes(statement)); - } - names - } - - fn structural_boundaries(&self, statements: &[Statement]) -> Vec { - let mut out = Vec::new(); - for i in 0..statements.len().saturating_sub(1) { - let left = &statements[i]; - let right = &statements[i + 1]; - if let Some(boundary) = self.source_boundary(left.end_line + 1, right.line - 1) { - out.push(Boundary { - before_index: left.index, - after_index: right.index, - line: boundary.line, - kind: boundary.kind, - text: boundary.text, - }); - } - } - out - } - - fn source_boundary(&self, first_line: usize, last_line: usize) -> Option { - if first_line > last_line { - return None; - } - - let mut blank = None; - for line_number in first_line..=last_line { - let text = self - .lines - .get(line_number - 1) - .map(|s| s.as_str()) - .unwrap_or(""); - let stripped = text.trim(); - if stripped.starts_with('#') || stripped.starts_with("//") || stripped.starts_with("--") - { - return Some(RawBoundary { - line: line_number, - kind: "comment".to_string(), - text: stripped.to_string(), - }); - } - if stripped.is_empty() && blank.is_none() { - blank = Some(RawBoundary { - line: line_number, - kind: "blank".to_string(), - text: stripped.to_string(), - }); - } - } - blank - } - - fn owner_methods<'a>(&self, owner_node: &'a Node) -> Vec<&'a Node> { - let Some(body) = self.owner_body(owner_node) else { - return Vec::new(); - }; - - let stmts = if statement_container(body) { - body.children - .iter() - .filter_map(ast::node) - .collect::>() - } else { - vec![body] - }; - - stmts - .into_iter() - .flat_map(|stmt| { - if METHOD_TYPES.contains(&stmt.r#type.as_str()) { - vec![stmt] - } else if self.visibility_call(stmt) { - self.inline_methods(stmt) - } else { - vec![] - } - }) - .collect() - } - - fn inline_methods<'a>(&self, stmt: &'a Node) -> Vec<&'a Node> { - let Some(args) = stmt.children.get(1).and_then(ast::node) else { - return Vec::new(); - }; - args.children - .iter() - .filter_map(ast::node) - .filter(|arg| METHOD_TYPES.contains(&arg.r#type.as_str())) - .collect() - } - - fn owner_body<'a>(&self, owner_node: &'a Node) -> Option<&'a Node> { - let scope_index = if owner_node.r#type == "CLASS" { 2 } else { 1 }; - let scope = owner_node.children.get(scope_index).and_then(ast::node)?; - if scope.r#type != "SCOPE" { - return None; - } - scope.children.get(2).and_then(ast::node) - } - - fn visibility_call(&self, node: &Node) -> bool { - if node.r#type == "FCALL" { - if let Some(Child::Symbol(name)) = node.children.first() { - return matches!(name.as_str(), "public" | "protected" | "private"); - } - } - false - } - - fn method_name(&self, node: &Node) -> String { - if node.r#type == "DEFS" { - let receiver = node.children.get(0).and_then(ast::node); - let prefix = if let Some(r) = receiver { - if r.r#type == "SELF" { - "self".to_string() - } else { - ast::slice(r, &self.lines) - } - } else { - "?".to_string() - }; - format!( - "{}.{}", - prefix, - node.children - .get(1) - .and_then(|c| match c { - Child::Symbol(s) => Some(s), - _ => None, - }) - .unwrap_or(&"?".to_string()) - ) - } else { - node.children - .first() - .and_then(|c| match c { - Child::Symbol(s) => Some(s.clone()), - _ => None, - }) - .unwrap_or_else(|| "?".to_string()) - } - } - - fn full_owner_name(&self, owners: &[String], node: &Node) -> String { - let mut next = owners.to_vec(); - next.push(self.owner_segment(node)); - next.join("::") - } - - fn owner_segment(&self, node: &Node) -> String { - let text = ast::slice( - node.children.first().and_then(ast::node).unwrap_or(node), - &self.lines, - ); - if text.is_empty() { - "(anonymous)".to_string() - } else { - text - } - } - - fn local_reads( - &self, - node: &Node, - local_names: &BTreeSet, - writes: &BTreeSet, - ) -> BTreeSet { - let mut reads = Vec::new(); - self.walk_local(node, &mut |child| { - if LOCAL_READ_TYPES.contains(&child.r#type.as_str()) { - if let Some(name) = local_read_name(child) { - if local_names.contains(&name) { - reads.push(name); - } - } - } - }); - reads.extend(textual_local_reads( - &ast::slice(node, &self.lines), - local_names, - writes, - )); - reads.into_iter().collect() - } - - fn local_writes(&self, node: &Node) -> BTreeSet { - let mut writes = Vec::new(); - self.walk_local(node, &mut |child| { - if LOCAL_WRITE_TYPES.contains(&child.r#type.as_str()) { - if let Some(Child::String(name)) = child.children.first() { - writes.push(name.clone()); - } - } - }); - writes.extend(textual_local_writes(&ast::slice(node, &self.lines))); - writes.into_iter().collect() - } - - fn assignment_dependencies( - &self, - node: &Node, - local_names: &BTreeSet, - ) -> Vec<(String, String)> { - let mut deps = Vec::new(); - self.walk_local(node, &mut |child| { - if LOCAL_WRITE_TYPES.contains(&child.r#type.as_str()) { - if let Some(Child::String(lhs)) = child.children.first() { - if let Some(rhs) = child.children.get(1).and_then(ast::node) { - let rhs_writes = self.local_writes(rhs); - for read in self.local_reads(rhs, local_names, &rhs_writes) { - if lhs != &read { - deps.push((lhs.clone(), read)); - } - } - } - } - } - }); - let lhs_names = self.local_writes(node); - if !lhs_names.is_empty() { - let reads = self.local_reads(node, local_names, &lhs_names); - for lhs in lhs_names { - for read in &reads { - if &lhs != read { - deps.push((lhs.clone(), read.clone())); - } - } - } - } - deps.sort(); - deps.dedup(); - deps - } - - fn co_use_edges(&self, node: &Node, local_names: &BTreeSet) -> Vec<(String, String)> { - let writes = self.local_writes(node); - let reads: Vec<_> = self - .local_reads(node, local_names, &writes) - .into_iter() - .collect(); - let mut out = Vec::new(); - for i in 0..reads.len() { - for j in i + 1..reads.len() { - out.push((reads[i].clone(), reads[j].clone())); - } - } - out - } - - fn walk_local(&self, node: &Node, blk: &mut dyn FnMut(&Node)) { - if SKIP_NESTED_TYPES.contains(&node.r#type.as_str()) { - return; - } - blk(node); - for child in node.children.iter().filter_map(ast::node) { - self.walk_local(child, blk); - } - } -} - -fn local_read_name(node: &Node) -> Option { - match node.children.first() { - Some(Child::String(name)) | Some(Child::Symbol(name)) => Some(name.clone()), - Some(Child::Nil) => Some(String::new()), - _ => None, - } -} - -fn textual_local_writes(source: &str) -> Vec { - let Some((lhs, operator)) = split_assignment(source) else { - return Vec::new(); - }; - if lhs.contains('.') - || lhs.contains("->") - || lhs.contains('[') - || lhs.contains('(') - || lhs.contains(')') - { - return Vec::new(); - } - - let identifiers = identifiers_with_positions(lhs) - .into_iter() - .map(|identifier| identifier.name) - .filter(|name| !local_keyword(name)) - .collect::>(); - if identifiers.is_empty() { - return Vec::new(); - } - - if operator == ":=" || declaration_like_lhs(lhs) || identifiers.len() == 1 { - return identifiers - .into_iter() - .filter(|name| simple_identifier(name)) - .collect(); - } - - Vec::new() -} - -fn textual_local_reads( - source: &str, - local_names: &BTreeSet, - writes: &BTreeSet, -) -> Vec { - if plain_string_literal_source(source) { - return Vec::new(); - } - - identifiers_with_positions(source) - .into_iter() - .filter(|identifier| local_names.contains(&identifier.name)) - .filter(|identifier| !writes.contains(&identifier.name)) - .filter(|identifier| !member_name(source, identifier.start)) - .filter(|identifier| !call_name(source, identifier.end)) - .map(|identifier| identifier.name) - .collect() -} - -fn plain_string_literal_source(source: &str) -> bool { - let source = source.trim(); - if source.starts_with('f') || source.starts_with('F') { - return false; - } - (source.starts_with("\"\"\"") && source.ends_with("\"\"\"")) - || (source.starts_with("'''") && source.ends_with("'''")) - || (source.starts_with('"') && source.ends_with('"')) - || (source.starts_with('\'') && source.ends_with('\'')) -} - -#[derive(Clone, Debug, Eq, PartialEq)] -struct IdentifierSpan { - name: String, - start: usize, - end: usize, -} - -fn identifiers_with_positions(source: &str) -> Vec { - let bytes = source.as_bytes(); - let mut out = Vec::new(); - let mut index = 0; - while index < bytes.len() { - let start = if bytes[index] == b'$' { - let next = index + 1; - if next < bytes.len() && identifier_start(bytes[next]) { - next - } else { - index += 1; - continue; - } - } else if identifier_start(bytes[index]) { - index - } else { - index += 1; - continue; - }; - let mut end = start + 1; - while end < bytes.len() && identifier_part(bytes[end]) { - end += 1; - } - out.push(IdentifierSpan { - name: source[start..end].to_string(), - start, - end, - }); - index = end; - } - out -} - -fn identifier_start(byte: u8) -> bool { - byte == b'_' || byte.is_ascii_alphabetic() -} - -fn identifier_part(byte: u8) -> bool { - byte == b'_' || byte.is_ascii_alphanumeric() -} - -fn split_assignment(source: &str) -> Option<(&str, &str)> { - let bytes = source.as_bytes(); - let mut index = 0; - while index < bytes.len() { - if index + 1 < bytes.len() && bytes[index] == b':' && bytes[index + 1] == b'=' { - return Some((source[..index].trim(), ":=")); - } - if bytes[index] == b'=' { - let previous = index.checked_sub(1).and_then(|i| bytes.get(i)).copied(); - let next = bytes.get(index + 1).copied(); - if !matches!( - previous, - Some( - b'=' | b'!' - | b'<' - | b'>' - | b':' - | b'+' - | b'-' - | b'*' - | b'/' - | b'%' - | b'&' - | b'|' - ) - ) && !matches!(next, Some(b'=' | b'>')) - { - return Some((source[..index].trim(), "=")); - } - } - index += 1; - } - None -} - -fn declaration_like_lhs(lhs: &str) -> bool { - identifiers_with_positions(lhs) - .first() - .map(|identifier| { - matches!( - identifier.name.as_str(), - "let" - | "const" - | "var" - | "val" - | "auto" - | "int" - | "long" - | "float" - | "double" - | "bool" - | "boolean" - | "char" - | "String" - | "string" - ) - }) - .unwrap_or(false) -} - -fn local_keyword(name: &str) -> bool { - matches!( - name, - "as" | "break" - | "auto" - | "boolean" - | "bool" - | "case" - | "char" - | "class" - | "const" - | "continue" - | "default" - | "double" - | "else" - | "false" - | "float" - | "for" - | "func" - | "fun" - | "function" - | "if" - | "in" - | "int" - | "long" - | "let" - | "mut" - | "nil" - | "None" - | "null" - | "private" - | "protected" - | "public" - | "return" - | "self" - | "short" - | "static" - | "String" - | "string" - | "this" - | "true" - | "val" - | "var" - | "void" - | "while" - ) -} - -fn simple_identifier(name: &str) -> bool { - let mut chars = name.chars(); - matches!(chars.next(), Some(first) if first == '_' || first.is_ascii_alphabetic()) - && chars.all(|ch| ch == '_' || ch.is_ascii_alphanumeric()) -} - -fn member_name(source: &str, start: usize) -> bool { - let prefix = source[..start].trim_end(); - prefix.ends_with('.') || prefix.ends_with("->") || prefix.ends_with("::") -} - -fn call_name(source: &str, end: usize) -> bool { - let suffix = source[end..].trim_start(); - suffix.starts_with('(') -} - -fn method_metadata(document: &Document) -> BTreeMap { - document - .function_defs - .iter() - .map(|function| (function.span, metadata_for_function(document, function))) - .collect() -} - -fn metadata_for_function(document: &Document, function: &FunctionDef) -> MethodMetadata { - let owner = local_flow_owner(&document.file, &function.owner); - MethodMetadata { - owner, - name: function.name.clone(), - params: function.params.iter().cloned().collect(), - } -} - -fn local_flow_owner(file: &str, owner: &str) -> String { - let file_owner = file_owner(file); - if owner == file_owner { - return "(top-level)".to_string(); - } - owner - .strip_prefix(&format!("{file_owner}::")) - .unwrap_or(owner) - .to_string() -} - -fn file_owner(file: &str) -> String { - Path::new(file) - .file_stem() - .and_then(|stem| stem.to_str()) - .filter(|stem| !stem.is_empty()) - .unwrap_or("(file)") - .to_string() -} - -fn statement_container(node: &Node) -> bool { - STATEMENT_CONTAINER_TYPES.contains(&node.r#type.as_str()) -} - -fn comment_statement(node: &Node) -> bool { - node.r#type.to_ascii_lowercase().contains("comment") - || node.text.trim_start().starts_with("//") - || node.text.trim_start().starts_with('#') - || node.text.trim_start().starts_with("--") -} - -struct RawBoundary { - line: usize, - kind: String, - text: String, -} - -#[cfg(test)] -mod tests { - use super::*; - use std::io::Write; - use tempfile::NamedTempFile; - - fn summaries(source: &str, language: Language) -> Vec { - let mut file = NamedTempFile::new().expect("tempfile"); - file.write_all(source.as_bytes()).expect("write"); - scan_files(&[file.path().to_path_buf()], language).expect("scan") - } - - #[test] - fn extracts_python_function_local_flow() { - let summaries = summaries( - "def mixed(price, tax):\n subtotal = price + tax\n total = subtotal\n return total\n", - Language::Python, - ); - let summary = summaries - .iter() - .find(|summary| summary.name == "mixed") - .expect("mixed summary"); - - assert_eq!(summary.owner, "(top-level)"); - assert_eq!(summary.statements.len(), 3); - assert_eq!( - summary.statements[0].reads, - ["price".to_string(), "tax".to_string()] - .into_iter() - .collect() - ); - assert_eq!( - summary.statements[1].dependencies, - vec![("total".to_string(), "subtotal".to_string())] - ); - assert_eq!( - summary.statements[2].reads, - ["total".to_string()].into_iter().collect() - ); - } - - #[test] - fn handles_non_ascii_source_without_byte_boundary_panics() { - let summaries = summaries( - "def mixed(price):\n marker = \"✓\"\n total = price\n return total\n", - Language::Python, - ); - let summary = summaries - .iter() - .find(|summary| summary.name == "mixed") - .expect("mixed summary"); - - assert_eq!(summary.statements.len(), 3); - assert_eq!( - summary.statements[1].dependencies, - vec![("total".to_string(), "price".to_string())] - ); - } - - #[test] - fn preserves_self_parameter_reads_for_python_attribute_access() { - let summaries = summaries( - "class TextSuite:\n def setup(self):\n self.console = Console(file=StringIO(), color_system=\"truecolor\")\n self.text = Text.from_markup(markup)\n", - Language::Python, - ); - let summary = summaries - .iter() - .find(|summary| summary.id == "TextSuite#setup") - .expect("setup summary"); - - assert_eq!( - summary.statements[0].reads, - ["self".to_string()].into_iter().collect() - ); - assert!(!summary.statements[0].writes.contains("file")); - assert_eq!( - summary.statements[1].reads, - ["self".to_string()].into_iter().collect() - ); - } - - #[test] - fn excludes_keyword_argument_writes_from_outer_assignment_dependencies() { - let summaries = summaries( - "def render():\n pretty = Pretty(snippets.PYTHON_DICT, indent_guides=True)\n return pretty\n", - Language::Python, - ); - let summary = summaries - .iter() - .find(|summary| summary.name == "render") - .expect("render summary"); - - assert_eq!( - summary.statements[0].writes, - ["pretty".to_string()].into_iter().collect() - ); - assert!(summary.statements[0].dependencies.is_empty()); - } - - #[test] - fn mines_python_loop_and_with_locals_without_keyword_writes() { - let summaries = summaries( - "def download(urls, dest_dir):\n with ThreadPoolExecutor(max_workers=4) as pool:\n for url in urls:\n filename = url.split(\"/\")[-1]\n dest_path = os.path.join(dest_dir, filename)\n task_id = progress.add_task(\"download\", filename=filename, start=False)\n pool.submit(copy_url, task_id, url, dest_path)\n", - Language::Python, - ); - let summary = summaries - .iter() - .find(|summary| summary.name == "download") - .expect("download summary"); - let statement = &summary.statements[0]; - - assert!(statement.reads.contains("urls")); - assert!(statement.reads.contains("url")); - assert!(statement.reads.contains("pool")); - assert!(statement.writes.contains("url")); - assert!(statement.writes.contains("pool")); - assert!(!statement.writes.contains("urls")); - assert!(!statement.writes.contains("max_workers")); - assert!(!statement.writes.contains("start")); - } - - #[test] - fn does_not_read_python_with_alias_at_declaration_site() { - let summaries = summaries( - "def capture(console):\n with console.capture() as output:\n console.line()\n return output\n", - Language::Python, - ); - let summary = summaries - .iter() - .find(|summary| summary.name == "capture") - .expect("capture summary"); - - assert!(summary.statements[0].writes.contains("output")); - assert!(!summary.statements[0].reads.contains("output")); - assert!(summary.statements[1].reads.contains("output")); - } - - #[test] - fn mines_python_named_expression_writes() { - let summaries = summaries( - "def scan(text, index):\n if (character := text[index]):\n return character\n", - Language::Python, - ); - let summary = summaries - .iter() - .find(|summary| summary.name == "scan") - .expect("scan summary"); - let statement = &summary.statements[0]; - - assert!(statement.writes.contains("character")); - assert!(statement.reads.contains("text")); - assert!(statement.reads.contains("index")); - assert!(statement - .dependencies - .contains(&("character".to_string(), "text".to_string()))); - assert!(statement - .dependencies - .contains(&("character".to_string(), "index".to_string()))); - } - - #[test] - fn ignores_python_import_path_segments_that_match_locals() { - let summaries = summaries( - "def status(status):\n from .status import Status\n return status\n", - Language::Python, - ); - let summary = summaries - .iter() - .find(|summary| summary.name == "status") - .expect("status summary"); - - assert!(summary.statements[0].reads.is_empty()); - assert_eq!( - summary.statements[1].reads, - ["status".to_string()].into_iter().collect() - ); - } - - #[test] - fn reads_python_callable_locals_without_marking_call_callee_as_write() { - let summaries = summaries( - "def invoke(callback, value):\n runner = callback\n return runner(value)\n", - Language::Python, - ); - let summary = summaries - .iter() - .find(|summary| summary.name == "invoke") - .expect("invoke summary"); - - assert_eq!( - summary.statements[1].reads, - ["runner".to_string(), "value".to_string()] - .into_iter() - .collect() - ); - assert!(summary.statements[1].writes.is_empty()); - } - - #[test] - fn does_not_read_locals_from_plain_docstring_text() { - let summaries = summaries( - "def get_content(user):\n \"\"\"Extract text from user dict.\"\"\"\n return user\n", - Language::Python, - ); - let summary = summaries - .iter() - .find(|summary| summary.name == "get_content") - .expect("get_content summary"); - - assert!(summary.statements[0].reads.is_empty()); - assert_eq!( - summary.statements[1].reads, - ["user".to_string()].into_iter().collect() - ); - } - - #[test] - fn extracts_java_kotlin_and_swift_local_flow() { - let cases = [ - ( - Language::Java, - "class Billing {\n int mixed(int price, int tax) {\n int subtotal = price + tax;\n int total = subtotal;\n return total;\n }\n}\n", - ), - ( - Language::Kotlin, - "class Billing {\n fun mixed(price: Int, tax: Int): Int {\n val subtotal = price + tax\n val total = subtotal\n return total\n }\n}\n", - ), - ( - Language::Swift, - "class Billing {\n func mixed(price: Int, tax: Int) -> Int {\n let subtotal = price + tax\n let total = subtotal\n return total\n }\n}\n", - ), - ]; - - for (language, source) in cases { - let summaries = summaries(source, language); - let summary = summaries - .iter() - .find(|summary| summary.name == "mixed") - .expect("mixed summary"); - - assert_eq!(summary.owner, "Billing"); - assert_eq!(summary.statements.len(), 3); - assert_eq!( - summary.statements[0].reads, - ["price".to_string(), "tax".to_string()] - .into_iter() - .collect() - ); - assert_eq!( - summary.statements[1].dependencies, - vec![("total".to_string(), "subtotal".to_string())] - ); - assert_eq!( - summary.statements[2].reads, - ["total".to_string()].into_iter().collect() - ); - } - } -} +pub use crate::decomplex::syntax::local_flow::{ + scan_documents, scan_files, Boundary, LocalFlowRow, MethodSummary, Statement, +}; diff --git a/gems/decomplex/rust/src/decomplex/detectors/path_condition.rs b/gems/decomplex/rust/src/decomplex/detectors/path_condition.rs index 50ccb7455..7ca69d04a 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/path_condition.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/path_condition.rs @@ -1,738 +1,3 @@ -use crate::decomplex::ast::{self, normalize_text, Child, Node, RawNode, Span}; -use crate::decomplex::syntax::adapters::{language_profile, LanguageProfile}; -use crate::decomplex::syntax::{self, Document, Language}; -use anyhow::Result; -use serde::Serialize; -use std::collections::{BTreeMap, BTreeSet}; -use std::path::PathBuf; - -#[derive(Clone, Debug, Eq, PartialEq, Serialize)] -pub struct PathConditionReport { - pub neglected: Vec, - pub scattered: Vec, -} - -#[derive(Clone, Debug, Eq, PartialEq, Serialize)] -pub struct NeglectedPathCondition { - pub pattern: Vec, - pub support: usize, - pub missing: String, - pub at: String, - pub spans: BTreeMap, - pub action: String, -} - -#[derive(Clone, Debug, Eq, PartialEq, Serialize)] -pub struct ScatteredPathCondition { - pub guards: Vec, - pub support: usize, - pub scatter: usize, - pub rank: usize, - pub sites: Vec, - pub spans: BTreeMap, -} - -#[derive(Clone, Debug)] -struct Site { - guards: Vec, - action: String, - file: String, - defn: String, - line: usize, - span: Span, -} - -pub fn scan_files(files: &[PathBuf], language: Language) -> Result { - let documents = syntax::parse_files(files, language)?; - Ok(scan_documents(&documents)) -} - -pub fn scan_documents(documents: &[Document]) -> PathConditionReport { - let mut sites = documents - .iter() - .flat_map(sites_from_document_facts) - .collect::>(); - sites.extend( - documents - .iter() - .flat_map(sites_from_raw_facts) - .collect::>(), - ); - if !sites.is_empty() { - return Report::new(dedupe_sites(sites)).findings(); - } - - let mut sites = Vec::new(); - for document in documents { - let mut pc = PathCondition::new(document.file.clone(), document.lines.clone()); - pc.walk(&document.normalized_root, &Vec::new(), &Vec::new()); - sites.extend(pc.sites); - } - Report::new(sites).findings() -} - -fn dedupe_sites(sites: Vec) -> Vec { - let mut seen = BTreeSet::new(); - sites - .into_iter() - .filter(|site| { - seen.insert(( - site.guards.clone(), - site.action.clone(), - site.file.clone(), - site.defn.clone(), - site.line, - )) - }) - .collect() -} - -fn sites_from_document_facts(document: &Document) -> Vec { - document - .path_condition_sites - .iter() - .map(|site| Site { - guards: site.guards.clone(), - action: site.action.clone(), - file: site.file.clone(), - defn: site.function.clone(), - line: site.line, - span: site.span, - }) - .collect() -} - -fn sites_from_raw_facts(document: &Document) -> Vec { - let profile = language_profile(document.language); - let mut sites = Vec::new(); - for function in &document.function_defs { - for statement in raw_function_body_statements(profile, &function.body) { - raw_path_walk( - document, - profile, - statement, - &function.name, - &[], - &mut sites, - ); - } - } - sites -} - -fn raw_function_body_node<'a>( - profile: &dyn LanguageProfile, - node: &'a RawNode, -) -> Option<&'a RawNode> { - if let Some(body) = raw_child_by_field(node, "body") { - return Some(body); - } - raw_named_children(node).into_iter().rev().find(|child| { - profile - .function_body_node_kinds() - .contains(&child.kind.as_str()) - }) -} - -fn raw_function_body_statements<'a>( - profile: &dyn LanguageProfile, - node: &'a RawNode, -) -> Vec<&'a RawNode> { - let Some(body) = raw_function_body_node(profile, node) else { - return Vec::new(); - }; - - let mut named = raw_named_children(body) - .into_iter() - .filter(|child| !raw_comment_node(child)) - .collect::>(); - if named.len() == 1 - && profile - .nested_statement_wrapper_node_kinds() - .contains(&named[0].kind.as_str()) - { - if raw_branch_node(profile, named[0]) { - return vec![named[0]]; - } - named = raw_named_children(named[0]) - .into_iter() - .filter(|child| !raw_comment_node(child)) - .collect(); - } - if named.is_empty() && body.text.trim().is_empty() { - return Vec::new(); - } - if raw_branch_node(profile, body) || raw_assignment_statement(profile, body) || named.is_empty() - { - return vec![body]; - } - named -} - -fn raw_path_walk( - document: &Document, - profile: &dyn LanguageProfile, - node: &RawNode, - function: &str, - guards: &[String], - out: &mut Vec, -) { - if raw_nested_local_scope(profile, node) { - return; - } - - if raw_branch_node(profile, node) { - let condition = raw_branch_condition(node); - let atoms = raw_path_condition_atoms(profile, condition); - for child in raw_branch_body_nodes(profile, node) { - let mut next_guards = guards.to_vec(); - next_guards.extend(atoms.clone()); - raw_path_walk(document, profile, child, function, &next_guards, out); - } - return; - } - - if guards.len() >= 2 && raw_path_action_node(profile, node) { - let mut unique = guards.to_vec(); - unique.sort(); - unique.dedup(); - out.push(Site { - guards: unique, - action: profile.normalize_source_text(&node.text), - file: document.file.clone(), - defn: function.to_string(), - line: node.span[0], - span: node.span, - }); - return; - } - - for child in raw_named_children(node) { - raw_path_walk(document, profile, child, function, guards, out); - } -} - -fn raw_path_condition_atoms( - profile: &dyn LanguageProfile, - condition: Option<&RawNode>, -) -> Vec { - let Some(condition) = condition else { - return Vec::new(); - }; - if raw_boolean_container(profile, condition) && raw_boolean_and(profile, condition) { - let mut atoms = raw_flatten_boolean_and(profile, condition) - .into_iter() - .map(|child| raw_decision_member_text(profile, &child.text)) - .collect::>(); - atoms.sort(); - atoms.dedup(); - atoms - } else { - vec![raw_decision_member_text(profile, &condition.text)] - } -} - -fn raw_branch_condition(node: &RawNode) -> Option<&RawNode> { - raw_child_by_field(node, "condition") - .or_else(|| raw_child_by_field(node, "value")) - .or_else(|| raw_child_by_field(node, "subject")) - .or_else(|| raw_named_children(node).into_iter().next()) -} - -fn raw_branch_body_nodes<'a>(profile: &dyn LanguageProfile, node: &'a RawNode) -> Vec<&'a RawNode> { - let mut bodies = ["consequence", "body", "alternative"] - .into_iter() - .filter_map(|field| raw_child_by_field(node, field)) - .collect::>(); - if bodies.is_empty() { - bodies = raw_named_children(node).into_iter().skip(1).collect(); - } - bodies - .into_iter() - .flat_map(|body| { - if raw_simple_action_wrapper(profile, body) { - return vec![body]; - } - let body_children = raw_named_children(body); - let children = if profile - .path_transparent_branch_body_node_kinds() - .contains(&body.kind.as_str()) - { - body_children.into_iter().skip(1).collect::>() - } else { - body_children - }; - let children = children - .into_iter() - .flat_map(|child| { - if profile - .path_transparent_branch_body_node_kinds() - .contains(&child.kind.as_str()) - { - raw_named_children(child) - .into_iter() - .skip(1) - .collect::>() - } else { - vec![child] - } - }) - .filter(|child| !raw_comment_node(child)) - .collect::>(); - if children.is_empty() { - vec![body] - } else { - children - } - }) - .collect() -} - -fn raw_path_action_node(profile: &dyn LanguageProfile, node: &RawNode) -> bool { - if raw_branch_node(profile, node) { - return false; - } - raw_simple_action_wrapper(profile, node) - || raw_assignment_statement(profile, node) - || profile - .path_action_node_kinds() - .contains(&node.kind.as_str()) -} - -fn raw_simple_action_wrapper(profile: &dyn LanguageProfile, node: &RawNode) -> bool { - if !profile - .simple_action_wrapper_node_kinds() - .contains(&node.kind.as_str()) - { - return false; - } - let text = normalize_text(&node.text); - if text.contains('{') || text.contains('}') { - return false; - } - let text = text.strip_suffix(';').unwrap_or(&text).trim(); - let Some(open) = text.find('(') else { - return false; - }; - text.ends_with(')') - && text[..open] - .chars() - .all(|ch| ch == '_' || ch == '.' || ch.is_ascii_alphanumeric()) -} - -fn raw_assignment_statement(profile: &dyn LanguageProfile, node: &RawNode) -> bool { - profile - .assignment_node_kinds() - .contains(&node.kind.as_str()) - || node.children.iter().any(|child| { - !child.named - && profile - .assignment_operator_tokens() - .contains(&child.text.as_str()) - }) -} - -fn raw_branch_node(profile: &dyn LanguageProfile, node: &RawNode) -> bool { - profile.branch_node_kinds().contains(&node.kind.as_str()) -} - -fn raw_nested_local_scope(profile: &dyn LanguageProfile, node: &RawNode) -> bool { - profile.function_node_kinds().contains(&node.kind.as_str()) - || profile - .class_owner_node_kinds() - .contains(&node.kind.as_str()) - || profile - .module_owner_node_kinds() - .contains(&node.kind.as_str()) - || profile - .generic_owner_node_kinds() - .contains(&node.kind.as_str()) - || profile - .struct_owner_node_kinds() - .contains(&node.kind.as_str()) -} - -fn raw_boolean_container(profile: &dyn LanguageProfile, node: &RawNode) -> bool { - if profile - .boolean_container_node_kinds() - .contains(&node.kind.as_str()) - { - return true; - } - if raw_parenthesized_wrapper(profile, node) { - return raw_named_children(node) - .into_iter() - .next() - .map(|child| raw_boolean_container(profile, child)) - .unwrap_or(false); - } - false -} - -fn raw_boolean_and(profile: &dyn LanguageProfile, node: &RawNode) -> bool { - if raw_parenthesized_wrapper(profile, node) { - return raw_named_children(node) - .into_iter() - .next() - .map(|child| raw_boolean_and(profile, child)) - .unwrap_or(false); - } - raw_direct_operator(node) - .map(|operator| profile.boolean_and_operators().contains(&operator.as_str())) - .unwrap_or(false) -} - -fn raw_flatten_boolean_and<'a>( - profile: &dyn LanguageProfile, - node: &'a RawNode, -) -> Vec<&'a RawNode> { - if !(raw_boolean_container(profile, node) && raw_boolean_and(profile, node)) { - return vec![node]; - } - if raw_parenthesized_wrapper(profile, node) { - return raw_named_children(node) - .into_iter() - .next() - .map(|child| raw_flatten_boolean_and(profile, child)) - .unwrap_or_else(|| vec![node]); - } - raw_named_children(node) - .into_iter() - .flat_map(|child| raw_flatten_boolean_and(profile, child)) - .collect() -} - -fn raw_parenthesized_wrapper(profile: &dyn LanguageProfile, node: &RawNode) -> bool { - profile - .parenthesized_wrapper_node_kinds() - .contains(&node.kind.as_str()) - && raw_named_children(node).len() == 1 -} - -fn raw_decision_member_text(profile: &dyn LanguageProfile, text: &str) -> String { - profile.normalize_source_text(&strip_enclosing_parentheses(text)) -} - -fn strip_enclosing_parentheses(text: &str) -> String { - let mut value = text.trim().to_string(); - loop { - if !(value.starts_with('(') && value.ends_with(')')) { - break value; - } - if !enclosing_parentheses_wrap_all(&value) { - break value; - } - value = value[1..value.len() - 1].trim().to_string(); - } -} - -fn enclosing_parentheses_wrap_all(text: &str) -> bool { - let mut depth = 0isize; - for (index, ch) in text.chars().enumerate() { - if ch == '(' { - depth += 1; - } else if ch == ')' { - depth -= 1; - } - if depth == 0 && index < text.len() - 1 { - return false; - } - if depth < 0 { - return false; - } - } - depth == 0 -} - -fn raw_direct_operator(node: &RawNode) -> Option { - node.children - .iter() - .find(|child| { - let text = child.text.trim(); - !child.named && !matches!(text, "(" | ")") - }) - .map(|child| normalize_text(&child.text)) -} - -fn raw_named_children(node: &RawNode) -> Vec<&RawNode> { - node.children.iter().filter(|child| child.named).collect() -} - -fn raw_child_by_field<'a>(node: &'a RawNode, field: &str) -> Option<&'a RawNode> { - node.children - .iter() - .find(|child| child.field_name.as_deref() == Some(field)) -} - -fn raw_comment_node(node: &RawNode) -> bool { - node.kind.contains("comment") -} - -struct PathCondition { - file: String, - lines: Vec, - sites: Vec, -} - -impl PathCondition { - fn new(file: String, lines: Vec) -> Self { - Self { - file, - lines, - sites: Vec::new(), - } - } - - fn walk(&mut self, node: &Node, defstack: &[String], guards: &[Vec]) { - let mut next_defstack = defstack.to_vec(); - if matches!(node.r#type.as_str(), "DEFN" | "DEFS") { - let name_index = if node.r#type == "DEFS" { 1 } else { 0 }; - if let Some(Child::Symbol(name)) = node.children.get(name_index) { - next_defstack.push(name.clone()); - } - } - - match node.r#type.as_str() { - "IF" | "UNLESS" => { - let cond = node.children.get(0).and_then(ast::node); - let a = node.children.get(1).and_then(ast::node); - let b = node.children.get(2).and_then(ast::node); - - let atoms = self.cond_atoms(cond); - let then_g = if node.r#type == "IF" { - atoms.clone() - } else { - self.negate(&atoms) - }; - let else_g = if node.r#type == "IF" { - self.negate(&atoms) - } else { - atoms.clone() - }; - - if let Some(a_node) = a { - let mut next_guards = guards.to_vec(); - next_guards.extend(then_g); - self.walk(a_node, &next_defstack, &next_guards); - } - if let Some(b_node) = b { - let mut next_guards = guards.to_vec(); - next_guards.extend(else_g); - self.walk(b_node, &next_defstack, &next_guards); - } - - return; - } - "CALL" | "FCALL" | "VCALL" | "ATTRASGN" | "LASGN" | "IASGN" | "OPCALL" => { - if guards.len() >= 2 { - self.record(node, &next_defstack, guards); - } - } - _ => {} - } - - for child in node.children.iter().filter_map(ast::node) { - self.walk(child, &next_defstack, guards); - } - } - - fn cond_atoms(&self, cond: Option<&Node>) -> Vec> { - let Some(cond) = cond else { return Vec::new() }; - ast::flatten_and(cond) - .into_iter() - .map(|a| { - let t = ast::slice(a, &self.lines); - let (text, neg) = ast::canon_polarity(&t); - vec![ - text, - if neg { - "true".to_string() - } else { - "false".to_string() - }, - ] - }) - .collect() - } - - fn negate(&self, atoms: &[Vec]) -> Vec> { - atoms - .iter() - .map(|a| { - let t = &a[0]; - let n = a[1] == "true"; - vec![ - t.clone(), - if !n { - "true".to_string() - } else { - "false".to_string() - }, - ] - }) - .collect() - } - - fn record(&mut self, node: &Node, defstack: &[String], guards: &[Vec]) { - let mut members_set = BTreeSet::new(); - for g in guards { - let prefix = if g[1] == "true" { "!" } else { "" }; - members_set.insert(format!("{}{}", prefix, g[0])); - } - let members: Vec<_> = members_set.into_iter().collect(); - - if members.len() < 2 { - return; - } - - let slice = ast::slice(node, &self.lines); - let action = if slice.len() > 80 { - slice[..80].to_string() - } else { - slice - }; - - self.sites.push(Site { - guards: members, - action, - file: self.file.clone(), - defn: defstack - .last() - .cloned() - .unwrap_or_else(|| "(top-level)".to_string()), - line: node.first_lineno, - span: [ - node.first_lineno, - node.first_column, - node.last_lineno, - node.last_column, - ], - }); - } -} - -struct Report { - sites: Vec, - groups: Vec<(Vec, Vec)>, -} - -impl Report { - fn new(sites: Vec) -> Self { - let mut keys = Vec::new(); - let mut groups: BTreeMap, Vec> = BTreeMap::new(); - for s in &sites { - if !groups.contains_key(&s.guards) { - keys.push(s.guards.clone()); - } - groups.entry(s.guards.clone()).or_default().push(s.clone()); - } - - let ordered_groups = keys - .into_iter() - .map(|k| { - let v = groups.remove(&k).unwrap(); - (k, v) - }) - .collect(); - - Self { - sites, - groups: ordered_groups, - } - } - - fn findings(&self) -> PathConditionReport { - PathConditionReport { - neglected: self.neglected(3), - scattered: self.scattered(2), - } - } - - fn scattered(&self, min_scatter: usize) -> Vec { - let mut out = Vec::new(); - for (guards, sites) in &self.groups { - let scatter = sites - .iter() - .map(|site| (site.file.clone(), site.defn.clone())) - .collect::>() - .len(); - if scatter < min_scatter { - continue; - } - - let locations = sites - .iter() - .map(|site| format!("{}:{}:{}", site.file, site.defn, site.line)) - .collect::>(); - let spans = sites - .iter() - .map(|site| { - ( - format!("{}:{}:{}", site.file, site.defn, site.line), - site.span, - ) - }) - .collect::>(); - out.push(ScatteredPathCondition { - guards: guards.clone(), - support: sites.len(), - scatter, - rank: sites.len() * scatter, - sites: locations, - spans, - }); - } - out.sort_by(|a, b| b.rank.cmp(&a.rank).then_with(|| a.guards.cmp(&b.guards))); - out - } - - fn neglected(&self, min_support: usize) -> Vec { - let popular: Vec<_> = self - .groups - .iter() - .filter(|(_, s)| s.len() >= min_support) - .map(|(g, s)| (g.clone(), s.len())) - .collect(); - - let mut out = Vec::new(); - let mut seen = BTreeSet::new(); - - for s in &self.sites { - for (gs, sup) in &popular { - let gs_set: BTreeSet<_> = gs.iter().cloned().collect(); - let s_guards_set: BTreeSet<_> = s.guards.iter().cloned().collect(); - - let diff_gs_s: BTreeSet<_> = gs_set.difference(&s_guards_set).cloned().collect(); - let diff_s_gs: BTreeSet<_> = s_guards_set.difference(&gs_set).cloned().collect(); - - if diff_gs_s.len() == 1 && diff_s_gs.is_empty() { - if s.guards == *gs { - continue; - } - - let at = format!("{}:{}:{}", s.file, s.defn, s.line); - let missing = diff_gs_s.into_iter().next().unwrap(); - - // dedupe manually - let key = (gs.clone(), sup.clone(), missing.clone(), at.clone()); - if seen.insert(key) { - let mut spans = BTreeMap::new(); - spans.insert(at.clone(), s.span); - - out.push(NeglectedPathCondition { - pattern: gs.clone(), - support: *sup, - missing, - at, - spans, - action: s.action.clone(), - }); - } - } - } - } - - out.sort_by(|a, b| b.support.cmp(&a.support).then_with(|| a.at.cmp(&b.at))); - out - } -} +pub use crate::decomplex::syntax::path_condition::{ + scan_documents, scan_files, NeglectedPathCondition, PathConditionReport, ScatteredPathCondition, +}; diff --git a/gems/decomplex/rust/src/decomplex/detectors/redundant_nil_guard.rs b/gems/decomplex/rust/src/decomplex/detectors/redundant_nil_guard.rs index b25faf725..e0d4a77f2 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/redundant_nil_guard.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/redundant_nil_guard.rs @@ -1,644 +1,3 @@ -use crate::decomplex::ast::{self, Child, Node, Span}; -use crate::decomplex::syntax::{self, Document, Language}; -use anyhow::Result; -use serde::Serialize; -use std::collections::{BTreeMap, BTreeSet}; -use std::path::PathBuf; - -#[derive(Clone, Debug, Eq, PartialEq, Serialize)] -pub struct RedundantNilGuardRow { - pub at: String, - pub file: String, - pub defn: String, - pub line: usize, - pub span: Span, - pub local: String, - pub guard: String, - pub proof: String, - pub spans: BTreeMap, -} - -#[derive(Clone, Debug)] -struct Flow { - known: BTreeSet, - terminated: bool, -} - -#[derive(Clone, Debug)] -struct NilFact { - local: String, - non_nil_when_true: bool, -} - -struct CallParts<'a> { - receiver: Option<&'a Node>, - message: String, - no_args: bool, -} - -struct Finding { - file: String, - defn: String, - line: usize, - span: Span, - local: String, - guard: String, - proof: String, -} - -impl Finding { - fn to_h(&self) -> RedundantNilGuardRow { - let loc = format!("{}:{}:{}", self.file, self.defn, self.line); - let mut spans = BTreeMap::new(); - spans.insert(loc.clone(), self.span); - RedundantNilGuardRow { - at: loc, - file: self.file.clone(), - defn: self.defn.clone(), - line: self.line, - span: self.span, - local: self.local.clone(), - guard: self.guard.clone(), - proof: self.proof.clone(), - spans, - } - } -} - -const TERMINATING_CALLS: &[&str] = &["raise", "fail", "abort", "exit", "exit!"]; -const NIL_PREDICATE_MIDS: &[&str] = &["nil?", "isNull", "is_null", "nil", "is_none"]; -const NON_NIL_PREDICATE_MIDS: &[&str] = &["isSome", "is_some", "present", "present?"]; - -pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { - let documents = syntax::parse_files(files, language)?; - Ok(scan_documents(&documents)) -} - -pub fn scan_documents(documents: &[Document]) -> Vec { - let mut findings = Vec::new(); - for document in documents { - let mut scanner = RedundantNilGuard::new(document.file.clone(), document.lines.clone()); - scanner.walk(&document.normalized_root, &Vec::new()); - findings.extend(scanner.findings); - } - let mut out: Vec<_> = findings.into_iter().map(|f| f.to_h()).collect(); - out.sort_by(|a, b| { - a.file - .cmp(&b.file) - .then_with(|| a.line.cmp(&b.line)) - .then_with(|| a.local.cmp(&b.local)) - .then_with(|| a.guard.cmp(&b.guard)) - }); - out -} - -struct RedundantNilGuard { - file: String, - lines: Vec, - findings: Vec, -} - -impl RedundantNilGuard { - fn new(file: String, lines: Vec) -> Self { - Self { - file, - lines, - findings: Vec::new(), - } - } - - fn walk(&mut self, node: &Node, defstack: &[String]) { - if matches!(node.r#type.as_str(), "DEFN" | "DEFS") { - let name_index = if node.r#type == "DEFS" { 1 } else { 0 }; - if let Some(Child::Symbol(name)) = node.children.get(name_index) { - let mut next_defstack = defstack.to_vec(); - next_defstack.push(name.clone()); - self.process_block(&ast::body_stmts(node), &next_defstack, &BTreeSet::new()); - } - return; - } - - for child in node.children.iter().filter_map(ast::node) { - self.walk(child, defstack); - } - } - - fn process_block( - &mut self, - stmts: &[&Node], - defstack: &[String], - known: &BTreeSet, - ) -> Flow { - let mut current = known.clone(); - for stmt in stmts { - let flow = self.process_stmt(stmt, defstack, ¤t); - current = flow.known; - if flow.terminated { - return Flow { - known: current, - terminated: true, - }; - } - } - Flow { - known: current, - terminated: false, - } - } - - fn process_stmt(&mut self, node: &Node, defstack: &[String], known: &BTreeSet) -> Flow { - match node.r#type.as_str() { - "IF" | "UNLESS" => self.process_branch(node, defstack, known), - "LASGN" => { - if let Some(rhs) = node.children.get(1).and_then(ast::node) { - self.inspect_node(rhs, defstack, known); - } - let mut next_known = known.clone(); - if let Some(Child::String(name)) = node.children.first() { - next_known.remove(name); - } - Flow { - known: next_known, - terminated: false, - } - } - _ => { - self.inspect_node(node, defstack, known); - Flow { - known: known.clone(), - terminated: self.terminating(node), - } - } - } - } - - fn process_branch( - &mut self, - node: &Node, - defstack: &[String], - known: &BTreeSet, - ) -> Flow { - let cond = node.children.get(0).and_then(ast::node); - let then_body = node.children.get(1).and_then(ast::node); - let else_body = node.children.get(2).and_then(ast::node); - - if let Some(cond) = cond { - self.inspect_node(cond, defstack, known); - } - - let then_known = self.known_for_branch(node.r#type.as_str(), true, cond, known); - let else_known = self.known_for_branch(node.r#type.as_str(), false, cond, known); - - let then_flow = self.process_block(&self.stmts_for(then_body), defstack, &then_known); - let else_flow = self.process_block(&self.stmts_for(else_body), defstack, &else_known); - - if then_flow.terminated && else_flow.terminated { - Flow { - known: BTreeSet::new(), - terminated: true, - } - } else if then_flow.terminated { - Flow { - known: else_flow.known, - terminated: false, - } - } else if else_flow.terminated { - Flow { - known: then_flow.known, - terminated: false, - } - } else { - let intersection: BTreeSet<_> = then_flow - .known - .intersection(&else_flow.known) - .cloned() - .collect(); - Flow { - known: intersection, - terminated: false, - } - } - } - - fn known_for_branch( - &self, - node_type: &str, - body_branch: bool, - cond: Option<&Node>, - known: &BTreeSet, - ) -> BTreeSet { - let mut next_known = known.clone(); - let cond_true_branch = if node_type == "IF" { - body_branch - } else { - !body_branch - }; - if let Some(cond) = cond { - for fact in self.branch_nil_facts(cond, cond_true_branch) { - next_known.insert(fact.local); - } - } - next_known - } - - fn inspect_node(&mut self, node: &Node, defstack: &[String], known: &BTreeSet) { - let recorded = self.record_redundant(node, defstack, known); - if matches!(node.r#type.as_str(), "DEFN" | "DEFS") { - return; - } - if recorded && (node.r#type == "OPCALL" || self.call_parts(node).is_some()) { - return; - } - for child in node.children.iter().filter_map(ast::node) { - self.inspect_node(child, defstack, known); - } - } - - fn record_redundant( - &mut self, - node: &Node, - defstack: &[String], - known: &BTreeSet, - ) -> bool { - let local = self.redundant_nil_subject(node, known); - let Some(local) = local else { return false }; - - let defn = defstack.last().map(|s| s.as_str()).unwrap_or("(top-level)"); - self.findings.push(Finding { - file: self.file.clone(), - defn: defn.to_string(), - line: node.first_lineno, - span: self.span(node), - local: local.clone(), - guard: ast::slice(node, &self.lines), - proof: format!("{} is already proven non-nil on this path", local), - }); - true - } - - fn redundant_nil_subject(&self, node: &Node, known: &BTreeSet) -> Option { - if node.r#type == "QCALL" { - return self.qcall_subject(node, known); - } - - let fact = self.nil_fact(node)?; - if known.contains(&fact.local) { - return Some(fact.local); - } - None - } - - fn nil_fact(&self, node: &Node) -> Option { - if self.parenthesized_wrapper(node) { - return self.nil_fact(self.first_node_child(node)?); - } - - if let Some(call) = self.call_parts(node) { - if call.no_args && NIL_PREDICATE_MIDS.contains(&call.message.as_str()) { - let subject = self.subject_key(call.receiver?)?; - return Some(NilFact { - local: subject, - non_nil_when_true: false, - }); - } - if call.no_args && NON_NIL_PREDICATE_MIDS.contains(&call.message.as_str()) { - let subject = self.subject_key(call.receiver?)?; - return Some(NilFact { - local: subject, - non_nil_when_true: true, - }); - } - } - - match node.r#type.as_str() { - "OPCALL" => { - let recv = node.children.get(0).and_then(ast::node)?; - let mid = match node.children.get(1)? { - Child::Symbol(s) => s, - _ => return None, - }; - let args = node.children.get(2); - if mid == "!" { - return self.negated_nil_fact(recv); - } - if mid == "==" || mid == "!=" { - return self.comparison_nil_fact(recv, mid, args); - } - None - } - _ => None, - } - } - - fn branch_nil_facts(&self, node: &Node, cond_truth: bool) -> Vec { - if self.parenthesized_wrapper(node) { - if let Some(child) = self.first_node_child(node) { - return self.branch_nil_facts(child, cond_truth); - } - } - - if node.r#type == "AND" { - if !cond_truth { - return Vec::new(); - } - let mut facts = Vec::new(); - for child in ast::flatten_and(node) { - facts.extend(self.branch_nil_facts(child, true)); - } - return facts; - } - - if node.r#type == "OPCALL" { - if let Some(Child::Symbol(mid)) = node.children.get(1) { - if mid == "!" { - if let Some(child) = node.children.get(0).and_then(ast::node) { - return self.branch_nil_facts(child, !cond_truth); - } - } - } - } - - if let Some(safe_receiver) = self.safe_nav_receiver_fact(node) { - if cond_truth { - return vec![safe_receiver]; - } - } - - if let Some(fact) = self.nil_fact(node) { - if cond_truth == fact.non_nil_when_true { - return vec![fact]; - } - } - - if let Some(truthy) = self.truthy_subject_fact(node) { - if cond_truth { - return vec![truthy]; - } - } - - Vec::new() - } - - fn safe_nav_receiver_fact(&self, node: &Node) -> Option { - if node.r#type == "QCALL" { - let recv = node.children.get(0).and_then(ast::node)?; - let subject = self.subject_key(recv)?; - return Some(NilFact { - local: subject, - non_nil_when_true: true, - }); - } - None - } - - fn truthy_subject_fact(&self, node: &Node) -> Option { - let subject = self.subject_key(node)?; - Some(NilFact { - local: subject, - non_nil_when_true: true, - }) - } - - fn negated_nil_fact(&self, node: &Node) -> Option { - let mut fact = self.nil_fact(node)?; - fact.non_nil_when_true = !fact.non_nil_when_true; - Some(fact) - } - - fn comparison_nil_fact(&self, recv: &Node, mid: &str, args: Option<&Child>) -> Option { - let subject = self.subject_key(recv)?; - if !self.nil_arg(args) { - return None; - } - Some(NilFact { - local: subject, - non_nil_when_true: mid == "!=", - }) - } - - fn qcall_subject(&self, node: &Node, known: &BTreeSet) -> Option { - let recv = node.children.get(0).and_then(ast::node)?; - let subject = self.subject_key(recv)?; - if known.contains(&subject) { - return Some(subject); - } - None - } - - fn subject_key(&self, node: &Node) -> Option { - match node.r#type.as_str() { - "LVAR" | "DVAR" | "VCALL" => match node.children.first()? { - Child::String(s) | Child::Symbol(s) => Some(s.clone()), - _ => None, - }, - _ if self.call_parts(node).is_some() => { - let call = self.call_parts(node)?; - if !call.no_args || !self.stable_reader_name(&call.message) { - return None; - } - let recv = call.receiver?; - if recv.r#type == "SELF" { - return Some(format!("self.{}", call.message)); - } - let recv_key = self.subject_key(recv)?; - Some(format!("{}.{}", recv_key, call.message)) - } - _ => None, - } - } - - fn call_parts<'a>(&self, node: &'a Node) -> Option> { - match node.r#type.as_str() { - "CALL" => { - let receiver = node.children.get(0).and_then(ast::node); - let message = self.child_name(node.children.get(1)?)?; - Some(CallParts { - receiver, - message, - no_args: self.no_call_arguments(node.children.get(2)), - }) - } - "METHOD_INVOCATION" => { - let nodes = node - .children - .iter() - .filter_map(ast::node) - .collect::>(); - let receiver = nodes.first().copied(); - let message = nodes.get(1).and_then(|child| self.node_name(child))?; - Some(CallParts { - receiver, - message, - no_args: self.no_call_arguments(node.children.get(2)), - }) - } - "FUNCTION_CALL" | "METHOD_CALL" => { - let callee = node.children.iter().filter_map(ast::node).next()?; - let args = node - .children - .iter() - .skip(1) - .find(|child| matches!(child, Child::Node(n) if matches!(n.r#type.as_str(), "ARGUMENTS" | "ARGUMENT_LIST" | "LIST"))); - self.field_call_parts(callee, args) - } - "BLOCK" => { - let callee = node.children.iter().filter_map(ast::node).next()?; - let args = node - .children - .iter() - .skip(1) - .find(|child| matches!(child, Child::Node(n) if matches!(n.r#type.as_str(), "ARGUMENTS" | "ARGUMENT_LIST" | "LIST"))); - self.field_call_parts(callee, args) - } - "INVOCATION_EXPRESSION" => { - let callee = node.children.iter().filter_map(ast::node).next()?; - let mut parts = self.call_parts(callee)?; - let args = node - .children - .iter() - .skip(1) - .find(|child| matches!(child, Child::Node(n) if matches!(n.r#type.as_str(), "ARGUMENTS" | "ARGUMENT_LIST" | "LIST"))); - parts.no_args = self.no_call_arguments(args); - Some(parts) - } - _ => None, - } - } - - fn field_call_parts<'a>( - &self, - node: &'a Node, - args: Option<&'a Child>, - ) -> Option> { - if !matches!( - node.r#type.as_str(), - "DOT_INDEX_EXPRESSION" - | "FIELD_EXPRESSION" - | "FIELD_ACCESS" - | "MEMBER_EXPRESSION" - | "CALL" - ) { - return self.call_parts(node); - } - let nodes = node - .children - .iter() - .filter_map(ast::node) - .collect::>(); - let receiver = nodes.first().copied(); - let message = nodes.last().and_then(|child| self.node_name(child))?; - Some(CallParts { - receiver, - message, - no_args: self.no_call_arguments(args), - }) - } - - fn child_name(&self, child: &Child) -> Option { - match child { - Child::String(s) | Child::Symbol(s) => Some(s.clone()), - Child::Node(node) => self.node_name(node), - _ => None, - } - } - - fn node_name(&self, node: &Node) -> Option { - match node.children.first() { - Some(Child::String(s)) | Some(Child::Symbol(s)) => Some(s.clone()), - _ => { - let text = ast::slice(node, &self.lines).trim().to_string(); - (!text.is_empty()).then_some(text) - } - } - } - - fn no_call_arguments(&self, args: Option<&Child>) -> bool { - match args { - None | Some(Child::Nil) => true, - Some(Child::Node(node)) => { - !node.children.iter().any(|child| ast::node(child).is_some()) - } - Some(_) => false, - } - } - - fn parenthesized_wrapper(&self, node: &Node) -> bool { - matches!( - node.r#type.as_str(), - "CONDITION_CLAUSE" | "PARENTHESIZED_EXPRESSION" | "PARENTHESIZED_STATEMENTS" - ) && self.first_node_child(node).is_some() - } - - fn first_node_child<'a>(&self, node: &'a Node) -> Option<&'a Node> { - node.children.iter().find_map(ast::node) - } - - fn stable_reader_name(&self, mid: &str) -> bool { - !(mid.ends_with('=') || mid.ends_with('!') || mid == "[]") - } - - fn nil_arg(&self, args: Option<&Child>) -> bool { - let Some(Child::Node(node)) = args else { - return false; - }; - if node.r#type != "LIST" { - return false; - } - node.children.iter().any(|c| match c { - Child::Node(n) => n.r#type == "NIL", - Child::Nil => true, - _ => false, - }) - } - - fn stmts_for<'a>(&self, node: Option<&'a Node>) -> Vec<&'a Node> { - let Some(node) = node else { return Vec::new() }; - if self.call_parts(node).is_some() { - return vec![node]; - } - if node.r#type == "BLOCK" { - node.children.iter().filter_map(ast::node).collect() - } else { - vec![node] - } - } - - fn terminating(&self, node: &Node) -> bool { - if matches!(node.r#type.as_str(), "RETURN" | "NEXT" | "BREAK") { - return true; - } - if !matches!(node.r#type.as_str(), "FCALL" | "VCALL" | "CALL") - && self.call_parts(node).is_none() - { - return false; - } - - let mid = if let Some(call) = self.call_parts(node) { - Some(call.message) - } else if node.r#type == "CALL" { - node.children.get(1).and_then(|c| match c { - Child::String(s) | Child::Symbol(s) => Some(s.clone()), - _ => None, - }) - } else { - node.children.get(0).and_then(|c| match c { - Child::String(s) | Child::Symbol(s) => Some(s.clone()), - _ => None, - }) - }; - - if let Some(mid) = mid { - return TERMINATING_CALLS.contains(&mid.as_str()); - } - false - } - - fn span(&self, node: &Node) -> Span { - [ - node.first_lineno, - node.first_column, - node.last_lineno, - node.last_column, - ] - } -} +pub use crate::decomplex::syntax::redundant_nil_guard::{ + scan_documents, scan_files, RedundantNilGuardRow, +}; diff --git a/gems/decomplex/rust/src/decomplex/syntax.rs b/gems/decomplex/rust/src/decomplex/syntax.rs index 36672ba5c..244ae00a5 100644 --- a/gems/decomplex/rust/src/decomplex/syntax.rs +++ b/gems/decomplex/rust/src/decomplex/syntax.rs @@ -1,5 +1,8 @@ pub(crate) mod adapters; pub(crate) mod complexity; +pub mod local_flow; +pub mod path_condition; +pub mod redundant_nil_guard; pub mod tree_sitter_adapter; use crate::decomplex::ast::{Node as NormalizedNode, RawNode, Span}; @@ -142,6 +145,10 @@ pub struct Document { pub comparison_uses: Vec, #[serde(default)] pub path_condition_sites: Vec, + #[serde(default)] + pub protocol_method_effects: Vec, + #[serde(default)] + pub protocol_call_paths: Vec, } fn empty_raw_node() -> RawNode { @@ -318,6 +325,35 @@ pub struct PathConditionSite { pub span: Span, } +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct ProtocolMethodEffect { + pub file: String, + pub owner: String, + pub name: String, + pub line: usize, + pub reads: Vec, + pub writes: Vec, +} + +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct ProtocolCall { + pub mid: String, + pub file: String, + pub owner: String, + pub defn: String, + pub line: usize, + pub span: Span, +} + +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct ProtocolMethodPath { + pub file: String, + pub owner: String, + pub name: String, + pub line: usize, + pub calls: Vec, +} + #[derive(Clone, Debug)] pub(crate) struct CloneCandidate { pub(crate) file: String, @@ -351,6 +387,14 @@ pub fn parse_files(files: &[PathBuf], language: Language) -> Result Vec { + adapters::language_profile(document.language).clone_candidates(document) +} + +pub(crate) fn core_owner_names(document: &Document) -> &'static [&'static str] { + adapters::false_simplicity_lexicon::false_simplicity_lexicon(document.language).core_consts +} + #[cfg(test)] mod tests { use super::*; diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs index fe63908ee..389e9c7db 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs @@ -3,9 +3,13 @@ use super::super::tree_sitter_adapter::{ named_children, normalize_type_owner, strip_assignment_suffix, AssignmentTarget, CallTarget, Target, }; -use super::super::{CallSite, CloneCandidate, Document, FunctionDef, Language}; +use super::super::{ + CallSite, CloneCandidate, Document, FunctionDef, Language, ProtocolCall, ProtocolMethodEffect, + ProtocolMethodPath, SemanticEffectSite, StateRead, StateWrite, +}; use crate::decomplex::ast::{node_text, normalize_text, span, RawNode}; use std::collections::HashSet; +use std::path::Path; use tree_sitter::{Language as TreeSitterLanguage, Node}; pub(crate) const EMPTY_NODE_KINDS: &[&str] = &[]; @@ -370,6 +374,22 @@ pub(crate) trait LanguageProfile { false } + fn suppress_indexed_lhs_reads(&self) -> bool { + true + } + + fn indexed_lhs_descendants_are_writes(&self) -> bool { + true + } + + fn keyed_element_first_named_child_is_key(&self) -> bool { + true + } + + fn nested_assignment_dependencies_only(&self) -> bool { + false + } + fn implicit_state_accesses(&self) -> bool { false } @@ -428,6 +448,91 @@ pub(crate) trait LanguageProfile { fn after_collect_facts(&self, _functions: &mut Vec, _calls: &[CallSite]) {} + fn structural_semantic_effect_sites( + &self, + _root: Node<'_>, + _source: &str, + _file: &Path, + _functions: &[FunctionDef], + _state_reads: &[StateRead], + _state_writes: &[StateWrite], + ) -> Vec { + Vec::new() + } + + fn protocol_method_effects(&self, document: &Document) -> Vec { + document + .function_defs + .iter() + .map(|function_def| { + let mut reads = document + .state_reads + .iter() + .filter(|read| { + read.owner == function_def.owner && read.function == function_def.name + }) + .map(|read| normalize_protocol_state(&read.field)) + .collect::>(); + reads.sort(); + reads.dedup(); + + let mut writes = document + .state_writes + .iter() + .filter(|write| { + write.owner == function_def.owner && write.function == function_def.name + }) + .map(|write| normalize_protocol_state(&write.field)) + .collect::>(); + writes.sort(); + writes.dedup(); + + ProtocolMethodEffect { + file: function_def.file.clone(), + owner: function_def.owner.clone(), + name: protocol_method_name(&function_def.name), + line: function_def.line, + reads, + writes, + } + }) + .collect() + } + + fn protocol_call_paths(&self, document: &Document) -> Vec { + document + .function_defs + .iter() + .map(|function_def| { + let calls = document + .call_sites + .iter() + .filter(|call| { + call.owner == function_def.owner + && call.function == function_def.name + && call.receiver == "self" + }) + .map(|call| ProtocolCall { + mid: protocol_method_name(&call.message), + file: function_def.file.clone(), + owner: function_def.owner.clone(), + defn: protocol_method_name(&function_def.name), + line: call.line, + span: call.span, + }) + .collect(); + + ProtocolMethodPath { + file: function_def.file.clone(), + owner: function_def.owner.clone(), + name: protocol_method_name(&function_def.name), + line: function_def.line, + calls, + } + }) + .collect() + } + fn default_function_name(&self, node: Node<'_>, source: &str) -> Option { if !self.function_node_kinds().contains(&node.kind()) { return None; @@ -1673,6 +1778,20 @@ fn generic_branch_context(node: Node<'_>, source: &str) -> bool { .starts_with("if ") } +pub(crate) fn protocol_method_name(name: &str) -> String { + name.split(['.', ':']) + .filter(|part| !part.is_empty()) + .last() + .unwrap_or(name) + .to_string() +} + +pub(crate) fn normalize_protocol_state(name: &str) -> String { + name.trim_start_matches('@') + .trim_end_matches('=') + .to_string() +} + fn clone_node_key(node: &RawNode) -> String { format!( "{}\0{}\0{}\0{}\0{}\0{}", diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/ruby.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/ruby.rs index 3165da531..858c1638e 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/ruby.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/ruby.rs @@ -1,15 +1,88 @@ use super::super::tree_sitter_adapter::{ - first_child_kind, first_named_text, named_children, next_sibling_raw_text, + direct_operator, first_child_kind, first_named_text, named_children, next_sibling_raw_text, previous_sibling_raw_text, AssignmentTarget, CallTarget, Target, }; -use super::super::{CallSite, FunctionDef, Language}; -use super::base::LanguageProfile; -use crate::decomplex::ast::{node_text, normalize_text, span}; +use super::super::{ + CallSite, Document, FunctionDef, Language, ProtocolMethodEffect, SemanticEffectSite, StateRead, + StateWrite, +}; +use super::base::{normalize_protocol_state, protocol_method_name, LanguageProfile}; +use crate::decomplex::ast::{node_text, normalize_text, span, RawNode}; use regex::Regex; +use std::collections::BTreeSet; +use std::path::Path; use tree_sitter::{Language as TreeSitterLanguage, Node}; pub(crate) struct RubyProfile; +const RUBY_PROTOCOL_IGNORED_MIDS: &[&str] = &[ + "abstract!", + "alias_method", + "any", + "attr_accessor", + "attr_reader", + "attr_writer", + "bind", + "cast", + "checked", + "enum", + "extend", + "final", + "include", + "interface!", + "let", + "must", + "must_because", + "nilable", + "override", + "overridable", + "params", + "prepend", + "private", + "private_class_method", + "protected", + "public", + "require", + "require_relative", + "requires_ancestor", + "sealed!", + "sig", + "type_member", + "type_template", + "untyped", + "unsafe", + "void", + "a_kind_of", + "after", + "around", + "before", + "be", + "be_a", + "be_an", + "be_empty", + "be_falsey", + "be_nil", + "be_truthy", + "change", + "contain_exactly", + "context", + "describe", + "eq", + "eql", + "equal", + "expect", + "have_attributes", + "have_key", + "have_received", + "it", + "match", + "not_to", + "raise_error", + "receive", + "subject", + "to", +]; + impl LanguageProfile for RubyProfile { fn language(&self) -> Language { Language::Ruby @@ -148,12 +221,10 @@ impl LanguageProfile for RubyProfile { ruby_proc_call_target(node, source).or_else(|| ruby_call_target(node, source)) } "body_statement" => ruby_bare_body_call_target(node, source), - "identifier" => ruby_bare_call_target(node, source), + "identifier" => ruby_visibility_identifier_call_target(node, source) + .or_else(|| ruby_bare_call_target(node, source)), _ => None, }?; - if ruby_brace_block_parameter_receiver(node, &target.receiver, source) { - return None; - } if target.arguments.is_empty() && !ruby_call_has_block(node) { if let Some(span) = ruby_narrow_no_arg_call_span(node, source, &target.receiver, &target.message) @@ -225,6 +296,65 @@ impl LanguageProfile for RubyProfile { apply_ruby_visibility(functions, calls); } + fn structural_semantic_effect_sites( + &self, + root: Node<'_>, + source: &str, + file: &Path, + functions: &[FunctionDef], + state_reads: &[StateRead], + state_writes: &[StateWrite], + ) -> Vec { + ruby_structural_semantic_effect_sites( + root, + source, + file, + functions, + state_reads, + state_writes, + ) + } + + fn protocol_method_effects(&self, document: &Document) -> Vec { + document + .function_defs + .iter() + .map(|function_def| { + let mut reads = document + .state_reads + .iter() + .filter(|read| { + read.owner == function_def.owner && read.function == function_def.name + }) + .map(|read| normalize_protocol_state(&read.field)) + .collect::>(); + reads.extend(ruby_protocol_bare_reads(function_def)); + reads.sort(); + reads.dedup(); + + let mut writes = document + .state_writes + .iter() + .filter(|write| { + write.owner == function_def.owner && write.function == function_def.name + }) + .map(|write| normalize_protocol_state(&write.field)) + .collect::>(); + writes.sort(); + writes.dedup(); + + ProtocolMethodEffect { + file: function_def.file.clone(), + owner: function_def.owner.clone(), + name: protocol_method_name(&function_def.name), + line: function_def.line, + reads, + writes, + } + }) + .collect() + } + fn owner_name_from_declaration(&self, node: Node<'_>, source: &str) -> Option { if node.kind() == "body_statement" && matches!(first_child_kind(node), Some("class" | "module")) @@ -269,11 +399,7 @@ impl LanguageProfile for RubyProfile { return None; } let target = ruby_state_variable_target(node, source) - .or_else(|| ruby_bare_state_reader_target(node, source)) .or_else(|| self.default_state_read_target(node, source))?; - if ruby_state_block_parameter_receiver(node, &target.receiver, source) { - return None; - } Some(target) } @@ -300,6 +426,22 @@ impl LanguageProfile for RubyProfile { fn skip_state_write_target(&self, target: &Target) -> bool { target.field == "[]" || target.field.starts_with('$') } + + fn suppress_indexed_lhs_reads(&self) -> bool { + false + } + + fn indexed_lhs_descendants_are_writes(&self) -> bool { + false + } + + fn keyed_element_first_named_child_is_key(&self) -> bool { + false + } + + fn nested_assignment_dependencies_only(&self) -> bool { + true + } } fn hidden_ruby_method_name(node: Node<'_>, source: &str) -> Option { @@ -438,6 +580,26 @@ fn ruby_visibility_call(call: &CallSite) -> bool { && matches!(call.message.as_str(), "public" | "protected" | "private") } +fn ruby_visibility_identifier_call_target<'tree>( + node: Node<'tree>, + source: &str, +) -> Option> { + let message = node_text(node, source); + if !matches!(message, "private" | "protected" | "public") { + return None; + } + let parent = node.parent()?; + if matches!( + parent.kind(), + "call" | "argument_list" | "method_parameters" | "block_parameters" | "assignment" + ) { + return None; + } + let mut target = CallTarget::new("self".to_string(), message.to_string(), Vec::new()); + target.source_node = Some(node); + Some(target) +} + fn ruby_visibility_arg_name(argument: &str) -> String { argument .trim() @@ -577,7 +739,7 @@ fn ruby_argument_texts(node: Node<'_>, source: &str) -> Vec { } let values = named_children(args) .into_iter() - .map(|child| normalize_text(node_text(child, source))) + .map(|child| ruby_argument_text(child, args, source)) .collect::>(); if !values.is_empty() { return values; @@ -593,6 +755,26 @@ fn ruby_argument_texts(node: Node<'_>, source: &str) -> Vec { .collect() } +fn ruby_argument_text(node: Node<'_>, args: Node<'_>, source: &str) -> String { + if node.kind() == "string" && !node_text(args, source).trim_start().starts_with('(') { + if let Some(content) = named_children(node) + .into_iter() + .find(|child| child.kind() == "string_content") + { + return normalize_text(node_text(content, source)); + } + let text = normalize_text(node_text(node, source)); + if text.len() >= 2 + && ((text.starts_with('"') && text.ends_with('"')) + || (text.starts_with('\'') && text.ends_with('\''))) + { + return text[1..text.len() - 1].to_string(); + } + return text; + } + normalize_text(node_text(node, source)) +} + fn ruby_inline_def_argument_texts(args: Node<'_>, source: &str) -> Option> { let children = named_children(args); if children.len() != 1 || first_child_kind(children[0]) != Some("def") { @@ -625,6 +807,311 @@ fn ruby_inline_def_argument_texts(args: Node<'_>, source: &str) -> Option, + source: &str, + file: &Path, + functions: &[FunctionDef], + state_reads: &[StateRead], + state_writes: &[StateWrite], +) -> Vec { + let file_name = file.to_string_lossy().to_string(); + let mut out = Vec::new(); + out.extend(ruby_global_context_effects(source, state_reads)); + out.extend(ruby_state_mutation_effects(state_writes)); + out.extend(ruby_method_hook_effects(functions)); + ruby_collect_structural_effect_nodes(root, source, &file_name, functions, &mut out); + out +} + +fn ruby_global_context_effects(source: &str, state_reads: &[StateRead]) -> Vec { + state_reads + .iter() + .filter(|read| read.field.starts_with('$')) + .filter(|read| !ruby_global_assignment_read(source, read)) + .map(|read| SemanticEffectSite { + kind: "context_dependency".to_string(), + detail: read.field.clone(), + file: read.file.clone(), + function: read.function.clone(), + line: read.line, + span: read.span, + }) + .collect() +} + +fn ruby_global_assignment_read(source: &str, read: &StateRead) -> bool { + let line_text = source + .lines() + .nth(read.line.saturating_sub(1)) + .unwrap_or(""); + line_text + .chars() + .skip(read.span[3]) + .collect::() + .trim_start() + .starts_with('=') +} + +fn ruby_state_mutation_effects(state_writes: &[StateWrite]) -> Vec { + state_writes + .iter() + .filter(|write| write.receiver != "self") + .filter(|write| !write.field.starts_with('@') && !write.field.starts_with('$')) + .map(|write| SemanticEffectSite { + kind: "hidden_mutation".to_string(), + detail: format!("{}=", write.field), + file: write.file.clone(), + function: write.function.clone(), + line: write.line, + span: write.span, + }) + .collect() +} + +fn ruby_method_hook_effects(functions: &[FunctionDef]) -> Vec { + functions + .iter() + .filter_map(|function| { + let name = function + .name + .split('.') + .last() + .unwrap_or(function.name.as_str()); + matches!(name, "method_missing" | "respond_to_missing?").then(|| SemanticEffectSite { + kind: "metaprogramming".to_string(), + detail: format!("def {name}"), + file: function.file.clone(), + function: function.name.clone(), + line: function.line, + span: function.span, + }) + }) + .collect() +} + +fn ruby_collect_structural_effect_nodes( + node: Node<'_>, + source: &str, + file: &str, + functions: &[FunctionDef], + out: &mut Vec, +) { + out.extend(ruby_structural_effect_for_node( + node, source, file, functions, + )); + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + ruby_collect_structural_effect_nodes(child, source, file, functions, out); + } +} + +fn ruby_structural_effect_for_node( + node: Node<'_>, + source: &str, + file: &str, + functions: &[FunctionDef], +) -> Vec { + match node.kind() { + "yield" => vec![ruby_semantic_effect_site( + node, + source, + file, + functions, + "dynamic_dispatch", + "yield", + )], + "subshell" => vec![ruby_semantic_effect_site( + node, + source, + file, + functions, + "hidden_io", + "backtick", + )], + "singleton_class" => ruby_singleton_class_effect(node, source, file, functions), + "element_reference" => ruby_element_reference_effect(node, source, file, functions), + "assignment" => ruby_assignment_effects(node, source, file, functions), + "operator_assignment" => ruby_operator_assignment_effect(node, source, file, functions), + "binary" => ruby_binary_effect(node, source, file, functions), + _ => Vec::new(), + } +} + +fn ruby_singleton_class_effect( + node: Node<'_>, + source: &str, + file: &str, + functions: &[FunctionDef], +) -> Vec { + let Some(receiver) = named_children(node).into_iter().next() else { + return Vec::new(); + }; + if node_text(receiver, source) == "self" { + return Vec::new(); + } + vec![ruby_semantic_effect_site( + node, + source, + file, + functions, + "metaprogramming", + &format!("class << {}", normalize_text(node_text(receiver, source))), + )] +} + +fn ruby_element_reference_effect( + node: Node<'_>, + source: &str, + file: &str, + functions: &[FunctionDef], +) -> Vec { + let Some(receiver) = named_children(node).into_iter().next() else { + return Vec::new(); + }; + if node_text(receiver, source) != "ENV" { + return Vec::new(); + } + vec![ruby_semantic_effect_site( + node, + source, + file, + functions, + "context_dependency", + "ENV", + )] +} + +fn ruby_assignment_effects( + node: Node<'_>, + source: &str, + file: &str, + functions: &[FunctionDef], +) -> Vec { + let lhs = node + .child_by_field_name("left") + .or_else(|| named_children(node).into_iter().next()); + let Some(lhs) = lhs else { + return Vec::new(); + }; + let mut out = Vec::new(); + if lhs.kind() == "global_variable" { + out.push(ruby_semantic_effect_site( + node, + source, + file, + functions, + "context_dependency", + node_text(lhs, source), + )); + } + if lhs.kind() == "element_reference" { + let receiver = named_children(lhs).into_iter().next(); + if receiver + .map(|receiver| node_text(receiver, source) != "ENV") + .unwrap_or(true) + { + out.push(ruby_semantic_effect_site( + node, + source, + file, + functions, + "hidden_mutation", + "[]=", + )); + } + } + out +} + +fn ruby_operator_assignment_effect( + node: Node<'_>, + source: &str, + file: &str, + functions: &[FunctionDef], +) -> Vec { + let lhs = node + .child_by_field_name("left") + .or_else(|| named_children(node).into_iter().next()); + if ruby_local_operator_assignment_lhs(lhs) { + return Vec::new(); + } + vec![ruby_semantic_effect_site( + node, + source, + file, + functions, + "hidden_mutation", + "op-assign", + )] +} + +fn ruby_local_operator_assignment_lhs(lhs: Option>) -> bool { + let Some(lhs) = lhs else { + return true; + }; + matches!( + lhs.kind(), + "identifier" | "instance_variable" | "global_variable" + ) +} + +fn ruby_binary_effect( + node: Node<'_>, + source: &str, + file: &str, + functions: &[FunctionDef], +) -> Vec { + if direct_operator(node) != "<<" { + return Vec::new(); + } + vec![ruby_semantic_effect_site( + node, + source, + file, + functions, + "hidden_mutation", + "<<", + )] +} + +fn ruby_semantic_effect_site( + node: Node<'_>, + _source: &str, + file: &str, + functions: &[FunctionDef], + kind: &str, + detail: &str, +) -> SemanticEffectSite { + let site_span = span(node); + SemanticEffectSite { + kind: kind.to_string(), + detail: detail.to_string(), + file: file.to_string(), + function: ruby_effect_function(functions, site_span), + line: site_span[0], + span: site_span, + } +} + +fn ruby_effect_function(functions: &[FunctionDef], site_span: [usize; 4]) -> String { + functions + .iter() + .filter(|function| span_contains(function.span, site_span)) + .min_by_key(|function| span_width(function.span)) + .map(|function| function.name.clone()) + .unwrap_or_else(|| "(top-level)".to_string()) +} + +fn span_contains(outer: [usize; 4], inner: [usize; 4]) -> bool { + (outer[0] < inner[0] || (outer[0] == inner[0] && outer[1] <= inner[1])) + && (outer[2] > inner[2] || (outer[2] == inner[2] && outer[3] >= inner[3])) +} + +fn span_width(span: [usize; 4]) -> usize { + span[2].saturating_sub(span[0]) * 10_000 + span[3].saturating_sub(span[1]) +} + fn ruby_safe_navigation_call(node: Node<'_>, source: &str) -> bool { let mut cursor = node.walk(); let found = node @@ -686,6 +1173,167 @@ fn ruby_bare_call_identifier(node: Node<'_>, source: &str) -> bool { .unwrap_or(false) } +fn ruby_protocol_bare_reads(function_def: &FunctionDef) -> Vec { + let mut local_names = BTreeSet::new(); + local_names.extend(function_def.params.iter().cloned()); + ruby_protocol_collect_local_names(&function_def.body, &mut local_names, true); + + let mut reads = BTreeSet::new(); + ruby_protocol_collect_bare_reads(&function_def.body, None, &local_names, &mut reads, true); + reads.into_iter().collect() +} + +fn ruby_protocol_collect_local_names( + node: &RawNode, + local_names: &mut BTreeSet, + root: bool, +) { + if !root && ruby_protocol_nested_boundary(node) { + return; + } + if matches!(node.kind.as_str(), "assignment" | "operator_assignment") { + if let Some(lhs) = raw_named_children(node).first() { + if lhs.kind == "identifier" && ruby_simple_call_text(&lhs.text) { + local_names.insert(lhs.text.clone()); + } + } + } + if matches!(node.kind.as_str(), "block_parameters" | "method_parameters") { + for child in raw_named_children(node) { + if child.kind == "identifier" && ruby_simple_call_text(&child.text) { + local_names.insert(child.text.clone()); + } + } + } + for child in &node.children { + ruby_protocol_collect_local_names(child, local_names, false); + } +} + +fn ruby_protocol_collect_bare_reads( + node: &RawNode, + parent: Option<&RawNode>, + local_names: &BTreeSet, + reads: &mut BTreeSet, + root: bool, +) { + if !root && ruby_protocol_nested_boundary(node) { + return; + } + if node.kind == "identifier" && ruby_protocol_bare_reader(node, parent, local_names) { + reads.insert(normalize_protocol_state(&node.text)); + } + for child in &node.children { + ruby_protocol_collect_bare_reads(child, Some(node), local_names, reads, false); + } +} + +fn ruby_protocol_bare_reader( + node: &RawNode, + parent: Option<&RawNode>, + local_names: &BTreeSet, +) -> bool { + let name = node.text.as_str(); + if !ruby_simple_call_text(name) + || local_names.contains(name) + || RUBY_PROTOCOL_IGNORED_MIDS.contains(&name) + { + return false; + } + let Some(parent) = parent else { + return false; + }; + if ruby_protocol_declaration_name(node, parent) { + return false; + } + if matches!( + parent.kind.as_str(), + "call" + | "method_parameters" + | "block_parameters" + | "argument_list" + | "assignment" + | "operator_assignment" + | "pair" + | "hash_key_symbol" + ) { + return false; + } + if matches!( + raw_next_sibling_text(node, parent).as_deref(), + Some("=" | "." | ":") + ) || matches!( + raw_previous_sibling_text(node, parent).as_deref(), + Some("=" | "." | ":") + ) { + return false; + } + true +} + +fn ruby_protocol_declaration_name(node: &RawNode, parent: &RawNode) -> bool { + if matches!( + parent.kind.as_str(), + "method" | "singleton_method" | "class" | "module" + ) { + return true; + } + if parent.kind == "body_statement" { + let stripped = parent.text.trim_start(); + if stripped.starts_with("def ") + || stripped.starts_with("class ") + || stripped.starts_with("module ") + { + return true; + } + } + node.kind == "identifier" && parent.kind == "method_parameters" +} + +fn ruby_protocol_nested_boundary(node: &RawNode) -> bool { + matches!( + node.kind.as_str(), + "class" | "module" | "method" | "singleton_method" | "lambda" + ) || (node.kind == "body_statement" + && matches!( + raw_first_child_kind(node).as_deref(), + Some("def" | "class" | "module") + )) +} + +fn raw_named_children(node: &RawNode) -> Vec<&RawNode> { + node.children.iter().filter(|child| child.named).collect() +} + +fn raw_first_child_kind(node: &RawNode) -> Option { + node.children.first().map(|child| child.kind.clone()) +} + +fn raw_next_sibling_text(node: &RawNode, parent: &RawNode) -> Option { + let index = raw_child_index(node, parent)?; + parent + .children + .get(index + 1) + .map(|sibling| sibling.text.clone()) +} + +fn raw_previous_sibling_text(node: &RawNode, parent: &RawNode) -> Option { + let index = raw_child_index(node, parent)?; + index + .checked_sub(1) + .and_then(|previous| parent.children.get(previous)) + .map(|sibling| sibling.text.clone()) +} + +fn raw_child_index(node: &RawNode, parent: &RawNode) -> Option { + parent.children.iter().position(|child| { + child.kind == node.kind + && child.text == node.text + && child.span == node.span + && child.named == node.named + }) +} + fn ruby_declaration_name(node: Node<'_>, parent: Node<'_>, source: &str) -> bool { if matches!( parent.kind(), @@ -735,76 +1383,6 @@ fn ruby_embedded_text_node(node: Node<'_>) -> bool { false } -fn ruby_brace_block_parameter_receiver(node: Node<'_>, receiver: &str, source: &str) -> bool { - if receiver.contains('.') || receiver.contains('[') || receiver == "self" { - return false; - } - let mut current = node.parent(); - while let Some(parent) = current { - if parent.kind() == "block" { - return ruby_block_parameters(parent, source) - .into_iter() - .any(|param| param == receiver); - } - if matches!( - parent.kind(), - "method" | "singleton_method" | "body_statement" - ) { - return false; - } - current = parent.parent(); - } - false -} - -fn ruby_state_block_parameter_receiver(node: Node<'_>, receiver: &str, source: &str) -> bool { - if ruby_brace_block_parameter_receiver(node, receiver, source) { - return true; - } - if receiver.contains('.') || receiver.contains('[') || receiver == "self" { - return false; - } - let mut current = node.parent(); - while let Some(parent) = current { - if parent.kind() == "do_block" { - return ruby_block_parameters(parent, source) - .into_iter() - .any(|param| param == receiver); - } - if parent.kind() == "body_statement" - && parent - .parent() - .map(|grandparent| grandparent.kind() == "do_block") - .unwrap_or(false) - { - current = parent.parent(); - continue; - } - if matches!( - parent.kind(), - "method" | "singleton_method" | "body_statement" - ) { - return false; - } - current = parent.parent(); - } - false -} - -fn ruby_block_parameters(block: Node<'_>, source: &str) -> Vec { - named_children(block) - .into_iter() - .find(|child| child.kind() == "block_parameters") - .map(|params| { - named_children(params) - .into_iter() - .filter(|child| child.kind() == "identifier") - .map(|child| node_text(child, source).to_string()) - .collect() - }) - .unwrap_or_default() -} - fn ruby_narrow_no_arg_call_span( node: Node<'_>, source: &str, @@ -882,46 +1460,6 @@ fn ruby_state_variable_target(node: Node<'_>, source: &str) -> Option { }) } -fn ruby_bare_state_reader_target(node: Node<'_>, source: &str) -> Option { - if node.kind() != "identifier" || !ruby_simple_call_text(node_text(node, source)) { - return None; - } - if matches!(node_text(node, source), "private" | "protected" | "public") { - return None; - } - let parent = node.parent()?; - if ruby_declaration_name(node, parent, source) { - return None; - } - if matches!( - parent.kind(), - "call" - | "method_parameters" - | "block_parameters" - | "argument_list" - | "assignment" - | "operator_assignment" - | "pair" - | "hash_key_symbol" - ) { - return None; - } - if next_sibling_raw_text(node).as_deref() == Some("=") - || previous_sibling_raw_text(node).as_deref() == Some("=") - || next_sibling_raw_text(node).as_deref() == Some(".") - || previous_sibling_raw_text(node).as_deref() == Some(".") - || next_sibling_raw_text(node).as_deref() == Some(":") - || previous_sibling_raw_text(node).as_deref() == Some(":") - { - return None; - } - - Some(Target { - receiver: "self".to_string(), - field: node_text(node, source).to_string(), - }) -} - fn ruby_sorbet_signature_payload_node(node: Node<'_>, source: &str) -> bool { let mut current = Some(node); while let Some(candidate) = current { diff --git a/gems/decomplex/rust/src/decomplex/syntax/local_flow.rs b/gems/decomplex/rust/src/decomplex/syntax/local_flow.rs new file mode 100644 index 000000000..11c654267 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/local_flow.rs @@ -0,0 +1,2168 @@ +use crate::decomplex::ast::{self, Child, Node, RawNode, Span}; +use crate::decomplex::syntax::adapters::{language_profile, LanguageProfile}; +use crate::decomplex::syntax::{Document, FunctionDef, Language}; +use anyhow::Result; +use serde::{Deserialize, Serialize}; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::{Path, PathBuf}; + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct LocalFlowRow { + pub summaries: Vec, +} + +#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)] +pub struct MethodSummary { + pub id: String, + pub owner: String, + pub name: String, + pub file: String, + pub line: usize, + pub span: Span, + #[serde(default = "empty_node", skip_serializing)] + pub node: Node, + #[serde(default, skip_serializing)] + pub raw_node: Option, + pub statements: Vec, + pub boundaries: Vec, +} + +#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)] +pub struct Statement { + pub index: usize, + pub line: usize, + pub end_line: usize, + pub span: Span, + pub source: String, + pub reads: BTreeSet, + pub writes: BTreeSet, + pub dependencies: Vec<(String, String)>, + pub co_uses: Vec<(String, String)>, +} + +#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)] +pub struct Boundary { + pub before_index: usize, + pub after_index: usize, + pub line: usize, + pub kind: String, + pub text: String, +} + +const OWNER_TYPES: &[&str] = &["CLASS", "MODULE"]; +const METHOD_TYPES: &[&str] = &["DEFN", "DEFS"]; +const SKIP_NESTED_TYPES: &[&str] = &["CLASS", "MODULE", "DEFN", "DEFS", "LAMBDA"]; +const LOCAL_READ_TYPES: &[&str] = &["LVAR", "DVAR"]; +const LOCAL_WRITE_TYPES: &[&str] = &["LASGN", "DASGN"]; +const STATEMENT_CONTAINER_TYPES: &[&str] = &[ + "BLOCK", + "COMPOUND_STATEMENT", + "DECLARATION_LIST", + "FUNCTION_BODY", + "HASH", + "STATEMENTS", +]; + +fn empty_node() -> Node { + Node { + r#type: "ROOT".to_string(), + children: Vec::new(), + first_lineno: 1, + first_column: 0, + last_lineno: 1, + last_column: 0, + text: String::new(), + } +} + +pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { + let documents = super::parse_files(files, language)?; + Ok(scan_documents(&documents)) +} + +pub fn scan_documents(documents: &[Document]) -> Vec { + let mut out = Vec::new(); + for document in documents { + let normalized = normalized_local_methods(document); + let raw = raw_local_methods(document); + let raw_keys: BTreeSet<_> = raw.iter().map(method_summary_key).collect(); + out.extend(raw); + out.extend( + normalized + .into_iter() + .filter(|summary| !raw_keys.contains(&method_summary_key(summary))), + ); + } + out +} + +fn normalized_local_methods(document: &Document) -> Vec { + let mut detector = LocalFlow::new( + document.file.clone(), + document.lines.clone(), + method_metadata(document), + ); + detector.scan(&document.normalized_root) +} + +fn method_summary_key(summary: &MethodSummary) -> (String, String, usize) { + (summary.file.clone(), summary.id.clone(), summary.line) +} + +#[derive(Clone, Debug, Eq, PartialEq)] +struct MethodMetadata { + owner: String, + name: String, + params: BTreeSet, +} + +fn raw_local_methods(document: &Document) -> Vec { + let profile = language_profile(document.language); + document + .function_defs + .iter() + .map(|function| raw_method_summary(document, profile, function)) + .collect() +} + +fn raw_method_summary( + document: &Document, + profile: &dyn LanguageProfile, + function: &FunctionDef, +) -> MethodSummary { + let statement_nodes = raw_function_body_statements(&function.body, profile); + let local_names = raw_local_names(function, &statement_nodes, profile); + let statements: Vec<_> = statement_nodes + .iter() + .enumerate() + .map(|(index, statement)| raw_statement_summary(statement, index, &local_names, profile)) + .collect(); + let owner = local_flow_owner(&document.file, &function.owner); + + MethodSummary { + id: format!("{}#{}", owner, function.name), + owner, + name: function.name.clone(), + file: function.file.clone(), + line: function.line, + span: function.span, + node: normalized_node_for_span(&document.normalized_root, function.span) + .cloned() + .unwrap_or_else(|| fallback_node_from_raw(&function.body)), + raw_node: Some(function.body.clone()), + boundaries: raw_structural_boundaries(document, &statements), + statements, + } +} + +fn raw_function_body_statements<'a>( + node: &'a RawNode, + profile: &dyn LanguageProfile, +) -> Vec<&'a RawNode> { + let body = raw_function_body_node(node, profile); + let Some(body) = body else { + return Vec::new(); + }; + + let mut named = raw_named_children(body) + .into_iter() + .filter(|child| !raw_comment_node(child)) + .collect::>(); + if named.len() == 1 + && profile + .nested_statement_wrapper_node_kinds() + .contains(&named[0].kind.as_str()) + { + if raw_branch_node(named[0], profile) { + return vec![named[0]]; + } + named = raw_named_children(named[0]) + .into_iter() + .filter(|child| !raw_comment_node(child)) + .collect(); + } + if named.is_empty() && body.text.trim().is_empty() { + return Vec::new(); + } + if raw_branch_node(body, profile) || raw_assignment_statement(body, profile) || named.is_empty() + { + return vec![body]; + } + named +} + +fn raw_function_body_node<'a>( + node: &'a RawNode, + profile: &dyn LanguageProfile, +) -> Option<&'a RawNode> { + raw_named_children(node).into_iter().rev().find(|child| { + profile + .function_body_node_kinds() + .contains(&child.kind.as_str()) + }) +} + +fn raw_local_names( + function: &FunctionDef, + statements: &[&RawNode], + profile: &dyn LanguageProfile, +) -> BTreeSet { + let mut names: BTreeSet = function.params.iter().cloned().collect(); + if let Some(receiver) = raw_function_receiver_name(&function.body, profile) { + names.insert(receiver); + } + for statement in statements { + names.extend(raw_local_writes(statement, profile)); + } + names +} + +fn raw_function_receiver_name(node: &RawNode, profile: &dyn LanguageProfile) -> Option { + if !profile + .method_receiver_node_kinds() + .contains(&node.kind.as_str()) + { + return None; + } + let receiver_params = raw_named_children(node).into_iter().find(|child| { + profile + .parameter_list_node_kinds() + .contains(&child.kind.as_str()) + })?; + let receiver = raw_named_children(receiver_params) + .into_iter() + .find(|child| { + profile + .receiver_parameter_node_kinds() + .contains(&child.kind.as_str()) + })?; + let name = raw_named_children(receiver).into_iter().find(|child| { + profile + .first_argument_receiver_name_node_kinds() + .contains(&child.kind.as_str()) + })?; + raw_local_identifier_text(name, profile) +} + +fn raw_statement_summary( + node: &RawNode, + index: usize, + local_names: &BTreeSet, + profile: &dyn LanguageProfile, +) -> Statement { + let writes = raw_local_writes(node, profile); + let reads = raw_local_reads(node, local_names, profile); + Statement { + index, + line: node.span[0], + end_line: node.span[2], + span: node.span, + source: profile.normalize_source_text(&node.text), + dependencies: raw_assignment_dependencies(node, local_names, profile), + co_uses: co_use_pairs(&reads), + reads, + writes, + } +} + +fn raw_local_reads( + node: &RawNode, + local_names: &BTreeSet, + profile: &dyn LanguageProfile, +) -> BTreeSet { + if raw_nested_local_scope(node, profile) { + return BTreeSet::new(); + } + + let mut reads = Vec::new(); + raw_walk_local(node, None, node, profile, &mut |child, parent| { + let Some(name) = raw_local_identifier_text(child, profile) else { + return; + }; + if local_names.contains(&name) + && !raw_local_write_node(child, parent, profile) + && !raw_assignment_lhs_read_in_tree(node, child, profile) + && !raw_ruby_unary_assertion_argument(node, child, parent, profile) + && !raw_python_import_name(parent, profile) + && !raw_python_with_alias_read(child, parent, profile) + && !raw_declaration_name_in_tree(node, child, profile) + && !raw_declaration_name(child, parent, profile) + && !raw_member_name(child, parent, profile) + && !raw_keyed_element_key(child, parent, profile) + { + reads.push(name); + } + }); + reads.into_iter().collect() +} + +fn raw_local_writes(node: &RawNode, profile: &dyn LanguageProfile) -> BTreeSet { + if raw_nested_local_scope(node, profile) { + return BTreeSet::new(); + } + + let source = profile.normalize_source_text(&node.text); + let textual_writes_allowed = raw_assignment_statement(node, profile) + || profile + .local_declaration_node_kinds() + .contains(&node.kind.as_str()); + let mut writes = if !textual_writes_allowed { + Vec::new() + } else if profile.language() == Language::Python { + python_textual_local_writes(&source) + } else { + textual_local_writes(&source) + }; + if profile.language() == Language::Python { + writes.extend(raw_python_with_alias_names(node, profile)); + } + raw_walk_local(node, None, node, profile, &mut |child, parent| { + if raw_local_write_node(child, parent, profile) + || raw_declaration_name_in_tree(node, child, profile) + || raw_assignment_lhs_write_in_tree(node, child, profile) + { + if let Some(name) = raw_local_identifier_text(child, profile) { + writes.push(name); + } + } + }); + writes + .into_iter() + .filter_map(|name| { + let normalized = profile.normalize_local_identifier_text(&name); + (!normalized.is_empty()).then_some(normalized) + }) + .collect() +} + +fn raw_assignment_dependencies( + node: &RawNode, + local_names: &BTreeSet, + profile: &dyn LanguageProfile, +) -> Vec<(String, String)> { + if profile.nested_assignment_dependencies_only() { + return raw_nested_assignment_dependencies(node, local_names, profile); + } + + let lhs_names = raw_local_writes(node, profile); + if lhs_names.is_empty() { + return Vec::new(); + } + + let reads = raw_local_reads(node, local_names, profile); + let mut deps = Vec::new(); + for lhs in &lhs_names { + for read in &reads { + if lhs != read && !lhs_names.contains(read) { + deps.push((lhs.clone(), read.clone())); + } + } + } + deps.sort(); + deps.dedup(); + deps +} + +fn raw_nested_assignment_dependencies( + node: &RawNode, + local_names: &BTreeSet, + profile: &dyn LanguageProfile, +) -> Vec<(String, String)> { + let mut deps = Vec::new(); + raw_walk_local(node, None, node, profile, &mut |child, _parent| { + if !profile + .assignment_node_kinds() + .contains(&child.kind.as_str()) + { + return; + } + let children = raw_named_children(child); + let Some(lhs) = children.first().copied() else { + return; + }; + let Some(rhs) = children.get(1).copied() else { + return; + }; + let Some(lhs_name) = raw_local_identifier_text(lhs, profile) else { + return; + }; + for read in raw_local_reads(rhs, local_names, profile) { + if lhs_name != read { + deps.push((lhs_name.clone(), read)); + } + } + }); + deps.sort(); + deps.dedup(); + deps +} + +fn co_use_pairs(reads: &BTreeSet) -> Vec<(String, String)> { + let reads = reads.iter().cloned().collect::>(); + let mut out = Vec::new(); + for i in 0..reads.len() { + for j in i + 1..reads.len() { + out.push((reads[i].clone(), reads[j].clone())); + } + } + out +} + +fn raw_structural_boundaries(document: &Document, statements: &[Statement]) -> Vec { + let mut out = Vec::new(); + for i in 0..statements.len().saturating_sub(1) { + let left = &statements[i]; + let right = &statements[i + 1]; + if let Some(boundary) = raw_source_boundary(document, left.end_line + 1, right.line - 1) { + out.push(Boundary { + before_index: left.index, + after_index: right.index, + line: boundary.line, + kind: boundary.kind, + text: boundary.text, + }); + } + } + out +} + +fn raw_source_boundary( + document: &Document, + first_line: usize, + last_line: usize, +) -> Option { + if first_line > last_line { + return None; + } + + let mut blank = None; + for line_number in first_line..=last_line { + let stripped = document + .lines + .get(line_number - 1) + .map(|line| line.trim()) + .unwrap_or(""); + if stripped.starts_with('#') || stripped.starts_with("//") || stripped.starts_with("--") { + return Some(RawBoundary { + line: line_number, + kind: "comment".to_string(), + text: stripped.to_string(), + }); + } + if stripped.is_empty() && blank.is_none() { + blank = Some(RawBoundary { + line: line_number, + kind: "blank".to_string(), + text: stripped.to_string(), + }); + } + } + blank +} + +fn raw_walk_local<'a>( + node: &'a RawNode, + parent: Option<&'a RawNode>, + root: &'a RawNode, + profile: &dyn LanguageProfile, + block: &mut dyn FnMut(&'a RawNode, Option<&'a RawNode>), +) { + if !std::ptr::eq(node, root) && raw_nested_local_scope(node, profile) { + return; + } + block(node, parent); + for child in &node.children { + raw_walk_local(child, Some(node), root, profile, block); + } +} + +fn raw_nested_local_scope(node: &RawNode, profile: &dyn LanguageProfile) -> bool { + profile.function_node_kinds().contains(&node.kind.as_str()) || raw_owner_node(node, profile) +} + +fn raw_owner_node(node: &RawNode, profile: &dyn LanguageProfile) -> bool { + profile + .class_owner_node_kinds() + .contains(&node.kind.as_str()) + || profile + .module_owner_node_kinds() + .contains(&node.kind.as_str()) + || profile + .generic_owner_node_kinds() + .contains(&node.kind.as_str()) + || profile + .impl_owner_node_kinds() + .contains(&node.kind.as_str()) + || profile + .struct_owner_node_kinds() + .contains(&node.kind.as_str()) +} + +fn raw_local_identifier_text(node: &RawNode, profile: &dyn LanguageProfile) -> Option { + if profile.language() == Language::Ruby && node.kind != "identifier" { + return None; + } + if profile + .identifier_node_kinds() + .contains(&node.kind.as_str()) + { + let text = profile.normalize_local_identifier_text(&node.text); + return (!text.is_empty()).then_some(text); + } + if profile + .local_identifier_wrapper_node_kinds() + .contains(&node.kind.as_str()) + && node.named + && raw_named_children(node).is_empty() + && simple_identifier(&node.text) + { + let text = profile.normalize_local_identifier_text(&node.text); + return (!text.is_empty()).then_some(text); + } + None +} + +fn raw_ruby_unary_assertion_argument( + root: &RawNode, + node: &RawNode, + parent: Option<&RawNode>, + profile: &dyn LanguageProfile, +) -> bool { + if profile.language() != Language::Ruby { + return false; + } + let _ = parent; + let source = root.text.as_str(); + ["assert_empty", "refute_empty", "assert_nil", "refute_nil"] + .iter() + .any(|name| source.contains(&format!("{name} {}", node.text))) +} + +fn raw_local_write_node( + node: &RawNode, + parent: Option<&RawNode>, + profile: &dyn LanguageProfile, +) -> bool { + if raw_local_identifier_text(node, profile).is_none() || raw_member_name(node, parent, profile) + { + return false; + } + if raw_call_name(node, parent, profile) { + return false; + } + if raw_declaration_name(node, parent, profile) { + return true; + } + let Some(parent) = parent else { + return false; + }; + if profile + .update_statement_node_kinds() + .contains(&parent.kind.as_str()) + && raw_named_children(parent) + .first() + .map(|target| std::ptr::eq(*target, node)) + .unwrap_or(false) + { + return true; + } + if profile + .assignment_node_kinds() + .contains(&parent.kind.as_str()) + { + if let Some(lhs) = raw_named_children(parent).first() { + if raw_indexed_lhs_node(lhs, profile) + && !profile.indexed_lhs_descendants_are_writes() + && raw_contains_node(lhs, node) + { + return false; + } + if raw_contains_node(lhs, node) { + return true; + } + } + } + if profile.language() == Language::Python { + if parent.kind == "keyword_argument" { + return false; + } + if raw_python_loop_target(node, parent) + || raw_python_named_expression_lhs(node, parent) + || raw_python_typed_assignment_lhs(node, parent) + || raw_python_annotation_lhs(node, parent) + { + return true; + } + } + raw_assignment_lhs(node, parent, profile) +} + +fn raw_python_loop_target(node: &RawNode, parent: &RawNode) -> bool { + if raw_previous_sibling(node, parent) + .map(|sibling| sibling.text.as_str() == "for") + .unwrap_or(false) + && raw_next_sibling(node, parent) + .map(|sibling| sibling.text.as_str() != ":") + .unwrap_or(false) + { + return true; + } + + let mut seen_for = false; + let mut current = raw_previous_sibling(node, parent); + while let Some(sibling) = current { + match sibling.text.as_str() { + "in" | ":" => return false, + "for" => { + seen_for = true; + break; + } + _ => current = raw_previous_sibling(sibling, parent), + } + } + if !seen_for { + return false; + } + + current = raw_next_sibling(node, parent); + while let Some(sibling) = current { + match sibling.text.as_str() { + "in" => return true, + ":" => return false, + _ => current = raw_next_sibling(sibling, parent), + } + } + false +} + +fn raw_python_typed_assignment_lhs(node: &RawNode, parent: &RawNode) -> bool { + let Some(colon) = raw_next_sibling(node, parent) else { + return false; + }; + if colon.text != ":" { + return false; + } + let Some(type_node) = raw_next_sibling(colon, parent) else { + return false; + }; + if type_node.kind != "type" { + return false; + } + raw_next_sibling(type_node, parent) + .map(|sibling| sibling.text.as_str() == "=") + .unwrap_or(false) +} + +fn raw_python_named_expression_lhs(node: &RawNode, parent: &RawNode) -> bool { + parent.kind == "named_expression" + && raw_named_children(parent) + .first() + .map(|lhs| std::ptr::eq(*lhs, node)) + .unwrap_or(false) + && raw_next_sibling(node, parent) + .map(|sibling| sibling.text.as_str() == ":=") + .unwrap_or(false) +} + +fn raw_python_annotation_lhs(node: &RawNode, parent: &RawNode) -> bool { + let Some(colon) = raw_next_sibling(node, parent) else { + return false; + }; + if colon.text != ":" { + return false; + } + let Some(type_node) = raw_next_sibling(colon, parent) else { + return false; + }; + if type_node.kind != "type" { + return false; + } + !raw_next_sibling(type_node, parent) + .map(|sibling| sibling.text.as_str() == "=") + .unwrap_or(false) +} + +fn raw_python_with_alias_names(node: &RawNode, profile: &dyn LanguageProfile) -> Vec { + let mut names = Vec::new(); + raw_walk_local(node, None, node, profile, &mut |child, _parent| { + if child.kind == "as_pattern_target" && simple_identifier(&child.text) { + names.push(child.text.clone()); + } + }); + names +} + +fn raw_python_import_name(parent: Option<&RawNode>, profile: &dyn LanguageProfile) -> bool { + profile.language() == Language::Python + && parent + .map(|parent| parent.kind.as_str() == "dotted_name") + .unwrap_or(false) +} + +fn raw_python_with_alias_read( + node: &RawNode, + parent: Option<&RawNode>, + profile: &dyn LanguageProfile, +) -> bool { + profile.language() == Language::Python + && (node.kind == "as_pattern_target" + || parent + .map(|parent| parent.kind.as_str() == "as_pattern_target") + .unwrap_or(false)) +} + +fn python_textual_local_writes(source: &str) -> Vec { + match split_assignment(source) { + Some((_lhs, ":=")) => Vec::new(), + _ => textual_local_writes(source), + } +} + +fn raw_declaration_name( + node: &RawNode, + parent: Option<&RawNode>, + profile: &dyn LanguageProfile, +) -> bool { + parent + .map(|parent| { + raw_local_declaration_name_nodes(parent, profile) + .into_iter() + .any(|name| std::ptr::eq(name, node) || raw_contains_node(name, node)) + }) + .unwrap_or(false) +} + +fn raw_declaration_name_in_tree( + root: &RawNode, + target: &RawNode, + profile: &dyn LanguageProfile, +) -> bool { + raw_local_declaration_name_nodes(root, profile) + .into_iter() + .any(|name| std::ptr::eq(name, target) || raw_contains_node(name, target)) + || root + .children + .iter() + .any(|child| raw_declaration_name_in_tree(child, target, profile)) +} + +fn raw_local_declaration_name_nodes<'a>( + node: &'a RawNode, + profile: &dyn LanguageProfile, +) -> Vec<&'a RawNode> { + if !profile + .local_declaration_node_kinds() + .contains(&node.kind.as_str()) + { + return Vec::new(); + } + + if profile + .short_variable_declaration_node_kinds() + .contains(&node.kind.as_str()) + { + if let Some(left) = raw_named_children(node).into_iter().find(|child| { + profile + .variable_declaration_node_kinds() + .contains(&child.kind.as_str()) + }) { + let identifiers = raw_named_children(left) + .into_iter() + .filter(|child| raw_local_identifier_text(child, profile).is_some()) + .collect::>(); + if !identifiers.is_empty() { + return identifiers; + } + if simple_identifier(&left.text) { + return vec![left]; + } + } + return Vec::new(); + } + + let variables = raw_variable_declaration_nodes(node, profile); + if !variables.is_empty() { + let names = variables + .into_iter() + .flat_map(|variable| raw_variable_declaration_name_nodes(variable, profile)) + .collect::>(); + if !names.is_empty() { + return names; + } + } + + if let Some(declaration_assignment) = raw_named_children(node).into_iter().find(|child| { + profile + .declaration_assignment_node_kinds() + .contains(&child.kind.as_str()) + }) { + if let Some(lhs) = raw_named_children(declaration_assignment).first().copied() { + return raw_first_identifier(lhs, profile) + .or(Some(lhs)) + .into_iter() + .collect(); + } + } + + raw_named_children(node) + .into_iter() + .find(|child| { + profile + .local_identifier_wrapper_node_kinds() + .contains(&child.kind.as_str()) + }) + .or_else(|| raw_first_identifier(node, profile)) + .into_iter() + .collect() +} + +fn raw_variable_declaration_nodes<'a>( + node: &'a RawNode, + profile: &dyn LanguageProfile, +) -> Vec<&'a RawNode> { + let mut out = Vec::new(); + raw_collect_variable_declaration_nodes(node, profile, &mut out); + out +} + +fn raw_collect_variable_declaration_nodes<'a>( + node: &'a RawNode, + profile: &dyn LanguageProfile, + out: &mut Vec<&'a RawNode>, +) { + if profile + .variable_declaration_node_kinds() + .contains(&node.kind.as_str()) + { + out.push(node); + return; + } + for child in raw_named_children(node) { + raw_collect_variable_declaration_nodes(child, profile, out); + } +} + +fn raw_variable_declaration_name_nodes<'a>( + variable: &'a RawNode, + profile: &dyn LanguageProfile, +) -> Vec<&'a RawNode> { + if simple_identifier(&variable.text) { + return vec![variable]; + } + + if profile + .multi_name_variable_declaration_node_kinds() + .contains(&variable.kind.as_str()) + { + let names = raw_named_children(variable) + .into_iter() + .take_while(|child| raw_local_identifier_text(child, profile).is_some()) + .collect::>(); + if !names.is_empty() { + return names; + } + } + + raw_first_identifier(variable, profile) + .into_iter() + .collect() +} + +fn raw_first_identifier<'a>( + node: &'a RawNode, + profile: &dyn LanguageProfile, +) -> Option<&'a RawNode> { + if raw_local_identifier_text(node, profile).is_some() { + return Some(node); + } + node.children + .iter() + .find_map(|child| raw_first_identifier(child, profile)) +} + +fn raw_assignment_lhs(node: &RawNode, parent: &RawNode, profile: &dyn LanguageProfile) -> bool { + if raw_previous_sibling(node, parent) + .map(|sibling| sibling.text.as_str() == ":") + .unwrap_or(false) + { + return false; + } + raw_next_sibling(node, parent) + .map(|sibling| { + !sibling.named + && profile + .assignment_operator_tokens() + .contains(&sibling.text.as_str()) + }) + .unwrap_or(false) +} + +fn raw_assignment_lhs_read_in_tree( + root: &RawNode, + target: &RawNode, + profile: &dyn LanguageProfile, +) -> bool { + if profile + .deferred_statement_node_kinds() + .contains(&root.kind.as_str()) + { + return false; + } + if profile + .assignment_node_kinds() + .contains(&root.kind.as_str()) + { + if let Some(lhs) = raw_named_children(root).first() { + if raw_assignment_lhs_read_target(lhs, target, profile) { + return true; + } + } + } + root.children + .iter() + .any(|child| raw_assignment_lhs_read_in_tree(child, target, profile)) +} + +fn raw_assignment_lhs_write_in_tree( + root: &RawNode, + target: &RawNode, + profile: &dyn LanguageProfile, +) -> bool { + if profile + .deferred_statement_node_kinds() + .contains(&root.kind.as_str()) + { + return false; + } + if profile + .assignment_node_kinds() + .contains(&root.kind.as_str()) + { + if let Some(lhs) = raw_named_children(root).first() { + if raw_assignment_lhs_write_target(lhs, target, profile) { + return true; + } + } + } + root.children + .iter() + .any(|child| raw_assignment_lhs_write_in_tree(child, target, profile)) +} + +fn raw_assignment_lhs_read_target( + lhs: &RawNode, + target: &RawNode, + profile: &dyn LanguageProfile, +) -> bool { + if raw_indexed_lhs_node(lhs, profile) { + return profile.suppress_indexed_lhs_reads() && raw_contains_node(lhs, target); + } + if raw_field_like_node(lhs, profile) { + return profile.suppress_field_receiver_lhs_reads() + && raw_member_receiver_target(lhs, target, profile); + } + if let Some(lhs_name) = raw_local_identifier_text(lhs, profile) { + return std::ptr::eq(lhs, target) + || (raw_contains_node(lhs, target) + && raw_local_identifier_text(target, profile) + .map(|target_name| target_name == lhs_name) + .unwrap_or(false)); + } + if profile + .expression_list_node_kinds() + .contains(&lhs.kind.as_str()) + { + if raw_named_children(lhs).is_empty() && raw_local_identifier_text(lhs, profile).is_some() { + return std::ptr::eq(lhs, target); + } + return raw_named_children(lhs) + .into_iter() + .any(|child| raw_assignment_lhs_read_target(child, target, profile)); + } + raw_contains_node(lhs, target) +} + +fn raw_assignment_lhs_write_target( + lhs: &RawNode, + target: &RawNode, + profile: &dyn LanguageProfile, +) -> bool { + if profile.language() == Language::Ruby && lhs.kind == "element_reference" { + return false; + } + if raw_indexed_lhs_node(lhs, profile) { + return raw_named_children(lhs) + .first() + .map(|object| raw_assignment_lhs_write_target(object, target, profile)) + .unwrap_or(false); + } + if raw_field_like_node(lhs, profile) { + return raw_member_receiver_target(lhs, target, profile); + } + if let Some(lhs_name) = raw_local_identifier_text(lhs, profile) { + return std::ptr::eq(lhs, target) + || (raw_contains_node(lhs, target) + && raw_local_identifier_text(target, profile) + .map(|target_name| target_name == lhs_name) + .unwrap_or(false)); + } + if profile + .expression_list_node_kinds() + .contains(&lhs.kind.as_str()) + { + if raw_named_children(lhs).is_empty() && raw_local_identifier_text(lhs, profile).is_some() { + return std::ptr::eq(lhs, target); + } + return raw_named_children(lhs) + .into_iter() + .any(|child| raw_assignment_lhs_write_target(child, target, profile)); + } + raw_contains_node(lhs, target) +} + +fn raw_indexed_lhs_node(node: &RawNode, profile: &dyn LanguageProfile) -> bool { + profile + .indexed_lhs_node_kinds() + .contains(&node.kind.as_str()) + || (profile + .indexed_lhs_bracket_wrapper_node_kinds() + .contains(&node.kind.as_str()) + && node + .children + .iter() + .any(|child| !child.named && child.text == "[")) +} + +fn raw_field_like_node(node: &RawNode, profile: &dyn LanguageProfile) -> bool { + profile + .field_like_node_kinds() + .contains(&node.kind.as_str()) + || (profile + .field_like_dot_wrapper_node_kinds() + .contains(&node.kind.as_str()) + && node + .children + .iter() + .any(|child| !child.named && child.text == ".")) +} + +fn raw_member_receiver_target( + node: &RawNode, + target: &RawNode, + profile: &dyn LanguageProfile, +) -> bool { + let Some(receiver) = raw_named_children(node).first().copied() else { + return false; + }; + if raw_local_identifier_text(receiver, profile).is_some() { + return std::ptr::eq(receiver, target); + } + if raw_indexed_lhs_node(receiver, profile) { + return raw_named_children(receiver) + .first() + .map(|object| raw_member_receiver_target(object, target, profile)) + .unwrap_or(false); + } + if raw_field_like_node(receiver, profile) { + return raw_member_receiver_target(receiver, target, profile); + } + if raw_named_children(receiver) + .into_iter() + .any(|child| raw_member_receiver_target(child, target, profile)) + { + return true; + } + false +} + +fn raw_member_name( + node: &RawNode, + parent: Option<&RawNode>, + profile: &dyn LanguageProfile, +) -> bool { + let Some(parent) = parent else { + return false; + }; + if !raw_field_like_node(parent, profile) { + return false; + } + raw_named_children(parent) + .last() + .map(|field| std::ptr::eq(*field, node)) + .unwrap_or(false) +} + +fn raw_call_name(node: &RawNode, parent: Option<&RawNode>, profile: &dyn LanguageProfile) -> bool { + let Some(parent) = parent else { + return false; + }; + if raw_field_like_node(parent, profile) { + return false; + } + profile.call_node_kinds().contains(&parent.kind.as_str()) + && raw_named_children(parent) + .first() + .map(|callee| std::ptr::eq(*callee, node)) + .unwrap_or(false) +} + +fn raw_keyed_element_key( + node: &RawNode, + parent: Option<&RawNode>, + profile: &dyn LanguageProfile, +) -> bool { + let Some(parent) = parent else { + return false; + }; + if !profile + .keyed_element_node_kinds() + .contains(&parent.kind.as_str()) + || !profile.keyed_element_first_named_child_is_key() + { + return false; + } + raw_named_children(parent) + .first() + .map(|key| std::ptr::eq(*key, node)) + .unwrap_or(false) + || raw_next_sibling(node, parent) + .map(|sibling| !sibling.named && sibling.text == ":") + .unwrap_or(false) +} + +fn raw_assignment_statement(node: &RawNode, profile: &dyn LanguageProfile) -> bool { + profile + .assignment_node_kinds() + .contains(&node.kind.as_str()) + || node.children.iter().any(|child| { + !child.named + && profile + .assignment_operator_tokens() + .contains(&child.text.as_str()) + }) +} + +fn raw_branch_node(node: &RawNode, profile: &dyn LanguageProfile) -> bool { + profile.branch_node_kinds().contains(&node.kind.as_str()) +} + +fn raw_comment_node(node: &RawNode) -> bool { + node.kind.to_ascii_lowercase().contains("comment") +} + +fn raw_named_children(node: &RawNode) -> Vec<&RawNode> { + node.children.iter().filter(|child| child.named).collect() +} + +fn raw_next_sibling<'a>(node: &RawNode, parent: &'a RawNode) -> Option<&'a RawNode> { + let index = parent + .children + .iter() + .position(|child| std::ptr::eq(child, node))?; + parent.children.get(index + 1) +} + +fn raw_previous_sibling<'a>(node: &RawNode, parent: &'a RawNode) -> Option<&'a RawNode> { + let index = parent + .children + .iter() + .position(|child| std::ptr::eq(child, node))?; + index + .checked_sub(1) + .and_then(|previous| parent.children.get(previous)) +} + +fn raw_contains_node(root: &RawNode, target: &RawNode) -> bool { + std::ptr::eq(root, target) + || root + .children + .iter() + .any(|child| raw_contains_node(child, target)) +} + +fn normalized_node_for_span(root: &Node, span: Span) -> Option<&Node> { + if [ + root.first_lineno, + root.first_column, + root.last_lineno, + root.last_column, + ] == span + { + return Some(root); + } + root.children + .iter() + .filter_map(ast::node) + .find_map(|child| normalized_node_for_span(child, span)) +} + +fn fallback_node_from_raw(raw: &RawNode) -> Node { + Node { + r#type: "DEFN".to_string(), + children: raw + .children + .iter() + .filter(|child| child.named) + .map(|child| Child::Node(Box::new(fallback_node_from_raw(child)))) + .collect(), + first_lineno: raw.span[0], + first_column: raw.span[1], + last_lineno: raw.span[2], + last_column: raw.span[3], + text: raw.text.clone(), + } +} + +struct LocalFlow { + file: String, + lines: Vec, + methods_by_span: BTreeMap, +} + +impl LocalFlow { + fn new( + file: String, + lines: Vec, + methods_by_span: BTreeMap, + ) -> Self { + Self { + file, + lines, + methods_by_span, + } + } + + fn scan(&mut self, root: &Node) -> Vec { + let mut out = Vec::new(); + self.collect_methods(root, &Vec::new(), &mut out); + out + } + + fn collect_methods(&self, node: &Node, owners: &[String], out: &mut Vec) { + if OWNER_TYPES.contains(&node.r#type.as_str()) { + let owner = self.full_owner_name(owners, node); + for method in self.owner_methods(node) { + out.push(self.method_summary(method, Some(&owner))); + } + let mut next_owners = owners.to_vec(); + next_owners.push(self.owner_segment(node)); + self.collect_nested_owners(node, &next_owners, out); + } else if METHOD_TYPES.contains(&node.r#type.as_str()) && owners.is_empty() { + out.push(self.method_summary(node, None)); + } else { + for child in node.children.iter().filter_map(ast::node) { + self.collect_methods(child, owners, out); + } + } + } + + fn collect_nested_owners(&self, node: &Node, owners: &[String], out: &mut Vec) { + if METHOD_TYPES.contains(&node.r#type.as_str()) { + return; + } + + for child in node.children.iter().filter_map(ast::node) { + if OWNER_TYPES.contains(&child.r#type.as_str()) { + self.collect_methods(child, owners, out); + } else { + self.collect_nested_owners(child, owners, out); + } + } + } + + fn method_summary(&self, node: &Node, owner_hint: Option<&str>) -> MethodSummary { + let node_span = [ + node.first_lineno, + node.first_column, + node.last_lineno, + node.last_column, + ]; + let metadata = self.methods_by_span.get(&node_span); + let owner = metadata + .map(|item| item.owner.as_str()) + .or(owner_hint) + .unwrap_or("(top-level)"); + let name = metadata + .map(|item| item.name.clone()) + .unwrap_or_else(|| self.method_name(node)); + let statement_nodes = ast::body_stmts(node) + .into_iter() + .filter(|statement| !comment_statement(statement)) + .collect::>(); + let local_names = self.local_names(&statement_nodes, metadata); + let statements: Vec<_> = statement_nodes + .iter() + .enumerate() + .map(|(index, stmt)| self.statement_summary(stmt, index, &local_names)) + .collect(); + MethodSummary { + id: format!("{}#{}", owner, name), + owner: owner.to_string(), + name, + file: self.file.clone(), + line: node.first_lineno, + span: [ + node.first_lineno, + node.first_column, + node.last_lineno, + node.last_column, + ], + node: node.clone(), + raw_node: None, + boundaries: self.structural_boundaries(&statements), + statements, + } + } + + fn statement_summary( + &self, + node: &Node, + index: usize, + local_names: &BTreeSet, + ) -> Statement { + let source = ast::slice(node, &self.lines); + let writes = self.local_writes(node); + let reads = self.local_reads(node, local_names, &writes); + Statement { + index, + line: node.first_lineno, + end_line: node.last_lineno, + span: [ + node.first_lineno, + node.first_column, + node.last_lineno, + node.last_column, + ], + source, + dependencies: self.assignment_dependencies(node, local_names), + co_uses: self.co_use_edges(node, local_names), + reads, + writes, + } + } + + fn local_names( + &self, + statements: &[&Node], + metadata: Option<&MethodMetadata>, + ) -> BTreeSet { + let mut names = metadata.map(|item| item.params.clone()).unwrap_or_default(); + for statement in statements { + names.extend(self.local_writes(statement)); + } + names + } + + fn structural_boundaries(&self, statements: &[Statement]) -> Vec { + let mut out = Vec::new(); + for i in 0..statements.len().saturating_sub(1) { + let left = &statements[i]; + let right = &statements[i + 1]; + if let Some(boundary) = self.source_boundary(left.end_line + 1, right.line - 1) { + out.push(Boundary { + before_index: left.index, + after_index: right.index, + line: boundary.line, + kind: boundary.kind, + text: boundary.text, + }); + } + } + out + } + + fn source_boundary(&self, first_line: usize, last_line: usize) -> Option { + if first_line > last_line { + return None; + } + + let mut blank = None; + for line_number in first_line..=last_line { + let text = self + .lines + .get(line_number - 1) + .map(|s| s.as_str()) + .unwrap_or(""); + let stripped = text.trim(); + if stripped.starts_with('#') || stripped.starts_with("//") || stripped.starts_with("--") + { + return Some(RawBoundary { + line: line_number, + kind: "comment".to_string(), + text: stripped.to_string(), + }); + } + if stripped.is_empty() && blank.is_none() { + blank = Some(RawBoundary { + line: line_number, + kind: "blank".to_string(), + text: stripped.to_string(), + }); + } + } + blank + } + + fn owner_methods<'a>(&self, owner_node: &'a Node) -> Vec<&'a Node> { + let Some(body) = self.owner_body(owner_node) else { + return Vec::new(); + }; + + let stmts = if statement_container(body) { + body.children + .iter() + .filter_map(ast::node) + .collect::>() + } else { + vec![body] + }; + + stmts + .into_iter() + .flat_map(|stmt| { + if METHOD_TYPES.contains(&stmt.r#type.as_str()) { + vec![stmt] + } else if self.visibility_call(stmt) { + self.inline_methods(stmt) + } else { + vec![] + } + }) + .collect() + } + + fn inline_methods<'a>(&self, stmt: &'a Node) -> Vec<&'a Node> { + let Some(args) = stmt.children.get(1).and_then(ast::node) else { + return Vec::new(); + }; + args.children + .iter() + .filter_map(ast::node) + .filter(|arg| METHOD_TYPES.contains(&arg.r#type.as_str())) + .collect() + } + + fn owner_body<'a>(&self, owner_node: &'a Node) -> Option<&'a Node> { + let scope_index = if owner_node.r#type == "CLASS" { 2 } else { 1 }; + let scope = owner_node.children.get(scope_index).and_then(ast::node)?; + if scope.r#type != "SCOPE" { + return None; + } + scope.children.get(2).and_then(ast::node) + } + + fn visibility_call(&self, node: &Node) -> bool { + if node.r#type == "FCALL" { + if let Some(Child::Symbol(name)) = node.children.first() { + return matches!(name.as_str(), "public" | "protected" | "private"); + } + } + false + } + + fn method_name(&self, node: &Node) -> String { + if node.r#type == "DEFS" { + let receiver = node.children.get(0).and_then(ast::node); + let prefix = if let Some(r) = receiver { + if r.r#type == "SELF" { + "self".to_string() + } else { + ast::slice(r, &self.lines) + } + } else { + "?".to_string() + }; + format!( + "{}.{}", + prefix, + node.children + .get(1) + .and_then(|c| match c { + Child::Symbol(s) => Some(s), + _ => None, + }) + .unwrap_or(&"?".to_string()) + ) + } else { + node.children + .first() + .and_then(|c| match c { + Child::Symbol(s) => Some(s.clone()), + _ => None, + }) + .unwrap_or_else(|| "?".to_string()) + } + } + + fn full_owner_name(&self, owners: &[String], node: &Node) -> String { + let mut next = owners.to_vec(); + next.push(self.owner_segment(node)); + next.join("::") + } + + fn owner_segment(&self, node: &Node) -> String { + let text = ast::slice( + node.children.first().and_then(ast::node).unwrap_or(node), + &self.lines, + ); + if text.is_empty() { + "(anonymous)".to_string() + } else { + text + } + } + + fn local_reads( + &self, + node: &Node, + local_names: &BTreeSet, + writes: &BTreeSet, + ) -> BTreeSet { + let mut reads = Vec::new(); + self.walk_local(node, &mut |child| { + if LOCAL_READ_TYPES.contains(&child.r#type.as_str()) { + if let Some(name) = local_read_name(child) { + if local_names.contains(&name) { + reads.push(name); + } + } + } + }); + reads.extend(textual_local_reads( + &ast::slice(node, &self.lines), + local_names, + writes, + )); + reads.into_iter().collect() + } + + fn local_writes(&self, node: &Node) -> BTreeSet { + let mut writes = Vec::new(); + self.walk_local(node, &mut |child| { + if LOCAL_WRITE_TYPES.contains(&child.r#type.as_str()) { + if let Some(Child::String(name)) = child.children.first() { + writes.push(name.clone()); + } + } + }); + writes.extend(textual_local_writes(&ast::slice(node, &self.lines))); + writes.into_iter().collect() + } + + fn assignment_dependencies( + &self, + node: &Node, + local_names: &BTreeSet, + ) -> Vec<(String, String)> { + let mut deps = Vec::new(); + self.walk_local(node, &mut |child| { + if LOCAL_WRITE_TYPES.contains(&child.r#type.as_str()) { + if let Some(Child::String(lhs)) = child.children.first() { + if let Some(rhs) = child.children.get(1).and_then(ast::node) { + let rhs_writes = self.local_writes(rhs); + for read in self.local_reads(rhs, local_names, &rhs_writes) { + if lhs != &read { + deps.push((lhs.clone(), read)); + } + } + } + } + } + }); + let lhs_names = self.local_writes(node); + if !lhs_names.is_empty() { + let reads = self.local_reads(node, local_names, &lhs_names); + for lhs in lhs_names { + for read in &reads { + if &lhs != read { + deps.push((lhs.clone(), read.clone())); + } + } + } + } + deps.sort(); + deps.dedup(); + deps + } + + fn co_use_edges(&self, node: &Node, local_names: &BTreeSet) -> Vec<(String, String)> { + let writes = self.local_writes(node); + let reads: Vec<_> = self + .local_reads(node, local_names, &writes) + .into_iter() + .collect(); + let mut out = Vec::new(); + for i in 0..reads.len() { + for j in i + 1..reads.len() { + out.push((reads[i].clone(), reads[j].clone())); + } + } + out + } + + fn walk_local(&self, node: &Node, blk: &mut dyn FnMut(&Node)) { + if SKIP_NESTED_TYPES.contains(&node.r#type.as_str()) { + return; + } + blk(node); + for child in node.children.iter().filter_map(ast::node) { + self.walk_local(child, blk); + } + } +} + +fn local_read_name(node: &Node) -> Option { + match node.children.first() { + Some(Child::String(name)) | Some(Child::Symbol(name)) => Some(name.clone()), + Some(Child::Nil) => Some(String::new()), + _ => None, + } +} + +fn textual_local_writes(source: &str) -> Vec { + let Some((lhs, operator)) = split_assignment(source) else { + return Vec::new(); + }; + if lhs.contains('.') + || lhs.contains("->") + || lhs.contains('[') + || lhs.contains('(') + || lhs.contains(')') + { + return Vec::new(); + } + + let identifiers = identifiers_with_positions(lhs) + .into_iter() + .map(|identifier| identifier.name) + .filter(|name| !local_keyword(name)) + .collect::>(); + if identifiers.is_empty() { + return Vec::new(); + } + + if operator == ":=" || declaration_like_lhs(lhs) || identifiers.len() == 1 { + return identifiers + .into_iter() + .filter(|name| simple_identifier(name)) + .collect(); + } + + Vec::new() +} + +fn textual_local_reads( + source: &str, + local_names: &BTreeSet, + writes: &BTreeSet, +) -> Vec { + if plain_string_literal_source(source) { + return Vec::new(); + } + + identifiers_with_positions(source) + .into_iter() + .filter(|identifier| local_names.contains(&identifier.name)) + .filter(|identifier| !writes.contains(&identifier.name)) + .filter(|identifier| !member_name(source, identifier.start)) + .filter(|identifier| !call_name(source, identifier.end)) + .map(|identifier| identifier.name) + .collect() +} + +fn plain_string_literal_source(source: &str) -> bool { + let source = source.trim(); + if source.starts_with('f') || source.starts_with('F') { + return false; + } + (source.starts_with("\"\"\"") && source.ends_with("\"\"\"")) + || (source.starts_with("'''") && source.ends_with("'''")) + || (source.starts_with('"') && source.ends_with('"')) + || (source.starts_with('\'') && source.ends_with('\'')) +} + +#[derive(Clone, Debug, Eq, PartialEq)] +struct IdentifierSpan { + name: String, + start: usize, + end: usize, +} + +fn identifiers_with_positions(source: &str) -> Vec { + let bytes = source.as_bytes(); + let mut out = Vec::new(); + let mut index = 0; + while index < bytes.len() { + let start = if bytes[index] == b'$' { + let next = index + 1; + if next < bytes.len() && identifier_start(bytes[next]) { + next + } else { + index += 1; + continue; + } + } else if identifier_start(bytes[index]) { + index + } else { + index += 1; + continue; + }; + let mut end = start + 1; + while end < bytes.len() && identifier_part(bytes[end]) { + end += 1; + } + out.push(IdentifierSpan { + name: source[start..end].to_string(), + start, + end, + }); + index = end; + } + out +} + +fn identifier_start(byte: u8) -> bool { + byte == b'_' || byte.is_ascii_alphabetic() +} + +fn identifier_part(byte: u8) -> bool { + byte == b'_' || byte.is_ascii_alphanumeric() +} + +fn split_assignment(source: &str) -> Option<(&str, &str)> { + let bytes = source.as_bytes(); + let mut index = 0; + while index < bytes.len() { + if index + 1 < bytes.len() && bytes[index] == b':' && bytes[index + 1] == b'=' { + return Some((source[..index].trim(), ":=")); + } + if bytes[index] == b'=' { + let previous = index.checked_sub(1).and_then(|i| bytes.get(i)).copied(); + let next = bytes.get(index + 1).copied(); + if !matches!( + previous, + Some( + b'=' | b'!' + | b'<' + | b'>' + | b':' + | b'+' + | b'-' + | b'*' + | b'/' + | b'%' + | b'&' + | b'|' + ) + ) && !matches!(next, Some(b'=' | b'>')) + { + return Some((source[..index].trim(), "=")); + } + } + index += 1; + } + None +} + +fn declaration_like_lhs(lhs: &str) -> bool { + identifiers_with_positions(lhs) + .first() + .map(|identifier| { + matches!( + identifier.name.as_str(), + "let" + | "const" + | "var" + | "val" + | "auto" + | "int" + | "long" + | "float" + | "double" + | "bool" + | "boolean" + | "char" + | "String" + | "string" + ) + }) + .unwrap_or(false) +} + +fn local_keyword(name: &str) -> bool { + matches!( + name, + "as" | "break" + | "auto" + | "boolean" + | "bool" + | "case" + | "char" + | "class" + | "const" + | "continue" + | "default" + | "double" + | "else" + | "false" + | "float" + | "for" + | "func" + | "fun" + | "function" + | "if" + | "in" + | "int" + | "long" + | "let" + | "mut" + | "nil" + | "None" + | "null" + | "private" + | "protected" + | "public" + | "return" + | "self" + | "short" + | "static" + | "String" + | "string" + | "this" + | "true" + | "val" + | "var" + | "void" + | "while" + ) +} + +fn simple_identifier(name: &str) -> bool { + let mut chars = name.chars(); + matches!(chars.next(), Some(first) if first == '_' || first.is_ascii_alphabetic()) + && chars.all(|ch| ch == '_' || ch.is_ascii_alphanumeric()) +} + +fn member_name(source: &str, start: usize) -> bool { + let prefix = source[..start].trim_end(); + prefix.ends_with('.') || prefix.ends_with("->") || prefix.ends_with("::") +} + +fn call_name(source: &str, end: usize) -> bool { + let suffix = source[end..].trim_start(); + suffix.starts_with('(') +} + +fn method_metadata(document: &Document) -> BTreeMap { + document + .function_defs + .iter() + .map(|function| (function.span, metadata_for_function(document, function))) + .collect() +} + +fn metadata_for_function(document: &Document, function: &FunctionDef) -> MethodMetadata { + let owner = local_flow_owner(&document.file, &function.owner); + MethodMetadata { + owner, + name: function.name.clone(), + params: function.params.iter().cloned().collect(), + } +} + +fn local_flow_owner(file: &str, owner: &str) -> String { + let file_owner = file_owner(file); + if owner == file_owner { + return "(top-level)".to_string(); + } + owner + .strip_prefix(&format!("{file_owner}::")) + .unwrap_or(owner) + .to_string() +} + +fn file_owner(file: &str) -> String { + Path::new(file) + .file_stem() + .and_then(|stem| stem.to_str()) + .filter(|stem| !stem.is_empty()) + .unwrap_or("(file)") + .to_string() +} + +fn statement_container(node: &Node) -> bool { + STATEMENT_CONTAINER_TYPES.contains(&node.r#type.as_str()) +} + +fn comment_statement(node: &Node) -> bool { + node.r#type.to_ascii_lowercase().contains("comment") + || node.text.trim_start().starts_with("//") + || node.text.trim_start().starts_with('#') + || node.text.trim_start().starts_with("--") +} + +struct RawBoundary { + line: usize, + kind: String, + text: String, +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Write; + use tempfile::NamedTempFile; + + fn summaries(source: &str, language: Language) -> Vec { + let mut file = NamedTempFile::new().expect("tempfile"); + file.write_all(source.as_bytes()).expect("write"); + scan_files(&[file.path().to_path_buf()], language).expect("scan") + } + + #[test] + fn extracts_python_function_local_flow() { + let summaries = summaries( + "def mixed(price, tax):\n subtotal = price + tax\n total = subtotal\n return total\n", + Language::Python, + ); + let summary = summaries + .iter() + .find(|summary| summary.name == "mixed") + .expect("mixed summary"); + + assert_eq!(summary.owner, "(top-level)"); + assert_eq!(summary.statements.len(), 3); + assert_eq!( + summary.statements[0].reads, + ["price".to_string(), "tax".to_string()] + .into_iter() + .collect() + ); + assert_eq!( + summary.statements[1].dependencies, + vec![("total".to_string(), "subtotal".to_string())] + ); + assert_eq!( + summary.statements[2].reads, + ["total".to_string()].into_iter().collect() + ); + } + + #[test] + fn handles_non_ascii_source_without_byte_boundary_panics() { + let summaries = summaries( + "def mixed(price):\n marker = \"✓\"\n total = price\n return total\n", + Language::Python, + ); + let summary = summaries + .iter() + .find(|summary| summary.name == "mixed") + .expect("mixed summary"); + + assert_eq!(summary.statements.len(), 3); + assert_eq!( + summary.statements[1].dependencies, + vec![("total".to_string(), "price".to_string())] + ); + } + + #[test] + fn preserves_self_parameter_reads_for_python_attribute_access() { + let summaries = summaries( + "class TextSuite:\n def setup(self):\n self.console = Console(file=StringIO(), color_system=\"truecolor\")\n self.text = Text.from_markup(markup)\n", + Language::Python, + ); + let summary = summaries + .iter() + .find(|summary| summary.id == "TextSuite#setup") + .expect("setup summary"); + + assert_eq!( + summary.statements[0].reads, + ["self".to_string()].into_iter().collect() + ); + assert!(!summary.statements[0].writes.contains("file")); + assert_eq!( + summary.statements[1].reads, + ["self".to_string()].into_iter().collect() + ); + } + + #[test] + fn excludes_keyword_argument_writes_from_outer_assignment_dependencies() { + let summaries = summaries( + "def render():\n pretty = Pretty(snippets.PYTHON_DICT, indent_guides=True)\n return pretty\n", + Language::Python, + ); + let summary = summaries + .iter() + .find(|summary| summary.name == "render") + .expect("render summary"); + + assert_eq!( + summary.statements[0].writes, + ["pretty".to_string()].into_iter().collect() + ); + assert!(summary.statements[0].dependencies.is_empty()); + } + + #[test] + fn mines_python_loop_and_with_locals_without_keyword_writes() { + let summaries = summaries( + "def download(urls, dest_dir):\n with ThreadPoolExecutor(max_workers=4) as pool:\n for url in urls:\n filename = url.split(\"/\")[-1]\n dest_path = os.path.join(dest_dir, filename)\n task_id = progress.add_task(\"download\", filename=filename, start=False)\n pool.submit(copy_url, task_id, url, dest_path)\n", + Language::Python, + ); + let summary = summaries + .iter() + .find(|summary| summary.name == "download") + .expect("download summary"); + let statement = &summary.statements[0]; + + assert!(statement.reads.contains("urls")); + assert!(statement.reads.contains("url")); + assert!(statement.reads.contains("pool")); + assert!(statement.writes.contains("url")); + assert!(statement.writes.contains("pool")); + assert!(!statement.writes.contains("urls")); + assert!(!statement.writes.contains("max_workers")); + assert!(!statement.writes.contains("start")); + } + + #[test] + fn does_not_read_python_with_alias_at_declaration_site() { + let summaries = summaries( + "def capture(console):\n with console.capture() as output:\n console.line()\n return output\n", + Language::Python, + ); + let summary = summaries + .iter() + .find(|summary| summary.name == "capture") + .expect("capture summary"); + + assert!(summary.statements[0].writes.contains("output")); + assert!(!summary.statements[0].reads.contains("output")); + assert!(summary.statements[1].reads.contains("output")); + } + + #[test] + fn mines_python_named_expression_writes() { + let summaries = summaries( + "def scan(text, index):\n if (character := text[index]):\n return character\n", + Language::Python, + ); + let summary = summaries + .iter() + .find(|summary| summary.name == "scan") + .expect("scan summary"); + let statement = &summary.statements[0]; + + assert!(statement.writes.contains("character")); + assert!(statement.reads.contains("text")); + assert!(statement.reads.contains("index")); + assert!(statement + .dependencies + .contains(&("character".to_string(), "text".to_string()))); + assert!(statement + .dependencies + .contains(&("character".to_string(), "index".to_string()))); + } + + #[test] + fn ignores_python_import_path_segments_that_match_locals() { + let summaries = summaries( + "def status(status):\n from .status import Status\n return status\n", + Language::Python, + ); + let summary = summaries + .iter() + .find(|summary| summary.name == "status") + .expect("status summary"); + + assert!(summary.statements[0].reads.is_empty()); + assert_eq!( + summary.statements[1].reads, + ["status".to_string()].into_iter().collect() + ); + } + + #[test] + fn reads_python_callable_locals_without_marking_call_callee_as_write() { + let summaries = summaries( + "def invoke(callback, value):\n runner = callback\n return runner(value)\n", + Language::Python, + ); + let summary = summaries + .iter() + .find(|summary| summary.name == "invoke") + .expect("invoke summary"); + + assert_eq!( + summary.statements[1].reads, + ["runner".to_string(), "value".to_string()] + .into_iter() + .collect() + ); + assert!(summary.statements[1].writes.is_empty()); + } + + #[test] + fn does_not_read_locals_from_plain_docstring_text() { + let summaries = summaries( + "def get_content(user):\n \"\"\"Extract text from user dict.\"\"\"\n return user\n", + Language::Python, + ); + let summary = summaries + .iter() + .find(|summary| summary.name == "get_content") + .expect("get_content summary"); + + assert!(summary.statements[0].reads.is_empty()); + assert_eq!( + summary.statements[1].reads, + ["user".to_string()].into_iter().collect() + ); + } + + #[test] + fn extracts_java_kotlin_and_swift_local_flow() { + let cases = [ + ( + Language::Java, + "class Billing {\n int mixed(int price, int tax) {\n int subtotal = price + tax;\n int total = subtotal;\n return total;\n }\n}\n", + ), + ( + Language::Kotlin, + "class Billing {\n fun mixed(price: Int, tax: Int): Int {\n val subtotal = price + tax\n val total = subtotal\n return total\n }\n}\n", + ), + ( + Language::Swift, + "class Billing {\n func mixed(price: Int, tax: Int) -> Int {\n let subtotal = price + tax\n let total = subtotal\n return total\n }\n}\n", + ), + ]; + + for (language, source) in cases { + let summaries = summaries(source, language); + let summary = summaries + .iter() + .find(|summary| summary.name == "mixed") + .expect("mixed summary"); + + assert_eq!(summary.owner, "Billing"); + assert_eq!(summary.statements.len(), 3); + assert_eq!( + summary.statements[0].reads, + ["price".to_string(), "tax".to_string()] + .into_iter() + .collect() + ); + assert_eq!( + summary.statements[1].dependencies, + vec![("total".to_string(), "subtotal".to_string())] + ); + assert_eq!( + summary.statements[2].reads, + ["total".to_string()].into_iter().collect() + ); + } + } +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/path_condition.rs b/gems/decomplex/rust/src/decomplex/syntax/path_condition.rs new file mode 100644 index 000000000..363ea98a5 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/path_condition.rs @@ -0,0 +1,738 @@ +use crate::decomplex::ast::{self, normalize_text, Child, Node, RawNode, Span}; +use crate::decomplex::syntax::adapters::{language_profile, LanguageProfile}; +use crate::decomplex::syntax::{Document, Language}; +use anyhow::Result; +use serde::Serialize; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::PathBuf; + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct PathConditionReport { + pub neglected: Vec, + pub scattered: Vec, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct NeglectedPathCondition { + pub pattern: Vec, + pub support: usize, + pub missing: String, + pub at: String, + pub spans: BTreeMap, + pub action: String, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct ScatteredPathCondition { + pub guards: Vec, + pub support: usize, + pub scatter: usize, + pub rank: usize, + pub sites: Vec, + pub spans: BTreeMap, +} + +#[derive(Clone, Debug)] +struct Site { + guards: Vec, + action: String, + file: String, + defn: String, + line: usize, + span: Span, +} + +pub fn scan_files(files: &[PathBuf], language: Language) -> Result { + let documents = super::parse_files(files, language)?; + Ok(scan_documents(&documents)) +} + +pub fn scan_documents(documents: &[Document]) -> PathConditionReport { + let mut sites = documents + .iter() + .flat_map(sites_from_document_facts) + .collect::>(); + sites.extend( + documents + .iter() + .flat_map(sites_from_raw_facts) + .collect::>(), + ); + if !sites.is_empty() { + return Report::new(dedupe_sites(sites)).findings(); + } + + let mut sites = Vec::new(); + for document in documents { + let mut pc = PathCondition::new(document.file.clone(), document.lines.clone()); + pc.walk(&document.normalized_root, &Vec::new(), &Vec::new()); + sites.extend(pc.sites); + } + Report::new(sites).findings() +} + +fn dedupe_sites(sites: Vec) -> Vec { + let mut seen = BTreeSet::new(); + sites + .into_iter() + .filter(|site| { + seen.insert(( + site.guards.clone(), + site.action.clone(), + site.file.clone(), + site.defn.clone(), + site.line, + )) + }) + .collect() +} + +fn sites_from_document_facts(document: &Document) -> Vec { + document + .path_condition_sites + .iter() + .map(|site| Site { + guards: site.guards.clone(), + action: site.action.clone(), + file: site.file.clone(), + defn: site.function.clone(), + line: site.line, + span: site.span, + }) + .collect() +} + +fn sites_from_raw_facts(document: &Document) -> Vec { + let profile = language_profile(document.language); + let mut sites = Vec::new(); + for function in &document.function_defs { + for statement in raw_function_body_statements(profile, &function.body) { + raw_path_walk( + document, + profile, + statement, + &function.name, + &[], + &mut sites, + ); + } + } + sites +} + +fn raw_function_body_node<'a>( + profile: &dyn LanguageProfile, + node: &'a RawNode, +) -> Option<&'a RawNode> { + if let Some(body) = raw_child_by_field(node, "body") { + return Some(body); + } + raw_named_children(node).into_iter().rev().find(|child| { + profile + .function_body_node_kinds() + .contains(&child.kind.as_str()) + }) +} + +fn raw_function_body_statements<'a>( + profile: &dyn LanguageProfile, + node: &'a RawNode, +) -> Vec<&'a RawNode> { + let Some(body) = raw_function_body_node(profile, node) else { + return Vec::new(); + }; + + let mut named = raw_named_children(body) + .into_iter() + .filter(|child| !raw_comment_node(child)) + .collect::>(); + if named.len() == 1 + && profile + .nested_statement_wrapper_node_kinds() + .contains(&named[0].kind.as_str()) + { + if raw_branch_node(profile, named[0]) { + return vec![named[0]]; + } + named = raw_named_children(named[0]) + .into_iter() + .filter(|child| !raw_comment_node(child)) + .collect(); + } + if named.is_empty() && body.text.trim().is_empty() { + return Vec::new(); + } + if raw_branch_node(profile, body) || raw_assignment_statement(profile, body) || named.is_empty() + { + return vec![body]; + } + named +} + +fn raw_path_walk( + document: &Document, + profile: &dyn LanguageProfile, + node: &RawNode, + function: &str, + guards: &[String], + out: &mut Vec, +) { + if raw_nested_local_scope(profile, node) { + return; + } + + if raw_branch_node(profile, node) { + let condition = raw_branch_condition(node); + let atoms = raw_path_condition_atoms(profile, condition); + for child in raw_branch_body_nodes(profile, node) { + let mut next_guards = guards.to_vec(); + next_guards.extend(atoms.clone()); + raw_path_walk(document, profile, child, function, &next_guards, out); + } + return; + } + + if guards.len() >= 2 && raw_path_action_node(profile, node) { + let mut unique = guards.to_vec(); + unique.sort(); + unique.dedup(); + out.push(Site { + guards: unique, + action: profile.normalize_source_text(&node.text), + file: document.file.clone(), + defn: function.to_string(), + line: node.span[0], + span: node.span, + }); + return; + } + + for child in raw_named_children(node) { + raw_path_walk(document, profile, child, function, guards, out); + } +} + +fn raw_path_condition_atoms( + profile: &dyn LanguageProfile, + condition: Option<&RawNode>, +) -> Vec { + let Some(condition) = condition else { + return Vec::new(); + }; + if raw_boolean_container(profile, condition) && raw_boolean_and(profile, condition) { + let mut atoms = raw_flatten_boolean_and(profile, condition) + .into_iter() + .map(|child| raw_decision_member_text(profile, &child.text)) + .collect::>(); + atoms.sort(); + atoms.dedup(); + atoms + } else { + vec![raw_decision_member_text(profile, &condition.text)] + } +} + +fn raw_branch_condition(node: &RawNode) -> Option<&RawNode> { + raw_child_by_field(node, "condition") + .or_else(|| raw_child_by_field(node, "value")) + .or_else(|| raw_child_by_field(node, "subject")) + .or_else(|| raw_named_children(node).into_iter().next()) +} + +fn raw_branch_body_nodes<'a>(profile: &dyn LanguageProfile, node: &'a RawNode) -> Vec<&'a RawNode> { + let mut bodies = ["consequence", "body", "alternative"] + .into_iter() + .filter_map(|field| raw_child_by_field(node, field)) + .collect::>(); + if bodies.is_empty() { + bodies = raw_named_children(node).into_iter().skip(1).collect(); + } + bodies + .into_iter() + .flat_map(|body| { + if raw_simple_action_wrapper(profile, body) { + return vec![body]; + } + let body_children = raw_named_children(body); + let children = if profile + .path_transparent_branch_body_node_kinds() + .contains(&body.kind.as_str()) + { + body_children.into_iter().skip(1).collect::>() + } else { + body_children + }; + let children = children + .into_iter() + .flat_map(|child| { + if profile + .path_transparent_branch_body_node_kinds() + .contains(&child.kind.as_str()) + { + raw_named_children(child) + .into_iter() + .skip(1) + .collect::>() + } else { + vec![child] + } + }) + .filter(|child| !raw_comment_node(child)) + .collect::>(); + if children.is_empty() { + vec![body] + } else { + children + } + }) + .collect() +} + +fn raw_path_action_node(profile: &dyn LanguageProfile, node: &RawNode) -> bool { + if raw_branch_node(profile, node) { + return false; + } + raw_simple_action_wrapper(profile, node) + || raw_assignment_statement(profile, node) + || profile + .path_action_node_kinds() + .contains(&node.kind.as_str()) +} + +fn raw_simple_action_wrapper(profile: &dyn LanguageProfile, node: &RawNode) -> bool { + if !profile + .simple_action_wrapper_node_kinds() + .contains(&node.kind.as_str()) + { + return false; + } + let text = normalize_text(&node.text); + if text.contains('{') || text.contains('}') { + return false; + } + let text = text.strip_suffix(';').unwrap_or(&text).trim(); + let Some(open) = text.find('(') else { + return false; + }; + text.ends_with(')') + && text[..open] + .chars() + .all(|ch| ch == '_' || ch == '.' || ch.is_ascii_alphanumeric()) +} + +fn raw_assignment_statement(profile: &dyn LanguageProfile, node: &RawNode) -> bool { + profile + .assignment_node_kinds() + .contains(&node.kind.as_str()) + || node.children.iter().any(|child| { + !child.named + && profile + .assignment_operator_tokens() + .contains(&child.text.as_str()) + }) +} + +fn raw_branch_node(profile: &dyn LanguageProfile, node: &RawNode) -> bool { + profile.branch_node_kinds().contains(&node.kind.as_str()) +} + +fn raw_nested_local_scope(profile: &dyn LanguageProfile, node: &RawNode) -> bool { + profile.function_node_kinds().contains(&node.kind.as_str()) + || profile + .class_owner_node_kinds() + .contains(&node.kind.as_str()) + || profile + .module_owner_node_kinds() + .contains(&node.kind.as_str()) + || profile + .generic_owner_node_kinds() + .contains(&node.kind.as_str()) + || profile + .struct_owner_node_kinds() + .contains(&node.kind.as_str()) +} + +fn raw_boolean_container(profile: &dyn LanguageProfile, node: &RawNode) -> bool { + if profile + .boolean_container_node_kinds() + .contains(&node.kind.as_str()) + { + return true; + } + if raw_parenthesized_wrapper(profile, node) { + return raw_named_children(node) + .into_iter() + .next() + .map(|child| raw_boolean_container(profile, child)) + .unwrap_or(false); + } + false +} + +fn raw_boolean_and(profile: &dyn LanguageProfile, node: &RawNode) -> bool { + if raw_parenthesized_wrapper(profile, node) { + return raw_named_children(node) + .into_iter() + .next() + .map(|child| raw_boolean_and(profile, child)) + .unwrap_or(false); + } + raw_direct_operator(node) + .map(|operator| profile.boolean_and_operators().contains(&operator.as_str())) + .unwrap_or(false) +} + +fn raw_flatten_boolean_and<'a>( + profile: &dyn LanguageProfile, + node: &'a RawNode, +) -> Vec<&'a RawNode> { + if !(raw_boolean_container(profile, node) && raw_boolean_and(profile, node)) { + return vec![node]; + } + if raw_parenthesized_wrapper(profile, node) { + return raw_named_children(node) + .into_iter() + .next() + .map(|child| raw_flatten_boolean_and(profile, child)) + .unwrap_or_else(|| vec![node]); + } + raw_named_children(node) + .into_iter() + .flat_map(|child| raw_flatten_boolean_and(profile, child)) + .collect() +} + +fn raw_parenthesized_wrapper(profile: &dyn LanguageProfile, node: &RawNode) -> bool { + profile + .parenthesized_wrapper_node_kinds() + .contains(&node.kind.as_str()) + && raw_named_children(node).len() == 1 +} + +fn raw_decision_member_text(profile: &dyn LanguageProfile, text: &str) -> String { + profile.normalize_source_text(&strip_enclosing_parentheses(text)) +} + +fn strip_enclosing_parentheses(text: &str) -> String { + let mut value = text.trim().to_string(); + loop { + if !(value.starts_with('(') && value.ends_with(')')) { + break value; + } + if !enclosing_parentheses_wrap_all(&value) { + break value; + } + value = value[1..value.len() - 1].trim().to_string(); + } +} + +fn enclosing_parentheses_wrap_all(text: &str) -> bool { + let mut depth = 0isize; + for (index, ch) in text.chars().enumerate() { + if ch == '(' { + depth += 1; + } else if ch == ')' { + depth -= 1; + } + if depth == 0 && index < text.len() - 1 { + return false; + } + if depth < 0 { + return false; + } + } + depth == 0 +} + +fn raw_direct_operator(node: &RawNode) -> Option { + node.children + .iter() + .find(|child| { + let text = child.text.trim(); + !child.named && !matches!(text, "(" | ")") + }) + .map(|child| normalize_text(&child.text)) +} + +fn raw_named_children(node: &RawNode) -> Vec<&RawNode> { + node.children.iter().filter(|child| child.named).collect() +} + +fn raw_child_by_field<'a>(node: &'a RawNode, field: &str) -> Option<&'a RawNode> { + node.children + .iter() + .find(|child| child.field_name.as_deref() == Some(field)) +} + +fn raw_comment_node(node: &RawNode) -> bool { + node.kind.contains("comment") +} + +struct PathCondition { + file: String, + lines: Vec, + sites: Vec, +} + +impl PathCondition { + fn new(file: String, lines: Vec) -> Self { + Self { + file, + lines, + sites: Vec::new(), + } + } + + fn walk(&mut self, node: &Node, defstack: &[String], guards: &[Vec]) { + let mut next_defstack = defstack.to_vec(); + if matches!(node.r#type.as_str(), "DEFN" | "DEFS") { + let name_index = if node.r#type == "DEFS" { 1 } else { 0 }; + if let Some(Child::Symbol(name)) = node.children.get(name_index) { + next_defstack.push(name.clone()); + } + } + + match node.r#type.as_str() { + "IF" | "UNLESS" => { + let cond = node.children.get(0).and_then(ast::node); + let a = node.children.get(1).and_then(ast::node); + let b = node.children.get(2).and_then(ast::node); + + let atoms = self.cond_atoms(cond); + let then_g = if node.r#type == "IF" { + atoms.clone() + } else { + self.negate(&atoms) + }; + let else_g = if node.r#type == "IF" { + self.negate(&atoms) + } else { + atoms.clone() + }; + + if let Some(a_node) = a { + let mut next_guards = guards.to_vec(); + next_guards.extend(then_g); + self.walk(a_node, &next_defstack, &next_guards); + } + if let Some(b_node) = b { + let mut next_guards = guards.to_vec(); + next_guards.extend(else_g); + self.walk(b_node, &next_defstack, &next_guards); + } + + return; + } + "CALL" | "FCALL" | "VCALL" | "ATTRASGN" | "LASGN" | "IASGN" | "OPCALL" => { + if guards.len() >= 2 { + self.record(node, &next_defstack, guards); + } + } + _ => {} + } + + for child in node.children.iter().filter_map(ast::node) { + self.walk(child, &next_defstack, guards); + } + } + + fn cond_atoms(&self, cond: Option<&Node>) -> Vec> { + let Some(cond) = cond else { return Vec::new() }; + ast::flatten_and(cond) + .into_iter() + .map(|a| { + let t = ast::slice(a, &self.lines); + let (text, neg) = ast::canon_polarity(&t); + vec![ + text, + if neg { + "true".to_string() + } else { + "false".to_string() + }, + ] + }) + .collect() + } + + fn negate(&self, atoms: &[Vec]) -> Vec> { + atoms + .iter() + .map(|a| { + let t = &a[0]; + let n = a[1] == "true"; + vec![ + t.clone(), + if !n { + "true".to_string() + } else { + "false".to_string() + }, + ] + }) + .collect() + } + + fn record(&mut self, node: &Node, defstack: &[String], guards: &[Vec]) { + let mut members_set = BTreeSet::new(); + for g in guards { + let prefix = if g[1] == "true" { "!" } else { "" }; + members_set.insert(format!("{}{}", prefix, g[0])); + } + let members: Vec<_> = members_set.into_iter().collect(); + + if members.len() < 2 { + return; + } + + let slice = ast::slice(node, &self.lines); + let action = if slice.len() > 80 { + slice[..80].to_string() + } else { + slice + }; + + self.sites.push(Site { + guards: members, + action, + file: self.file.clone(), + defn: defstack + .last() + .cloned() + .unwrap_or_else(|| "(top-level)".to_string()), + line: node.first_lineno, + span: [ + node.first_lineno, + node.first_column, + node.last_lineno, + node.last_column, + ], + }); + } +} + +struct Report { + sites: Vec, + groups: Vec<(Vec, Vec)>, +} + +impl Report { + fn new(sites: Vec) -> Self { + let mut keys = Vec::new(); + let mut groups: BTreeMap, Vec> = BTreeMap::new(); + for s in &sites { + if !groups.contains_key(&s.guards) { + keys.push(s.guards.clone()); + } + groups.entry(s.guards.clone()).or_default().push(s.clone()); + } + + let ordered_groups = keys + .into_iter() + .map(|k| { + let v = groups.remove(&k).unwrap(); + (k, v) + }) + .collect(); + + Self { + sites, + groups: ordered_groups, + } + } + + fn findings(&self) -> PathConditionReport { + PathConditionReport { + neglected: self.neglected(3), + scattered: self.scattered(2), + } + } + + fn scattered(&self, min_scatter: usize) -> Vec { + let mut out = Vec::new(); + for (guards, sites) in &self.groups { + let scatter = sites + .iter() + .map(|site| (site.file.clone(), site.defn.clone())) + .collect::>() + .len(); + if scatter < min_scatter { + continue; + } + + let locations = sites + .iter() + .map(|site| format!("{}:{}:{}", site.file, site.defn, site.line)) + .collect::>(); + let spans = sites + .iter() + .map(|site| { + ( + format!("{}:{}:{}", site.file, site.defn, site.line), + site.span, + ) + }) + .collect::>(); + out.push(ScatteredPathCondition { + guards: guards.clone(), + support: sites.len(), + scatter, + rank: sites.len() * scatter, + sites: locations, + spans, + }); + } + out.sort_by(|a, b| b.rank.cmp(&a.rank).then_with(|| a.guards.cmp(&b.guards))); + out + } + + fn neglected(&self, min_support: usize) -> Vec { + let popular: Vec<_> = self + .groups + .iter() + .filter(|(_, s)| s.len() >= min_support) + .map(|(g, s)| (g.clone(), s.len())) + .collect(); + + let mut out = Vec::new(); + let mut seen = BTreeSet::new(); + + for s in &self.sites { + for (gs, sup) in &popular { + let gs_set: BTreeSet<_> = gs.iter().cloned().collect(); + let s_guards_set: BTreeSet<_> = s.guards.iter().cloned().collect(); + + let diff_gs_s: BTreeSet<_> = gs_set.difference(&s_guards_set).cloned().collect(); + let diff_s_gs: BTreeSet<_> = s_guards_set.difference(&gs_set).cloned().collect(); + + if diff_gs_s.len() == 1 && diff_s_gs.is_empty() { + if s.guards == *gs { + continue; + } + + let at = format!("{}:{}:{}", s.file, s.defn, s.line); + let missing = diff_gs_s.into_iter().next().unwrap(); + + // dedupe manually + let key = (gs.clone(), sup.clone(), missing.clone(), at.clone()); + if seen.insert(key) { + let mut spans = BTreeMap::new(); + spans.insert(at.clone(), s.span); + + out.push(NeglectedPathCondition { + pattern: gs.clone(), + support: *sup, + missing, + at, + spans, + action: s.action.clone(), + }); + } + } + } + } + + out.sort_by(|a, b| b.support.cmp(&a.support).then_with(|| a.at.cmp(&b.at))); + out + } +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/redundant_nil_guard.rs b/gems/decomplex/rust/src/decomplex/syntax/redundant_nil_guard.rs new file mode 100644 index 000000000..7296c88c2 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/redundant_nil_guard.rs @@ -0,0 +1,644 @@ +use crate::decomplex::ast::{self, Child, Node, Span}; +use crate::decomplex::syntax::{Document, Language}; +use anyhow::Result; +use serde::Serialize; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::PathBuf; + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct RedundantNilGuardRow { + pub at: String, + pub file: String, + pub defn: String, + pub line: usize, + pub span: Span, + pub local: String, + pub guard: String, + pub proof: String, + pub spans: BTreeMap, +} + +#[derive(Clone, Debug)] +struct Flow { + known: BTreeSet, + terminated: bool, +} + +#[derive(Clone, Debug)] +struct NilFact { + local: String, + non_nil_when_true: bool, +} + +struct CallParts<'a> { + receiver: Option<&'a Node>, + message: String, + no_args: bool, +} + +struct Finding { + file: String, + defn: String, + line: usize, + span: Span, + local: String, + guard: String, + proof: String, +} + +impl Finding { + fn to_h(&self) -> RedundantNilGuardRow { + let loc = format!("{}:{}:{}", self.file, self.defn, self.line); + let mut spans = BTreeMap::new(); + spans.insert(loc.clone(), self.span); + RedundantNilGuardRow { + at: loc, + file: self.file.clone(), + defn: self.defn.clone(), + line: self.line, + span: self.span, + local: self.local.clone(), + guard: self.guard.clone(), + proof: self.proof.clone(), + spans, + } + } +} + +const TERMINATING_CALLS: &[&str] = &["raise", "fail", "abort", "exit", "exit!"]; +const NIL_PREDICATE_MIDS: &[&str] = &["nil?", "isNull", "is_null", "nil", "is_none"]; +const NON_NIL_PREDICATE_MIDS: &[&str] = &["isSome", "is_some", "present", "present?"]; + +pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { + let documents = super::parse_files(files, language)?; + Ok(scan_documents(&documents)) +} + +pub fn scan_documents(documents: &[Document]) -> Vec { + let mut findings = Vec::new(); + for document in documents { + let mut scanner = RedundantNilGuard::new(document.file.clone(), document.lines.clone()); + scanner.walk(&document.normalized_root, &Vec::new()); + findings.extend(scanner.findings); + } + let mut out: Vec<_> = findings.into_iter().map(|f| f.to_h()).collect(); + out.sort_by(|a, b| { + a.file + .cmp(&b.file) + .then_with(|| a.line.cmp(&b.line)) + .then_with(|| a.local.cmp(&b.local)) + .then_with(|| a.guard.cmp(&b.guard)) + }); + out +} + +struct RedundantNilGuard { + file: String, + lines: Vec, + findings: Vec, +} + +impl RedundantNilGuard { + fn new(file: String, lines: Vec) -> Self { + Self { + file, + lines, + findings: Vec::new(), + } + } + + fn walk(&mut self, node: &Node, defstack: &[String]) { + if matches!(node.r#type.as_str(), "DEFN" | "DEFS") { + let name_index = if node.r#type == "DEFS" { 1 } else { 0 }; + if let Some(Child::Symbol(name)) = node.children.get(name_index) { + let mut next_defstack = defstack.to_vec(); + next_defstack.push(name.clone()); + self.process_block(&ast::body_stmts(node), &next_defstack, &BTreeSet::new()); + } + return; + } + + for child in node.children.iter().filter_map(ast::node) { + self.walk(child, defstack); + } + } + + fn process_block( + &mut self, + stmts: &[&Node], + defstack: &[String], + known: &BTreeSet, + ) -> Flow { + let mut current = known.clone(); + for stmt in stmts { + let flow = self.process_stmt(stmt, defstack, ¤t); + current = flow.known; + if flow.terminated { + return Flow { + known: current, + terminated: true, + }; + } + } + Flow { + known: current, + terminated: false, + } + } + + fn process_stmt(&mut self, node: &Node, defstack: &[String], known: &BTreeSet) -> Flow { + match node.r#type.as_str() { + "IF" | "UNLESS" => self.process_branch(node, defstack, known), + "LASGN" => { + if let Some(rhs) = node.children.get(1).and_then(ast::node) { + self.inspect_node(rhs, defstack, known); + } + let mut next_known = known.clone(); + if let Some(Child::String(name)) = node.children.first() { + next_known.remove(name); + } + Flow { + known: next_known, + terminated: false, + } + } + _ => { + self.inspect_node(node, defstack, known); + Flow { + known: known.clone(), + terminated: self.terminating(node), + } + } + } + } + + fn process_branch( + &mut self, + node: &Node, + defstack: &[String], + known: &BTreeSet, + ) -> Flow { + let cond = node.children.get(0).and_then(ast::node); + let then_body = node.children.get(1).and_then(ast::node); + let else_body = node.children.get(2).and_then(ast::node); + + if let Some(cond) = cond { + self.inspect_node(cond, defstack, known); + } + + let then_known = self.known_for_branch(node.r#type.as_str(), true, cond, known); + let else_known = self.known_for_branch(node.r#type.as_str(), false, cond, known); + + let then_flow = self.process_block(&self.stmts_for(then_body), defstack, &then_known); + let else_flow = self.process_block(&self.stmts_for(else_body), defstack, &else_known); + + if then_flow.terminated && else_flow.terminated { + Flow { + known: BTreeSet::new(), + terminated: true, + } + } else if then_flow.terminated { + Flow { + known: else_flow.known, + terminated: false, + } + } else if else_flow.terminated { + Flow { + known: then_flow.known, + terminated: false, + } + } else { + let intersection: BTreeSet<_> = then_flow + .known + .intersection(&else_flow.known) + .cloned() + .collect(); + Flow { + known: intersection, + terminated: false, + } + } + } + + fn known_for_branch( + &self, + node_type: &str, + body_branch: bool, + cond: Option<&Node>, + known: &BTreeSet, + ) -> BTreeSet { + let mut next_known = known.clone(); + let cond_true_branch = if node_type == "IF" { + body_branch + } else { + !body_branch + }; + if let Some(cond) = cond { + for fact in self.branch_nil_facts(cond, cond_true_branch) { + next_known.insert(fact.local); + } + } + next_known + } + + fn inspect_node(&mut self, node: &Node, defstack: &[String], known: &BTreeSet) { + let recorded = self.record_redundant(node, defstack, known); + if matches!(node.r#type.as_str(), "DEFN" | "DEFS") { + return; + } + if recorded && (node.r#type == "OPCALL" || self.call_parts(node).is_some()) { + return; + } + for child in node.children.iter().filter_map(ast::node) { + self.inspect_node(child, defstack, known); + } + } + + fn record_redundant( + &mut self, + node: &Node, + defstack: &[String], + known: &BTreeSet, + ) -> bool { + let local = self.redundant_nil_subject(node, known); + let Some(local) = local else { return false }; + + let defn = defstack.last().map(|s| s.as_str()).unwrap_or("(top-level)"); + self.findings.push(Finding { + file: self.file.clone(), + defn: defn.to_string(), + line: node.first_lineno, + span: self.span(node), + local: local.clone(), + guard: ast::slice(node, &self.lines), + proof: format!("{} is already proven non-nil on this path", local), + }); + true + } + + fn redundant_nil_subject(&self, node: &Node, known: &BTreeSet) -> Option { + if node.r#type == "QCALL" { + return self.qcall_subject(node, known); + } + + let fact = self.nil_fact(node)?; + if known.contains(&fact.local) { + return Some(fact.local); + } + None + } + + fn nil_fact(&self, node: &Node) -> Option { + if self.parenthesized_wrapper(node) { + return self.nil_fact(self.first_node_child(node)?); + } + + if let Some(call) = self.call_parts(node) { + if call.no_args && NIL_PREDICATE_MIDS.contains(&call.message.as_str()) { + let subject = self.subject_key(call.receiver?)?; + return Some(NilFact { + local: subject, + non_nil_when_true: false, + }); + } + if call.no_args && NON_NIL_PREDICATE_MIDS.contains(&call.message.as_str()) { + let subject = self.subject_key(call.receiver?)?; + return Some(NilFact { + local: subject, + non_nil_when_true: true, + }); + } + } + + match node.r#type.as_str() { + "OPCALL" => { + let recv = node.children.get(0).and_then(ast::node)?; + let mid = match node.children.get(1)? { + Child::Symbol(s) => s, + _ => return None, + }; + let args = node.children.get(2); + if mid == "!" { + return self.negated_nil_fact(recv); + } + if mid == "==" || mid == "!=" { + return self.comparison_nil_fact(recv, mid, args); + } + None + } + _ => None, + } + } + + fn branch_nil_facts(&self, node: &Node, cond_truth: bool) -> Vec { + if self.parenthesized_wrapper(node) { + if let Some(child) = self.first_node_child(node) { + return self.branch_nil_facts(child, cond_truth); + } + } + + if node.r#type == "AND" { + if !cond_truth { + return Vec::new(); + } + let mut facts = Vec::new(); + for child in ast::flatten_and(node) { + facts.extend(self.branch_nil_facts(child, true)); + } + return facts; + } + + if node.r#type == "OPCALL" { + if let Some(Child::Symbol(mid)) = node.children.get(1) { + if mid == "!" { + if let Some(child) = node.children.get(0).and_then(ast::node) { + return self.branch_nil_facts(child, !cond_truth); + } + } + } + } + + if let Some(safe_receiver) = self.safe_nav_receiver_fact(node) { + if cond_truth { + return vec![safe_receiver]; + } + } + + if let Some(fact) = self.nil_fact(node) { + if cond_truth == fact.non_nil_when_true { + return vec![fact]; + } + } + + if let Some(truthy) = self.truthy_subject_fact(node) { + if cond_truth { + return vec![truthy]; + } + } + + Vec::new() + } + + fn safe_nav_receiver_fact(&self, node: &Node) -> Option { + if node.r#type == "QCALL" { + let recv = node.children.get(0).and_then(ast::node)?; + let subject = self.subject_key(recv)?; + return Some(NilFact { + local: subject, + non_nil_when_true: true, + }); + } + None + } + + fn truthy_subject_fact(&self, node: &Node) -> Option { + let subject = self.subject_key(node)?; + Some(NilFact { + local: subject, + non_nil_when_true: true, + }) + } + + fn negated_nil_fact(&self, node: &Node) -> Option { + let mut fact = self.nil_fact(node)?; + fact.non_nil_when_true = !fact.non_nil_when_true; + Some(fact) + } + + fn comparison_nil_fact(&self, recv: &Node, mid: &str, args: Option<&Child>) -> Option { + let subject = self.subject_key(recv)?; + if !self.nil_arg(args) { + return None; + } + Some(NilFact { + local: subject, + non_nil_when_true: mid == "!=", + }) + } + + fn qcall_subject(&self, node: &Node, known: &BTreeSet) -> Option { + let recv = node.children.get(0).and_then(ast::node)?; + let subject = self.subject_key(recv)?; + if known.contains(&subject) { + return Some(subject); + } + None + } + + fn subject_key(&self, node: &Node) -> Option { + match node.r#type.as_str() { + "LVAR" | "DVAR" | "VCALL" => match node.children.first()? { + Child::String(s) | Child::Symbol(s) => Some(s.clone()), + _ => None, + }, + _ if self.call_parts(node).is_some() => { + let call = self.call_parts(node)?; + if !call.no_args || !self.stable_reader_name(&call.message) { + return None; + } + let recv = call.receiver?; + if recv.r#type == "SELF" { + return Some(format!("self.{}", call.message)); + } + let recv_key = self.subject_key(recv)?; + Some(format!("{}.{}", recv_key, call.message)) + } + _ => None, + } + } + + fn call_parts<'a>(&self, node: &'a Node) -> Option> { + match node.r#type.as_str() { + "CALL" => { + let receiver = node.children.get(0).and_then(ast::node); + let message = self.child_name(node.children.get(1)?)?; + Some(CallParts { + receiver, + message, + no_args: self.no_call_arguments(node.children.get(2)), + }) + } + "METHOD_INVOCATION" => { + let nodes = node + .children + .iter() + .filter_map(ast::node) + .collect::>(); + let receiver = nodes.first().copied(); + let message = nodes.get(1).and_then(|child| self.node_name(child))?; + Some(CallParts { + receiver, + message, + no_args: self.no_call_arguments(node.children.get(2)), + }) + } + "FUNCTION_CALL" | "METHOD_CALL" => { + let callee = node.children.iter().filter_map(ast::node).next()?; + let args = node + .children + .iter() + .skip(1) + .find(|child| matches!(child, Child::Node(n) if matches!(n.r#type.as_str(), "ARGUMENTS" | "ARGUMENT_LIST" | "LIST"))); + self.field_call_parts(callee, args) + } + "BLOCK" => { + let callee = node.children.iter().filter_map(ast::node).next()?; + let args = node + .children + .iter() + .skip(1) + .find(|child| matches!(child, Child::Node(n) if matches!(n.r#type.as_str(), "ARGUMENTS" | "ARGUMENT_LIST" | "LIST"))); + self.field_call_parts(callee, args) + } + "INVOCATION_EXPRESSION" => { + let callee = node.children.iter().filter_map(ast::node).next()?; + let mut parts = self.call_parts(callee)?; + let args = node + .children + .iter() + .skip(1) + .find(|child| matches!(child, Child::Node(n) if matches!(n.r#type.as_str(), "ARGUMENTS" | "ARGUMENT_LIST" | "LIST"))); + parts.no_args = self.no_call_arguments(args); + Some(parts) + } + _ => None, + } + } + + fn field_call_parts<'a>( + &self, + node: &'a Node, + args: Option<&'a Child>, + ) -> Option> { + if !matches!( + node.r#type.as_str(), + "DOT_INDEX_EXPRESSION" + | "FIELD_EXPRESSION" + | "FIELD_ACCESS" + | "MEMBER_EXPRESSION" + | "CALL" + ) { + return self.call_parts(node); + } + let nodes = node + .children + .iter() + .filter_map(ast::node) + .collect::>(); + let receiver = nodes.first().copied(); + let message = nodes.last().and_then(|child| self.node_name(child))?; + Some(CallParts { + receiver, + message, + no_args: self.no_call_arguments(args), + }) + } + + fn child_name(&self, child: &Child) -> Option { + match child { + Child::String(s) | Child::Symbol(s) => Some(s.clone()), + Child::Node(node) => self.node_name(node), + _ => None, + } + } + + fn node_name(&self, node: &Node) -> Option { + match node.children.first() { + Some(Child::String(s)) | Some(Child::Symbol(s)) => Some(s.clone()), + _ => { + let text = ast::slice(node, &self.lines).trim().to_string(); + (!text.is_empty()).then_some(text) + } + } + } + + fn no_call_arguments(&self, args: Option<&Child>) -> bool { + match args { + None | Some(Child::Nil) => true, + Some(Child::Node(node)) => { + !node.children.iter().any(|child| ast::node(child).is_some()) + } + Some(_) => false, + } + } + + fn parenthesized_wrapper(&self, node: &Node) -> bool { + matches!( + node.r#type.as_str(), + "CONDITION_CLAUSE" | "PARENTHESIZED_EXPRESSION" | "PARENTHESIZED_STATEMENTS" + ) && self.first_node_child(node).is_some() + } + + fn first_node_child<'a>(&self, node: &'a Node) -> Option<&'a Node> { + node.children.iter().find_map(ast::node) + } + + fn stable_reader_name(&self, mid: &str) -> bool { + !(mid.ends_with('=') || mid.ends_with('!') || mid == "[]") + } + + fn nil_arg(&self, args: Option<&Child>) -> bool { + let Some(Child::Node(node)) = args else { + return false; + }; + if node.r#type != "LIST" { + return false; + } + node.children.iter().any(|c| match c { + Child::Node(n) => n.r#type == "NIL", + Child::Nil => true, + _ => false, + }) + } + + fn stmts_for<'a>(&self, node: Option<&'a Node>) -> Vec<&'a Node> { + let Some(node) = node else { return Vec::new() }; + if self.call_parts(node).is_some() { + return vec![node]; + } + if node.r#type == "BLOCK" { + node.children.iter().filter_map(ast::node).collect() + } else { + vec![node] + } + } + + fn terminating(&self, node: &Node) -> bool { + if matches!(node.r#type.as_str(), "RETURN" | "NEXT" | "BREAK") { + return true; + } + if !matches!(node.r#type.as_str(), "FCALL" | "VCALL" | "CALL") + && self.call_parts(node).is_none() + { + return false; + } + + let mid = if let Some(call) = self.call_parts(node) { + Some(call.message) + } else if node.r#type == "CALL" { + node.children.get(1).and_then(|c| match c { + Child::String(s) | Child::Symbol(s) => Some(s.clone()), + _ => None, + }) + } else { + node.children.get(0).and_then(|c| match c { + Child::String(s) | Child::Symbol(s) => Some(s.clone()), + _ => None, + }) + }; + + if let Some(mid) = mid { + return TERMINATING_CALLS.contains(&mid.as_str()); + } + false + } + + fn span(&self, node: &Node) -> Span { + [ + node.first_lineno, + node.first_column, + node.last_lineno, + node.last_column, + ] + } +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs b/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs index 2c205c541..b4766c18f 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs @@ -79,12 +79,21 @@ pub fn parse_file(file: PathBuf, language: Language) -> Result { &mut dispatch_sites, ); collect_equality_dispatch_sites(&comparison_uses, &call_sites, &mut dispatch_sites); + let profile = language_profile(language); let mut semantic_effect_sites = semantic_effect_sites_from_calls(language, &call_sites); - semantic_effect_sites.extend(ruby_global_context_effects(language, &state_reads)); + semantic_effect_sites.extend(profile.structural_semantic_effect_sites( + parsed.tree.root_node(), + &parsed.source, + &parsed.file, + &function_defs, + &state_reads, + &state_writes, + )); + dedup_semantic_effect_sites(&mut semantic_effect_sites); let local_complexity_scores = local_complexity_scores(&parsed.file.to_string_lossy(), &function_defs); - Ok(Document { + let mut document = Document { file: parsed.file.to_string_lossy().to_string(), language, source: parsed.source.clone(), @@ -105,7 +114,12 @@ pub fn parse_file(file: PathBuf, language: Language) -> Result { predicate_aliases, comparison_uses, path_condition_sites: Vec::new(), - }) + protocol_method_effects: Vec::new(), + protocol_call_paths: Vec::new(), + }; + document.protocol_method_effects = profile.protocol_method_effects(&document); + document.protocol_call_paths = profile.protocol_call_paths(&document); + Ok(document) } struct ParsedDocument { @@ -955,25 +969,18 @@ fn collect_branch_state_refs( } } -fn ruby_global_context_effects( - language: Language, - state_reads: &[StateRead], -) -> Vec { - if language != Language::Ruby { - return Vec::new(); - } - state_reads - .iter() - .filter(|read| read.field.starts_with('$')) - .map(|read| SemanticEffectSite { - kind: "context_dependency".to_string(), - detail: read.field.clone(), - file: read.file.clone(), - function: read.function.clone(), - line: read.line, - span: read.span, - }) - .collect() +fn dedup_semantic_effect_sites(sites: &mut Vec) { + let mut seen = HashSet::new(); + sites.retain(|site| { + seen.insert(( + site.kind.clone(), + site.detail.clone(), + site.file.clone(), + site.function.clone(), + site.line, + site.span, + )) + }); } fn branch_local_ref( diff --git a/gems/decomplex/rust/tests/examples_oracle.rs b/gems/decomplex/rust/tests/examples_oracle.rs index 0ca35cad4..0fe540919 100644 --- a/gems/decomplex/rust/tests/examples_oracle.rs +++ b/gems/decomplex/rust/tests/examples_oracle.rs @@ -8,6 +8,7 @@ use decomplex_rust::decomplex::detectors::{ }; use decomplex_rust::decomplex::report::Report; use decomplex_rust::decomplex::syntax::{Document, Language, LocalComplexityScore}; +use decomplex_rust::decomplex::syntax_oracle; use serde::{Deserialize, Serialize}; use serde_json::{json, Map, Value}; use std::collections::BTreeSet; @@ -106,6 +107,45 @@ fn shared_detector_fact_examples_match_exact_oracles() -> Result<()> { } } +#[test] +fn shared_local_flow_consumer_fact_examples_match_exact_oracles() -> Result<()> { + let examples_root = examples_root(); + let mut failures = Vec::new(); + + for fixture in local_flow_fact_fixture_paths(&examples_root)? { + let fixture_value: Value = serde_json::from_str(&fs::read_to_string(&fixture)?)?; + let expected_by_detector = fixture_value + .get("expected") + .and_then(Value::as_object) + .with_context(|| format!("{} missing expected", fixture.display()))?; + let input = detector_fact_input(&fixture_value) + .with_context(|| format!("{} input", fixture.display()))?; + + for (detector, expected) in expected_by_detector { + let actual = run_detector_on_fact_input(detector, &input, &fixture_value) + .with_context(|| format!("{} {}", detector, fixture.display()))?; + if actual != *expected { + failures.push(format!( + "{} {}\nexpected: {}\nactual: {}", + detector, + fixture.display(), + expected, + actual + )); + } + } + } + + if failures.is_empty() { + Ok(()) + } else { + bail!( + "shared local-flow consumer fact oracle failures:\n{}", + failures.join("\n\n") + ) + } +} + #[test] fn shared_report_fact_examples_match_postprocess_oracles() -> Result<()> { let examples_root = examples_root(); @@ -157,6 +197,57 @@ fn shared_report_fact_examples_match_postprocess_oracles() -> Result<()> { } } +#[test] +fn ruby_source_fact_examples_match_oracles() -> Result<()> { + let examples_root = examples_root(); + let mut failures = Vec::new(); + + for fixture in source_fact_fixture_paths(&examples_root)? { + let name = file_stem(&fixture)?; + let oracle_path = examples_root + .join("source-facts") + .join("oracles") + .join(format!("ruby-{name}.json")); + let expected: Value = serde_json::from_str(&fs::read_to_string(&oracle_path)?) + .with_context(|| format!("read {}", oracle_path.display()))?; + let mut actual = Map::new(); + if let Some(syntax_expected) = expected.get("syntax") { + actual.insert( + "syntax".to_string(), + project_source_syntax(&fixture, syntax_expected)?, + ); + } + if expected.get("local_flow").is_some() { + actual.insert( + "local_flow".to_string(), + project_local_flow(&value(local_flow::scan_files( + &[fixture.clone()], + Language::Ruby, + )?)?), + ); + } + + let actual = Value::Object(actual); + if actual != expected { + failures.push(format!( + "{}\nexpected: {}\nactual: {}", + fixture.display(), + expected, + actual + )); + } + } + + if failures.is_empty() { + Ok(()) + } else { + bail!( + "ruby source-facts oracle failures:\n{}", + failures.join("\n\n") + ) + } +} + fn examples_root() -> PathBuf { Path::new(env!("CARGO_MANIFEST_DIR")).join("../examples") } @@ -207,6 +298,32 @@ fn detector_fact_fixture_paths(examples_root: &Path) -> Result> { Ok(paths) } +fn local_flow_fact_fixture_paths(examples_root: &Path) -> Result> { + let root = examples_root.join("facts").join("local-flow"); + let mut paths = Vec::new(); + for entry in fs::read_dir(&root)? { + let path = entry?.path(); + if path.extension().and_then(|extension| extension.to_str()) == Some("json") { + paths.push(path); + } + } + paths.sort(); + Ok(paths) +} + +fn source_fact_fixture_paths(examples_root: &Path) -> Result> { + let root = examples_root.join("source-facts").join("ruby"); + let mut paths = Vec::new(); + for entry in fs::read_dir(&root)? { + let path = entry?.path(); + if path.extension().and_then(|extension| extension.to_str()) == Some("rb") { + paths.push(path); + } + } + paths.sort(); + Ok(paths) +} + #[derive(Deserialize)] struct DetectorFactInput { documents: Vec, @@ -663,6 +780,39 @@ fn project_detector_output(detector: &str, output: Value) -> Value { } } +fn project_source_syntax(fixture: &Path, expected: &Value) -> Result { + let projection = syntax_oracle::project_files(&[fixture.to_path_buf()], Language::Ruby)?; + let document = array(field(&projection, "documents")) + .first() + .cloned() + .unwrap_or(Value::Null); + let mut out = Map::new(); + if let Some(object) = expected.as_object() { + for key in object.keys() { + let keys = match key.as_str() { + "functions" => &["name", "owner", "line", "visibility", "params"][..], + "calls" => &[ + "receiver", + "message", + "function", + "line", + "conditional", + "control", + "safe_navigation", + "block", + "arguments", + ][..], + "state_reads" => &["receiver", "field", "function", "line"][..], + "state_writes" => &["receiver", "field", "function", "line"][..], + "semantic_effects" => &["kind", "detail", "function", "line"][..], + _ => bail!("unsupported source syntax section: {key}"), + }; + out.insert(key.clone(), rows(field(&document, key), keys)); + } + } + Ok(Value::Object(out)) +} + fn project_local_flow(output: &Value) -> Value { Value::Array( array(output) diff --git a/gems/decomplex/test/local_flow_fact_oracle_test.rb b/gems/decomplex/test/local_flow_fact_oracle_test.rb new file mode 100644 index 000000000..93ade23ed --- /dev/null +++ b/gems/decomplex/test/local_flow_fact_oracle_test.rb @@ -0,0 +1,41 @@ +# frozen_string_literal: true + +require "json" +require "minitest/autorun" +require "tempfile" +require_relative "../lib/decomplex/detector_runner" + +class LocalFlowFactOracleTest < Minitest::Test + EXAMPLES_ROOT = File.expand_path("../examples/facts/local-flow", __dir__) + ENGINES = Decomplex::DetectorRunner::ENGINES.freeze + + FIXTURE_PATHS = Dir[File.join(EXAMPLES_ROOT, "*.json")].sort.freeze + + def test_local_flow_fact_fixtures_exist + refute_empty FIXTURE_PATHS + end + + FIXTURE_PATHS.product(ENGINES).each_with_index do |(fixture_path, engine), index| + name = File.basename(fixture_path, ".json") + method_name = "test_#{index}_#{engine}_#{name.tr("-", "_")}_local_flow_consumers_match_oracle" + + define_method(method_name) do + assert_local_flow_fact_fixture(fixture_path, engine) + end + end + + private + + def assert_local_flow_fact_fixture(fixture_path, engine) + fixture = JSON.parse(File.read(fixture_path)) + input = fixture.fetch("input") + fixture.fetch("expected").each do |detector, expected| + Tempfile.create(["decomplex-local-flow-fact", ".json"]) do |file| + file.write(JSON.pretty_generate({ "detector" => detector, "input" => input, "expected" => expected })) + file.flush + actual = JSON.parse(Decomplex::DetectorRunner.canonical_json_from_fact_fixture(file.path, engine: engine)) + assert_equal expected, actual, "#{engine} #{fixture_path} #{detector}" + end + end + end +end diff --git a/gems/decomplex/test/source_facts_oracle_test.rb b/gems/decomplex/test/source_facts_oracle_test.rb new file mode 100644 index 000000000..fe0842936 --- /dev/null +++ b/gems/decomplex/test/source_facts_oracle_test.rb @@ -0,0 +1,88 @@ +# frozen_string_literal: true + +require "json" +require "minitest/autorun" +require_relative "../lib/decomplex/detector_runner" +require_relative "../lib/decomplex/syntax_oracle" + +class SourceFactsOracleTest < Minitest::Test + EXAMPLES_ROOT = File.expand_path("../examples/source-facts", __dir__) + ORACLE_ROOT = File.join(EXAMPLES_ROOT, "oracles") + ENGINES = %w[ruby rust].freeze + + FIXTURES = Dir[File.join(EXAMPLES_ROOT, "ruby", "*.rb")].sort.freeze + + def test_ruby_source_fact_fixtures_exist + refute_empty FIXTURES + end + + FIXTURES.product(ENGINES).each_with_index do |(fixture_path, engine), index| + name = File.basename(fixture_path, ".rb") + method_name = "test_#{index}_#{engine}_ruby_#{name}_source_facts_match_oracle" + + define_method(method_name) do + assert_source_facts_match_oracle(fixture_path, engine) + end + end + + private + + def assert_source_facts_match_oracle(fixture_path, engine) + name = File.basename(fixture_path, ".rb") + oracle_path = File.join(ORACLE_ROOT, "ruby-#{name}.json") + assert File.file?(oracle_path), "missing source-facts oracle #{oracle_path}" + + expected = JSON.parse(File.read(oracle_path)) + actual = {} + actual["syntax"] = project_syntax(fixture_path, engine, expected.fetch("syntax", {})) if expected.key?("syntax") + actual["local_flow"] = project_local_flow(fixture_path, engine) if expected.key?("local_flow") + + assert_equal expected, actual, "#{engine} #{fixture_path}" + end + + def project_syntax(fixture_path, engine, expected) + document = Decomplex::SyntaxOracle.project([fixture_path], engine: engine, language: :ruby) + .fetch("documents") + .first + expected.keys.each_with_object({}) do |section, out| + out[section] = syntax_rows(document.fetch(section), syntax_keys(section)) + end + end + + def syntax_keys(section) + { + "functions" => %w[name owner line visibility params], + "calls" => %w[receiver message function line conditional control safe_navigation block arguments], + "state_reads" => %w[receiver field function line], + "state_writes" => %w[receiver field function line], + "semantic_effects" => %w[kind detail function line] + }.fetch(section) + end + + def syntax_rows(rows, keys) + Array(rows).map { |row| pick(row, keys) } + end + + def project_local_flow(fixture_path, engine) + output = JSON.parse( + Decomplex::DetectorRunner.canonical_json("local-flow", [fixture_path], engine: engine) + ) + Array(output).map do |method| + { + "method" => method["name"], + "statements" => Array(method["statements"]).map do |statement| + pick(statement, %w[reads writes dependencies co_uses]) + end, + "boundaries" => Array(method["boundaries"]).map do |boundary| + pick(boundary, %w[before_index after_index kind]) + end + } + end + end + + def pick(row, keys) + keys.each_with_object({}) do |key, out| + out[key] = row[key] if row.key?(key) + end + end +end From 34cabe0c2e5847a3f9bc485c0f23db9b9f1c2e94 Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Sat, 20 Jun 2026 17:03:04 +0000 Subject: [PATCH 50/52] WIP enforce decomplex architecture boundaries --- .../lib/decomplex/false_simplicity.rb | 8 +- .../decomplex/lib/decomplex/syntax/effects.rb | 10 ++ .../rust/src/decomplex/architecture_test.rs | 116 +++++++++++++++--- .../test/architecture_invariants_test.rb | 34 +++++ 4 files changed, 147 insertions(+), 21 deletions(-) diff --git a/gems/decomplex/lib/decomplex/false_simplicity.rb b/gems/decomplex/lib/decomplex/false_simplicity.rb index d25791132..07857fa86 100644 --- a/gems/decomplex/lib/decomplex/false_simplicity.rb +++ b/gems/decomplex/lib/decomplex/false_simplicity.rb @@ -60,13 +60,7 @@ def self.class_records_for_document(document) end def self.core_owner_names(language) - profile = Syntax.language_profile(language) - return [] unless profile.respond_to?(:effect_lexicon, true) - - lexicon = profile.send(:effect_lexicon) - lexicon&.core_consts.to_a - rescue ArgumentError - [] + Syntax.core_owner_names(language) end # Groups hits by [kind, detail] and ranks by blast radius: diff --git a/gems/decomplex/lib/decomplex/syntax/effects.rb b/gems/decomplex/lib/decomplex/syntax/effects.rb index e4766b1f9..6b834a444 100644 --- a/gems/decomplex/lib/decomplex/syntax/effects.rb +++ b/gems/decomplex/lib/decomplex/syntax/effects.rb @@ -11,6 +11,16 @@ module Syntax keyword_init: true ) + def self.core_owner_names(language) + profile = language_profile(language) + return [] unless profile.respond_to?(:effect_lexicon, true) + + lexicon = profile.send(:effect_lexicon) + Array(lexicon&.core_consts) + rescue ArgumentError + [] + end + class Document def semantic_effect_sites @semantic_effect_sites ||= adapter.semantic_effect_sites(self) diff --git a/gems/decomplex/rust/src/decomplex/architecture_test.rs b/gems/decomplex/rust/src/decomplex/architecture_test.rs index 7ea1f7702..6c98a1f57 100644 --- a/gems/decomplex/rust/src/decomplex/architecture_test.rs +++ b/gems/decomplex/rust/src/decomplex/architecture_test.rs @@ -5,6 +5,38 @@ fn crate_src() -> PathBuf { Path::new(env!("CARGO_MANIFEST_DIR")).join("src/decomplex") } +fn detector_files() -> Vec { + rust_files(crate_src().join("detectors")) +} + +fn post_syntax_consumer_files() -> Vec { + let mut files = detector_files(); + files.extend( + [ + "convergence.rs", + "delta.rs", + "report.rs", + "report_facts.rs", + "report_value.rs", + "root_cause.rs", + "sarif.rs", + ] + .iter() + .map(|name| crate_src().join(name)), + ); + files +} + +fn rust_files(dir: PathBuf) -> Vec { + let mut files = fs::read_dir(&dir) + .unwrap_or_else(|err| panic!("read {}: {err}", dir.display())) + .map(|entry| entry.expect("rust file entry").path()) + .filter(|path| path.extension().and_then(|ext| ext.to_str()) == Some("rs")) + .collect::>(); + files.sort(); + files +} + #[test] fn every_supported_language_has_a_syntax_adapter_file() { let adapters = crate_src().join("syntax/adapters"); @@ -140,14 +172,7 @@ fn ast_adapters_do_not_delegate_through_a_language_kind_selector() { #[test] fn detectors_do_not_import_tree_sitter_directly() { - let detectors = crate_src().join("detectors"); - let entries = fs::read_dir(&detectors).expect("read detectors dir"); - - for entry in entries { - let path = entry.expect("detector entry").path(); - if path.extension().and_then(|ext| ext.to_str()) != Some("rs") { - continue; - } + for path in detector_files() { let source = production_source(&fs::read_to_string(&path).expect("read detector source")); assert!( !source.contains("tree_sitter"), @@ -159,7 +184,6 @@ fn detectors_do_not_import_tree_sitter_directly() { #[test] fn detectors_do_not_cross_the_syntax_boundary() { - let detectors = crate_src().join("detectors"); let forbidden = [ ("syntax adapter access", "syntax::adapters"), ("language profile access", "language_profile("), @@ -188,11 +212,7 @@ fn detectors_do_not_cross_the_syntax_boundary() { ]; let mut offenders = Vec::new(); - for entry in fs::read_dir(&detectors).expect("read detectors dir") { - let path = entry.expect("detector entry").path(); - if path.extension().and_then(|ext| ext.to_str()) != Some("rs") { - continue; - } + for path in detector_files() { let source = production_source(&fs::read_to_string(&path).expect("read detector source")); for (reason, pattern) in forbidden { if source.contains(pattern) { @@ -208,6 +228,74 @@ fn detectors_do_not_cross_the_syntax_boundary() { ); } +#[test] +fn post_syntax_consumers_do_not_access_parser_or_adapter_internals() { + let forbidden = [ + ("syntax adapter access", "syntax::adapters"), + ("language profile access", "language_profile("), + ("raw syntax node type", "RawNode"), + ("tree-sitter access", "tree_sitter"), + ("raw document root access", "document.root"), + ( + "normalized document root access", + "document.normalized_root", + ), + ]; + let mut offenders = Vec::new(); + + for path in post_syntax_consumer_files() { + let source = production_source(&fs::read_to_string(&path).expect("read consumer source")); + for (reason, pattern) in forbidden { + if source.contains(pattern) { + offenders.push(format!("{}: {}: {}", path.display(), reason, pattern)); + } + } + } + + assert!( + offenders.is_empty(), + "Post-syntax consumers must consume generated facts, not parser/adaptor internals:\n{}", + offenders.join("\n") + ); +} + +#[test] +fn post_syntax_consumers_do_not_branch_on_concrete_languages() { + let forbidden = [ + ("Ruby language branch", "Language::Ruby"), + ("Python language branch", "Language::Python"), + ("JavaScript language branch", "Language::JavaScript"), + ("Java language branch", "Language::Java"), + ("TypeScript language branch", "Language::TypeScript"), + ("Swift language branch", "Language::Swift"), + ("Kotlin language branch", "Language::Kotlin"), + ("Go language branch", "Language::Go"), + ("Rust language branch", "Language::Rust"), + ("Zig language branch", "Language::Zig"), + ("Lua language branch", "Language::Lua"), + ("C language branch", "Language::C"), + ("Cpp language branch", "Language::Cpp"), + ("CSharp language branch", "Language::CSharp"), + ("Php language branch", "Language::Php"), + ]; + let mut offenders = Vec::new(); + + for path in post_syntax_consumer_files() { + let source = production_source(&fs::read_to_string(&path).expect("read consumer source")); + for (reason, pattern) in forbidden { + if source.contains(pattern) { + offenders.push(format!("{}: {}: {}", path.display(), reason, pattern)); + } + } + } + + assert!( + offenders.is_empty(), + "Post-syntax consumers must not encode language-specific branches:\n{}", + offenders.join("\n") + ); +} + #[test] fn report_facts_uses_document_detector_apis() { let path = crate_src().join("report_facts.rs"); diff --git a/gems/decomplex/test/architecture_invariants_test.rb b/gems/decomplex/test/architecture_invariants_test.rb index 92324e718..7210bbf6e 100644 --- a/gems/decomplex/test/architecture_invariants_test.rb +++ b/gems/decomplex/test/architecture_invariants_test.rb @@ -15,6 +15,13 @@ class DecomplexArchitectureInvariantsTest < Minitest::Test temporal_ordering_pressure weighted_inlined_cognitive_complexity ].freeze DETECTOR_FILES = DETECTOR_BASENAMES.map { |name| File.join(LIB, "#{name}.rb") }.freeze + POST_SYNTAX_CONSUMER_BASENAMES = ( + DETECTOR_BASENAMES + %w[ + convergence delta report report_facts root_cause sarif + ] + ).uniq.freeze + POST_SYNTAX_CONSUMER_FILES = + POST_SYNTAX_CONSUMER_BASENAMES.map { |name| File.join(LIB, "#{name}.rb") }.freeze RAW_TREE_SITTER_PATTERNS = { "raw child traversal" => /(? /\b(?:ts_node\?|tree_sitter_node\?)\b/, "raw node duck typing" => /respond_to\?\s*\(\s*:children\s*\)/ }.freeze + ADAPTER_BOUNDARY_PATTERNS = RAW_TREE_SITTER_PATTERNS.merge( + "syntax adapter profile access" => /\bSyntax\.language_profile\b|\blanguage_profile\s*\(/, + "raw document root access" => /\bdocument\.root\b/, + "normalized document root access" => /\bdocument\.normalized_root\b/ + ).freeze + CONCRETE_LANGUAGE_BRANCH_PATTERNS = { + "concrete language branch" => + /\b(?:case|when|if|elsif)\b.*(?::ruby|:python|:javascript|:typescript|:go|:rust|:zig|:lua|:c|:cpp|:csharp|:java|:swift|:kotlin|:php)\b|\blanguage\s*==\s*(?::ruby|:python|:javascript|:typescript|:go|:rust|:zig|:lua|:c|:cpp|:csharp|:java|:swift|:kotlin|:php)\b/ + }.freeze SYNTAX_RB_EXTENSION_HOST_PATTERNS = { "clone similarity belongs in syntax/clone_similarity.rb" => /\b(?:CloneCandidate|clone_candidates|CLONE_)/, @@ -71,6 +87,24 @@ def test_detectors_do_not_talk_to_tree_sitter_nodes_directly ) end + def test_post_syntax_consumers_do_not_cross_adapter_boundary + offenders = scan_files(POST_SYNTAX_CONSUMER_FILES, ADAPTER_BOUNDARY_PATTERNS) + + assert_empty offenders, format_offenders( + "Code after Syntax must consume facts instead of parser or adapter internals", + offenders + ) + end + + def test_post_syntax_consumers_do_not_branch_on_concrete_languages + offenders = scan_files(POST_SYNTAX_CONSUMER_FILES, CONCRETE_LANGUAGE_BRANCH_PATTERNS) + + assert_empty offenders, format_offenders( + "Code after Syntax must not contain language-specific branches", + offenders + ) + end + def test_detector_specific_syntax_extensions_do_not_live_in_syntax_rb syntax_rb = File.join(LIB, "syntax.rb") offenders = scan_files([syntax_rb], SYNTAX_RB_EXTENSION_HOST_PATTERNS) From d8f06fe67bac4a14a52733cbf6beecb8b8984c81 Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Sat, 20 Jun 2026 18:16:18 +0000 Subject: [PATCH 51/52] Strengthen decomplex Ruby Rust fact parity --- .../facts/detectors/co-update-rich.json | 67 ++ .../false-simplicity-core-top-level.json | 61 ++ .../detectors/flay-similarity-facts.json | 234 ++++++ .../weighted-inlined-complexity-edges.json | 167 ++++ .../oracles/ruby-branch_predicate_paths.json | 271 +++++++ .../oracles/ruby-local_methods_contracts.json | 308 ++++++++ .../oracles/ruby-protocols_nil_clone.json | 568 ++++++++++++++ .../ruby/branch_predicate_paths.rb | 24 + .../ruby/local_methods_contracts.rb | 14 + .../source-facts/ruby/protocols_nil_clone.rb | 34 + .../syntax-facts/oracles/ruby-core.json | 13 + .../lib/decomplex/detector_runner.rb | 44 ++ .../lib/decomplex/false_simplicity.rb | 3 +- gems/decomplex/lib/decomplex/syntax.rb | 21 +- gems/decomplex/lib/decomplex/syntax/ruby.rb | 82 +- .../lib/decomplex/syntax/ruby_protocols.rb | 8 + gems/decomplex/lib/decomplex/syntax_oracle.rb | 85 +- .../decomplex/detectors/decision_pressure.rs | 33 +- .../src/decomplex/detectors/local_flow.rs | 3 +- gems/decomplex/rust/src/decomplex/syntax.rs | 16 + .../src/decomplex/syntax/adapters/ruby.rs | 742 ++++++++++++++++-- .../rust/src/decomplex/syntax/local_flow.rs | 83 +- .../src/decomplex/syntax/path_condition.rs | 192 +++-- .../decomplex/syntax/tree_sitter_adapter.rs | 353 ++++++++- .../rust/src/decomplex/syntax_oracle.rs | 110 +++ gems/decomplex/rust/src/main.rs | 6 +- gems/decomplex/rust/tests/examples_oracle.rs | 71 +- gems/decomplex/test/examples_oracle_test.rb | 8 +- gems/decomplex/test/local_flow_test.rb | 2 +- .../test/source_facts_oracle_test.rb | 41 +- gems/decomplex/test/syntax_oracle_test.rb | 19 + 31 files changed, 3512 insertions(+), 171 deletions(-) create mode 100644 gems/decomplex/examples/facts/detectors/co-update-rich.json create mode 100644 gems/decomplex/examples/facts/detectors/false-simplicity-core-top-level.json create mode 100644 gems/decomplex/examples/facts/detectors/flay-similarity-facts.json create mode 100644 gems/decomplex/examples/facts/detectors/weighted-inlined-complexity-edges.json create mode 100644 gems/decomplex/examples/source-facts/oracles/ruby-branch_predicate_paths.json create mode 100644 gems/decomplex/examples/source-facts/oracles/ruby-local_methods_contracts.json create mode 100644 gems/decomplex/examples/source-facts/oracles/ruby-protocols_nil_clone.json create mode 100644 gems/decomplex/examples/source-facts/ruby/branch_predicate_paths.rb create mode 100644 gems/decomplex/examples/source-facts/ruby/local_methods_contracts.rb create mode 100644 gems/decomplex/examples/source-facts/ruby/protocols_nil_clone.rb diff --git a/gems/decomplex/examples/facts/detectors/co-update-rich.json b/gems/decomplex/examples/facts/detectors/co-update-rich.json new file mode 100644 index 000000000..aaebc703f --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/co-update-rich.json @@ -0,0 +1,67 @@ +{ + "detector": "co-update", + "input": { + "documents": [ + { + "file": "facts/co_update.rb", + "language": "ruby", + "state_writes": [ + {"field": "status", "receiver": "self", "file": "facts/co_update.rb", "function": "create", "line": 1, "span": [1, 0, 1, 14], "owner": "Fixture"}, + {"field": "version", "receiver": "self", "file": "facts/co_update.rb", "function": "create", "line": 2, "span": [2, 0, 2, 15], "owner": "Fixture"}, + {"field": "status", "receiver": "self", "file": "facts/co_update.rb", "function": "replace", "line": 4, "span": [4, 0, 4, 14], "owner": "Fixture"}, + {"field": "version", "receiver": "self", "file": "facts/co_update.rb", "function": "replace", "line": 5, "span": [5, 0, 5, 15], "owner": "Fixture"}, + {"field": "status", "receiver": "self", "file": "facts/co_update.rb", "function": "restore", "line": 7, "span": [7, 0, 7, 14], "owner": "Fixture"}, + {"field": "version", "receiver": "self", "file": "facts/co_update.rb", "function": "restore", "line": 8, "span": [8, 0, 8, 15], "owner": "Fixture"}, + {"field": "status", "receiver": "self", "file": "facts/co_update.rb", "function": "touch_status", "line": 10, "span": [10, 0, 10, 14], "owner": "Fixture"}, + {"field": "version", "receiver": "self", "file": "facts/co_update.rb", "function": "touch_version", "line": 12, "span": [12, 0, 12, 15], "owner": "Fixture"} + ] + } + ] + }, + "expected": { + "co_written_pairs": [ + { + "pair": [ + "status", + "version" + ], + "sites": [ + "facts/co_update.rb:create", + "facts/co_update.rb:replace", + "facts/co_update.rb:restore" + ], + "support": 3 + } + ], + "neglected_updates": [ + { + "at": "facts/co_update.rb:touch_status:10", + "has": "status", + "missing": "version", + "pair": [ + "status", + "version" + ], + "recv": "self", + "spans": { + "facts/co_update.rb:touch_status:10": [10, 0, 10, 14] + }, + "support": 3 + }, + { + "at": "facts/co_update.rb:touch_version:12", + "has": "version", + "missing": "status", + "pair": [ + "status", + "version" + ], + "recv": "self", + "spans": { + "facts/co_update.rb:touch_version:12": [12, 0, 12, 15] + }, + "support": 3 + } + ] + } +} diff --git a/gems/decomplex/examples/facts/detectors/false-simplicity-core-top-level.json b/gems/decomplex/examples/facts/detectors/false-simplicity-core-top-level.json new file mode 100644 index 000000000..0d6722792 --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/false-simplicity-core-top-level.json @@ -0,0 +1,61 @@ +{ + "detector": "false-simplicity", + "input": { + "documents": [ + { + "file": "facts/false_core.rb", + "language": "ruby", + "source": "", + "lines": [], + "root": {"kind": "program", "text": "", "span": [1, 0, 1, 0], "named": true, "field_name": null, "children": []}, + "normalized_root": {"type": "ROOT", "children": [], "first_lineno": 1, "first_column": 0, "last_lineno": 1, "last_column": 0, "text": ""}, + "function_defs": [ + {"file": "facts/false_core.rb", "name": "scrub", "owner": "String", "line": 2, "span": [2, 2, 2, 20], "body": {"kind": "body_statement", "text": "", "span": [2, 2, 2, 20], "named": true, "field_name": null, "children": []}, "visibility": null, "params": []} + ], + "owner_defs": [ + {"file": "facts/false_core.rb", "name": "String", "kind": "class", "line": 1, "span": [1, 0, 3, 3]} + ], + "call_sites": [], + "state_reads": [], + "state_writes": [], + "decision_sites": [], + "branch_decisions": [], + "dispatch_sites": [], + "semantic_effect_sites": [ + {"kind": "hidden_context", "detail": "Dir.chdir", "file": "facts/false_core.rb", "function": "", "owner": "", "line": 5, "span": [5, 0, 5, 18]} + ], + "local_complexity_scores": {}, + "predicate_aliases": [], + "comparison_uses": [] + } + ] + }, + "expected": [ + { + "at": "facts/false_core.rb:(top-level):5", + "detail": "Dir.chdir", + "kind": "hidden_context", + "scatter": 1, + "sites": [ + "facts/false_core.rb:(top-level):5" + ], + "spans": { + "facts/false_core.rb:(top-level):5": [5, 0, 5, 18] + }, + "support": 1 + }, + { + "at": "facts/false_core.rb:String:1", + "detail": "String", + "kind": "monkeypatch", + "scatter": 1, + "sites": [ + "facts/false_core.rb:String:1" + ], + "spans": { + "facts/false_core.rb:String:1": [1, 0, 3, 3] + }, + "support": 1 + } + ] +} diff --git a/gems/decomplex/examples/facts/detectors/flay-similarity-facts.json b/gems/decomplex/examples/facts/detectors/flay-similarity-facts.json new file mode 100644 index 000000000..698835931 --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/flay-similarity-facts.json @@ -0,0 +1,234 @@ +{ + "detector": "flay-similarity", + "options": { + "mass": 1, + "fuzzy": 1 + }, + "input": { + "documents": [ + { + "file": "facts/flay.rb", + "language": "ruby", + "root": {"kind": "program", "text": "", "span": [1, 0, 1, 0], "named": true, "field_name": null, "children": []}, + "function_defs": [ + { + "file": "facts/flay.rb", + "name": "alpha", + "owner": "Fixture", + "line": 1, + "span": [1, 0, 5, 3], + "visibility": "public", + "params": [], + "body": { + "kind": "body_statement", + "text": "left = source.fetch(:left)\nright = source.fetch(:right)\ncommit(left, right)", + "span": [2, 2, 4, 21], + "named": true, + "field_name": null, + "children": [ + {"kind": "assignment", "text": "left = source.fetch(:left)", "span": [2, 2, 2, 28], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "left", "span": [2, 2, 2, 6], "named": true, "field_name": null, "children": []}, + {"kind": "=", "text": "=", "span": [2, 7, 2, 8], "named": false, "field_name": null, "children": []}, + {"kind": "call", "text": "source.fetch(:left)", "span": [2, 9, 2, 28], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "source", "span": [2, 9, 2, 15], "named": true, "field_name": null, "children": []}, + {"kind": "identifier", "text": "fetch", "span": [2, 16, 2, 21], "named": true, "field_name": null, "children": []}, + {"kind": "argument_list", "text": "(:left)", "span": [2, 21, 2, 28], "named": true, "field_name": null, "children": [ + {"kind": "symbol", "text": ":left", "span": [2, 22, 2, 27], "named": true, "field_name": null, "children": []} + ]} + ]} + ]}, + {"kind": "assignment", "text": "right = source.fetch(:right)", "span": [3, 2, 3, 30], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "right", "span": [3, 2, 3, 7], "named": true, "field_name": null, "children": []}, + {"kind": "=", "text": "=", "span": [3, 8, 3, 9], "named": false, "field_name": null, "children": []}, + {"kind": "call", "text": "source.fetch(:right)", "span": [3, 10, 3, 30], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "source", "span": [3, 10, 3, 16], "named": true, "field_name": null, "children": []}, + {"kind": "identifier", "text": "fetch", "span": [3, 17, 3, 22], "named": true, "field_name": null, "children": []}, + {"kind": "argument_list", "text": "(:right)", "span": [3, 22, 3, 30], "named": true, "field_name": null, "children": [ + {"kind": "symbol", "text": ":right", "span": [3, 23, 3, 29], "named": true, "field_name": null, "children": []} + ]} + ]} + ]}, + {"kind": "call", "text": "commit(left, right)", "span": [4, 2, 4, 21], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "commit", "span": [4, 2, 4, 8], "named": true, "field_name": null, "children": []}, + {"kind": "argument_list", "text": "(left, right)", "span": [4, 8, 4, 21], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "left", "span": [4, 9, 4, 13], "named": true, "field_name": null, "children": []}, + {"kind": "identifier", "text": "right", "span": [4, 15, 4, 20], "named": true, "field_name": null, "children": []} + ]} + ]} + ] + } + }, + { + "file": "facts/flay.rb", + "name": "beta", + "owner": "Fixture", + "line": 7, + "span": [7, 0, 11, 3], + "visibility": "public", + "params": [], + "body": { + "kind": "body_statement", + "text": "first = source.fetch(:first)\nsecond = source.fetch(:second)\ncommit(first, second)", + "span": [8, 2, 10, 25], + "named": true, + "field_name": null, + "children": [ + {"kind": "assignment", "text": "first = source.fetch(:first)", "span": [8, 2, 8, 30], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "first", "span": [8, 2, 8, 7], "named": true, "field_name": null, "children": []}, + {"kind": "=", "text": "=", "span": [8, 8, 8, 9], "named": false, "field_name": null, "children": []}, + {"kind": "call", "text": "source.fetch(:first)", "span": [8, 10, 8, 30], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "source", "span": [8, 10, 8, 16], "named": true, "field_name": null, "children": []}, + {"kind": "identifier", "text": "fetch", "span": [8, 17, 8, 22], "named": true, "field_name": null, "children": []}, + {"kind": "argument_list", "text": "(:first)", "span": [8, 22, 8, 30], "named": true, "field_name": null, "children": [ + {"kind": "symbol", "text": ":first", "span": [8, 23, 8, 29], "named": true, "field_name": null, "children": []} + ]} + ]} + ]}, + {"kind": "assignment", "text": "second = source.fetch(:second)", "span": [9, 2, 9, 32], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "second", "span": [9, 2, 9, 8], "named": true, "field_name": null, "children": []}, + {"kind": "=", "text": "=", "span": [9, 9, 9, 10], "named": false, "field_name": null, "children": []}, + {"kind": "call", "text": "source.fetch(:second)", "span": [9, 11, 9, 32], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "source", "span": [9, 11, 9, 17], "named": true, "field_name": null, "children": []}, + {"kind": "identifier", "text": "fetch", "span": [9, 18, 9, 23], "named": true, "field_name": null, "children": []}, + {"kind": "argument_list", "text": "(:second)", "span": [9, 23, 9, 32], "named": true, "field_name": null, "children": [ + {"kind": "symbol", "text": ":second", "span": [9, 24, 9, 31], "named": true, "field_name": null, "children": []} + ]} + ]} + ]}, + {"kind": "call", "text": "commit(first, second)", "span": [10, 2, 10, 25], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "commit", "span": [10, 2, 10, 8], "named": true, "field_name": null, "children": []}, + {"kind": "argument_list", "text": "(first, second)", "span": [10, 8, 10, 25], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "first", "span": [10, 9, 10, 14], "named": true, "field_name": null, "children": []}, + {"kind": "identifier", "text": "second", "span": [10, 16, 10, 22], "named": true, "field_name": null, "children": []} + ]} + ]} + ] + } + }, + { + "file": "facts/flay.rb", + "name": "gamma", + "owner": "Fixture", + "line": 13, + "span": [13, 0, 17, 3], + "visibility": "public", + "params": [], + "body": { + "kind": "body_statement", + "text": "value = source.fetch(:value)\nsave(value)\nlog(value)", + "span": [14, 2, 16, 12], + "named": true, + "field_name": null, + "children": [ + {"kind": "assignment", "text": "value = source.fetch(:value)", "span": [14, 2, 14, 30], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "value", "span": [14, 2, 14, 7], "named": true, "field_name": null, "children": []}, + {"kind": "=", "text": "=", "span": [14, 8, 14, 9], "named": false, "field_name": null, "children": []}, + {"kind": "call", "text": "source.fetch(:value)", "span": [14, 10, 14, 30], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "source", "span": [14, 10, 14, 16], "named": true, "field_name": null, "children": []}, + {"kind": "identifier", "text": "fetch", "span": [14, 17, 14, 22], "named": true, "field_name": null, "children": []}, + {"kind": "argument_list", "text": "(:value)", "span": [14, 22, 14, 30], "named": true, "field_name": null, "children": [ + {"kind": "symbol", "text": ":value", "span": [14, 23, 14, 29], "named": true, "field_name": null, "children": []} + ]} + ]} + ]}, + {"kind": "call", "text": "save(value)", "span": [15, 2, 15, 13], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "save", "span": [15, 2, 15, 6], "named": true, "field_name": null, "children": []}, + {"kind": "argument_list", "text": "(value)", "span": [15, 6, 15, 13], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "value", "span": [15, 7, 15, 12], "named": true, "field_name": null, "children": []} + ]} + ]}, + {"kind": "call", "text": "log(value)", "span": [16, 2, 16, 12], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "log", "span": [16, 2, 16, 5], "named": true, "field_name": null, "children": []}, + {"kind": "argument_list", "text": "(value)", "span": [16, 5, 16, 12], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "value", "span": [16, 6, 16, 11], "named": true, "field_name": null, "children": []} + ]} + ]} + ] + } + }, + { + "file": "facts/flay.rb", + "name": "delta", + "owner": "Fixture", + "line": 19, + "span": [19, 0, 23, 3], + "visibility": "public", + "params": [], + "body": { + "kind": "body_statement", + "text": "item = source.fetch(:item)\nsave(item)\naudit(item)", + "span": [20, 2, 22, 14], + "named": true, + "field_name": null, + "children": [ + {"kind": "assignment", "text": "item = source.fetch(:item)", "span": [20, 2, 20, 28], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "item", "span": [20, 2, 20, 6], "named": true, "field_name": null, "children": []}, + {"kind": "=", "text": "=", "span": [20, 7, 20, 8], "named": false, "field_name": null, "children": []}, + {"kind": "call", "text": "source.fetch(:item)", "span": [20, 9, 20, 28], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "source", "span": [20, 9, 20, 15], "named": true, "field_name": null, "children": []}, + {"kind": "identifier", "text": "fetch", "span": [20, 16, 20, 21], "named": true, "field_name": null, "children": []}, + {"kind": "argument_list", "text": "(:item)", "span": [20, 21, 20, 28], "named": true, "field_name": null, "children": [ + {"kind": "symbol", "text": ":item", "span": [20, 22, 20, 27], "named": true, "field_name": null, "children": []} + ]} + ]} + ]}, + {"kind": "call", "text": "save(item)", "span": [21, 2, 21, 12], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "save", "span": [21, 2, 21, 6], "named": true, "field_name": null, "children": []}, + {"kind": "argument_list", "text": "(item)", "span": [21, 6, 21, 12], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "item", "span": [21, 7, 21, 11], "named": true, "field_name": null, "children": []} + ]} + ]}, + {"kind": "call", "text": "audit(item)", "span": [22, 2, 22, 14], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "audit", "span": [22, 2, 22, 7], "named": true, "field_name": null, "children": []}, + {"kind": "argument_list", "text": "(item)", "span": [22, 7, 22, 14], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "item", "span": [22, 8, 22, 12], "named": true, "field_name": null, "children": []} + ]} + ]} + ] + } + } + ] + } + ] + }, + "expected": { + "findings": [ + { + "at": "facts/flay.rb:alpha:2", + "clone_type": "type2", + "locations": [ + "facts/flay.rb:2", + "facts/flay.rb:8" + ], + "mass": 22, + "node": "defn", + "sites": [ + "facts/flay.rb:alpha:2", + "facts/flay.rb:beta:8" + ], + "spans": { + "facts/flay.rb:alpha:2": [2, 0, 4, 1], + "facts/flay.rb:beta:8": [8, 0, 10, 1] + } + }, + { + "at": "facts/flay.rb:delta:20", + "clone_type": "type3", + "locations": [ + "facts/flay.rb:14", + "facts/flay.rb:20" + ], + "mass": 12, + "node": "defn", + "sites": [ + "facts/flay.rb:delta:20", + "facts/flay.rb:gamma:14" + ], + "spans": { + "facts/flay.rb:delta:20": [20, 0, 22, 1], + "facts/flay.rb:gamma:14": [14, 0, 16, 1] + } + } + ] + } +} diff --git a/gems/decomplex/examples/facts/detectors/weighted-inlined-complexity-edges.json b/gems/decomplex/examples/facts/detectors/weighted-inlined-complexity-edges.json new file mode 100644 index 000000000..578983ace --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/weighted-inlined-complexity-edges.json @@ -0,0 +1,167 @@ +{ + "detector": "weighted-inlined-complexity", + "input": { + "documents": [ + { + "file": "facts/weighted_edges.rb", + "language": "ruby", + "lines": [ + "class Fixture", + " def entry", + " self.helper", + " self.helper", + " self.shared", + " self.missing", + " end", + " def helper", + " self.leaf", + " end", + " def leaf", + " self.deep", + " end", + " def deep", + " end", + " def shared", + " end", + " def hub", + " self.shared", + " end", + " def other", + " self.shared", + " end", + "end" + ], + "function_defs": [ + {"file": "facts/weighted_edges.rb", "name": "entry", "owner": "Fixture", "line": 2, "span": [2, 2, 7, 5], "body": {"kind": "body_statement", "text": "", "span": [2, 2, 7, 5], "named": true, "field_name": null, "children": []}, "visibility": "public", "params": []}, + {"file": "facts/weighted_edges.rb", "name": "helper", "owner": "Fixture", "line": 8, "span": [8, 2, 10, 5], "body": {"kind": "body_statement", "text": "", "span": [8, 2, 10, 5], "named": true, "field_name": null, "children": []}, "visibility": "private", "params": []}, + {"file": "facts/weighted_edges.rb", "name": "leaf", "owner": "Fixture", "line": 11, "span": [11, 2, 13, 5], "body": {"kind": "body_statement", "text": "", "span": [11, 2, 13, 5], "named": true, "field_name": null, "children": []}, "visibility": "private", "params": []}, + {"file": "facts/weighted_edges.rb", "name": "deep", "owner": "Fixture", "line": 14, "span": [14, 2, 15, 5], "body": {"kind": "body_statement", "text": "", "span": [14, 2, 15, 5], "named": true, "field_name": null, "children": []}, "visibility": "private", "params": []}, + {"file": "facts/weighted_edges.rb", "name": "shared", "owner": "Fixture", "line": 16, "span": [16, 2, 17, 5], "body": {"kind": "body_statement", "text": "", "span": [16, 2, 17, 5], "named": true, "field_name": null, "children": []}, "visibility": "public", "params": []}, + {"file": "facts/weighted_edges.rb", "name": "hub", "owner": "Fixture", "line": 18, "span": [18, 2, 20, 5], "body": {"kind": "body_statement", "text": "", "span": [18, 2, 20, 5], "named": true, "field_name": null, "children": []}, "visibility": "public", "params": []}, + {"file": "facts/weighted_edges.rb", "name": "other", "owner": "Fixture", "line": 21, "span": [21, 2, 23, 5], "body": {"kind": "body_statement", "text": "", "span": [21, 2, 23, 5], "named": true, "field_name": null, "children": []}, "visibility": "public", "params": []} + ], + "owner_defs": [ + {"file": "facts/weighted_edges.rb", "name": "Fixture", "kind": "class", "line": 1, "span": [1, 0, 24, 3]} + ], + "call_sites": [ + {"receiver": "self", "message": "helper", "file": "facts/weighted_edges.rb", "function": "entry", "owner": "Fixture", "line": 3, "span": [3, 4, 3, 15], "conditional": false, "arguments": [], "control": "conditional", "safe_navigation": false, "block": false}, + {"receiver": "self", "message": "helper", "file": "facts/weighted_edges.rb", "function": "entry", "owner": "Fixture", "line": 4, "span": [4, 4, 4, 15], "conditional": false, "arguments": [], "control": "iterates", "safe_navigation": false, "block": false}, + {"receiver": "self", "message": "shared", "file": "facts/weighted_edges.rb", "function": "entry", "owner": "Fixture", "line": 5, "span": [5, 4, 5, 15], "conditional": false, "arguments": [], "control": null, "safe_navigation": false, "block": false}, + {"receiver": "self", "message": "missing", "file": "facts/weighted_edges.rb", "function": "entry", "owner": "Fixture", "line": 6, "span": [6, 4, 6, 16], "conditional": false, "arguments": [], "control": null, "safe_navigation": false, "block": false}, + {"receiver": "self", "message": "leaf", "file": "facts/weighted_edges.rb", "function": "helper", "owner": "Fixture", "line": 9, "span": [9, 4, 9, 13], "conditional": false, "arguments": [], "control": "iterates", "safe_navigation": false, "block": false}, + {"receiver": "self", "message": "deep", "file": "facts/weighted_edges.rb", "function": "leaf", "owner": "Fixture", "line": 12, "span": [12, 4, 12, 13], "conditional": false, "arguments": [], "control": null, "safe_navigation": false, "block": false}, + {"receiver": "self", "message": "shared", "file": "facts/weighted_edges.rb", "function": "hub", "owner": "Fixture", "line": 19, "span": [19, 4, 19, 15], "conditional": false, "arguments": [], "control": null, "safe_navigation": false, "block": false}, + {"receiver": "self", "message": "shared", "file": "facts/weighted_edges.rb", "function": "other", "owner": "Fixture", "line": 22, "span": [22, 4, 22, 15], "conditional": false, "arguments": [], "control": null, "safe_navigation": false, "block": false} + ], + "local_methods": [ + {"id": "Fixture#entry", "owner": "Fixture", "name": "entry", "file": "facts/weighted_edges.rb", "line": 2, "span": [2, 2, 7, 5], "statements": [], "boundaries": []}, + {"id": "Fixture#helper", "owner": "Fixture", "name": "helper", "file": "facts/weighted_edges.rb", "line": 8, "span": [8, 2, 10, 5], "statements": [], "boundaries": []}, + {"id": "Fixture#leaf", "owner": "Fixture", "name": "leaf", "file": "facts/weighted_edges.rb", "line": 11, "span": [11, 2, 13, 5], "statements": [], "boundaries": []}, + {"id": "Fixture#deep", "owner": "Fixture", "name": "deep", "file": "facts/weighted_edges.rb", "line": 14, "span": [14, 2, 15, 5], "statements": [], "boundaries": []}, + {"id": "Fixture#shared", "owner": "Fixture", "name": "shared", "file": "facts/weighted_edges.rb", "line": 16, "span": [16, 2, 17, 5], "statements": [], "boundaries": []}, + {"id": "Fixture#hub", "owner": "Fixture", "name": "hub", "file": "facts/weighted_edges.rb", "line": 18, "span": [18, 2, 20, 5], "statements": [], "boundaries": []}, + {"id": "Fixture#other", "owner": "Fixture", "name": "other", "file": "facts/weighted_edges.rb", "line": 21, "span": [21, 2, 23, 5], "statements": [], "boundaries": []} + ], + "local_complexity_scores": { + "Fixture#entry": {"score": 2.0, "signals": {"branches": 1}}, + "Fixture#helper": {"score": 10.0, "signals": {"branches": 3}}, + "Fixture#leaf": {"score": 8.0, "signals": {"branches": 2}}, + "Fixture#deep": {"score": 12.0, "signals": {"branches": 2}}, + "Fixture#shared": {"score": 80.0, "signals": {"branches": 4}}, + "Fixture#hub": {"score": 1.0, "signals": {"branches": 1}}, + "Fixture#other": {"score": 1.0, "signals": {"branches": 1}} + } + } + ] + }, + "expected": [ + { + "at": "facts/weighted_edges.rb:entry:2", + "call_chain": [ + "entry", + "shared" + ], + "depth": 2, + "hidden": 34.6, + "inlined": 36.6, + "local": 2.0, + "method": "entry", + "owner": "Fixture", + "reason": "1 single-caller helper(s) add 34.6 weighted cognitive points", + "signals": { + "branches": 1 + }, + "single_caller_callees": [ + "helper" + ], + "spans": { + "facts/weighted_edges.rb:entry:2": [2, 2, 7, 5] + } + }, + { + "at": "facts/weighted_edges.rb:helper:8", + "call_chain": [ + "helper", + "leaf" + ], + "depth": 2, + "hidden": 17.5, + "inlined": 27.5, + "local": 10.0, + "method": "helper", + "owner": "Fixture", + "reason": "1 single-caller helper(s) add 17.5 weighted cognitive points", + "signals": { + "branches": 3 + }, + "single_caller_callees": [ + "leaf" + ], + "spans": { + "facts/weighted_edges.rb:helper:8": [8, 2, 10, 5] + } + }, + { + "at": "facts/weighted_edges.rb:hub:18", + "call_chain": [ + "hub", + "shared" + ], + "depth": 1, + "hidden": 16.8, + "inlined": 17.8, + "local": 1.0, + "method": "hub", + "owner": "Fixture", + "reason": "same-owner call chain adds 16.8 weighted cognitive points", + "signals": { + "branches": 1 + }, + "single_caller_callees": [], + "spans": { + "facts/weighted_edges.rb:hub:18": [18, 2, 20, 5] + } + }, + { + "at": "facts/weighted_edges.rb:other:21", + "call_chain": [ + "other", + "shared" + ], + "depth": 1, + "hidden": 16.8, + "inlined": 17.8, + "local": 1.0, + "method": "other", + "owner": "Fixture", + "reason": "same-owner call chain adds 16.8 weighted cognitive points", + "signals": { + "branches": 1 + }, + "single_caller_callees": [], + "spans": { + "facts/weighted_edges.rb:other:21": [21, 2, 23, 5] + } + } + ] +} diff --git a/gems/decomplex/examples/source-facts/oracles/ruby-branch_predicate_paths.json b/gems/decomplex/examples/source-facts/oracles/ruby-branch_predicate_paths.json new file mode 100644 index 000000000..52d79177e --- /dev/null +++ b/gems/decomplex/examples/source-facts/oracles/ruby-branch_predicate_paths.json @@ -0,0 +1,271 @@ +{ + "syntax": { + "functions": [ + { + "name": "ready?", + "owner": "SourceFactBranchPredicatePaths", + "line": 4, + "visibility": "public", + "params": [] + }, + { + "name": "route", + "owner": "SourceFactBranchPredicatePaths", + "line": 8, + "visibility": "public", + "params": [ + "user" + ] + } + ], + "calls": [ + { + "receiver": "self", + "message": "publish", + "function": "route", + "line": 10, + "conditional": true, + "control": "conditional", + "safe_navigation": false, + "block": false, + "arguments": [ + ":ready" + ] + }, + { + "receiver": "self", + "message": "warn", + "function": "route", + "line": 12, + "conditional": true, + "control": "conditional", + "safe_navigation": false, + "block": false, + "arguments": [ + "\"not ready\"" + ] + }, + { + "receiver": "self", + "message": "audit", + "function": "route", + "line": 17, + "conditional": true, + "control": "conditional", + "safe_navigation": false, + "block": false, + "arguments": [ + "user" + ] + }, + { + "receiver": "self", + "message": "fallback", + "function": "route", + "line": 19, + "conditional": true, + "control": "conditional", + "safe_navigation": false, + "block": false, + "arguments": [ + "user" + ] + }, + { + "receiver": "self", + "message": "default", + "function": "route", + "line": 21, + "conditional": true, + "control": "conditional", + "safe_navigation": false, + "block": false, + "arguments": [ + "user" + ] + }, + { + "receiver": "user", + "message": "role", + "function": "route", + "line": 15, + "conditional": true, + "control": "conditional", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "user", + "message": "active?", + "function": "route", + "line": 9, + "conditional": true, + "control": "conditional", + "safe_navigation": false, + "block": false, + "arguments": [] + } + ], + "state_reads": [ + { + "receiver": "self", + "field": "@status", + "function": "ready?", + "line": 5 + }, + { + "receiver": "self", + "field": "@status", + "function": "route", + "line": 9 + }, + { + "receiver": "user", + "field": "active?", + "function": "route", + "line": 9 + }, + { + "receiver": "user", + "field": "role", + "function": "route", + "line": 15 + } + ], + "decisions": [ + { + "kind": "case_dispatch", + "members": [ + "\"admin\"", + "\"guest\"" + ], + "function": "route", + "line": 15, + "predicate": "user.role" + }, + { + "kind": "conjunction", + "members": [ + "@status == :ready", + "user.active?" + ], + "function": "route", + "line": 9, + "predicate": "@status == :ready && user.active?" + } + ], + "branch_decisions": [ + { + "function": "route", + "line": 15, + "predicate": "user.role", + "state_refs": [ + "user.role" + ] + }, + { + "function": "route", + "line": 9, + "predicate": "@status == :ready && user.active?", + "state_refs": [ + "@status", + "user.active?" + ] + } + ], + "branch_arms": [ + { + "function": "route", + "kind": "case", + "line": 16, + "decision_line": 15, + "predicate": "user.role", + "member": "\"admin\"", + "body": "audit(user)" + }, + { + "function": "route", + "kind": "if", + "line": 11, + "decision_line": 9, + "predicate": "@status == :ready && user.active?", + "member": "else", + "body": "else warn(\"not ready\")" + }, + { + "function": "route", + "kind": "case", + "line": 18, + "decision_line": 15, + "predicate": "user.role", + "member": "\"guest\"", + "body": "fallback(user)" + }, + { + "function": "route", + "kind": "if", + "line": 9, + "decision_line": 9, + "predicate": "@status == :ready && user.active?", + "member": "then", + "body": "publish(:ready)" + } + ], + "dispatch_sites": [], + "predicate_bodies": [ + { + "name": "ready?", + "owner": "SourceFactBranchPredicatePaths", + "body": "@status == :ready", + "line": 4 + } + ], + "comparisons": [ + { + "source": "@status == :ready", + "raw": "@status == :ready", + "canon_source": "status == :ready", + "operator": "==", + "function": "ready?", + "line": 5 + }, + { + "source": "@status == :ready", + "raw": "@status == :ready", + "canon_source": "status == :ready", + "operator": "==", + "function": "route", + "line": 9 + } + ], + "path_conditions": [ + { + "guards": [ + "@status == :ready", + "user.active?" + ], + "action": "publish(:ready)", + "function": "route", + "line": 10 + }, + { + "guards": [ + "!@status == :ready", + "!user.active?" + ], + "action": "warn(\"not ready\")", + "function": "route", + "line": 12 + } + ], + "semantic_effects": [ + { + "kind": "hidden_io", + "detail": "warn", + "function": "route", + "line": 12 + } + ] + } +} diff --git a/gems/decomplex/examples/source-facts/oracles/ruby-local_methods_contracts.json b/gems/decomplex/examples/source-facts/oracles/ruby-local_methods_contracts.json new file mode 100644 index 000000000..4ded02cd3 --- /dev/null +++ b/gems/decomplex/examples/source-facts/oracles/ruby-local_methods_contracts.json @@ -0,0 +1,308 @@ +{ + "syntax": { + "functions": [ + { + "name": "process", + "owner": "SourceFactLocalMethodsContracts", + "line": 4, + "visibility": "public", + "params": [ + "user", + "items" + ] + } + ], + "calls": [ + { + "receiver": "user", + "message": "profile", + "function": "process", + "line": 5, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "names", + "message": "any?", + "function": "process", + "line": 12, + "conditional": true, + "control": "conditional", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "profile", + "message": "name", + "function": "process", + "line": 12, + "conditional": true, + "control": "conditional", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "item", + "message": "ready?", + "function": "process", + "line": 8, + "conditional": true, + "control": "conditional", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "item", + "message": "name", + "function": "process", + "line": 9, + "conditional": true, + "control": "conditional", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "items", + "message": "each", + "function": "process", + "line": 7, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": true, + "arguments": [] + } + ], + "state_reads": [ + { + "receiver": "names", + "field": "any?", + "function": "process", + "line": 12 + }, + { + "receiver": "items", + "field": "each", + "function": "process", + "line": 7 + }, + { + "receiver": "profile", + "field": "name", + "function": "process", + "line": 12 + }, + { + "receiver": "item", + "field": "name", + "function": "process", + "line": 9 + }, + { + "receiver": "user", + "field": "profile", + "function": "process", + "line": 5 + }, + { + "receiver": "item", + "field": "ready?", + "function": "process", + "line": 8 + } + ], + "state_writes": [], + "semantic_effects": [ + { + "kind": "hidden_mutation", + "detail": "<<", + "function": "process", + "line": 9 + } + ], + "local_methods": [ + { + "id": "SourceFactLocalMethodsContracts#process", + "owner": "SourceFactLocalMethodsContracts", + "name": "process", + "line": 4, + "statements": [ + { + "co_uses": [], + "dependencies": [ + [ + "profile", + "user" + ] + ], + "end_line": 5, + "index": 0, + "line": 5, + "reads": [ + "user" + ], + "source": "profile = user.profile", + "span": [ + 5, + 4, + 5, + 26 + ], + "writes": [ + "profile" + ] + }, + { + "co_uses": [], + "dependencies": [], + "end_line": 6, + "index": 1, + "line": 6, + "reads": [], + "source": "names = []", + "span": [ + 6, + 4, + 6, + 14 + ], + "writes": [ + "names" + ] + }, + { + "co_uses": [ + [ + "items", + "names" + ] + ], + "dependencies": [], + "end_line": 11, + "index": 2, + "line": 7, + "reads": [ + "items", + "names" + ], + "source": "items.each do |item| if item.ready? names << item.name end end", + "span": [ + 7, + 4, + 11, + 7 + ], + "writes": [] + }, + { + "co_uses": [ + [ + "names", + "profile" + ] + ], + "dependencies": [], + "end_line": 12, + "index": 3, + "line": 12, + "reads": [ + "names", + "profile" + ], + "source": "profile.name if names.any?", + "span": [ + 12, + 4, + 12, + 30 + ], + "writes": [] + } + ], + "boundaries": [], + "local_contract_assignments": { + "names": "[]", + "profile": "user.profile" + } + } + ], + "local_complexity_scores": [ + { + "id": "SourceFactLocalMethodsContracts#process", + "score": 4.3, + "signals": { + "boolean_ops": 0, + "branches": 2, + "loops": 1, + "nested": 1 + } + } + ] + }, + "local_flow": [ + { + "method": "process", + "statements": [ + { + "reads": [ + "user" + ], + "writes": [ + "profile" + ], + "dependencies": [ + [ + "profile", + "user" + ] + ], + "co_uses": [] + }, + { + "reads": [], + "writes": [ + "names" + ], + "dependencies": [], + "co_uses": [] + }, + { + "reads": [ + "items", + "names" + ], + "writes": [], + "dependencies": [], + "co_uses": [ + [ + "items", + "names" + ] + ] + }, + { + "reads": [ + "names", + "profile" + ], + "writes": [], + "dependencies": [], + "co_uses": [ + [ + "names", + "profile" + ] + ] + } + ], + "boundaries": [] + } + ] +} diff --git a/gems/decomplex/examples/source-facts/oracles/ruby-protocols_nil_clone.json b/gems/decomplex/examples/source-facts/oracles/ruby-protocols_nil_clone.json new file mode 100644 index 000000000..ed78fcd3a --- /dev/null +++ b/gems/decomplex/examples/source-facts/oracles/ruby-protocols_nil_clone.json @@ -0,0 +1,568 @@ +{ + "syntax": { + "functions": [ + { + "name": "run", + "owner": "SourceFactProtocolsNilClone", + "line": 12, + "visibility": "public", + "params": [ + "item" + ] + }, + { + "name": "guard", + "owner": "SourceFactProtocolsNilClone", + "line": 17, + "visibility": "public", + "params": [ + "value" + ] + }, + { + "name": "clone_left", + "owner": "SourceFactProtocolsNilClone", + "line": 23, + "visibility": "public", + "params": [ + "user" + ] + }, + { + "name": "clone_right", + "owner": "SourceFactProtocolsNilClone", + "line": 29, + "visibility": "public", + "params": [ + "account" + ] + }, + { + "name": "open", + "owner": "SourceFactProtocolsNilClone", + "line": 4, + "visibility": "public", + "params": [] + }, + { + "name": "close", + "owner": "SourceFactProtocolsNilClone", + "line": 8, + "visibility": "public", + "params": [] + } + ], + "calls": [ + { + "receiver": "self", + "message": "audit", + "function": "clone_left", + "line": 25, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [ + "data" + ] + }, + { + "receiver": "self", + "message": "audit", + "function": "clone_right", + "line": 31, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [ + "data" + ] + }, + { + "receiver": "user.profile", + "message": "name", + "function": "clone_left", + "line": 24, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "user", + "message": "profile", + "function": "clone_left", + "line": 24, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "self", + "message": "data", + "function": "clone_left", + "line": 26, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "account.profile", + "message": "name", + "function": "clone_right", + "line": 30, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "account", + "message": "profile", + "function": "clone_right", + "line": 30, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "self", + "message": "data", + "function": "clone_right", + "line": 32, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "value", + "message": "name", + "function": "guard", + "line": 20, + "conditional": false, + "control": "always", + "safe_navigation": true, + "block": false, + "arguments": [] + }, + { + "receiver": "self", + "message": "open", + "function": "run", + "line": 13, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "self", + "message": "value", + "function": "guard", + "line": 18, + "conditional": true, + "control": "conditional", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "self", + "message": "close", + "function": "run", + "line": 14, + "conditional": true, + "control": "conditional", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "item", + "message": "ready?", + "function": "run", + "line": 14, + "conditional": true, + "control": "conditional", + "safe_navigation": false, + "block": false, + "arguments": [] + } + ], + "state_reads": [ + { + "receiver": "user.profile", + "field": "name", + "function": "clone_left", + "line": 24 + }, + { + "receiver": "account.profile", + "field": "name", + "function": "clone_right", + "line": 30 + }, + { + "receiver": "value", + "field": "name", + "function": "guard", + "line": 20 + }, + { + "receiver": "user", + "field": "profile", + "function": "clone_left", + "line": 24 + }, + { + "receiver": "account", + "field": "profile", + "function": "clone_right", + "line": 30 + }, + { + "receiver": "item", + "field": "ready?", + "function": "run", + "line": 14 + } + ], + "state_writes": [ + { + "receiver": "self", + "field": "@opened", + "function": "close", + "line": 9 + }, + { + "receiver": "self", + "field": "@opened", + "function": "open", + "line": 5 + } + ], + "semantic_effects": [ + { + "kind": "hidden_io", + "detail": "open", + "function": "run", + "line": 13 + } + ], + "protocol_method_effects": [ + { + "owner": "SourceFactProtocolsNilClone", + "name": "run", + "line": 12, + "reads": [ + "close", + "open" + ], + "writes": [] + }, + { + "owner": "SourceFactProtocolsNilClone", + "name": "guard", + "line": 17, + "reads": [], + "writes": [] + }, + { + "owner": "SourceFactProtocolsNilClone", + "name": "clone_left", + "line": 23, + "reads": [], + "writes": [] + }, + { + "owner": "SourceFactProtocolsNilClone", + "name": "clone_right", + "line": 29, + "reads": [], + "writes": [] + }, + { + "owner": "SourceFactProtocolsNilClone", + "name": "open", + "line": 4, + "reads": [], + "writes": [ + "opened" + ] + }, + { + "owner": "SourceFactProtocolsNilClone", + "name": "close", + "line": 8, + "reads": [], + "writes": [ + "opened" + ] + } + ], + "protocol_call_paths": [ + { + "owner": "SourceFactProtocolsNilClone", + "name": "guard", + "line": 17, + "calls": [] + }, + { + "owner": "SourceFactProtocolsNilClone", + "name": "guard", + "line": 17, + "calls": [] + }, + { + "owner": "SourceFactProtocolsNilClone", + "name": "open", + "line": 4, + "calls": [] + }, + { + "owner": "SourceFactProtocolsNilClone", + "name": "close", + "line": 8, + "calls": [] + }, + { + "owner": "SourceFactProtocolsNilClone", + "name": "run", + "line": 12, + "calls": [ + { + "line": 13, + "mid": "open", + "span": [ + 13, + 4, + 13, + 8 + ] + }, + { + "line": 14, + "mid": "close", + "span": [ + 14, + 4, + 14, + 9 + ] + } + ] + }, + { + "owner": "SourceFactProtocolsNilClone", + "name": "run", + "line": 12, + "calls": [ + { + "line": 13, + "mid": "open", + "span": [ + 13, + 4, + 13, + 8 + ] + } + ] + }, + { + "owner": "SourceFactProtocolsNilClone", + "name": "clone_left", + "line": 23, + "calls": [ + { + "line": 25, + "mid": "audit", + "span": [ + 25, + 4, + 25, + 15 + ] + } + ] + }, + { + "owner": "SourceFactProtocolsNilClone", + "name": "clone_right", + "line": 29, + "calls": [ + { + "line": 31, + "mid": "audit", + "span": [ + 31, + 4, + 31, + 15 + ] + } + ] + } + ], + "clone_candidates": [ + { + "method_name": "clone_left", + "node_name": "defn", + "line": 23, + "mass": 26, + "fingerprint": "method(id id method_parameters((:( id ):)) body_statement(assignment(id =:= call(call(id .:. id) .:. id)) call(id argument_list((:( id ):))) id) id)", + "child_fingerprints": [ + "assignment(id =:= call(call(id .:. id) .:. id))", + "call(id argument_list((:( id ):)))" + ], + "child_masses": [ + 10, + 6 + ] + }, + { + "method_name": "clone_right", + "node_name": "defn", + "line": 29, + "mass": 26, + "fingerprint": "method(id id method_parameters((:( id ):)) body_statement(assignment(id =:= call(call(id .:. id) .:. id)) call(id argument_list((:( id ):))) id) id)", + "child_fingerprints": [ + "assignment(id =:= call(call(id .:. id) .:. id))", + "call(id argument_list((:( id ):)))" + ], + "child_masses": [ + 10, + 6 + ] + }, + { + "method_name": "clone_left", + "node_name": "assignment", + "line": 24, + "mass": 10, + "fingerprint": "assignment(id =:= call(call(id .:. id) .:. id))", + "child_fingerprints": [ + "call(call(id .:. id) .:. id)" + ], + "child_masses": [ + 7 + ] + }, + { + "method_name": "clone_right", + "node_name": "assignment", + "line": 30, + "mass": 10, + "fingerprint": "assignment(id =:= call(call(id .:. id) .:. id))", + "child_fingerprints": [ + "call(call(id .:. id) .:. id)" + ], + "child_masses": [ + 7 + ] + }, + { + "method_name": "run", + "node_name": "defn", + "line": 12, + "mass": 17, + "fingerprint": "method(id id method_parameters((:( id ):)) body_statement(id if_modifier(id id call(id .:. id))) id)", + "child_fingerprints": [ + "if_modifier(id id call(id .:. id))" + ], + "child_masses": [ + 7 + ] + }, + { + "method_name": "open", + "node_name": "defn", + "line": 4, + "mass": 8, + "fingerprint": "method(id id body_statement(instance_variable:@opened =:= bool) id)", + "child_fingerprints": [ + "instance_variable:@opened", + "bool" + ], + "child_masses": [ + 1, + 1 + ] + }, + { + "method_name": "close", + "node_name": "defn", + "line": 8, + "mass": 8, + "fingerprint": "method(id id body_statement(instance_variable:@opened =:= bool) id)", + "child_fingerprints": [ + "instance_variable:@opened", + "bool" + ], + "child_masses": [ + 1, + 1 + ] + }, + { + "method_name": "(top-level)", + "node_name": "class", + "line": 3, + "mass": 107, + "fingerprint": "class(id id body_statement(method(id id body_statement(instance_variable:@opened =:= bool) id) method(id id body_statement(instance_variable:@opened =:= bool) id) method(id id method_parameters((:( id ):)) body_statement(id if_modifier(id id call(id .:. id))) id) method(id id method_parameters((:( id ):)) body_statement(unless_modifier(id id id) call(id &.:&. id)) id) method(id id method_parameters((:( id ):)) body_statement(assignment(id =:= call(call(id .:. id) .:. id)) call(id argument_list((:( id ):))) id) id) method(id id method_parameters((:( id ):)) body_statement(assignment(id =:= call(call(id .:. id) .:. id)) call(id argument_list((:( id ):))) id) id)) id)", + "child_fingerprints": [ + "method(id id body_statement(instance_variable:@opened =:= bool) id)", + "method(id id body_statement(instance_variable:@opened =:= bool) id)", + "method(id id method_parameters((:( id ):)) body_statement(id if_modifier(id id call(id .:. id))) id)", + "method(id id method_parameters((:( id ):)) body_statement(unless_modifier(id id id) call(id &.:&. id)) id)", + "method(id id method_parameters((:( id ):)) body_statement(assignment(id =:= call(call(id .:. id) .:. id)) call(id argument_list((:( id ):))) id) id)", + "method(id id method_parameters((:( id ):)) body_statement(assignment(id =:= call(call(id .:. id) .:. id)) call(id argument_list((:( id ):))) id) id)" + ], + "child_masses": [ + 8, + 8, + 17, + 17, + 26, + 26 + ] + }, + { + "method_name": "guard", + "node_name": "defn", + "line": 17, + "mass": 17, + "fingerprint": "method(id id method_parameters((:( id ):)) body_statement(unless_modifier(id id id) call(id &.:&. id)) id)", + "child_fingerprints": [ + "unless_modifier(id id id)", + "call(id &.:&. id)" + ], + "child_masses": [ + 4, + 4 + ] + } + ], + "redundant_nil_guards": [ + { + "defn": "guard", + "line": 20, + "local": "value", + "guard": "value&.name", + "proof": "value is already proven non-nil on this path" + } + ] + } +} diff --git a/gems/decomplex/examples/source-facts/ruby/branch_predicate_paths.rb b/gems/decomplex/examples/source-facts/ruby/branch_predicate_paths.rb new file mode 100644 index 000000000..34f163cbb --- /dev/null +++ b/gems/decomplex/examples/source-facts/ruby/branch_predicate_paths.rb @@ -0,0 +1,24 @@ +# frozen_string_literal: true + +class SourceFactBranchPredicatePaths + def ready? + @status == :ready + end + + def route(user) + if @status == :ready && user.active? + publish(:ready) + else + warn("not ready") + end + + case user.role + when "admin" + audit(user) + when "guest" + fallback(user) + else + default(user) + end + end +end diff --git a/gems/decomplex/examples/source-facts/ruby/local_methods_contracts.rb b/gems/decomplex/examples/source-facts/ruby/local_methods_contracts.rb new file mode 100644 index 000000000..308a2062f --- /dev/null +++ b/gems/decomplex/examples/source-facts/ruby/local_methods_contracts.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +class SourceFactLocalMethodsContracts + def process(user, items) + profile = user.profile + names = [] + items.each do |item| + if item.ready? + names << item.name + end + end + profile.name if names.any? + end +end diff --git a/gems/decomplex/examples/source-facts/ruby/protocols_nil_clone.rb b/gems/decomplex/examples/source-facts/ruby/protocols_nil_clone.rb new file mode 100644 index 000000000..4560a1323 --- /dev/null +++ b/gems/decomplex/examples/source-facts/ruby/protocols_nil_clone.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +class SourceFactProtocolsNilClone + def open + @opened = true + end + + def close + @opened = false + end + + def run(item) + open + close if item.ready? + end + + def guard(value) + return unless value + + value&.name + end + + def clone_left(user) + data = user.profile.name + audit(data) + data + end + + def clone_right(account) + data = account.profile.name + audit(data) + data + end +end diff --git a/gems/decomplex/examples/syntax-facts/oracles/ruby-core.json b/gems/decomplex/examples/syntax-facts/oracles/ruby-core.json index 75a78ed85..3a90a9349 100644 --- a/gems/decomplex/examples/syntax-facts/oracles/ruby-core.json +++ b/gems/decomplex/examples/syntax-facts/oracles/ruby-core.json @@ -881,6 +881,19 @@ 58 ] }, + { + "field": "children", + "function": "process", + "line": 49, + "owner": "RubySyntaxFactsCore", + "receiver": "item", + "span": [ + 49, + 6, + 49, + 19 + ] + }, { "field": "flat_map", "function": "process", diff --git a/gems/decomplex/lib/decomplex/detector_runner.rb b/gems/decomplex/lib/decomplex/detector_runner.rb index 734a16fa3..83ccd218f 100644 --- a/gems/decomplex/lib/decomplex/detector_runner.rb +++ b/gems/decomplex/lib/decomplex/detector_runner.rb @@ -229,6 +229,16 @@ def initialize(row) @language = row.fetch("language", "ruby").to_sym @source = row.fetch("source", "") @lines = row.fetch("lines", @source.lines) + @root = objectify(row.fetch("root", empty_fact_node("program"))) + @normalized_root = objectify(row.fetch("normalized_root", { + "type" => "ROOT", + "children" => [], + "first_lineno" => 1, + "first_column" => 0, + "last_lineno" => 1, + "last_column" => 0, + "text" => "" + })) @immutable_struct_readers = object_hash(row.fetch("immutable_struct_readers", {})) @immutable_struct_reader_types = object_hash(row.fetch("immutable_struct_reader_types", {})) @type_aliases = object_hash(row.fetch("type_aliases", {})) @@ -246,6 +256,12 @@ def initialize(row) define_method(name) { instance_variable_get("@#{name}") } end + attr_reader :root, :normalized_root + + def clone_candidates + Syntax.language_profile(language).clone_candidates(self) + end + def local_methods return @local_methods if @row.key?("local_methods") @@ -292,6 +308,17 @@ def fact_array(value) Array(value).map { |item| objectify(item) } end + def empty_fact_node(kind) + { + "kind" => kind, + "text" => "", + "span" => [1, 0, 1, 0], + "named" => true, + "field_name" => nil, + "children" => [] + } + end + def object_hash(value) value.to_h { |key, child| [key.to_s, child] } end @@ -345,6 +372,7 @@ def initialize(row, column) class FactNode attr_reader :kind, :text, :span, :field_name, :children, :start_point, :end_point + attr_reader :start_byte, :end_byte attr_accessor :parent, :prev_sibling, :next_sibling def initialize(row, objectifier) @@ -353,6 +381,8 @@ def initialize(row, objectifier) @span = row.fetch("span") @field_name = row["field_name"] @named = row.fetch("named", true) + @start_byte = row.fetch("start_byte", byte_offset(@span[0], @span[1])) + @end_byte = row.fetch("end_byte", byte_offset(@span[2], @span[3])) @children = Array(row.fetch("children", [])).map { |child| objectifier.call("node", child) } @children.each { |child| child.parent = self if child.respond_to?(:parent=) } @children.each_cons(2) do |left, right| @@ -367,13 +397,27 @@ def named? @named end + def child_count + @children.length + end + def named_children @children.select { |child| child.respond_to?(:named?) && child.named? } end + def named_child_count + named_children.length + end + def child_by_field_name(name) @children.find { |child| child.respond_to?(:field_name) && child.field_name.to_s == name.to_s } end + + private + + def byte_offset(line, column) + ((line.to_i - 1) * 1_000_000) + column.to_i + end end private_class_method def self.co_update(files, engine:, jobs:) diff --git a/gems/decomplex/lib/decomplex/false_simplicity.rb b/gems/decomplex/lib/decomplex/false_simplicity.rb index 07857fa86..fd5505547 100644 --- a/gems/decomplex/lib/decomplex/false_simplicity.rb +++ b/gems/decomplex/lib/decomplex/false_simplicity.rb @@ -30,8 +30,9 @@ def self.scan(files) def self.hits_for_document(document) document.semantic_effect_sites.map do |site| + defn = site.function.to_s.empty? ? "(top-level)" : site.function Hit.new(kind: site.kind, detail: site.detail, file: site.file, - defn: site.function || "(top-level)", line: site.line, + defn: defn, line: site.line, span: site.span) end end diff --git a/gems/decomplex/lib/decomplex/syntax.rb b/gems/decomplex/lib/decomplex/syntax.rb index 86d80b635..7a3528ef2 100644 --- a/gems/decomplex/lib/decomplex/syntax.rb +++ b/gems/decomplex/lib/decomplex/syntax.rb @@ -318,6 +318,10 @@ def structural_facts_for_node(document, node, stack) out end + def descend_into_children?(_node, _stack) + true + end + def after_structural_facts(document, out) record_implicit_state_accesses(document, out) if implicit_state_accesses? end @@ -329,6 +333,8 @@ def decision_site_facts(document, node, stack) end def branch_decision_facts(document, node, stack, immutable_readers:, immutable_reader_types:, type_aliases:) + return [] if branch_decision_wrapper_for_real_branch?(node) + out = [] record_branch_decision( document, @@ -344,6 +350,8 @@ def branch_decision_facts(document, node, stack, immutable_readers:, immutable_r end def branch_arm_facts(document, node, stack) + return [] if branch_decision_wrapper_for_real_branch?(node) + out = [] record_branch_arm(document, node, stack, out) out @@ -555,7 +563,7 @@ def generic_local_statement(node, index, local_names) assignment_lhs_read_target_keys: assignment_lhs_read_target_keys, assignment_lhs_target_keys: assignment_lhs_target_keys ), - co_uses: reads.combination(2).map { |left, right| [left, right] } + co_uses: reads.sort.combination(2).map { |left, right| [left, right] } ) end @@ -1783,6 +1791,15 @@ def branch_node?(node) hidden_modifier_if?(node) || hidden_case?(node) end + def branch_decision_wrapper_for_real_branch?(node) + return false unless ts_node?(node) + return false if branch_node_kinds.include?(node.kind) || hidden_match?(node) || hidden_case?(node) + return false unless hidden_if?(node) || hidden_modifier_if?(node) + + first_named = node.named_children.first + ts_node?(first_named) && branch_node?(first_named) + end + def if_node?(node) if_node_kinds.include?(node.kind) || hidden_if?(node) || hidden_modifier_if?(node) @@ -3178,6 +3195,8 @@ def self.walk_document(document, stack, profile, &block) next_stack = profile.push_context(document, current_stack, current) yield current, next_stack + next unless profile.descend_into_children?(current, current_stack) + current.children.reverse_each { |child| pending << [child, next_stack] } end end diff --git a/gems/decomplex/lib/decomplex/syntax/ruby.rb b/gems/decomplex/lib/decomplex/syntax/ruby.rb index 2292ea8b6..cc1cb42bf 100644 --- a/gems/decomplex/lib/decomplex/syntax/ruby.rb +++ b/gems/decomplex/lib/decomplex/syntax/ruby.rb @@ -100,7 +100,7 @@ def call_target(document, node) case node.kind when "call" ruby_proc_call_target(node) || ruby_call_target(node) - when "body_statement" + when "body_statement", "block_body" ruby_bare_body_call_target(node) when "identifier" ruby_bare_call_target(node) @@ -141,6 +141,11 @@ def state_declaration(node) end def state_read_target(node) + if ruby_explicit_receiver_body_read_node?(node) && + (target = ruby_explicit_receiver_body_call_target(node)) + return { receiver: target[:receiver], field: target[:message] } + end + ruby_state_variable_target(node) || super end @@ -153,12 +158,22 @@ def after_structural_facts(document, out) apply_ruby_visibility!(out) end + def descend_into_children?(node, stack) + return false if node.kind == "lambda" + return false if ruby_stabby_lambda_node?(node) + return false if ruby_nested_local_scope?(node) && stack.any? { |frame| frame[:function] } + + true + end + def predicate_def(_document, function_def) - expression = ruby_single_expression_function_body(function_def.body) + expression = ruby_single_expression_function_body(function_def.body) || + ruby_predicate_expression_body(function_def.body) return nil unless expression body = normalize_text(expression.text).delete_suffix(";").strip return nil if body.empty? || body == "nil" || body.length > 200 + return nil unless predicate_body?(body) PredicateDef.new( file: function_def.file, @@ -218,7 +233,7 @@ def type_aliases(document) private def comparison_target(node) - ruby_nil_predicate_comparison(node) || super + ruby_nil_predicate_comparison(node) || ruby_flat_comparison_statement(node) || super end def ruby_nil_predicate_comparison(node) @@ -230,6 +245,15 @@ def ruby_nil_predicate_comparison(node) { source: normalize_text(node.text), operator: "nil?" } end + def ruby_flat_comparison_statement(node) + return nil unless node.kind == "body_statement" + + operator = direct_operator(node) + return nil unless COMPARISON_OPERATORS.include?(operator) + + { source: normalize_text(node.text), operator: operator } + end + def inline_def_argument_list?(node) ts_node?(node) && node.kind == "argument_list" && node.children.first&.kind.to_s == "def" end @@ -317,6 +341,41 @@ def ruby_single_expression_body_child(body) nil end + def ruby_predicate_expression_body(node) + body = ruby_method_body_wrapper(node) + return nil unless body + + expression = ruby_single_expression_body_child(body) + return expression if expression + + source = normalize_text(body.text).delete_suffix(";").strip + return body if ruby_flat_predicate_body_statement?(body, source) + + nil + end + + def ruby_flat_predicate_body_statement?(body, source) + body.kind == "body_statement" && + predicate_body?(source) && + COMPARISON_OPERATORS.include?(direct_operator(body)) + end + + def predicate_body?(source) + text = source.to_s + lower = text.downcase + %w[true false].include?(lower) || + lower.include?("true") || + lower.include?("false") || + lower.include?("null") || + lower.include?("nil") || + text.include?("==") || + text.include?("!=") || + text.include?("&&") || + text.include?("||") || + lower.include?(" and ") || + lower.include?(" or ") + end + def ruby_heredoc_body?(_body, named_children) named_children.first&.kind == "call" && named_children[1..].to_a.all? { |child| child.kind == "heredoc_body" } @@ -357,7 +416,7 @@ def ruby_local_statement(node, index, local_names) reads: reads.to_set, writes: writes.to_set, dependencies: ruby_assignment_dependencies(node, local_names), - co_uses: reads.combination(2).map { |left, right| [left, right] } + co_uses: reads.sort.combination(2).map { |left, right| [left, right] } ) end @@ -449,6 +508,13 @@ def ruby_nested_local_scope?(node) %w[class module method singleton_method lambda].include?(node.kind) end + def ruby_stabby_lambda_node?(node) + return false unless ts_node?(node) + return true if node.kind == "body_statement" && node.children.first&.kind == "->" + + node.kind == "block" && prev_sibling(node)&.kind == "->" + end + def ruby_local_read_identifier?(node, local_names) return false unless node.kind == "identifier" return false unless local_names.include?(node.text.to_s) @@ -693,6 +759,12 @@ def ruby_explicit_receiver_body_call_target(node) } end + def ruby_explicit_receiver_body_read_node?(node) + return true if node.kind == "block_body" + + node.kind == "body_statement" && parent_node(node)&.kind == "do_block" + end + def ruby_simple_call_text?(text) text.to_s.strip.match?(/\A[a-z_]\w*[!?=]?\z/) end @@ -711,7 +783,7 @@ def ruby_bare_call_identifier?(node) return false if next_sibling(node)&.text == "=" || prev_sibling(node)&.text == "=" return false if next_sibling(node)&.text == "." || prev_sibling(node)&.text == "." - %w[body_statement then else elsif ensure rescue].include?(parent.kind) || + %w[body_statement then else elsif ensure rescue if_modifier unless_modifier].include?(parent.kind) || next_sibling(node)&.kind == "argument_list" end diff --git a/gems/decomplex/lib/decomplex/syntax/ruby_protocols.rb b/gems/decomplex/lib/decomplex/syntax/ruby_protocols.rb index 39bf7d5c4..b0a26852f 100644 --- a/gems/decomplex/lib/decomplex/syntax/ruby_protocols.rb +++ b/gems/decomplex/lib/decomplex/syntax/ruby_protocols.rb @@ -75,6 +75,14 @@ def ruby_protocol_collect_state_access(node, reads, writes, local_names:, root: return unless ts_node?(node) return if !root && ruby_protocol_nested_boundary?(node) + if ruby_flat_assignment_statement?(node) + lhs = node.named_children.first + rhs = node.named_children[1] + ruby_protocol_record_write(lhs, writes, local_names) + ruby_protocol_collect_state_access(rhs, reads, writes, local_names: local_names) + return + end + case node.kind when "assignment" lhs = named_field(node, "left") || node.named_children.first diff --git a/gems/decomplex/lib/decomplex/syntax_oracle.rb b/gems/decomplex/lib/decomplex/syntax_oracle.rb index da59b25d6..fa891f0c8 100644 --- a/gems/decomplex/lib/decomplex/syntax_oracle.rb +++ b/gems/decomplex/lib/decomplex/syntax_oracle.rb @@ -48,13 +48,27 @@ def project_document(document) document.call_sites, %i[receiver message function owner line span conditional arguments control safe_navigation block] ), + "state_declarations" => rows(document.state_declarations, %i[field owner type line span]), + "state_param_origins" => rows(document.state_param_origins, %i[field receiver owner param function line span]), "state_reads" => rows(document.state_reads, %i[field receiver function owner line span]), "state_writes" => rows(document.state_writes, %i[field receiver function owner line span]), "decisions" => rows(document.decision_sites, %i[kind members function line span predicate enclosing_span]), "branch_decisions" => branch_decision_rows(document), + "branch_arms" => rows( + document.branch_arms, + %i[function kind line span decision_line decision_span predicate member body] + ), "dispatch_sites" => rows(document.dispatch_sites, %i[variant_set arm_members outside function line span]), "semantic_effects" => rows(document.semantic_effect_sites, %i[kind detail function line span]), - "predicate_bodies" => rows(document.predicate_defs, %i[name owner body line span]) + "predicate_bodies" => rows(document.predicate_defs, %i[name owner body line span]), + "comparisons" => comparison_rows(document), + "path_conditions" => rows(document.path_condition_sites, %i[guards action function line span]), + "protocol_method_effects" => rows(document.protocol_method_effects, %i[owner name line reads writes]), + "protocol_call_paths" => protocol_call_path_rows(document), + "clone_candidates" => clone_candidate_rows(document), + "redundant_nil_guards" => rows(document.redundant_nil_guard_findings, %i[defn line span local guard proof]), + "local_methods" => local_method_rows(document), + "local_complexity_scores" => local_complexity_rows(document) } end @@ -72,8 +86,11 @@ def canonical_projection(projection) def canonical_document(document) sections = %w[ - functions owners calls state_reads state_writes decisions branch_decisions - dispatch_sites semantic_effects predicate_bodies + functions owners calls state_declarations state_param_origins state_reads + state_writes decisions branch_decisions branch_arms dispatch_sites + semantic_effects predicate_bodies comparisons path_conditions + protocol_method_effects protocol_call_paths clone_candidates redundant_nil_guards + local_methods local_complexity_scores ] out = { "file" => document.fetch("file"), @@ -113,10 +130,72 @@ def local_complexity_rows(document) end.sort_by { |row| row.fetch("id") } end + def comparison_rows(document) + rows(document.comparison_sites, %i[source operator function line span]).map do |row| + row.merge("raw" => row.fetch("source"), "canon_source" => normalize_comparison_source(row.fetch("source"))) + end + end + + def protocol_call_path_rows(document) + document.protocol_call_paths.map do |path| + { + "owner" => path.owner, + "name" => path.name, + "line" => path.line, + "calls" => Array(path.calls).map { |call| normalize_value(call.to_h.slice(:mid, :line, :span)) } + } + end.sort_by { |row| JSON.generate(row) } + end + + def clone_candidate_rows(document) + document.clone_candidates.map do |candidate| + { + "line" => candidate.line, + "span" => normalize_value(candidate.span), + "method_name" => candidate.method_name, + "node_name" => candidate.node_name, + "mass" => candidate.mass, + "fingerprint" => candidate.fingerprint, + "child_fingerprints" => normalize_value(candidate.child_fingerprints), + "child_masses" => normalize_value(candidate.child_masses) + } + end.sort_by { |row| JSON.generate(row) } + end + + def local_method_rows(document) + document.local_methods.map do |method| + { + "id" => method.id, + "owner" => method.owner, + "name" => method.name, + "line" => method.line, + "span" => normalize_value(method.span), + "statements" => Array(method.statements).map do |statement| + normalize_value(statement.to_h.slice(:index, :line, :end_line, :span, :source, + :reads, :writes, :dependencies, :co_uses)) + end, + "boundaries" => Array(method.boundaries).map do |boundary| + normalize_value(boundary.to_h.slice(:before_index, :after_index, :line, :kind, :text)) + end, + "local_contract_assignments" => normalize_value(document.local_contract_assignments(method)) + } + end.sort_by { |row| JSON.generate(row) } + end + + def normalize_comparison_source(source) + text = source.to_s.strip + text = text[1..].to_s.strip if text.start_with?("!") + text = text.sub(/\Aself\./, "").sub(/\A@/, "") + text = text.sub(/\A[A-Za-z_]\w*(?:\([^)]*\))?\.(?=[A-Za-z_]\w*\s*(==|!=|\.))/, "") + text.gsub(/\s+/, " ").strip + end + def normalize_value(value) case value when Symbol value.to_s + when Set + value.to_a.map { |item| normalize_value(item) }.sort_by { |item| JSON.generate(item) } when Array value.map { |item| normalize_value(item) } when Hash diff --git a/gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs b/gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs index 0d84104a2..aeaff8452 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs @@ -137,41 +137,12 @@ fn methods_by_file<'a>(methods: &'a [MethodSummary]) -> BTreeMap BTreeMap { - let mut map = BTreeMap::new(); - for statement in &method.statements { - if statement.writes.len() != 1 { - continue; - } - let name = statement.writes.iter().next().unwrap(); - if map.contains_key(name) { - continue; - } - if let Some(source) = local_contract_source(name, &statement.source) { - map.insert(name.clone(), source); - } - } - map.into_iter() + local_flow::local_contract_assignments(method) + .into_iter() .filter_map(|(name, source)| contract_of(&source, &BTreeMap::new(), 0).map(|c| (name, c))) .collect() } -fn local_contract_source(name: &str, source: &str) -> Option { - let pattern = format!( - r"(?s)\b{}\b\s*(?::=|=)\s*(.+?)\s*;?\s*$", - regex::escape(name) - ); - let assignment = Regex::new(&pattern).ok()?; - let rhs = assignment.captures(source)?.get(1)?.as_str().trim(); - static CONDITIONAL_SOURCE: OnceLock = OnceLock::new(); - let conditional = - CONDITIONAL_SOURCE.get_or_init(|| Regex::new(r"\s(?:if|unless|rescue)\s|\?|:").unwrap()); - if conditional.is_match(rhs) { - None - } else { - Some(rhs.to_string()) - } -} - fn rescue_nil_hits( document: &Document, methods: &[&MethodSummary], diff --git a/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs b/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs index a37b671fe..4b1bbc6fe 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs @@ -1,3 +1,4 @@ pub use crate::decomplex::syntax::local_flow::{ - scan_documents, scan_files, Boundary, LocalFlowRow, MethodSummary, Statement, + local_contract_assignments, scan_documents, scan_files, Boundary, LocalFlowRow, MethodSummary, + Statement, }; diff --git a/gems/decomplex/rust/src/decomplex/syntax.rs b/gems/decomplex/rust/src/decomplex/syntax.rs index 244ae00a5..d9c2594f3 100644 --- a/gems/decomplex/rust/src/decomplex/syntax.rs +++ b/gems/decomplex/rust/src/decomplex/syntax.rs @@ -134,6 +134,8 @@ pub struct Document { #[serde(default)] pub branch_decisions: Vec, #[serde(default)] + pub branch_arms: Vec, + #[serde(default)] pub dispatch_sites: Vec, #[serde(default)] pub semantic_effect_sites: Vec, @@ -277,6 +279,20 @@ pub struct BranchDecision { pub state_refs: Vec, } +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct BranchArm { + pub file: String, + pub function: String, + pub kind: String, + pub line: usize, + pub span: Span, + pub decision_line: usize, + pub decision_span: Span, + pub predicate: String, + pub member: String, + pub body: String, +} + #[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] pub struct DispatchSite { pub variant_set: Vec, diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/ruby.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/ruby.rs index 858c1638e..b901136d7 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/ruby.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/ruby.rs @@ -3,10 +3,12 @@ use super::super::tree_sitter_adapter::{ previous_sibling_raw_text, AssignmentTarget, CallTarget, Target, }; use super::super::{ - CallSite, Document, FunctionDef, Language, ProtocolMethodEffect, SemanticEffectSite, StateRead, - StateWrite, + CallSite, Document, FunctionDef, Language, ProtocolCall, ProtocolMethodEffect, + ProtocolMethodPath, SemanticEffectSite, StateRead, StateWrite, +}; +use super::base::{ + default_clone_candidate_node, normalize_protocol_state, protocol_method_name, LanguageProfile, }; -use super::base::{normalize_protocol_state, protocol_method_name, LanguageProfile}; use crate::decomplex::ast::{node_text, normalize_text, span, RawNode}; use regex::Regex; use std::collections::BTreeSet; @@ -82,6 +84,37 @@ const RUBY_PROTOCOL_IGNORED_MIDS: &[&str] = &[ "subject", "to", ]; +const RUBY_PROTOCOL_MUTATING_MIDS: &[&str] = &[ + "<<", + "[]=", + "add", + "append", + "clear", + "collect!", + "compact!", + "concat", + "declare", + "delete", + "delete_if", + "each_key=", + "fill", + "filter!", + "keep_if", + "mark", + "merge!", + "move", + "push", + "reject!", + "replace", + "resolve", + "shift", + "stamp", + "store", + "unshift", + "update", + "write", +]; +const RUBY_PROTOCOL_NON_MUTATING_OPERATOR_MIDS: &[&str] = &["!", "!=", "!~"]; impl LanguageProfile for RubyProfile { fn language(&self) -> Language { @@ -132,6 +165,10 @@ impl LanguageProfile for RubyProfile { &["assignment", "operator_assignment"] } + fn indexed_lhs_node_kinds(&self) -> &[&str] { + &["element_assignment", "element_reference"] + } + fn assignment_operator_tokens(&self) -> &[&str] { &["=", "+=", "-=", "*=", "/=", "%=", "&&=", "||="] } @@ -320,29 +357,7 @@ impl LanguageProfile for RubyProfile { .function_defs .iter() .map(|function_def| { - let mut reads = document - .state_reads - .iter() - .filter(|read| { - read.owner == function_def.owner && read.function == function_def.name - }) - .map(|read| normalize_protocol_state(&read.field)) - .collect::>(); - reads.extend(ruby_protocol_bare_reads(function_def)); - reads.sort(); - reads.dedup(); - - let mut writes = document - .state_writes - .iter() - .filter(|write| { - write.owner == function_def.owner && write.function == function_def.name - }) - .map(|write| normalize_protocol_state(&write.field)) - .collect::>(); - writes.sort(); - writes.dedup(); - + let (reads, writes) = ruby_protocol_method_access(function_def); ProtocolMethodEffect { file: function_def.file.clone(), owner: function_def.owner.clone(), @@ -355,6 +370,27 @@ impl LanguageProfile for RubyProfile { .collect() } + fn protocol_call_paths(&self, document: &Document) -> Vec { + document + .function_defs + .iter() + .flat_map(|function_def| { + let statements = ruby_raw_function_body_statements(&function_def.body); + let local_names = ruby_protocol_local_names(function_def, &statements); + ruby_protocol_paths_for_statements(&statements, &local_names) + .into_iter() + .map(|path| ProtocolMethodPath { + file: function_def.file.clone(), + owner: function_def.owner.clone(), + name: protocol_method_name(&function_def.name), + line: function_def.line, + calls: path.calls, + }) + .collect::>() + }) + .collect() + } + fn owner_name_from_declaration(&self, node: Node<'_>, source: &str) -> Option { if node.kind() == "body_statement" && matches!(first_child_kind(node), Some("class" | "module")) @@ -442,6 +478,33 @@ impl LanguageProfile for RubyProfile { fn nested_assignment_dependencies_only(&self) -> bool { true } + + fn clone_candidate_node(&self, node: &RawNode) -> bool { + if ruby_state_assignment_node(node) { + return false; + } + default_clone_candidate_node(node) + } + + fn clone_fingerprint_children<'a>(&self, node: &'a RawNode) -> Vec<&'a RawNode> { + if node.kind == "body_statement" { + let named = raw_named_children(node); + if named.len() == 1 && ruby_state_assignment_node(named[0]) { + return named[0].children.iter().collect(); + } + } + node.children.iter().collect() + } +} + +fn ruby_state_assignment_node(node: &RawNode) -> bool { + if !matches!(node.kind.as_str(), "assignment" | "operator_assignment") { + return false; + } + raw_named_children(node) + .first() + .map(|lhs| matches!(lhs.kind.as_str(), "instance_variable" | "global_variable")) + .unwrap_or(false) } fn hidden_ruby_method_name(node: Node<'_>, source: &str) -> Option { @@ -1166,65 +1229,638 @@ fn ruby_bare_call_identifier(node: Node<'_>, source: &str) -> bool { matches!( parent.kind(), - "body_statement" | "then" | "else" | "elsif" | "ensure" | "rescue" + "body_statement" + | "then" + | "else" + | "elsif" + | "ensure" + | "rescue" + | "if_modifier" + | "unless_modifier" ) || node .next_sibling() .map(|sibling| sibling.kind() == "argument_list") .unwrap_or(false) } -fn ruby_protocol_bare_reads(function_def: &FunctionDef) -> Vec { - let mut local_names = BTreeSet::new(); - local_names.extend(function_def.params.iter().cloned()); - ruby_protocol_collect_local_names(&function_def.body, &mut local_names, true); +#[derive(Clone)] +struct RubyProtocolPath { + calls: Vec, + terminal: bool, +} +fn ruby_protocol_method_access(function_def: &FunctionDef) -> (Vec, Vec) { + let statements = ruby_raw_function_body_statements(&function_def.body); + let local_names = ruby_protocol_local_names(function_def, &statements); let mut reads = BTreeSet::new(); - ruby_protocol_collect_bare_reads(&function_def.body, None, &local_names, &mut reads, true); - reads.into_iter().collect() + let mut writes = BTreeSet::new(); + ruby_protocol_collect_state_access( + &function_def.body, + None, + &local_names, + &mut reads, + &mut writes, + true, + ); + (reads.into_iter().collect(), writes.into_iter().collect()) } -fn ruby_protocol_collect_local_names( +fn ruby_protocol_local_names( + function_def: &FunctionDef, + statements: &[&RawNode], +) -> BTreeSet { + let mut local_names = BTreeSet::new(); + local_names.extend(function_def.params.iter().cloned()); + for statement in statements { + ruby_protocol_collect_local_names(statement, &mut local_names, true); + } + local_names +} + +fn ruby_protocol_collect_state_access( node: &RawNode, - local_names: &mut BTreeSet, + parent: Option<&RawNode>, + local_names: &BTreeSet, + reads: &mut BTreeSet, + writes: &mut BTreeSet, root: bool, ) { if !root && ruby_protocol_nested_boundary(node) { return; } - if matches!(node.kind.as_str(), "assignment" | "operator_assignment") { - if let Some(lhs) = raw_named_children(node).first() { - if lhs.kind == "identifier" && ruby_simple_call_text(&lhs.text) { - local_names.insert(lhs.text.clone()); - } + + if ruby_raw_flat_assignment_statement(node) { + let children = raw_named_children(node); + if let Some(lhs) = children.first() { + ruby_protocol_record_write(lhs, writes, local_names); + } + if let Some(rhs) = children.get(1) { + ruby_protocol_collect_state_access(rhs, Some(node), local_names, reads, writes, false); } + return; } - if matches!(node.kind.as_str(), "block_parameters" | "method_parameters") { - for child in raw_named_children(node) { - if child.kind == "identifier" && ruby_simple_call_text(&child.text) { - local_names.insert(child.text.clone()); + + match node.kind.as_str() { + "assignment" => { + let children = raw_named_children(node); + if let Some(lhs) = children.first() { + ruby_protocol_record_write(lhs, writes, local_names); } + if let Some(rhs) = children.get(1) { + ruby_protocol_collect_state_access( + rhs, + Some(node), + local_names, + reads, + writes, + false, + ); + } + return; + } + "operator_assignment" => { + let children = raw_named_children(node); + if let Some(lhs) = children.first() { + if let Some(state) = ruby_protocol_state_target(lhs, local_names) { + reads.insert(state.clone()); + writes.insert(state); + } + } + if let Some(rhs) = children.get(1) { + ruby_protocol_collect_state_access( + rhs, + Some(node), + local_names, + reads, + writes, + false, + ); + } + return; } + "instance_variable" => { + reads.insert(normalize_protocol_state(&node.text)); + } + "call" => ruby_protocol_collect_call_state(node, local_names, reads, writes), + "identifier" => { + if ruby_protocol_bare_reader(node, parent, local_names) { + reads.insert(normalize_protocol_state(&node.text)); + } + } + _ => {} } + for child in &node.children { - ruby_protocol_collect_local_names(child, local_names, false); + ruby_protocol_collect_state_access( + child, + Some(node), + local_names, + reads, + writes, + false, + ); } } -fn ruby_protocol_collect_bare_reads( +fn ruby_protocol_collect_call_state( node: &RawNode, - parent: Option<&RawNode>, local_names: &BTreeSet, reads: &mut BTreeSet, + writes: &mut BTreeSet, +) { + let Some(target) = ruby_raw_call_target(node) else { + return; + }; + if target.receiver == "self" + && target.arguments.is_empty() + && !ruby_protocol_mutating_mid(&target.message) + && !RUBY_PROTOCOL_IGNORED_MIDS.contains(&target.message.as_str()) + { + reads.insert(normalize_protocol_state(&target.message)); + } + if ruby_protocol_mutating_mid(&target.message) { + if let Some(token) = ruby_protocol_receiver_state_token(&target.receiver, local_names) { + writes.insert(token); + } + } +} + +fn ruby_protocol_record_write( + lhs: &RawNode, + writes: &mut BTreeSet, + local_names: &BTreeSet, +) { + if let Some(state) = ruby_protocol_state_target(lhs, local_names) { + writes.insert(state); + } +} + +fn ruby_protocol_state_target( + node: &RawNode, + local_names: &BTreeSet, +) -> Option { + match node.kind.as_str() { + "instance_variable" => Some(normalize_protocol_state(&node.text)), + "element_reference" => raw_named_children(node) + .first() + .and_then(|receiver| ruby_protocol_receiver_state_token(&receiver.text, local_names)), + "call" => { + let target = ruby_raw_call_target(node)?; + let receiver = ruby_protocol_receiver_state_token(&target.receiver, local_names)?; + let field = normalize_protocol_state(&target.message); + if receiver == "self" { + Some(field) + } else { + Some(format!("{receiver}.{field}")) + } + } + _ => None, + } +} + +fn ruby_protocol_receiver_state_token( + receiver: &str, + local_names: &BTreeSet, +) -> Option { + let text = receiver.trim(); + if text.is_empty() { + return None; + } + if text == "self" { + return Some("self".to_string()); + } + if text.starts_with('@') { + return Some(normalize_protocol_state(text)); + } + if ruby_simple_call_text(text) { + if local_names.contains(text) { + None + } else { + Some(normalize_protocol_state(text)) + } + } else { + None + } +} + +fn ruby_protocol_paths_for_statements( + statements: &[&RawNode], + local_names: &BTreeSet, +) -> Vec { + let mut paths = vec![ruby_protocol_empty_path()]; + for statement in statements { + let statement_paths = ruby_protocol_paths_for(statement, local_names); + paths = ruby_protocol_combine_path_lists(&paths, &statement_paths); + } + paths +} + +fn ruby_protocol_paths_for( + node: &RawNode, + local_names: &BTreeSet, +) -> Vec { + if ruby_protocol_nested_boundary(node) { + return vec![ruby_protocol_empty_path()]; + } + if ruby_raw_if_node(node) { + return ruby_protocol_branch_paths(node, local_names); + } + if ruby_raw_case_node(node) { + return ruby_protocol_case_paths(node, local_names); + } + + let children = ruby_protocol_child_nodes(node); + let child_paths = children + .iter() + .fold(vec![ruby_protocol_empty_path()], |paths, child| { + ruby_protocol_combine_path_lists(&paths, &ruby_protocol_paths_for(child, local_names)) + }); + let Some(mid) = ruby_protocol_internal_call(node, local_names) else { + return ruby_protocol_terminalize(node, child_paths); + }; + let call_path = RubyProtocolPath { + calls: vec![ruby_protocol_raw_call(mid, node)], + terminal: false, + }; + ruby_protocol_terminalize( + node, + ruby_protocol_combine_path_lists(&[call_path], &child_paths), + ) +} + +fn ruby_protocol_terminalize( + node: &RawNode, + paths: Vec, +) -> Vec { + if matches!( + node.kind.as_str(), + "return" | "break" | "next" | "redo" | "retry" + ) { + paths + .into_iter() + .map(|path| RubyProtocolPath { + calls: path.calls, + terminal: true, + }) + .collect() + } else { + paths + } +} + +fn ruby_protocol_branch_paths( + node: &RawNode, + local_names: &BTreeSet, +) -> Vec { + let condition_paths = ruby_raw_path_condition(node) + .map(|condition| ruby_protocol_paths_for(condition, local_names)) + .unwrap_or_else(|| vec![ruby_protocol_empty_path()]); + let then_paths = ruby_protocol_body_paths(ruby_raw_then_body(node), local_names); + let else_paths = ruby_raw_else_body(node) + .map(|body| ruby_protocol_body_paths(Some(body), local_names)) + .unwrap_or_else(|| vec![ruby_protocol_empty_path()]); + let alternatives = then_paths.into_iter().chain(else_paths).collect::>(); + ruby_protocol_combine_path_lists(&condition_paths, &alternatives) +} + +fn ruby_protocol_case_paths( + node: &RawNode, + local_names: &BTreeSet, +) -> Vec { + let subject_paths = raw_named_children(node) + .first() + .filter(|first| !matches!(first.kind.as_str(), "when" | "else")) + .map(|subject| ruby_protocol_paths_for(subject, local_names)) + .unwrap_or_else(|| vec![ruby_protocol_empty_path()]); + let branch_paths = raw_named_children(node) + .into_iter() + .filter(|child| matches!(child.kind.as_str(), "when" | "else")) + .flat_map(|child| ruby_protocol_body_paths(Some(child), local_names)) + .collect::>(); + let alternatives = if branch_paths.is_empty() { + vec![ruby_protocol_empty_path()] + } else { + branch_paths + }; + ruby_protocol_combine_path_lists( + &subject_paths, + &alternatives, + ) +} + +fn ruby_protocol_body_paths( + node: Option<&RawNode>, + local_names: &BTreeSet, +) -> Vec { + let Some(node) = node else { + return vec![ruby_protocol_empty_path()]; + }; + if matches!( + node.kind.as_str(), + "then" | "else" | "body_statement" | "block" | "block_body" + ) { + return ruby_protocol_paths_for_statements( + &raw_named_children(node) + .into_iter() + .filter(|child| child.kind != "comment") + .collect::>(), + local_names, + ); + } + ruby_protocol_paths_for(node, local_names) +} + +fn ruby_protocol_child_nodes(node: &RawNode) -> Vec<&RawNode> { + if ruby_protocol_nested_boundary(node) { + return Vec::new(); + } + match node.kind.as_str() { + "call" => raw_named_children(node) + .into_iter() + .filter(|child| matches!(child.kind.as_str(), "argument_list" | "block" | "do_block")) + .collect(), + "assignment" | "operator_assignment" => raw_named_children(node).into_iter().skip(1).collect(), + _ => raw_named_children(node) + .into_iter() + .filter(|child| child.kind != "comment") + .collect(), + } +} + +fn ruby_protocol_internal_call( + node: &RawNode, + local_names: &BTreeSet, +) -> Option { + let target = if node.kind == "call" { + ruby_raw_call_target(node) + } else if node.kind == "identifier" && ruby_protocol_bare_internal_identifier(node, local_names) { + Some(RubyRawCallTarget { + receiver: "self".to_string(), + message: node.text.clone(), + arguments: Vec::new(), + }) + } else { + None + }?; + if target.receiver != "self" { + return None; + } + if local_names.contains(&target.message) || RUBY_PROTOCOL_IGNORED_MIDS.contains(&target.message.as_str()) { + return None; + } + Some(target.message) +} + +fn ruby_protocol_raw_call(mid: String, node: &RawNode) -> ProtocolCall { + ProtocolCall { + mid, + file: String::new(), + owner: String::new(), + defn: String::new(), + line: node.span[0], + span: node.span, + } +} + +fn ruby_protocol_combine_path_lists( + left_paths: &[RubyProtocolPath], + right_paths: &[RubyProtocolPath], +) -> Vec { + let mut out = Vec::new(); + for left in left_paths { + if left.terminal { + out.push(left.clone()); + continue; + } + for right in right_paths { + let mut calls = left.calls.clone(); + calls.extend(right.calls.clone()); + out.push(RubyProtocolPath { + calls, + terminal: right.terminal, + }); + } + } + out.into_iter().take(64).collect() +} + +fn ruby_protocol_empty_path() -> RubyProtocolPath { + RubyProtocolPath { + calls: Vec::new(), + terminal: false, + } +} + +fn ruby_protocol_mutating_mid(mid: &str) -> bool { + !RUBY_PROTOCOL_NON_MUTATING_OPERATOR_MIDS.contains(&mid) + && (RUBY_PROTOCOL_MUTATING_MIDS.contains(&mid) || mid.ends_with('!')) +} + +fn ruby_protocol_bare_internal_identifier( + node: &RawNode, + local_names: &BTreeSet, +) -> bool { + ruby_simple_call_text(&node.text) + && !local_names.contains(&node.text) + && !RUBY_PROTOCOL_IGNORED_MIDS.contains(&node.text.as_str()) +} + +struct RubyRawCallTarget { + receiver: String, + message: String, + arguments: Vec, +} + +fn ruby_raw_call_target(node: &RawNode) -> Option { + if node.kind != "call" { + return None; + } + let receiver = raw_child_by_field(node, "receiver").map(|child| normalize_text(&child.text)); + let method = raw_child_by_field(node, "method") + .map(|child| child.text.clone()) + .or_else(|| { + raw_named_children(node) + .first() + .filter(|child| matches!(child.kind.as_str(), "identifier" | "constant")) + .map(|child| child.text.clone()) + })?; + Some(RubyRawCallTarget { + receiver: receiver.unwrap_or_else(|| "self".to_string()), + message: method, + arguments: ruby_raw_argument_texts(node), + }) +} + +fn ruby_raw_argument_texts(node: &RawNode) -> Vec { + let Some(args) = raw_child_by_field(node, "arguments") + .or_else(|| raw_named_children(node).into_iter().find(|child| child.kind == "argument_list")) + else { + return Vec::new(); + }; + let values = raw_named_children(args) + .into_iter() + .map(|child| normalize_text(&child.text)) + .filter(|text| !text.is_empty()) + .collect::>(); + if !values.is_empty() { + return values; + } + let text = args + .text + .trim() + .trim_start_matches('(') + .trim_end_matches(')') + .to_string(); + text.split(',') + .map(normalize_text) + .filter(|item| !item.is_empty()) + .collect() +} + +fn ruby_raw_function_body_statements(node: &RawNode) -> Vec<&RawNode> { + let Some(body) = ruby_raw_method_body_wrapper(node) else { + return Vec::new(); + }; + let named = raw_named_children(body) + .into_iter() + .filter(|child| child.kind != "comment") + .collect::>(); + if named.is_empty() && body.text.trim().is_empty() { + return Vec::new(); + } + if ruby_raw_if_node(body) || ruby_raw_case_node(body) || ruby_raw_flat_assignment_statement(body) { + return vec![body]; + } + if named.is_empty() || ruby_raw_heredoc_body(&named) { + return vec![body]; + } + named +} + +fn ruby_raw_method_body_wrapper(node: &RawNode) -> Option<&RawNode> { + match node.kind.as_str() { + "method" | "singleton_method" | "argument_list" => raw_named_children(node) + .into_iter() + .rev() + .find(|child| child.kind == "body_statement"), + "body_statement" => { + if ruby_raw_hidden_method_definition(node) { + raw_named_children(node) + .into_iter() + .rev() + .find(|child| child.kind == "body_statement") + } else { + Some(node) + } + } + _ => None, + } +} + +fn ruby_raw_heredoc_body(named: &[&RawNode]) -> bool { + named.first().map(|child| child.kind.as_str()) == Some("call") + && named + .iter() + .skip(1) + .all(|child| child.kind == "heredoc_body") +} + +fn ruby_raw_flat_assignment_statement(node: &RawNode) -> bool { + node.kind == "body_statement" + && node.children.iter().filter(|child| !child.named && child.text == "=").count() == 1 + && raw_named_children(node).len() >= 2 +} + +fn ruby_raw_if_node(node: &RawNode) -> bool { + matches!( + node.kind.as_str(), + "if" | "unless" | "if_modifier" | "unless_modifier" + ) || (matches!(node.kind.as_str(), "expression_statement" | "block" | "body_statement") + && matches!(raw_first_child_kind(node).as_deref(), Some("if" | "unless"))) +} + +fn ruby_raw_case_node(node: &RawNode) -> bool { + node.kind == "case" + || (matches!(node.kind.as_str(), "body_statement" | "block_body" | "argument_list") + && raw_first_child_kind(node).as_deref() == Some("case")) +} + +fn ruby_raw_path_condition(node: &RawNode) -> Option<&RawNode> { + if matches!(node.kind.as_str(), "if_modifier" | "unless_modifier") + || ruby_raw_hidden_modifier_if(node) + { + raw_named_children(node).into_iter().last() + } else { + raw_named_children(node).into_iter().next() + } +} + +fn ruby_raw_then_body(node: &RawNode) -> Option<&RawNode> { + if matches!(node.kind.as_str(), "if_modifier" | "unless_modifier") + || ruby_raw_hidden_modifier_if(node) + { + raw_named_children(node).into_iter().next() + } else { + raw_named_children(node) + .into_iter() + .find(|child| child.kind == "then") + .or_else(|| raw_named_children(node).into_iter().nth(1)) + } +} + +fn ruby_raw_else_body(node: &RawNode) -> Option<&RawNode> { + if matches!(node.kind.as_str(), "if_modifier" | "unless_modifier") + || ruby_raw_hidden_modifier_if(node) + { + return None; + } + raw_named_children(node) + .into_iter() + .find(|child| matches!(child.kind.as_str(), "else" | "elsif")) + .or_else(|| raw_named_children(node).into_iter().nth(2)) +} + +fn ruby_raw_hidden_modifier_if(node: &RawNode) -> bool { + if node.kind != "body_statement" { + return false; + } + let mut seen_named = false; + node.children.iter().any(|child| { + seen_named |= child.named; + seen_named && !child.named && matches!(child.kind.as_str(), "if" | "unless") + }) +} + +fn ruby_raw_hidden_method_definition(node: &RawNode) -> bool { + node.kind == "body_statement" && matches!(raw_first_child_kind(node).as_deref(), Some("def")) +} + +fn ruby_protocol_collect_local_names( + node: &RawNode, + local_names: &mut BTreeSet, root: bool, ) { if !root && ruby_protocol_nested_boundary(node) { return; } - if node.kind == "identifier" && ruby_protocol_bare_reader(node, parent, local_names) { - reads.insert(normalize_protocol_state(&node.text)); + if matches!(node.kind.as_str(), "assignment" | "operator_assignment") + || ruby_raw_flat_assignment_statement(node) + { + if let Some(lhs) = raw_named_children(node).first() { + if lhs.kind == "identifier" && ruby_simple_call_text(&lhs.text) { + local_names.insert(lhs.text.clone()); + } + } + } + if matches!(node.kind.as_str(), "block_parameters" | "method_parameters") { + for child in raw_named_children(node) { + if child.kind == "identifier" && ruby_simple_call_text(&child.text) { + local_names.insert(child.text.clone()); + } + } } for child in &node.children { - ruby_protocol_collect_bare_reads(child, Some(node), local_names, reads, false); + ruby_protocol_collect_local_names(child, local_names, false); } } @@ -1305,6 +1941,12 @@ fn raw_named_children(node: &RawNode) -> Vec<&RawNode> { node.children.iter().filter(|child| child.named).collect() } +fn raw_child_by_field<'a>(node: &'a RawNode, field: &str) -> Option<&'a RawNode> { + node.children + .iter() + .find(|child| child.field_name.as_deref() == Some(field)) +} + fn raw_first_child_kind(node: &RawNode) -> Option { node.children.first().map(|child| child.kind.clone()) } diff --git a/gems/decomplex/rust/src/decomplex/syntax/local_flow.rs b/gems/decomplex/rust/src/decomplex/syntax/local_flow.rs index 11c654267..f52e44616 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/local_flow.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/local_flow.rs @@ -2,9 +2,11 @@ use crate::decomplex::ast::{self, Child, Node, RawNode, Span}; use crate::decomplex::syntax::adapters::{language_profile, LanguageProfile}; use crate::decomplex::syntax::{Document, FunctionDef, Language}; use anyhow::Result; +use regex::Regex; use serde::{Deserialize, Serialize}; use std::collections::{BTreeMap, BTreeSet}; use std::path::{Path, PathBuf}; +use std::sync::OnceLock; #[derive(Clone, Debug, Eq, PartialEq, Serialize)] pub struct LocalFlowRow { @@ -96,6 +98,42 @@ pub fn scan_documents(documents: &[Document]) -> Vec { out } +pub fn local_contract_assignments(method: &MethodSummary) -> BTreeMap { + let mut map = BTreeMap::new(); + for statement in &method.statements { + if statement.writes.len() != 1 { + continue; + } + let Some(name) = statement.writes.iter().next() else { + continue; + }; + if map.contains_key(name) { + continue; + } + if let Some(source) = local_contract_source(name, &statement.source) { + map.insert(name.clone(), source); + } + } + map +} + +fn local_contract_source(name: &str, source: &str) -> Option { + let pattern = format!( + r"(?s)\b{}\b\s*(?::=|=)\s*(.+?)\s*;?\s*$", + regex::escape(name) + ); + let assignment = Regex::new(&pattern).ok()?; + let rhs = assignment.captures(source)?.get(1)?.as_str().trim(); + static CONDITIONAL_SOURCE: OnceLock = OnceLock::new(); + let conditional = + CONDITIONAL_SOURCE.get_or_init(|| Regex::new(r"\s(?:if|unless|rescue)\s|\?|:").unwrap()); + if conditional.is_match(rhs) { + None + } else { + Some(rhs.to_string()) + } +} + fn normalized_local_methods(document: &Document) -> Vec { let mut detector = LocalFlow::new( document.file.clone(), @@ -270,8 +308,18 @@ fn raw_local_reads( local_names: &BTreeSet, profile: &dyn LanguageProfile, ) -> BTreeSet { + raw_local_read_list(node, local_names, profile) + .into_iter() + .collect() +} + +fn raw_local_read_list( + node: &RawNode, + local_names: &BTreeSet, + profile: &dyn LanguageProfile, +) -> Vec { if raw_nested_local_scope(node, profile) { - return BTreeSet::new(); + return Vec::new(); } let mut reads = Vec::new(); @@ -288,12 +336,14 @@ fn raw_local_reads( && !raw_declaration_name_in_tree(node, child, profile) && !raw_declaration_name(child, parent, profile) && !raw_member_name(child, parent, profile) + && !raw_call_method_name(child, parent, profile) && !raw_keyed_element_key(child, parent, profile) + && !reads.contains(&name) { reads.push(name); } }); - reads.into_iter().collect() + reads } fn raw_local_writes(node: &RawNode, profile: &dyn LanguageProfile) -> BTreeSet { @@ -908,9 +958,8 @@ fn raw_assignment_lhs_read_in_tree( { return false; } - if profile - .assignment_node_kinds() - .contains(&root.kind.as_str()) + if profile.assignment_node_kinds().contains(&root.kind.as_str()) + || (profile.language() == Language::Ruby && raw_assignment_statement(root, profile)) { if let Some(lhs) = raw_named_children(root).first() { if raw_assignment_lhs_read_target(lhs, target, profile) { @@ -934,9 +983,8 @@ fn raw_assignment_lhs_write_in_tree( { return false; } - if profile - .assignment_node_kinds() - .contains(&root.kind.as_str()) + if profile.assignment_node_kinds().contains(&root.kind.as_str()) + || (profile.language() == Language::Ruby && raw_assignment_statement(root, profile)) { if let Some(lhs) = raw_named_children(root).first() { if raw_assignment_lhs_write_target(lhs, target, profile) { @@ -1106,6 +1154,25 @@ fn raw_call_name(node: &RawNode, parent: Option<&RawNode>, profile: &dyn Languag .unwrap_or(false) } +fn raw_call_method_name( + node: &RawNode, + parent: Option<&RawNode>, + profile: &dyn LanguageProfile, +) -> bool { + let Some(parent) = parent else { + return false; + }; + if !profile.call_node_kinds().contains(&parent.kind.as_str()) { + return false; + } + parent + .children + .iter() + .find(|child| child.field_name.as_deref() == Some("method")) + .map(|method| std::ptr::eq(method, node)) + .unwrap_or(false) +} + fn raw_keyed_element_key( node: &RawNode, parent: Option<&RawNode>, diff --git a/gems/decomplex/rust/src/decomplex/syntax/path_condition.rs b/gems/decomplex/rust/src/decomplex/syntax/path_condition.rs index 363ea98a5..ace21fc07 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/path_condition.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/path_condition.rs @@ -50,25 +50,47 @@ pub fn scan_files(files: &[PathBuf], language: Language) -> Result PathConditionReport { let mut sites = documents .iter() - .flat_map(sites_from_document_facts) + .flat_map(sites_for_document) .collect::>(); - sites.extend( - documents + if sites.is_empty() { + sites = documents .iter() - .flat_map(sites_from_raw_facts) - .collect::>(), - ); - if !sites.is_empty() { - return Report::new(dedupe_sites(sites)).findings(); + .flat_map(normalized_sites_from_document) + .collect::>(); } + Report::new(dedupe_sites(sites)).findings() +} - let mut sites = Vec::new(); - for document in documents { - let mut pc = PathCondition::new(document.file.clone(), document.lines.clone()); - pc.walk(&document.normalized_root, &Vec::new(), &Vec::new()); - sites.extend(pc.sites); +pub(crate) fn fact_sites_for_document( + document: &Document, +) -> Vec { + let mut sites = sites_for_document(document); + if sites.is_empty() { + sites = normalized_sites_from_document(document); } - Report::new(sites).findings() + dedupe_sites(sites) + .into_iter() + .map(|site| crate::decomplex::syntax::PathConditionSite { + guards: site.guards, + action: site.action, + file: site.file, + function: site.defn, + line: site.line, + span: site.span, + }) + .collect() +} + +fn sites_for_document(document: &Document) -> Vec { + let mut sites = sites_from_document_facts(document); + sites.extend(sites_from_raw_facts(document)); + sites +} + +fn normalized_sites_from_document(document: &Document) -> Vec { + let mut pc = PathCondition::new(document.file.clone(), document.lines.clone()); + pc.walk(&document.normalized_root, &Vec::new(), &Vec::new()); + pc.sites } fn dedupe_sites(sites: Vec) -> Vec { @@ -184,9 +206,21 @@ fn raw_path_walk( if raw_branch_node(profile, node) { let condition = raw_branch_condition(node); let atoms = raw_path_condition_atoms(profile, condition); - for child in raw_branch_body_nodes(profile, node) { + let then_atoms = if raw_unless_node(node) { + raw_negate_guards(&atoms) + } else { + atoms.clone() + }; + let else_atoms = if raw_unless_node(node) { + atoms + } else { + raw_negate_guards(&atoms) + }; + for (child, branch_guards) in + raw_branch_body_nodes(profile, node, &then_atoms, &else_atoms) + { let mut next_guards = guards.to_vec(); - next_guards.extend(atoms.clone()); + next_guards.extend(branch_guards); raw_path_walk(document, profile, child, function, &next_guards, out); } return; @@ -239,52 +273,104 @@ fn raw_branch_condition(node: &RawNode) -> Option<&RawNode> { .or_else(|| raw_named_children(node).into_iter().next()) } -fn raw_branch_body_nodes<'a>(profile: &dyn LanguageProfile, node: &'a RawNode) -> Vec<&'a RawNode> { - let mut bodies = ["consequence", "body", "alternative"] - .into_iter() - .filter_map(|field| raw_child_by_field(node, field)) - .collect::>(); +fn raw_branch_body_nodes<'a>( + profile: &dyn LanguageProfile, + node: &'a RawNode, + then_guards: &[String], + else_guards: &[String], +) -> Vec<(&'a RawNode, Vec)> { + let mut bodies = Vec::new(); + if let Some(body) = raw_child_by_field(node, "consequence") + .or_else(|| raw_child_by_field(node, "body")) + { + bodies.push((body, then_guards.to_vec())); + } + if let Some(body) = raw_child_by_field(node, "alternative") { + bodies.push((body, else_guards.to_vec())); + } if bodies.is_empty() { - bodies = raw_named_children(node).into_iter().skip(1).collect(); + bodies = raw_named_children(node) + .into_iter() + .skip(1) + .enumerate() + .map(|(index, body)| { + let guards = if index == 0 { + then_guards.to_vec() + } else { + else_guards.to_vec() + }; + (body, guards) + }) + .collect(); } bodies .into_iter() - .flat_map(|body| { - if raw_simple_action_wrapper(profile, body) { - return vec![body]; - } - let body_children = raw_named_children(body); - let children = if profile + .flat_map(|(body, branch_guards)| { + raw_flatten_branch_body(profile, body) + .into_iter() + .map(move |child| (child, branch_guards.clone())) + }) + .collect() +} + +fn raw_flatten_branch_body<'a>( + profile: &dyn LanguageProfile, + body: &'a RawNode, +) -> Vec<&'a RawNode> { + if raw_simple_action_wrapper(profile, body) { + return vec![body]; + } + let body_children = raw_named_children(body); + let children = if profile + .path_transparent_branch_body_node_kinds() + .contains(&body.kind.as_str()) + { + body_children.into_iter().skip(1).collect::>() + } else { + body_children + }; + let children = children + .into_iter() + .flat_map(|child| { + if profile .path_transparent_branch_body_node_kinds() - .contains(&body.kind.as_str()) + .contains(&child.kind.as_str()) { - body_children.into_iter().skip(1).collect::>() - } else { - body_children - }; - let children = children - .into_iter() - .flat_map(|child| { - if profile - .path_transparent_branch_body_node_kinds() - .contains(&child.kind.as_str()) - { - raw_named_children(child) - .into_iter() - .skip(1) - .collect::>() - } else { - vec![child] - } - }) - .filter(|child| !raw_comment_node(child)) - .collect::>(); - if children.is_empty() { - vec![body] + raw_named_children(child) + .into_iter() + .skip(1) + .collect::>() } else { - children + vec![child] } }) + .filter(|child| !raw_comment_node(child)) + .collect::>(); + if children.is_empty() { + vec![body] + } else { + children + } +} + +fn raw_unless_node(node: &RawNode) -> bool { + node.kind.contains("unless") + || node + .children + .first() + .map(|child| child.kind == "unless" || child.text == "unless") + .unwrap_or(false) +} + +fn raw_negate_guards(guards: &[String]) -> Vec { + guards + .iter() + .map(|guard| { + guard + .strip_prefix('!') + .map(str::to_string) + .unwrap_or_else(|| format!("!{guard}")) + }) .collect() } diff --git a/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs b/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs index b4766c18f..7ecf20f9e 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs @@ -3,7 +3,7 @@ use super::{ false_simplicity_lexicon::{false_simplicity_lexicon, FalseSimplicityLexicon}, language_profile, LanguageProfile, }, - BranchDecision, CallSite, ComparisonUse, DecisionSite, DispatchSite, Document, FunctionDef, + BranchArm, BranchDecision, CallSite, ComparisonUse, DecisionSite, DispatchSite, Document, FunctionDef, Language, OwnerDef, PredicateAlias, SemanticEffectSite, StateDeclaration, StateRead, StateWrite, }; @@ -25,6 +25,7 @@ pub fn parse_file(file: PathBuf, language: Language) -> Result { let mut state_writes = Vec::new(); let mut decision_sites = Vec::new(); let mut branch_decisions = Vec::new(); + let mut branch_arms = Vec::new(); let mut dispatch_sites = Vec::new(); let mut predicate_aliases = Vec::new(); let mut comparison_uses = Vec::new(); @@ -32,7 +33,10 @@ pub fn parse_file(file: PathBuf, language: Language) -> Result { let mut seen_reads = HashSet::new(); let mut seen_calls = HashSet::new(); let mut seen_decisions = HashSet::new(); - let context = ContextState::new(file_owner(&parsed.file)); + let mut context = ContextState::new(file_owner(&parsed.file)); + if language == Language::Ruby { + context.immutable_readers = ruby_immutable_struct_readers(&parsed.source); + } collect_facts( parsed.tree.root_node(), @@ -48,6 +52,7 @@ pub fn parse_file(file: PathBuf, language: Language) -> Result { &mut state_writes, &mut decision_sites, &mut branch_decisions, + &mut branch_arms, &mut predicate_aliases, &mut comparison_uses, &mut seen_writes, @@ -108,6 +113,7 @@ pub fn parse_file(file: PathBuf, language: Language) -> Result { state_writes, decision_sites, branch_decisions, + branch_arms, dispatch_sites, semantic_effect_sites, local_complexity_scores, @@ -151,6 +157,8 @@ struct ContextState { function_line: Option, pub receiver: Option, locals: BTreeSet, + param_types: BTreeMap, + immutable_readers: BTreeMap>, controls: Vec, } @@ -163,6 +171,8 @@ impl ContextState { function_line: None, receiver: None, locals: BTreeSet::new(), + param_types: BTreeMap::new(), + immutable_readers: BTreeMap::new(), controls: Vec::new(), } } @@ -207,6 +217,7 @@ fn collect_facts( state_writes: &mut Vec, decision_sites: &mut Vec, branch_decisions: &mut Vec, + branch_arms: &mut Vec, predicate_aliases: &mut Vec, comparison_uses: &mut Vec, seen_writes: &mut HashSet, @@ -279,6 +290,14 @@ fn collect_facts( &next_context, branch_decisions, ); + record_branch_arm( + node, + source, + file, + language, + &next_context, + branch_arms, + ); record_predicate_alias( node, source, @@ -305,6 +324,7 @@ fn collect_facts( state_writes, decision_sites, branch_decisions, + branch_arms, predicate_aliases, comparison_uses, seen_writes, @@ -760,6 +780,9 @@ fn predicate_body_text(profile: &dyn LanguageProfile, source: &str) -> Option 200 { return None; } + if assignment_like_predicate_body(&text) { + return None; + } if predicate_like_body(&text) { Some(text) } else { @@ -767,6 +790,21 @@ fn predicate_body_text(profile: &dyn LanguageProfile, source: &str) -> Option bool { + text.contains("||=") + || text.contains("&&=") + || text.contains("+=") + || text.contains("-=") + || text.contains("*=") + || text.contains("/=") + || text.contains("%=") + || text + .chars() + .collect::>() + .windows(3) + .any(|window| matches!(window, [left, '=', right] if !matches!(left, '=' | '!' | '<' | '>') && *right != '=')) +} + fn predicate_like_body(text: &str) -> bool { let lower = text.to_ascii_lowercase(); matches!(lower.as_str(), "true" | "false") @@ -796,7 +834,7 @@ fn record_comparison_use( } let raw = profile.normalize_source_text(node_text(node, source)); out.push(ComparisonUse { - canon_source: raw.clone(), + canon_source: normalize_comparison_source(&raw), raw, file: file.to_string_lossy().to_string(), function: context.current_function(), @@ -806,14 +844,44 @@ fn record_comparison_use( }); } +fn normalize_comparison_source(source: &str) -> String { + let mut text = source.trim().to_string(); + if let Some(stripped) = text.strip_prefix('!') { + text = stripped.trim().to_string(); + } + if let Some(stripped) = text.strip_prefix("self.") { + text = stripped.to_string(); + } + if let Some(stripped) = text.strip_prefix('@') { + text = stripped.to_string(); + } + if let Some(dot_index) = text.find('.') { + let receiver = &text[..dot_index]; + let rest = &text[dot_index + 1..]; + if simple_identifier(receiver) + && (rest.contains(" == ") + || rest.contains(" != ") + || rest.contains('.')) + { + text = rest.to_string(); + } + } + normalize_text(&text) +} + +fn simple_identifier(text: &str) -> bool { + let mut chars = text.chars(); + let Some(first) = chars.next() else { + return false; + }; + (first == '_' || first.is_ascii_alphabetic()) + && chars.all(|ch| ch == '_' || ch.is_ascii_alphanumeric()) +} + fn comparison_node(profile: &dyn LanguageProfile, node: Node<'_>, source: &str) -> bool { if profile.comparison_node_kinds().contains(&node.kind()) { let operator = direct_operator_from_source(node, source); - return profile.comparison_operators().contains(&operator.as_str()) - || profile - .comparison_operators() - .iter() - .any(|operator| node_text(node, source).contains(operator)); + return profile.comparison_operators().contains(&operator.as_str()); } if !profile.call_node_kinds().contains(&node.kind()) { return false; @@ -905,6 +973,148 @@ fn record_branch_decision( }); } +fn record_branch_arm( + node: Node<'_>, + source: &str, + file: &Path, + language: Language, + context: &ContextState, + out: &mut Vec, +) { + let profile = language_profile(language); + if profile.generated_prelude(node, source) + || branch_decision_wrapper_for_real_branch(profile, node, source) + { + return; + } + if if_arm_node(profile, node, source) { + record_if_arms(profile, node, source, file, context, out); + return; + } + if case_node(profile, node) || profile.hidden_case(node) { + let decision_node = profile.case_source_node(node); + record_case_arms(profile, decision_node, source, file, context, out); + } +} + +fn if_arm_node(profile: &dyn LanguageProfile, node: Node<'_>, source: &str) -> bool { + if case_node(profile, node) || profile.hidden_case(node) { + return false; + } + profile.branch_node_kinds().contains(&node.kind()) + || profile.control_context(node, source).as_deref() == Some("conditional") +} + +fn record_if_arms( + profile: &dyn LanguageProfile, + node: Node<'_>, + source: &str, + file: &Path, + context: &ContextState, + out: &mut Vec, +) { + let predicate = profile.normalize_source_text(&decision_predicate(profile, node, source)); + let decision_span = span(node); + let decision_line = line(node); + let named = named_children(node); + let consequence = node + .child_by_field_name("consequence") + .or_else(|| node.child_by_field_name("body")) + .or_else(|| named.get(1).copied()); + let alternative = node + .child_by_field_name("alternative") + .or_else(|| { + named.iter() + .copied() + .find(|child| child.kind().contains("else") || child.kind().contains("alternative")) + }) + .or_else(|| { + named.get(2) + .copied() + .filter(|candidate| consequence != Some(*candidate)) + }); + + for (arm, member) in [(consequence, "then"), (alternative, "else")] { + let Some(arm) = arm else { + continue; + }; + out.push(BranchArm { + file: file.to_string_lossy().to_string(), + function: context.current_function(), + kind: "if".to_string(), + line: line(arm), + span: span(arm), + decision_line, + decision_span, + predicate: predicate.clone(), + member: member.to_string(), + body: profile.normalize_source_text(node_text(arm, source)), + }); + } +} + +fn record_case_arms( + profile: &dyn LanguageProfile, + node: Node<'_>, + source: &str, + file: &Path, + context: &ContextState, + out: &mut Vec, +) { + let predicate = profile.normalize_source_text(&decision_predicate(profile, node, source)); + let decision_span = span(node); + let decision_line = line(node); + for arm in case_arms(profile, node) { + let pattern = case_arm_patterns(arm, source, profile) + .into_iter() + .find(|pattern| !default_case_pattern(profile, pattern)) + .unwrap_or_default(); + if pattern.is_empty() { + continue; + } + out.push(BranchArm { + file: file.to_string_lossy().to_string(), + function: context.current_function(), + kind: "case".to_string(), + line: line(arm), + span: span(arm), + decision_line, + decision_span, + predicate: predicate.clone(), + member: pattern.clone(), + body: case_arm_body(profile, arm, source, &pattern), + }); + } +} + +fn case_arm_body( + profile: &dyn LanguageProfile, + arm: Node<'_>, + source: &str, + pattern: &str, +) -> String { + let body = named_children(arm) + .into_iter() + .filter(|child| { + !profile.case_pattern_node_kinds().contains(&child.kind()) + && !matches!(child.kind(), "then" | "else") + }) + .last() + .map(|child| node_text(child, source)) + .unwrap_or_else(|| node_text(arm, source)); + let mut text = profile.normalize_source_text(body); + for prefix in [ + format!("when {pattern} then "), + format!("when {pattern} "), + ] { + if let Some(stripped) = text.strip_prefix(&prefix) { + text = stripped.to_string(); + break; + } + } + text +} + fn branch_decision_node(profile: &dyn LanguageProfile, node: Node<'_>, source: &str) -> bool { profile.branch_node_kinds().contains(&node.kind()) || profile.hidden_case(node) @@ -956,6 +1166,11 @@ fn collect_branch_state_refs( } else if branch_local_ref(node, source, receiver, &field, context) { // Function-local bindings are not object state, even when a // language permits bare predicate-style method calls. + } else if profile.language() == Language::Ruby + && ruby_immutable_param_state_read(receiver, &field, context) + { + // Sorbet T::Struct readers on typed params are immutable data reads, + // not mutable object state. } else if receiver.is_empty() || receiver == "self" { out.insert(field); } else { @@ -995,6 +1210,123 @@ fn branch_local_ref( && normalize_text(node_text(node, source)) == field } +fn ruby_immutable_param_state_read(receiver: &str, field: &str, context: &ContextState) -> bool { + if receiver.is_empty() || matches!(receiver, "self" | "this") { + return false; + } + let Some(param) = receiver.split('.').next() else { + return false; + }; + let Some(type_name) = context.param_types.get(param) else { + return false; + }; + let field = field.trim_end_matches('?'); + ruby_immutable_reader(type_name, field, &context.immutable_readers) +} + +fn ruby_immutable_reader( + type_name: &str, + field: &str, + readers: &BTreeMap>, +) -> bool { + let short = type_name.split("::").last().unwrap_or(type_name); + readers + .get(type_name) + .or_else(|| readers.get(short)) + .map(|fields| fields.contains(field)) + .unwrap_or(false) +} + +fn ruby_immutable_struct_readers(source: &str) -> BTreeMap> { + let mut readers: BTreeMap> = BTreeMap::new(); + let mut class_stack = Vec::new(); + for line in source.lines() { + let stripped = line.trim(); + if let Some(name) = stripped + .strip_prefix("class ") + .and_then(|rest| rest.split_once("< T::Struct").map(|(name, _)| name.trim())) + .filter(|name| ruby_constant_path(name)) + { + class_stack.push(name.to_string()); + continue; + } + if let Some(owner) = class_stack.last() { + if let Some(field) = stripped + .strip_prefix("const :") + .and_then(|rest| rest.split(|ch: char| !ch.is_ascii_alphanumeric() && ch != '_').next()) + .filter(|field| !field.is_empty()) + { + readers + .entry(owner.clone()) + .or_default() + .insert(field.to_string()); + continue; + } + } + if !class_stack.is_empty() && stripped.trim_end_matches(';') == "end" { + class_stack.pop(); + } + } + readers +} + +fn ruby_sig_param_types(source: &str, function_line: usize) -> BTreeMap { + let lines = source.lines().collect::>(); + let mut sig_lines = Vec::new(); + let mut cursor = function_line.saturating_sub(2); + while let Some(line) = lines.get(cursor) { + let stripped = line.trim(); + if stripped.is_empty() { + if sig_lines.is_empty() { + break; + } + } else if sig_lines.is_empty() && !stripped.starts_with("sig") { + break; + } + sig_lines.push(*line); + if stripped.starts_with("sig") { + break; + } + if cursor == 0 || sig_lines.len() >= 8 { + break; + } + cursor -= 1; + } + sig_lines.reverse(); + let sig = sig_lines.join("\n"); + let Some(params_start) = sig.find("params(").map(|index| index + "params(".len()) else { + return BTreeMap::new(); + }; + let rest = &sig[params_start..]; + let Some(params_end) = rest.find(')') else { + return BTreeMap::new(); + }; + rest[..params_end] + .split(',') + .filter_map(|part| { + let (name, type_name) = part.split_once(':')?; + let name = name.trim(); + let type_name = type_name.trim(); + (ruby_identifier(name) && ruby_constant_path(type_name)) + .then(|| (name.to_string(), type_name.to_string())) + }) + .collect() +} + +fn ruby_identifier(value: &str) -> bool { + let mut chars = value.chars(); + matches!(chars.next(), Some(ch) if ch == '_' || ch.is_ascii_alphabetic()) + && chars.all(|ch| ch == '_' || ch.is_ascii_alphanumeric()) +} + +fn ruby_constant_path(value: &str) -> bool { + value.split("::").all(|part| { + let mut chars = part.chars(); + matches!(chars.next(), Some(ch) if ch.is_ascii_uppercase()) + && chars.all(|ch| ch == '_' || ch.is_ascii_alphanumeric()) + }) +} + fn declared_state_index(declarations: &[StateDeclaration]) -> BTreeMap> { let mut index: BTreeMap> = BTreeMap::new(); for declaration in declarations { @@ -1276,6 +1608,11 @@ fn push_function_context( context.owner = Some(owner); context.receiver = profile.function_receiver_name(node, source); context.locals = profile.function_params(node, source).into_iter().collect(); + context.param_types = if language == Language::Ruby { + ruby_sig_param_types(source, line(node)) + } else { + BTreeMap::new() + }; if let Some(receiver) = &context.receiver { context.locals.insert(receiver.clone()); } diff --git a/gems/decomplex/rust/src/decomplex/syntax_oracle.rs b/gems/decomplex/rust/src/decomplex/syntax_oracle.rs index 749309267..be07a0cbb 100644 --- a/gems/decomplex/rust/src/decomplex/syntax_oracle.rs +++ b/gems/decomplex/rust/src/decomplex/syntax_oracle.rs @@ -44,6 +44,14 @@ pub fn project_document(document: &Document) -> Value { "safe_navigation": call.safe_navigation, "block": call.block, })).collect()), + "state_declarations": sorted(document.state_declarations.iter().map(|declaration| json!({ + "field": declaration.field, + "owner": declaration.owner, + "type": declaration.r#type, + "line": declaration.line, + "span": declaration.span, + })).collect()), + "state_param_origins": Vec::::new(), "state_reads": sorted(document.state_reads.iter().map(|read| json!({ "field": read.field, "receiver": read.receiver, @@ -76,6 +84,17 @@ pub fn project_document(document: &Document) -> Value { "predicate": decision.predicate, "state_refs": decision.state_refs, })).collect()), + "branch_arms": sorted(document.branch_arms.iter().map(|arm| json!({ + "function": arm.function, + "kind": arm.kind, + "line": arm.line, + "span": arm.span, + "decision_line": arm.decision_line, + "decision_span": arm.decision_span, + "predicate": arm.predicate, + "member": arm.member, + "body": arm.body, + })).collect()), "dispatch_sites": sorted(document.dispatch_sites.iter().map(|site| json!({ "variant_set": site.variant_set, "arm_members": site.arm_members, @@ -98,6 +117,88 @@ pub fn project_document(document: &Document) -> Value { "line": predicate.line, "span": predicate.span, })).collect()), + "comparisons": sorted(document.comparison_uses.iter().map(|comparison| json!({ + "source": comparison.raw, + "raw": comparison.raw, + "canon_source": comparison.canon_source, + "operator": comparison_operator(&comparison.raw), + "function": comparison.function, + "line": comparison.line, + "span": comparison.span, + })).collect()), + "path_conditions": sorted(syntax::path_condition::fact_sites_for_document(document).iter().map(|site| json!({ + "guards": site.guards, + "action": site.action, + "function": site.function, + "line": site.line, + "span": site.span, + })).collect()), + "protocol_method_effects": sorted(document.protocol_method_effects.iter().map(|effect| json!({ + "owner": effect.owner, + "name": effect.name, + "line": effect.line, + "reads": effect.reads, + "writes": effect.writes, + })).collect()), + "protocol_call_paths": sorted(document.protocol_call_paths.iter().map(|path| json!({ + "owner": path.owner, + "name": path.name, + "line": path.line, + "calls": path.calls.iter().map(|call| json!({ + "mid": call.mid, + "line": call.line, + "span": call.span, + })).collect::>(), + })).collect()), + "clone_candidates": sorted(syntax::clone_candidates(document).iter().map(|candidate| json!({ + "line": candidate.line, + "span": candidate.span, + "method_name": candidate.method_name, + "node_name": candidate.node_name, + "mass": candidate.mass, + "fingerprint": candidate.fingerprint, + "child_fingerprints": candidate.child_fingerprints, + "child_masses": candidate.child_masses, + })).collect()), + "redundant_nil_guards": sorted(syntax::redundant_nil_guard::scan_documents(std::slice::from_ref(document)).iter().map(|finding| json!({ + "defn": finding.defn, + "line": finding.line, + "span": finding.span, + "local": finding.local, + "guard": finding.guard, + "proof": finding.proof, + })).collect()), + "local_methods": sorted(syntax::local_flow::scan_documents(std::slice::from_ref(document)).iter().map(|method| json!({ + "id": method.id, + "owner": method.owner, + "name": method.name, + "line": method.line, + "span": method.span, + "statements": method.statements.iter().map(|statement| json!({ + "index": statement.index, + "line": statement.line, + "end_line": statement.end_line, + "span": statement.span, + "source": statement.source, + "reads": statement.reads, + "writes": statement.writes, + "dependencies": statement.dependencies, + "co_uses": statement.co_uses, + })).collect::>(), + "boundaries": method.boundaries.iter().map(|boundary| json!({ + "before_index": boundary.before_index, + "after_index": boundary.after_index, + "line": boundary.line, + "kind": boundary.kind, + "text": boundary.text, + })).collect::>(), + "local_contract_assignments": syntax::local_flow::local_contract_assignments(method), + })).collect()), + "local_complexity_scores": sorted(document.local_complexity_scores.iter().map(|(id, score)| json!({ + "id": id, + "score": score.score, + "signals": score.signals, + })).collect()), }) } @@ -114,3 +215,12 @@ fn logical_file(file: &str) -> String { } path } + +fn comparison_operator(source: &str) -> &str { + for operator in ["!==", "===", "!=", "==", ">=", "<=", ">", "<"] { + if source.contains(operator) { + return operator; + } + } + "" +} diff --git a/gems/decomplex/rust/src/main.rs b/gems/decomplex/rust/src/main.rs index b6e64ebb9..0e201e141 100644 --- a/gems/decomplex/rust/src/main.rs +++ b/gems/decomplex/rust/src/main.rs @@ -946,9 +946,9 @@ fn run_detector_on_fact_input( let options = fixture.get("options").unwrap_or(&Value::Null); let mass = value_usize(options, "mass", 32)?; let fuzzy = value_usize(options, "fuzzy", 1)?; - Ok(json!(flay_similarity::scan_documents( - documents, mass, fuzzy - ))) + Ok(json!({ + "findings": flay_similarity::scan_documents(documents, mass, fuzzy), + })) } "temporal-ordering-pressure" => { Ok(json!(temporal_ordering_pressure::scan_documents(documents))) diff --git a/gems/decomplex/rust/tests/examples_oracle.rs b/gems/decomplex/rust/tests/examples_oracle.rs index 0fe540919..c9ee159d9 100644 --- a/gems/decomplex/rust/tests/examples_oracle.rs +++ b/gems/decomplex/rust/tests/examples_oracle.rs @@ -463,7 +463,7 @@ fn run_detector_on_fact_input( let options = fixture.get("options").cloned().unwrap_or_else(|| json!({})); let mass = option_usize(&options, "mass", 32)?; let fuzzy = option_usize(&options, "fuzzy", 1)?; - value(flay_similarity::scan_documents(documents, mass, fuzzy)) + value(json!({ "findings": flay_similarity::scan_documents(documents, mass, fuzzy) })) } "temporal-ordering-pressure" => { value(temporal_ordering_pressure::scan_documents(documents)) @@ -791,6 +791,7 @@ fn project_source_syntax(fixture: &Path, expected: &Value) -> Result { for key in object.keys() { let keys = match key.as_str() { "functions" => &["name", "owner", "line", "visibility", "params"][..], + "owners" => &["name", "kind", "line"][..], "calls" => &[ "receiver", "message", @@ -802,9 +803,59 @@ fn project_source_syntax(fixture: &Path, expected: &Value) -> Result { "block", "arguments", ][..], + "state_declarations" => &["field", "owner", "type", "line"][..], + "state_param_origins" => { + &["field", "receiver", "owner", "param", "function", "line"][..] + } "state_reads" => &["receiver", "field", "function", "line"][..], "state_writes" => &["receiver", "field", "function", "line"][..], + "decisions" => &["kind", "members", "function", "line", "predicate"][..], + "branch_decisions" => &["function", "line", "predicate", "state_refs"][..], + "branch_arms" => &[ + "function", + "kind", + "line", + "decision_line", + "predicate", + "member", + "body", + ][..], + "dispatch_sites" => { + &["variant_set", "arm_members", "outside", "function", "line"][..] + } "semantic_effects" => &["kind", "detail", "function", "line"][..], + "predicate_bodies" => &["name", "owner", "body", "line"][..], + "comparisons" => &[ + "source", + "raw", + "canon_source", + "operator", + "function", + "line", + ][..], + "path_conditions" => &["guards", "action", "function", "line"][..], + "protocol_method_effects" => &["owner", "name", "line", "reads", "writes"][..], + "protocol_call_paths" => &["owner", "name", "line", "calls"][..], + "clone_candidates" => &[ + "method_name", + "node_name", + "line", + "mass", + "fingerprint", + "child_fingerprints", + "child_masses", + ][..], + "redundant_nil_guards" => &["defn", "line", "local", "guard", "proof"][..], + "local_methods" => &[ + "id", + "owner", + "name", + "line", + "statements", + "boundaries", + "local_contract_assignments", + ][..], + "local_complexity_scores" => &["id", "score", "signals"][..], _ => bail!("unsupported source syntax section: {key}"), }; out.insert(key.clone(), rows(field(&document, key), keys)); @@ -825,7 +876,7 @@ fn project_local_flow(output: &Value) -> Value { "reads": sorted_array(field(statement, "reads")), "writes": sorted_array(field(statement, "writes")), "dependencies": field(statement, "dependencies").clone(), - "co_uses": field(statement, "co_uses").clone(), + "co_uses": canonical_co_uses(field(statement, "co_uses")), }) }).collect::>(), "boundaries": array(field(method, "boundaries")).iter().map(|boundary| { @@ -876,6 +927,22 @@ fn project_protocols(rows_value: &Value) -> Value { ) } +fn canonical_co_uses(value: &Value) -> Value { + let mut pairs = array(value) + .iter() + .map(|pair| { + let mut items = array(pair) + .iter() + .map(|item| item.as_str().unwrap_or_default().to_string()) + .collect::>(); + items.sort(); + json!(items) + }) + .collect::>(); + pairs.sort_by_key(|item| item.to_string()); + Value::Array(pairs) +} + fn rows(value: &Value, keys: &[&str]) -> Value { Value::Array(array(value).iter().map(|row| pick(row, keys)).collect()) } diff --git a/gems/decomplex/test/examples_oracle_test.rb b/gems/decomplex/test/examples_oracle_test.rb index 68f9b92a5..f556cefcf 100644 --- a/gems/decomplex/test/examples_oracle_test.rb +++ b/gems/decomplex/test/examples_oracle_test.rb @@ -208,7 +208,9 @@ def project_detector_output(detector, output) { "method" => method["name"], "statements" => Array(method["statements"]).map do |statement| - pick(statement, %w[reads writes dependencies co_uses]) + row = pick(statement, %w[reads writes dependencies co_uses]) + row["co_uses"] = canonical_co_uses(row.fetch("co_uses", [])) + row end, "boundaries" => rows(method["boundaries"], %w[before_index after_index kind]) } @@ -249,6 +251,10 @@ def canonical_variants(value) end.sort end + def canonical_co_uses(value) + Array(value).map { |pair| Array(pair).map(&:to_s).sort }.sort_by { |pair| JSON.generate(pair) } + end + def canonical_state_refs(value) Array(value).map do |item| text = item.to_s diff --git a/gems/decomplex/test/local_flow_test.rb b/gems/decomplex/test/local_flow_test.rb index 5d0cfeb33..deab5135e 100644 --- a/gems/decomplex/test/local_flow_test.rb +++ b/gems/decomplex/test/local_flow_test.rb @@ -34,7 +34,7 @@ def mixed(price, tax) terminal = summary.statements.last assert_equal Set["total", "buffer"], terminal.reads - assert_includes terminal.co_uses, ["total", "buffer"] + assert_includes terminal.co_uses.map(&:sort), ["buffer", "total"] end def test_collects_top_level_and_inline_private_methods diff --git a/gems/decomplex/test/source_facts_oracle_test.rb b/gems/decomplex/test/source_facts_oracle_test.rb index fe0842936..12d902c00 100644 --- a/gems/decomplex/test/source_facts_oracle_test.rb +++ b/gems/decomplex/test/source_facts_oracle_test.rb @@ -52,15 +52,43 @@ def project_syntax(fixture_path, engine, expected) def syntax_keys(section) { "functions" => %w[name owner line visibility params], + "owners" => %w[name kind line], "calls" => %w[receiver message function line conditional control safe_navigation block arguments], + "state_declarations" => %w[field owner type line], + "state_param_origins" => %w[field receiver owner param function line], "state_reads" => %w[receiver field function line], "state_writes" => %w[receiver field function line], - "semantic_effects" => %w[kind detail function line] + "decisions" => %w[kind members function line predicate], + "branch_decisions" => %w[function line predicate state_refs], + "branch_arms" => %w[function kind line decision_line predicate member body], + "dispatch_sites" => %w[variant_set arm_members outside function line], + "semantic_effects" => %w[kind detail function line], + "predicate_bodies" => %w[name owner body line], + "comparisons" => %w[source raw canon_source operator function line], + "path_conditions" => %w[guards action function line], + "protocol_method_effects" => %w[owner name line reads writes], + "protocol_call_paths" => %w[owner name line calls], + "clone_candidates" => %w[method_name node_name line mass fingerprint child_fingerprints child_masses], + "redundant_nil_guards" => %w[defn line local guard proof], + "local_methods" => %w[id owner name line statements boundaries local_contract_assignments], + "local_complexity_scores" => %w[id score signals] }.fetch(section) end def syntax_rows(rows, keys) - Array(rows).map { |row| pick(row, keys) } + Array(rows).map do |row| + projected = pick(row, keys) + canonicalize_local_method_statements(projected) if projected.key?("statements") + projected + end + end + + def canonicalize_local_method_statements(row) + row["statements"] = Array(row["statements"]).map do |statement| + next statement unless statement.is_a?(Hash) + + statement.merge("co_uses" => canonical_co_uses(statement.fetch("co_uses", []))) + end end def project_local_flow(fixture_path, engine) @@ -71,7 +99,9 @@ def project_local_flow(fixture_path, engine) { "method" => method["name"], "statements" => Array(method["statements"]).map do |statement| - pick(statement, %w[reads writes dependencies co_uses]) + row = pick(statement, %w[reads writes dependencies co_uses]) + row["co_uses"] = canonical_co_uses(row.fetch("co_uses", [])) + row end, "boundaries" => Array(method["boundaries"]).map do |boundary| pick(boundary, %w[before_index after_index kind]) @@ -80,6 +110,11 @@ def project_local_flow(fixture_path, engine) end end + def canonical_co_uses(co_uses) + Array(co_uses).map { |pair| Array(pair).map(&:to_s).sort } + .sort_by { |pair| JSON.generate(pair) } + end + def pick(row, keys) keys.each_with_object({}) do |key, out| out[key] = row[key] if row.key?(key) diff --git a/gems/decomplex/test/syntax_oracle_test.rb b/gems/decomplex/test/syntax_oracle_test.rb index 25b39a5db..0b885cb25 100644 --- a/gems/decomplex/test/syntax_oracle_test.rb +++ b/gems/decomplex/test/syntax_oracle_test.rb @@ -39,7 +39,26 @@ def assert_syntax_facts_match_oracle(fixture_path, engine) expected = JSON.parse(File.read(oracle_path)) actual = Decomplex::SyntaxOracle.project([fixture_path], engine: engine, language: language) + actual = project_expected_shape(actual, expected) assert_equal expected, actual, "#{engine} #{fixture_path}" end + + def project_expected_shape(actual, expected) + case expected + when Hash + expected.keys.each_with_object({}) do |key, out| + out[key] = project_expected_shape(actual.fetch(key), expected.fetch(key)) + end + when Array + return actual unless expected.any? { |item| item.is_a?(Hash) } + + keys = expected.flat_map { |item| item.is_a?(Hash) ? item.keys : [] }.uniq + actual.map do |item| + item.is_a?(Hash) ? project_expected_shape(item.slice(*keys), expected.find { |row| row.is_a?(Hash) }) : item + end.sort_by { |item| JSON.generate(item) } + else + actual + end + end end From f412d307ab7fee526688ccb07904d1b3928bc527 Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Sat, 20 Jun 2026 19:09:12 +0000 Subject: [PATCH 52/52] Fix decomplex Ruby Rust fact parity --- .../oracles/ruby-local_flow_edges.json | 254 +++++++++++++ .../oracles/ruby-semantic_effects.json | 70 +++- .../oracles/ruby-sequence_call_edges.json | 344 ++++++++++++++++++ .../oracles/ruby-slopcop_parity_edges.json | 285 +++++++++++++++ .../source-facts/ruby/local_flow_edges.rb | 23 ++ .../source-facts/ruby/semantic_effects.rb | 8 + .../source-facts/ruby/sequence_call_edges.rb | 40 ++ .../source-facts/ruby/slopcop_parity_edges.rb | 25 ++ gems/decomplex/lib/decomplex/syntax/ruby.rb | 41 ++- .../lib/decomplex/syntax/ruby_effects.rb | 24 ++ .../decomplex/detectors/false_simplicity.rs | 7 +- .../rust/src/decomplex/report_facts.rs | 44 ++- .../src/decomplex/syntax/adapters/ruby.rs | 218 +++++++---- .../rust/src/decomplex/syntax/complexity.rs | 11 + .../rust/src/decomplex/syntax/local_flow.rs | 8 +- .../src/decomplex/syntax/path_condition.rs | 72 ++-- .../decomplex/syntax/tree_sitter_adapter.rs | 79 ++-- 17 files changed, 1405 insertions(+), 148 deletions(-) create mode 100644 gems/decomplex/examples/source-facts/oracles/ruby-local_flow_edges.json create mode 100644 gems/decomplex/examples/source-facts/oracles/ruby-sequence_call_edges.json create mode 100644 gems/decomplex/examples/source-facts/oracles/ruby-slopcop_parity_edges.json create mode 100644 gems/decomplex/examples/source-facts/ruby/local_flow_edges.rb create mode 100644 gems/decomplex/examples/source-facts/ruby/sequence_call_edges.rb create mode 100644 gems/decomplex/examples/source-facts/ruby/slopcop_parity_edges.rb diff --git a/gems/decomplex/examples/source-facts/oracles/ruby-local_flow_edges.json b/gems/decomplex/examples/source-facts/oracles/ruby-local_flow_edges.json new file mode 100644 index 000000000..07d396bb9 --- /dev/null +++ b/gems/decomplex/examples/source-facts/oracles/ruby-local_flow_edges.json @@ -0,0 +1,254 @@ +{ + "syntax": { + "state_reads": [ + { + "receiver": "findings", + "field": "each", + "function": "build", + "line": 11 + }, + { + "receiver": "sections", + "field": "each", + "function": "build", + "line": 8 + }, + { + "receiver": "file", + "field": "empty?", + "function": "build", + "line": 13 + }, + { + "receiver": "meth", + "field": "empty?", + "function": "build", + "line": 13 + }, + { + "receiver": "audit", + "field": "findings", + "function": "build", + "line": 16 + }, + { + "receiver": "audit", + "field": "findings", + "function": "build", + "line": 20 + }, + { + "receiver": "finding", + "field": "loc", + "function": "build", + "line": 12 + }, + { + "receiver": "audit.findings", + "field": "size", + "function": "build", + "line": 16 + } + ], + "semantic_effects": [ + { + "kind": "hidden_mutation", + "detail": "<<", + "function": "build", + "line": 16 + }, + { + "kind": "hidden_io", + "detail": "File.file?", + "function": "build", + "line": 6 + } + ], + "path_conditions": [ + { + "guards": [ + "!File.file?(grammar)", + "!grammar" + ], + "action": "skip \"missing grammar\"", + "function": "build", + "line": 6 + } + ], + "local_complexity_scores": [ + { + "id": "SourceFactLocalFlowEdges#build", + "score": 14.7, + "signals": { + "boolean_ops": 8, + "branches": 3, + "early_exits": 2, + "loops": 2, + "nested": 3 + } + } + ] + }, + "local_flow": [ + { + "method": "build", + "statements": [ + { + "reads": [ + + ], + "writes": [ + "rows" + ], + "dependencies": [ + + ], + "co_uses": [ + + ] + }, + { + "reads": [ + "grammar" + ], + "writes": [ + + ], + "dependencies": [ + + ], + "co_uses": [ + + ] + }, + { + "reads": [ + "audit", + "file", + "key", + "meth", + "rows", + "sections" + ], + "writes": [ + "file", + "key", + "meth" + ], + "dependencies": [ + [ + "key", + "file" + ], + [ + "key", + "meth" + ] + ], + "co_uses": [ + [ + "audit", + "file" + ], + [ + "audit", + "key" + ], + [ + "audit", + "meth" + ], + [ + "audit", + "rows" + ], + [ + "audit", + "sections" + ], + [ + "file", + "key" + ], + [ + "file", + "meth" + ], + [ + "file", + "rows" + ], + [ + "file", + "sections" + ], + [ + "key", + "meth" + ], + [ + "key", + "rows" + ], + [ + "key", + "sections" + ], + [ + "meth", + "rows" + ], + [ + "meth", + "sections" + ], + [ + "rows", + "sections" + ] + ] + }, + { + "reads": [ + + ], + "writes": [ + + ], + "dependencies": [ + + ], + "co_uses": [ + + ] + }, + { + "reads": [ + "rows" + ], + "writes": [ + + ], + "dependencies": [ + + ], + "co_uses": [ + + ] + } + ], + "boundaries": [ + { + "before_index": 1, + "after_index": 2, + "kind": "blank" + }, + { + "before_index": 2, + "after_index": 3, + "kind": "blank" + } + ] + } + ] +} diff --git a/gems/decomplex/examples/source-facts/oracles/ruby-semantic_effects.json b/gems/decomplex/examples/source-facts/oracles/ruby-semantic_effects.json index 5be6fd173..c3fe62e89 100644 --- a/gems/decomplex/examples/source-facts/oracles/ruby-semantic_effects.json +++ b/gems/decomplex/examples/source-facts/oracles/ruby-semantic_effects.json @@ -11,6 +11,15 @@ "value" ] }, + { + "name": "shape_hash", + "owner": "SourceFactSemanticEffects", + "line": 16, + "visibility": "public", + "params": [ + "data" + ] + }, { "name": "perform", "owner": "SourceFactSemanticEffects", @@ -23,6 +32,19 @@ } ], "calls": [ + { + "receiver": "Hash", + "message": "new", + "function": "shape_hash", + "line": 19, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [ + "0" + ] + }, { "receiver": "self", "message": "send", @@ -72,7 +94,35 @@ "control": "always", "safe_navigation": false, "block": false, - "arguments": [] + "arguments": [ + + ] + }, + { + "receiver": "Hash", + "message": "new", + "function": "shape_hash", + "line": 18, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": true, + "arguments": [ + + ] + }, + { + "receiver": "data", + "message": "each", + "function": "shape_hash", + "line": 20, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": true, + "arguments": [ + + ] } ], "state_reads": [ @@ -82,6 +132,12 @@ "function": "perform", "line": 8 }, + { + "receiver": "data", + "field": "each", + "function": "shape_hash", + "line": 20 + }, { "receiver": "target", "field": "items", @@ -108,12 +164,24 @@ "function": "mutate", "line": 12 }, + { + "kind": "hidden_mutation", + "detail": "[]=", + "function": "shape_hash", + "line": 18 + }, { "kind": "dynamic_dispatch", "detail": "callback.call", "function": "perform", "line": 6 }, + { + "kind": "hidden_mutation", + "detail": "op-assign", + "function": "shape_hash", + "line": 20 + }, { "kind": "hidden_io", "detail": "puts", diff --git a/gems/decomplex/examples/source-facts/oracles/ruby-sequence_call_edges.json b/gems/decomplex/examples/source-facts/oracles/ruby-sequence_call_edges.json new file mode 100644 index 000000000..723b9cfc2 --- /dev/null +++ b/gems/decomplex/examples/source-facts/oracles/ruby-sequence_call_edges.json @@ -0,0 +1,344 @@ +{ + "syntax": { + "calls": [ + { + "receiver": "arms", + "message": "map", + "function": "symbol_proc_maps", + "line": 16, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [ + "&:category" + ] + }, + { + "receiver": "C.classify_file(rsf.path, f.path)", + "message": "map", + "function": "symbol_proc_maps", + "line": 17, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [ + "&:category" + ] + }, + { + "receiver": "C.classify_file(rsf.path, f.path, diagnostic_mids: [:report_invalid_input!])", + "message": "map", + "function": "symbol_proc_maps", + "line": 18, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [ + "&:category" + ] + }, + { + "receiver": "self", + "message": "assert_empty", + "function": "assertion_single_call_argument", + "line": 10, + "conditional": true, + "control": "iterates", + "safe_navigation": false, + "block": false, + "arguments": [ + "C", + "classify_file", + "(coverage, file, root: dir)" + ] + }, + { + "receiver": "self", + "message": "assert_nil", + "function": "assertion_single_call_argument", + "line": 11, + "conditional": true, + "control": "iterates", + "safe_navigation": false, + "block": false, + "arguments": [ + "SlopCop::DecomplexVerdict", + "lookup", + "(v, path, \"plain\", 2)" + ] + }, + { + "receiver": "self", + "message": "with_env", + "function": "assertion_single_call_argument", + "line": 9, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": true, + "arguments": [ + "\"DECOMPLEX_PARSER\"", + "\"tree_sitter\"" + ] + }, + { + "receiver": "result", + "message": "dig", + "function": "assertion_argument_calls", + "line": 5, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [ + "\"locations\"", + "0", + "\"physicalLocation\"", + "\"artifactLocation\"", + "\"uri\"" + ] + }, + { + "receiver": "self", + "message": "assert_equal", + "function": "assertion_argument_calls", + "line": 5, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [ + "\"uri\"", + "result.dig(\"locations\", 0, \"physicalLocation\", \"artifactLocation\", \"uri\")" + ] + }, + { + "receiver": "File", + "message": "file?", + "function": "chained_multiline", + "line": 26, + "conditional": true, + "control": "conditional", + "safe_navigation": false, + "block": false, + "arguments": [ + "abs" + ] + }, + { + "receiver": "self", + "message": "overlay_arm", + "function": "chained_multiline", + "line": 34, + "conditional": true, + "control": "iterates", + "safe_navigation": false, + "block": false, + "arguments": [ + "arm", + "repo" + ] + }, + { + "receiver": "self", + "message": "repo_relative", + "function": "chained_multiline", + "line": 24, + "conditional": true, + "control": "iterates", + "safe_navigation": false, + "block": false, + "arguments": [ + "file", + "repo" + ] + }, + { + "receiver": "self", + "message": "Array", + "function": "chained_multiline", + "line": 23, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [ + "files" + ] + }, + { + "receiver": "File", + "message": "expand_path", + "function": "chained_multiline", + "line": 25, + "conditional": true, + "control": "iterates", + "safe_navigation": false, + "block": false, + "arguments": [ + "rel", + "repo" + ] + }, + { + "receiver": "Classifier", + "message": "classify_file", + "function": "chained_multiline", + "line": 28, + "conditional": true, + "control": "iterates", + "safe_navigation": false, + "block": false, + "arguments": [ + "resultset", + "abs", + "root: repo", + "ffi_boundary: ffi_boundary", + "diagnostic_mids: diagnostic_mids" + ] + }, + { + "receiver": "C", + "message": "classify_file", + "function": "symbol_proc_maps", + "line": 18, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [ + "rsf.path", + "f.path", + "diagnostic_mids: [:report_invalid_input!]" + ] + }, + { + "receiver": "C", + "message": "classify_file", + "function": "symbol_proc_maps", + "line": 17, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [ + "rsf.path", + "f.path" + ] + }, + { + "receiver": "Array(files).flat_map do |file| rel = repo_relative(file, repo) abs = File.expand_path(rel, repo) next [] unless File.file?(abs) Classifier.classify_file( resultset, abs, root: repo, ffi_boundary: ffi_boundary, diagnostic_mids: diagnostic_mids ).map { |arm| overlay_arm(arm, repo) } end", + "message": "compact", + "function": "chained_multiline", + "line": 23, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "self", + "message": "arms", + "function": "chained_multiline", + "line": 38, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "f", + "message": "path", + "function": "symbol_proc_maps", + "line": 17, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "rsf", + "message": "path", + "function": "symbol_proc_maps", + "line": 17, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "f", + "message": "path", + "function": "symbol_proc_maps", + "line": 18, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "rsf", + "message": "path", + "function": "symbol_proc_maps", + "line": 18, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "arm[\"line\"]", + "message": "to_i", + "function": "chained_multiline", + "line": 36, + "conditional": true, + "control": "iterates", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "Array(files)", + "message": "flat_map", + "function": "chained_multiline", + "line": 23, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": true, + "arguments": [] + }, + { + "receiver": "Array(files).flat_map do |file| rel = repo_relative(file, repo) abs = File.expand_path(rel, repo) next [] unless File.file?(abs) Classifier.classify_file( resultset, abs, root: repo, ffi_boundary: ffi_boundary, diagnostic_mids: diagnostic_mids ).map { |arm| overlay_arm(arm, repo) } end.compact", + "message": "sort_by", + "function": "chained_multiline", + "line": 23, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": true, + "arguments": [] + }, + { + "receiver": "Classifier.classify_file( resultset, abs, root: repo, ffi_boundary: ffi_boundary, diagnostic_mids: diagnostic_mids )", + "message": "map", + "function": "chained_multiline", + "line": 28, + "conditional": true, + "control": "iterates", + "safe_navigation": false, + "block": true, + "arguments": [] + } + ] + } +} diff --git a/gems/decomplex/examples/source-facts/oracles/ruby-slopcop_parity_edges.json b/gems/decomplex/examples/source-facts/oracles/ruby-slopcop_parity_edges.json new file mode 100644 index 000000000..edbb71a4c --- /dev/null +++ b/gems/decomplex/examples/source-facts/oracles/ruby-slopcop_parity_edges.json @@ -0,0 +1,285 @@ +{ + "syntax": { + "calls": [ + { + "receiver": "gaps", + "message": "first", + "function": "report", + "line": 7, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [ + "@top" + ] + }, + { + "receiver": "self", + "message": "source_path?", + "function": "scan", + "line": 13, + "conditional": true, + "control": "conditional", + "safe_navigation": false, + "block": false, + "arguments": [ + "path" + ] + }, + { + "receiver": "self", + "message": "Array", + "function": "scan", + "line": 12, + "conditional": true, + "control": "conditional", + "safe_navigation": false, + "block": false, + "arguments": [ + "paths" + ] + }, + { + "receiver": "self", + "message": "Array", + "function": "scan", + "line": 13, + "conditional": true, + "control": "conditional", + "safe_navigation": false, + "block": false, + "arguments": [ + "paths" + ] + }, + { + "receiver": "self", + "message": "emit", + "function": "guarded_emit", + "line": 23, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [ + "source" + ] + }, + { + "receiver": "x[:detectors]", + "message": "to_a", + "function": "report", + "line": 5, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "dup", + "message": "positive?", + "function": "report", + "line": 8, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "gaps", + "message": "size", + "function": "report", + "line": 8, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "self", + "message": "source", + "function": "guarded_emit", + "line": 20, + "conditional": true, + "control": "conditional", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "evidence", + "message": "covered?", + "function": "guarded_emit", + "line": 21, + "conditional": true, + "control": "conditional", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "Array(paths)", + "message": "empty?", + "function": "scan", + "line": 12, + "conditional": true, + "control": "conditional", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "gaps", + "message": "reject", + "function": "report", + "line": 5, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": true, + "arguments": [] + }, + { + "receiver": "flagged", + "message": "count", + "function": "report", + "line": 6, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": true, + "arguments": [] + }, + { + "receiver": "gaps.first(@top)", + "message": "map", + "function": "report", + "line": 7, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": true, + "arguments": [] + }, + { + "receiver": "Array(paths)", + "message": "select", + "function": "scan", + "line": 13, + "conditional": true, + "control": "conditional", + "safe_navigation": false, + "block": true, + "arguments": [] + } + ], + "state_reads": [ + { + "receiver": "self", + "field": "@top", + "function": "report", + "line": 7 + }, + { + "receiver": "flagged", + "field": "count", + "function": "report", + "line": 6 + }, + { + "receiver": "evidence", + "field": "covered?", + "function": "guarded_emit", + "line": 21 + }, + { + "receiver": "Array(paths)", + "field": "empty?", + "function": "scan", + "line": 12 + }, + { + "receiver": "gaps.first(@top)", + "field": "map", + "function": "report", + "line": 7 + }, + { + "receiver": "dup", + "field": "positive?", + "function": "report", + "line": 8 + }, + { + "receiver": "gaps", + "field": "reject", + "function": "report", + "line": 5 + }, + { + "receiver": "Array(paths)", + "field": "select", + "function": "scan", + "line": 13 + }, + { + "receiver": "gaps", + "field": "size", + "function": "report", + "line": 8 + }, + { + "receiver": "x[:detectors]", + "field": "to_a", + "function": "report", + "line": 5 + } + ], + "branch_decisions": [ + { + "function": "guarded_emit", + "line": 21, + "predicate": "evidence.covered?", + "state_refs": [ + "evidence.covered?" + ] + }, + { + "function": "scan", + "line": 12, + "predicate": "paths && !Array(paths).empty?", + "state_refs": [ + "Array(paths).empty?" + ] + } + ], + "local_complexity_scores": [ + { + "id": "SourceFactSlopcopParityEdges#guarded_emit", + "score": 3.7, + "signals": { + "boolean_ops": 0, + "branches": 2, + "early_exits": 2 + } + }, + { + "id": "SourceFactSlopcopParityEdges#report", + "score": 0.0, + "signals": {} + }, + { + "id": "SourceFactSlopcopParityEdges#scan", + "score": 1.9, + "signals": { + "boolean_ops": 2, + "branches": 1 + } + } + ] + } +} diff --git a/gems/decomplex/examples/source-facts/ruby/local_flow_edges.rb b/gems/decomplex/examples/source-facts/ruby/local_flow_edges.rb new file mode 100644 index 000000000..6fb8dbeac --- /dev/null +++ b/gems/decomplex/examples/source-facts/ruby/local_flow_edges.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +class SourceFactLocalFlowEdges + def build(sections, audit, grammar) + rows = [] + skip "missing grammar" unless grammar && File.file?(grammar) + + sections.each do |title, findings| + next unless findings + + findings.each do |finding| + file, meth, = parse_loc(finding.loc) + next unless file && !file.empty? && meth && !meth.empty? + + key = [file, meth] + rows << "| #{key.join(":")} | #{audit.findings.size} |" + end + end + + assert_empty audit.findings + rows + end +end diff --git a/gems/decomplex/examples/source-facts/ruby/semantic_effects.rb b/gems/decomplex/examples/source-facts/ruby/semantic_effects.rb index 6d55b05f1..315a4cc9e 100644 --- a/gems/decomplex/examples/source-facts/ruby/semantic_effects.rb +++ b/gems/decomplex/examples/source-facts/ruby/semantic_effects.rb @@ -12,4 +12,12 @@ def mutate(target, value) target[:name] = value target.items << value end + + def shape_hash(data) + schema = { "$schema" => "https://example.test/schema.json" } + buckets = Hash.new { |hash, key| hash[key] = [] } + totals = Hash.new(0) + data.each { |key, count| totals[key] += count } + [schema, buckets, totals] + end end diff --git a/gems/decomplex/examples/source-facts/ruby/sequence_call_edges.rb b/gems/decomplex/examples/source-facts/ruby/sequence_call_edges.rb new file mode 100644 index 000000000..2dd7a9226 --- /dev/null +++ b/gems/decomplex/examples/source-facts/ruby/sequence_call_edges.rb @@ -0,0 +1,40 @@ +# frozen_string_literal: true + +class SourceFactSequenceCallEdges + def assertion_argument_calls(result) + assert_equal "uri", result.dig("locations", 0, "physicalLocation", "artifactLocation", "uri") + end + + def assertion_single_call_argument(coverage, file, dir, v, path) + with_env("DECOMPLEX_PARSER", "tree_sitter") do + assert_empty C.classify_file(coverage, file, root: dir) + assert_nil SlopCop::DecomplexVerdict.lookup(v, path, "plain", 2) + end + end + + def symbol_proc_maps(arms, rsf, f) + arms.map(&:category) + C.classify_file(rsf.path, f.path).map(&:category) + C.classify_file(rsf.path, f.path, + diagnostic_mids: [:report_invalid_input!]).map(&:category) + end + + def chained_multiline(files, resultset, repo, ffi_boundary, diagnostic_mids) + arms = Array(files).flat_map do |file| + rel = repo_relative(file, repo) + abs = File.expand_path(rel, repo) + next [] unless File.file?(abs) + + Classifier.classify_file( + resultset, + abs, + root: repo, + ffi_boundary: ffi_boundary, + diagnostic_mids: diagnostic_mids + ).map { |arm| overlay_arm(arm, repo) } + end.compact.sort_by do |arm| + [arm["file"], arm["line"].to_i, arm["method"].to_s, arm["arm_category"].to_s] + end + arms + end +end diff --git a/gems/decomplex/examples/source-facts/ruby/slopcop_parity_edges.rb b/gems/decomplex/examples/source-facts/ruby/slopcop_parity_edges.rb new file mode 100644 index 000000000..91aa19ce4 --- /dev/null +++ b/gems/decomplex/examples/source-facts/ruby/slopcop_parity_edges.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +class SourceFactSlopcopParityEdges + def report(gaps) + flagged = gaps.reject { |x| x[:detectors].to_a.empty? } + dup = flagged.count { |x| x[:coarse_dup] } + gaps.first(@top).map { |x| x[:file] } + "#{gaps.size} #{dup.positive? ? @top : 0}" + end + + def scan(paths) + if paths && !Array(paths).empty? + Array(paths).select { |path| source_path?(path) } + else + [] + end + end + + def guarded_emit(source, evidence) + return unless source + return if evidence.covered? + + emit(source) + end +end diff --git a/gems/decomplex/lib/decomplex/syntax/ruby.rb b/gems/decomplex/lib/decomplex/syntax/ruby.rb index cc1cb42bf..d25c152e1 100644 --- a/gems/decomplex/lib/decomplex/syntax/ruby.rb +++ b/gems/decomplex/lib/decomplex/syntax/ruby.rb @@ -146,7 +146,7 @@ def state_read_target(node) return { receiver: target[:receiver], field: target[:message] } end - ruby_state_variable_target(node) || super + ruby_unparenthesized_member_argument_target(node) || ruby_state_variable_target(node) || super end def state_target(lhs) @@ -215,7 +215,7 @@ def path_condition_sites(document) ruby_path_walk(document, statement, function_def.name, [], out) end end - out + out.uniq { |site| [site.guards, site.action, site.file, site.function, site.line] } end def immutable_struct_readers(document) @@ -521,6 +521,7 @@ def ruby_local_read_identifier?(node, local_names) return false if ruby_local_write_identifier?(node) return false if ruby_declaration_name?(node, parent_node(node)) return false if ruby_call_message_identifier?(node) + return false if ruby_unary_assertion_argument?(node) true end @@ -530,9 +531,33 @@ def ruby_local_write_identifier?(node) parent = parent_node(node) (parent&.kind == "assignment" && parent.named_children.first == node) || + (parent&.kind == "left_assignment_list" && parent_node(parent)&.kind == "assignment") || (ruby_flat_assignment_statement?(parent) && parent.named_children.first == node) end + def ruby_unparenthesized_member_argument_target(node) + return nil unless node.kind == "argument_list" + return nil if node.text.to_s.strip.start_with?("(") + return nil unless node.children.any? { |child| !child.named? && child.text == "." } + + named = node.named_children + return nil unless named.size == 2 + return nil unless named.all? { |child| %w[identifier constant].include?(child.kind) } + + { receiver: normalize_text(named.first.text), field: named.last.text } + end + + def ruby_unary_assertion_argument?(node) + parent = parent_node(node) + return false unless parent&.kind == "argument_list" + + call = parent_node(parent) + return false unless call&.kind == "call" + return false unless %w[assert_empty refute_empty assert_nil refute_nil].include?(call.named_children.first&.text) + + true + end + def ruby_flat_assignment_statement?(node) return false unless ts_node?(node) && node.kind == "body_statement" @@ -562,6 +587,7 @@ def ruby_path_walk(document, node, function, guards, out) if guards.size >= 2 && ruby_path_action_node?(node) record_ruby_path_condition(document, node, function, guards, out) + return end node.children.each { |child| ruby_path_walk(document, child, function, guards, out) } @@ -931,11 +957,22 @@ def ruby_state_variable_target(node) def ruby_state_variable_node?(node) return false unless ts_node?(node) + return false if ruby_embedded_text_node?(node) return true if %w[instance_variable global_variable].include?(node.kind) node.named_children.empty? && node.text.to_s.match?(/\A[@$][A-Za-z_]\w*[!?=]?\z/) end + def ruby_embedded_text_node?(node) + current = node + while ts_node?(current) + return true if %w[string string_content heredoc_body simple_symbol symbol delimited_symbol].include?(current.kind) + + current = parent_node(current) + end + false + end + def ruby_instance_variable_node?(node) ts_node?(node) && node.kind == "instance_variable" end diff --git a/gems/decomplex/lib/decomplex/syntax/ruby_effects.rb b/gems/decomplex/lib/decomplex/syntax/ruby_effects.rb index af6e28f90..3bf9ae081 100644 --- a/gems/decomplex/lib/decomplex/syntax/ruby_effects.rb +++ b/gems/decomplex/lib/decomplex/syntax/ruby_effects.rb @@ -139,6 +139,8 @@ def ruby_semantic_effect_sites_for_node(document, node, stack) ruby_operator_assignment_effect(document, node, stack) when "binary" ruby_binary_effect(document, node, stack) + when "body_statement", "block_body" + ruby_flat_statement_effects(document, node, stack) else [] end @@ -192,6 +194,28 @@ def ruby_binary_effect(document, node, stack) [semantic_effect_site(document, node, stack, :hidden_mutation, "<<")] end + + def ruby_flat_statement_effects(document, node, stack) + operator = direct_operator(node) + case operator + when "<<" + [semantic_effect_site(document, node, stack, :hidden_mutation, "<<")] + when "=" + ruby_flat_element_assignment_effect(document, node, stack, "[]=") + when "+=", "-=", "*=", "/=", "%=", "&&=", "||=" + ruby_flat_element_assignment_effect(document, node, stack, "op-assign") + else + [] + end + end + + def ruby_flat_element_assignment_effect(document, node, stack, detail) + lhs = node.named_children.first + return [] unless lhs&.kind == "element_reference" + return [] if lhs.named_children.first&.text == "ENV" + + [semantic_effect_site(document, node, stack, :hidden_mutation, detail)] + end end end end diff --git a/gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs b/gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs index 7a90fa12d..a7e3904da 100644 --- a/gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs +++ b/gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs @@ -132,7 +132,12 @@ impl Report { grouped.push((rec.name.clone(), vec![rec])); } } - for (_name, recs) in grouped { + for (_name, mut recs) in grouped { + recs.sort_by(|left, right| { + left.file + .cmp(&right.file) + .then_with(|| left.line.cmp(&right.line)) + }); if recs.first().is_some_and(|rec| rec.core) { continue; } diff --git a/gems/decomplex/rust/src/decomplex/report_facts.rs b/gems/decomplex/rust/src/decomplex/report_facts.rs index 88003a1d0..146242d01 100644 --- a/gems/decomplex/rust/src/decomplex/report_facts.rs +++ b/gems/decomplex/rust/src/decomplex/report_facts.rs @@ -65,6 +65,7 @@ pub struct SourceFile { struct SharedFacts { local_summaries: Vec, + local_complexity_scores: BTreeMap<(String, String), syntax::LocalComplexityScore>, semantic_aliases: semantic_alias::SemanticAliasReport, } @@ -72,9 +73,13 @@ impl SharedFacts { fn new(documents: &[Document]) -> Self { thread::scope(|scope| { let local_summaries = scope.spawn(|| local_flow::scan_documents(documents)); + let local_complexity_scores = scope.spawn(|| local_complexity_scores(documents)); let semantic_aliases = scope.spawn(|| semantic_alias::scan_documents(documents)); Self { local_summaries: local_summaries.join().expect("local-flow facts worker"), + local_complexity_scores: local_complexity_scores + .join() + .expect("local-complexity facts worker"), semantic_aliases: semantic_aliases .join() .expect("semantic-alias facts worker"), @@ -118,10 +123,15 @@ pub fn facts_for_source_files(files: &[SourceFile], options: &Options) -> Result let detectors = collect_detector_facts(&groups, &shared, options)?; + let mut reported_files = files + .iter() + .map(|file| file.path.to_string_lossy().to_string()) + .collect::>(); + reported_files.sort(); + Ok(json!({ "format": FORMAT, - "files": files.iter().map(|file| file.path.to_string_lossy().to_string()).collect::>(), - "languages": language_counts(files), + "files": reported_files, "detectors": detectors, })) } @@ -253,8 +263,9 @@ fn collect_detector_facts( }) }); spawn_detector!("locality_drag", { - json_value(locality_drag::scan_summaries( + json_value(locality_drag::scan_summaries_with_scores( shared.local_summaries.clone(), + shared.local_complexity_scores.clone(), )) }); spawn_detector!("function_lcom", { @@ -422,8 +433,9 @@ fn collect_detector_facts_sequential( ); detectors.insert( "locality_drag".to_string(), - json_value(locality_drag::scan_summaries( + json_value(locality_drag::scan_summaries_with_scores( shared.local_summaries.clone(), + shared.local_complexity_scores.clone(), ))?, ); detectors.insert( @@ -441,6 +453,20 @@ fn collect_detector_facts_sequential( Ok(detectors) } +fn local_complexity_scores( + documents: &[Document], +) -> BTreeMap<(String, String), syntax::LocalComplexityScore> { + documents + .iter() + .flat_map(|document| { + document + .local_complexity_scores + .iter() + .map(|(id, score)| ((document.file.clone(), id.clone()), score.clone())) + }) + .collect() +} + fn merge_object_reports( groups: &BTreeMap>, fields: &[&str], @@ -565,16 +591,6 @@ fn re_derivation_location(site: &state_mesh::ReDerivationInfo) -> String { format!("{}:{}:{}", site.file, site.defn, site.line) } -fn language_counts(files: &[SourceFile]) -> BTreeMap { - let mut counts = BTreeMap::new(); - for file in files { - *counts - .entry(file.language.as_str().to_string()) - .or_insert(0) += 1; - } - counts -} - fn retain_git_tracked_files(files: &mut Vec) -> Result<()> { let tracked = git_tracked_paths_for_files(files)?; files.retain(|file| tracked.contains(&normalize_path(&file.path))); diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/ruby.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/ruby.rs index b901136d7..27d274faa 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/adapters/ruby.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/ruby.rs @@ -169,6 +169,10 @@ impl LanguageProfile for RubyProfile { &["element_assignment", "element_reference"] } + fn expression_list_node_kinds(&self) -> &[&str] { + &["left_assignment_list"] + } + fn assignment_operator_tokens(&self) -> &[&str] { &["=", "+=", "-=", "*=", "/=", "%=", "&&=", "||="] } @@ -247,17 +251,14 @@ impl LanguageProfile for RubyProfile { } fn call_target<'tree>(&self, node: Node<'tree>, source: &str) -> Option> { - if ruby_embedded_text_node(node) { - return None; - } - if node.kind() == "call" && ruby_command_argument_call(node, source) { + if node.kind() == "call" && ruby_single_command_argument_call(node, source) { return None; } let mut target = match node.kind() { "call" => { ruby_proc_call_target(node, source).or_else(|| ruby_call_target(node, source)) } - "body_statement" => ruby_bare_body_call_target(node, source), + "body_statement" | "block_body" => ruby_bare_body_call_target(node, source), "identifier" => ruby_visibility_identifier_call_target(node, source) .or_else(|| ruby_bare_call_target(node, source)), _ => None, @@ -278,6 +279,9 @@ impl LanguageProfile for RubyProfile { { return None; } + if ruby_chained_element_predicate_target(&target) { + return None; + } ruby_valid_call_target(&target).then_some(target) } @@ -436,6 +440,9 @@ impl LanguageProfile for RubyProfile { } let target = ruby_state_variable_target(node, source) .or_else(|| self.default_state_read_target(node, source))?; + if ruby_chained_element_predicate_read_target(&target) { + return None; + } Some(target) } @@ -540,7 +547,6 @@ fn inline_def_name(node: Node<'_>, source: &str) -> Option { fn ruby_call_target<'tree>(node: Node<'tree>, source: &str) -> Option> { let receiver = node.child_by_field_name("receiver"); let method = node.child_by_field_name("method"); - let arguments = ruby_argument_texts(node, source); let message = method .map(|method| node_text(method, source).to_string()) .or_else(|| first_named_text(node, source, &["identifier", "constant"])) @@ -548,6 +554,11 @@ fn ruby_call_target<'tree>(node: Node<'tree>, source: &str) -> Option, source: &str) -> Vec { if let Some(arguments) = ruby_inline_def_argument_texts(args, source) { return arguments; } + if let Some(arguments) = ruby_single_command_call_argument_texts(args, source) { + return arguments; + } let values = named_children(args) .into_iter() .map(|child| ruby_argument_text(child, args, source)) @@ -818,24 +832,71 @@ fn ruby_argument_texts(node: Node<'_>, source: &str) -> Vec { .collect() } -fn ruby_argument_text(node: Node<'_>, args: Node<'_>, source: &str) -> String { - if node.kind() == "string" && !node_text(args, source).trim_start().starts_with('(') { - if let Some(content) = named_children(node) +fn ruby_argument_text(node: Node<'_>, _args: Node<'_>, source: &str) -> String { + normalize_text(node_text(node, source)) +} + +fn ruby_single_command_call_argument_texts(args: Node<'_>, source: &str) -> Option> { + if node_text(args, source).trim_start().starts_with('(') { + return None; + } + let children = named_children(args); + if children.len() != 1 || children[0].kind() != "call" { + return None; + } + let values = named_children(children[0]) + .into_iter() + .map(|part| normalize_text(node_text(part, source))) + .filter(|part| !part.is_empty()) + .collect::>(); + (!values.is_empty()).then_some(values) +} + +fn ruby_require_argument_texts(node: Node<'_>, source: &str) -> Vec { + let args = node.child_by_field_name("arguments").or_else(|| { + named_children(node) .into_iter() - .find(|child| child.kind() == "string_content") - { - return normalize_text(node_text(content, source)); + .find(|child| child.kind() == "argument_list") + }); + let Some(args) = args else { + return Vec::new(); + }; + let children = named_children(args); + if children.len() == 1 { + let child = children[0]; + if child.kind() == "string" && !node_text(args, source).trim_start().starts_with('(') { + return vec![ruby_unquoted_string_text(child, source)]; } - let text = normalize_text(node_text(node, source)); - if text.len() >= 2 - && ((text.starts_with('"') && text.ends_with('"')) - || (text.starts_with('\'') && text.ends_with('\''))) - { - return text[1..text.len() - 1].to_string(); + if child.kind() == "call" && !node_text(args, source).trim_start().starts_with('(') { + return named_children(child) + .into_iter() + .map(|part| normalize_text(node_text(part, source))) + .filter(|part| !part.is_empty()) + .collect(); } - return text; } - normalize_text(node_text(node, source)) + children + .into_iter() + .map(|child| normalize_text(node_text(child, source))) + .filter(|part| !part.is_empty()) + .collect() +} + +fn ruby_unquoted_string_text(node: Node<'_>, source: &str) -> String { + if let Some(content) = named_children(node) + .into_iter() + .find(|child| child.kind() == "string_content") + { + return normalize_text(node_text(content, source)); + } + let text = normalize_text(node_text(node, source)); + if text.len() >= 2 + && ((text.starts_with('"') && text.ends_with('"')) + || (text.starts_with('\'') && text.ends_with('\''))) + { + return text[1..text.len() - 1].to_string(); + } + text } fn ruby_inline_def_argument_texts(args: Node<'_>, source: &str) -> Option> { @@ -1351,14 +1412,7 @@ fn ruby_protocol_collect_state_access( } for child in &node.children { - ruby_protocol_collect_state_access( - child, - Some(node), - local_names, - reads, - writes, - false, - ); + ruby_protocol_collect_state_access(child, Some(node), local_names, reads, writes, false); } } @@ -1395,10 +1449,7 @@ fn ruby_protocol_record_write( } } -fn ruby_protocol_state_target( - node: &RawNode, - local_names: &BTreeSet, -) -> Option { +fn ruby_protocol_state_target(node: &RawNode, local_names: &BTreeSet) -> Option { match node.kind.as_str() { "instance_variable" => Some(normalize_protocol_state(&node.text)), "element_reference" => raw_named_children(node) @@ -1542,10 +1593,7 @@ fn ruby_protocol_case_paths( } else { branch_paths }; - ruby_protocol_combine_path_lists( - &subject_paths, - &alternatives, - ) + ruby_protocol_combine_path_lists(&subject_paths, &alternatives) } fn ruby_protocol_body_paths( @@ -1579,7 +1627,9 @@ fn ruby_protocol_child_nodes(node: &RawNode) -> Vec<&RawNode> { .into_iter() .filter(|child| matches!(child.kind.as_str(), "argument_list" | "block" | "do_block")) .collect(), - "assignment" | "operator_assignment" => raw_named_children(node).into_iter().skip(1).collect(), + "assignment" | "operator_assignment" => { + raw_named_children(node).into_iter().skip(1).collect() + } _ => raw_named_children(node) .into_iter() .filter(|child| child.kind != "comment") @@ -1587,13 +1637,11 @@ fn ruby_protocol_child_nodes(node: &RawNode) -> Vec<&RawNode> { } } -fn ruby_protocol_internal_call( - node: &RawNode, - local_names: &BTreeSet, -) -> Option { +fn ruby_protocol_internal_call(node: &RawNode, local_names: &BTreeSet) -> Option { let target = if node.kind == "call" { ruby_raw_call_target(node) - } else if node.kind == "identifier" && ruby_protocol_bare_internal_identifier(node, local_names) { + } else if node.kind == "identifier" && ruby_protocol_bare_internal_identifier(node, local_names) + { Some(RubyRawCallTarget { receiver: "self".to_string(), message: node.text.clone(), @@ -1605,7 +1653,9 @@ fn ruby_protocol_internal_call( if target.receiver != "self" { return None; } - if local_names.contains(&target.message) || RUBY_PROTOCOL_IGNORED_MIDS.contains(&target.message.as_str()) { + if local_names.contains(&target.message) + || RUBY_PROTOCOL_IGNORED_MIDS.contains(&target.message.as_str()) + { return None; } Some(target.message) @@ -1656,10 +1706,7 @@ fn ruby_protocol_mutating_mid(mid: &str) -> bool { && (RUBY_PROTOCOL_MUTATING_MIDS.contains(&mid) || mid.ends_with('!')) } -fn ruby_protocol_bare_internal_identifier( - node: &RawNode, - local_names: &BTreeSet, -) -> bool { +fn ruby_protocol_bare_internal_identifier(node: &RawNode, local_names: &BTreeSet) -> bool { ruby_simple_call_text(&node.text) && !local_names.contains(&node.text) && !RUBY_PROTOCOL_IGNORED_MIDS.contains(&node.text.as_str()) @@ -1692,9 +1739,11 @@ fn ruby_raw_call_target(node: &RawNode) -> Option { } fn ruby_raw_argument_texts(node: &RawNode) -> Vec { - let Some(args) = raw_child_by_field(node, "arguments") - .or_else(|| raw_named_children(node).into_iter().find(|child| child.kind == "argument_list")) - else { + let Some(args) = raw_child_by_field(node, "arguments").or_else(|| { + raw_named_children(node) + .into_iter() + .find(|child| child.kind == "argument_list") + }) else { return Vec::new(); }; let values = raw_named_children(args) @@ -1728,7 +1777,10 @@ fn ruby_raw_function_body_statements(node: &RawNode) -> Vec<&RawNode> { if named.is_empty() && body.text.trim().is_empty() { return Vec::new(); } - if ruby_raw_if_node(body) || ruby_raw_case_node(body) || ruby_raw_flat_assignment_statement(body) { + if ruby_raw_if_node(body) + || ruby_raw_case_node(body) + || ruby_raw_flat_assignment_statement(body) + { return vec![body]; } if named.is_empty() || ruby_raw_heredoc_body(&named) { @@ -1767,7 +1819,12 @@ fn ruby_raw_heredoc_body(named: &[&RawNode]) -> bool { fn ruby_raw_flat_assignment_statement(node: &RawNode) -> bool { node.kind == "body_statement" - && node.children.iter().filter(|child| !child.named && child.text == "=").count() == 1 + && node + .children + .iter() + .filter(|child| !child.named && child.text == "=") + .count() + == 1 && raw_named_children(node).len() >= 2 } @@ -1775,14 +1832,18 @@ fn ruby_raw_if_node(node: &RawNode) -> bool { matches!( node.kind.as_str(), "if" | "unless" | "if_modifier" | "unless_modifier" - ) || (matches!(node.kind.as_str(), "expression_statement" | "block" | "body_statement") - && matches!(raw_first_child_kind(node).as_deref(), Some("if" | "unless"))) + ) || (matches!( + node.kind.as_str(), + "expression_statement" | "block" | "body_statement" + ) && matches!(raw_first_child_kind(node).as_deref(), Some("if" | "unless"))) } fn ruby_raw_case_node(node: &RawNode) -> bool { node.kind == "case" - || (matches!(node.kind.as_str(), "body_statement" | "block_body" | "argument_list") - && raw_first_child_kind(node).as_deref() == Some("case")) + || (matches!( + node.kind.as_str(), + "body_statement" | "block_body" | "argument_list" + ) && raw_first_child_kind(node).as_deref() == Some("case")) } fn ruby_raw_path_condition(node: &RawNode) -> Option<&RawNode> { @@ -2006,6 +2067,21 @@ fn ruby_command_argument_call(node: Node<'_>, source: &str) -> bool { !node_text(parent, source).trim_start().starts_with('(') } +fn ruby_single_command_argument_call(node: Node<'_>, source: &str) -> bool { + let Some(parent) = node.parent() else { + return false; + }; + if parent.kind() != "argument_list" || node_text(parent, source).trim_start().starts_with('(') { + return false; + } + let children = named_children(parent); + children.len() == 1 && children[0] == node +} + +fn ruby_require_message(message: &str) -> bool { + matches!(message, "require" | "require_relative") +} + fn ruby_embedded_text_node(node: Node<'_>) -> bool { let mut current = Some(node); while let Some(node) = current { @@ -2068,11 +2144,7 @@ fn ruby_narrow_no_arg_call_span( } fn ruby_valid_call_target(target: &CallTarget<'_>) -> bool { - if invalid_call_text(&target.receiver) - || invalid_call_text(&target.message) - || (target.receiver.split_whitespace().count() > 1 - && !ruby_literal_receiver_text(&target.receiver)) - { + if invalid_call_text(&target.message) { return false; } if matches!(target.message.as_str(), "[]" | "[]=") { @@ -2083,25 +2155,35 @@ fn ruby_valid_call_target(target: &CallTarget<'_>) -> bool { .is_match(target.message.as_str()) } -fn ruby_literal_receiver_text(text: &str) -> bool { - let value = text.trim(); - (value.starts_with("%w[") || value.starts_with("%i[")) - && value.ends_with(']') - && !value.contains('\n') -} - fn invalid_call_text(text: &str) -> bool { text.chars() .any(|ch| matches!(ch, '"' | '\'' | '\n' | '\r')) } fn ruby_state_variable_target(node: Node<'_>, source: &str) -> Option { + if ruby_embedded_text_node(node) { + return None; + } matches!(node.kind(), "instance_variable" | "global_variable").then(|| Target { receiver: "self".to_string(), field: node_text(node, source).to_string(), }) } +fn ruby_chained_element_predicate_target(target: &CallTarget<'_>) -> bool { + ruby_chained_element_predicate(&target.receiver, &target.message) +} + +fn ruby_chained_element_predicate_read_target(target: &Target) -> bool { + ruby_chained_element_predicate(&target.receiver, &target.field) +} + +fn ruby_chained_element_predicate(receiver: &str, message: &str) -> bool { + message.ends_with('?') + && receiver.contains('.') + && (receiver.contains("[:") || receiver.contains("[\"") || receiver.contains("['")) +} + fn ruby_sorbet_signature_payload_node(node: Node<'_>, source: &str) -> bool { let mut current = Some(node); while let Some(candidate) = current { diff --git a/gems/decomplex/rust/src/decomplex/syntax/complexity.rs b/gems/decomplex/rust/src/decomplex/syntax/complexity.rs index e70cf7de6..c83f4aad1 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/complexity.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/complexity.rs @@ -97,6 +97,8 @@ impl LocalComplexityScorer { compensated_sum(node.children.iter().map(|child| { if return_fallback_boolean_wrapper(node, child) { 0.0 + } else if duplicate_ruby_early_exit_token(node, child) { + 0.0 } else if transparent_single_line_suite_statement(node, child) { self.score_children(child, nesting, signals) } else { @@ -272,6 +274,15 @@ fn early_exit(node: &RawNode) -> bool { ) } +fn duplicate_ruby_early_exit_token(parent: &RawNode, child: &RawNode) -> bool { + matches!( + parent.kind.as_str(), + "return" | "break" | "next" | "redo" | "retry" + ) && !child.named + && child.text == parent.kind + && parent.text.trim() == parent.kind +} + fn transparent_single_line_suite_statement(parent: &RawNode, child: &RawNode) -> bool { parent.kind == "block" && parent.children.len() == 1 diff --git a/gems/decomplex/rust/src/decomplex/syntax/local_flow.rs b/gems/decomplex/rust/src/decomplex/syntax/local_flow.rs index f52e44616..1466e7aef 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/local_flow.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/local_flow.rs @@ -958,7 +958,9 @@ fn raw_assignment_lhs_read_in_tree( { return false; } - if profile.assignment_node_kinds().contains(&root.kind.as_str()) + if profile + .assignment_node_kinds() + .contains(&root.kind.as_str()) || (profile.language() == Language::Ruby && raw_assignment_statement(root, profile)) { if let Some(lhs) = raw_named_children(root).first() { @@ -983,7 +985,9 @@ fn raw_assignment_lhs_write_in_tree( { return false; } - if profile.assignment_node_kinds().contains(&root.kind.as_str()) + if profile + .assignment_node_kinds() + .contains(&root.kind.as_str()) || (profile.language() == Language::Ruby && raw_assignment_statement(root, profile)) { if let Some(lhs) = raw_named_children(root).first() { diff --git a/gems/decomplex/rust/src/decomplex/syntax/path_condition.rs b/gems/decomplex/rust/src/decomplex/syntax/path_condition.rs index ace21fc07..14f694a31 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/path_condition.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/path_condition.rs @@ -48,17 +48,22 @@ pub fn scan_files(files: &[PathBuf], language: Language) -> Result PathConditionReport { - let mut sites = documents + let sites = documents .iter() - .flat_map(sites_for_document) + .flat_map(|document| { + fact_sites_for_document(document) + .into_iter() + .map(|site| Site { + guards: site.guards, + action: site.action, + file: site.file, + defn: site.function, + line: site.line, + span: site.span, + }) + }) .collect::>(); - if sites.is_empty() { - sites = documents - .iter() - .flat_map(normalized_sites_from_document) - .collect::>(); - } - Report::new(dedupe_sites(sites)).findings() + Report::new(sites).findings() } pub(crate) fn fact_sites_for_document( @@ -216,8 +221,7 @@ fn raw_path_walk( } else { raw_negate_guards(&atoms) }; - for (child, branch_guards) in - raw_branch_body_nodes(profile, node, &then_atoms, &else_atoms) + for (child, branch_guards) in raw_branch_body_nodes(profile, node, &then_atoms, &else_atoms) { let mut next_guards = guards.to_vec(); next_guards.extend(branch_guards); @@ -267,6 +271,9 @@ fn raw_path_condition_atoms( } fn raw_branch_condition(node: &RawNode) -> Option<&RawNode> { + if raw_modifier_branch(node) { + return raw_named_children(node).into_iter().last(); + } raw_child_by_field(node, "condition") .or_else(|| raw_child_by_field(node, "value")) .or_else(|| raw_child_by_field(node, "subject")) @@ -280,8 +287,8 @@ fn raw_branch_body_nodes<'a>( else_guards: &[String], ) -> Vec<(&'a RawNode, Vec)> { let mut bodies = Vec::new(); - if let Some(body) = raw_child_by_field(node, "consequence") - .or_else(|| raw_child_by_field(node, "body")) + if let Some(body) = + raw_child_by_field(node, "consequence").or_else(|| raw_child_by_field(node, "body")) { bodies.push((body, then_guards.to_vec())); } @@ -289,19 +296,28 @@ fn raw_branch_body_nodes<'a>( bodies.push((body, else_guards.to_vec())); } if bodies.is_empty() { - bodies = raw_named_children(node) - .into_iter() - .skip(1) - .enumerate() - .map(|(index, body)| { - let guards = if index == 0 { - then_guards.to_vec() - } else { - else_guards.to_vec() - }; - (body, guards) - }) - .collect(); + let named = raw_named_children(node); + bodies = if raw_modifier_branch(node) { + named + .into_iter() + .next() + .map(|body| vec![(body, then_guards.to_vec())]) + .unwrap_or_default() + } else { + named + .into_iter() + .skip(1) + .enumerate() + .map(|(index, body)| { + let guards = if index == 0 { + then_guards.to_vec() + } else { + else_guards.to_vec() + }; + (body, guards) + }) + .collect() + }; } bodies .into_iter() @@ -313,6 +329,10 @@ fn raw_branch_body_nodes<'a>( .collect() } +fn raw_modifier_branch(node: &RawNode) -> bool { + matches!(node.kind.as_str(), "if_modifier" | "unless_modifier") +} + fn raw_flatten_branch_body<'a>( profile: &dyn LanguageProfile, body: &'a RawNode, diff --git a/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs b/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs index 7ecf20f9e..a90716f54 100644 --- a/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs +++ b/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs @@ -3,9 +3,9 @@ use super::{ false_simplicity_lexicon::{false_simplicity_lexicon, FalseSimplicityLexicon}, language_profile, LanguageProfile, }, - BranchArm, BranchDecision, CallSite, ComparisonUse, DecisionSite, DispatchSite, Document, FunctionDef, - Language, OwnerDef, PredicateAlias, SemanticEffectSite, StateDeclaration, StateRead, - StateWrite, + BranchArm, BranchDecision, CallSite, ComparisonUse, DecisionSite, DispatchSite, Document, + FunctionDef, Language, OwnerDef, PredicateAlias, SemanticEffectSite, StateDeclaration, + StateRead, StateWrite, }; use crate::decomplex::ast::{line, node_text, normalize_text, normalize_tree, span, RawNode}; use crate::decomplex::syntax::complexity::local_complexity_scores; @@ -290,14 +290,7 @@ fn collect_facts( &next_context, branch_decisions, ); - record_branch_arm( - node, - source, - file, - language, - &next_context, - branch_arms, - ); + record_branch_arm(node, source, file, language, &next_context, branch_arms); record_predicate_alias( node, source, @@ -859,9 +852,7 @@ fn normalize_comparison_source(source: &str) -> String { let receiver = &text[..dot_index]; let rest = &text[dot_index + 1..]; if simple_identifier(receiver) - && (rest.contains(" == ") - || rest.contains(" != ") - || rest.contains('.')) + && (rest.contains(" == ") || rest.contains(" != ") || rest.contains('.')) { text = rest.to_string(); } @@ -1021,18 +1012,19 @@ fn record_if_arms( .child_by_field_name("consequence") .or_else(|| node.child_by_field_name("body")) .or_else(|| named.get(1).copied()); - let alternative = node - .child_by_field_name("alternative") - .or_else(|| { - named.iter() - .copied() - .find(|child| child.kind().contains("else") || child.kind().contains("alternative")) - }) - .or_else(|| { - named.get(2) - .copied() - .filter(|candidate| consequence != Some(*candidate)) - }); + let alternative = + node.child_by_field_name("alternative") + .or_else(|| { + named.iter().copied().find(|child| { + child.kind().contains("else") || child.kind().contains("alternative") + }) + }) + .or_else(|| { + named + .get(2) + .copied() + .filter(|candidate| consequence != Some(*candidate)) + }); for (arm, member) in [(consequence, "then"), (alternative, "else")] { let Some(arm) = arm else { @@ -1103,10 +1095,7 @@ fn case_arm_body( .map(|child| node_text(child, source)) .unwrap_or_else(|| node_text(arm, source)); let mut text = profile.normalize_source_text(body); - for prefix in [ - format!("when {pattern} then "), - format!("when {pattern} "), - ] { + for prefix in [format!("when {pattern} then "), format!("when {pattern} ")] { if let Some(stripped) = text.strip_prefix(&prefix) { text = stripped.to_string(); break; @@ -1161,7 +1150,7 @@ fn collect_branch_state_refs( normalized_state_ref_field(&target.field) }; let receiver = target.receiver.trim_start_matches('$'); - if constant_like_state_ref(receiver, &field) { + if namespace_receiver(receiver) || constant_like_state_ref(receiver, &field) { // Constants and type namespaces are not mutable object state. } else if branch_local_ref(node, source, receiver, &field, context) { // Function-local bindings are not object state, even when a @@ -1253,7 +1242,10 @@ fn ruby_immutable_struct_readers(source: &str) -> BTreeMap String { } fn constant_like_state_ref(receiver: &str, field: &str) -> bool { - starts_uppercase(receiver) || (receiver.is_empty() && starts_uppercase(field)) + constant_namespace_receiver(receiver) || (receiver.is_empty() && starts_uppercase(field)) } fn starts_uppercase(value: &str) -> bool { matches!(value.chars().next(), Some(ch) if ch.is_ascii_uppercase()) } +fn constant_namespace_receiver(value: &str) -> bool { + let text = value.trim().trim_start_matches("::"); + if text.is_empty() || !starts_uppercase(text) { + return false; + } + text.split("::").all(|part| { + !part.is_empty() + && part + .chars() + .all(|ch| ch == '_' || ch == '.' || ch.is_ascii_alphanumeric()) + && part + .split('.') + .all(|segment| !segment.is_empty() && starts_uppercase(segment)) + }) +} + fn record_conjunction_decision( profile: &dyn LanguageProfile, mut node: Node<'_>, @@ -2087,7 +2095,10 @@ fn namespace_receiver(text: &str) -> bool { return true; } - matches!(receiver.chars().next(), Some(first) if first.is_ascii_uppercase()) + if !starts_uppercase(receiver) { + return false; + } + !receiver.contains('(') || receiver.contains('.') || receiver.contains("::") } pub(crate) fn first_named_text(node: Node<'_>, source: &str, kinds: &[&str]) -> Option {